3072 lines
96 KiB
Python
3072 lines
96 KiB
Python
import json
|
|
from datetime import datetime, timedelta, timezone
|
|
from fastapi import (
|
|
APIRouter,
|
|
Depends,
|
|
HTTPException,
|
|
UploadFile,
|
|
File,
|
|
Form,
|
|
Request,
|
|
Query,
|
|
Header,
|
|
Response,
|
|
)
|
|
from fastapi.responses import StreamingResponse, RedirectResponse
|
|
from sqlalchemy.orm import Session
|
|
from sqlalchemy import or_, func, text
|
|
from typing import List, Optional, Literal
|
|
import math
|
|
import io
|
|
|
|
from .database import get_db
|
|
from .storage import (
|
|
get_storage,
|
|
S3Storage,
|
|
MULTIPART_CHUNK_SIZE,
|
|
StorageError,
|
|
HashComputationError,
|
|
FileSizeExceededError,
|
|
S3ExistenceCheckError,
|
|
S3UploadError,
|
|
S3StorageUnavailableError,
|
|
HashCollisionError,
|
|
)
|
|
from .models import (
|
|
Project,
|
|
Package,
|
|
Artifact,
|
|
Tag,
|
|
TagHistory,
|
|
Upload,
|
|
Consumer,
|
|
AuditLog,
|
|
)
|
|
from .schemas import (
|
|
ProjectCreate,
|
|
ProjectResponse,
|
|
PackageCreate,
|
|
PackageResponse,
|
|
PackageDetailResponse,
|
|
TagSummary,
|
|
PACKAGE_FORMATS,
|
|
PACKAGE_PLATFORMS,
|
|
ArtifactDetailResponse,
|
|
ArtifactTagInfo,
|
|
PackageArtifactResponse,
|
|
TagCreate,
|
|
TagResponse,
|
|
TagDetailResponse,
|
|
TagHistoryResponse,
|
|
UploadResponse,
|
|
ConsumerResponse,
|
|
HealthResponse,
|
|
PaginatedResponse,
|
|
PaginationMeta,
|
|
ResumableUploadInitRequest,
|
|
ResumableUploadInitResponse,
|
|
ResumableUploadPartResponse,
|
|
ResumableUploadCompleteRequest,
|
|
ResumableUploadCompleteResponse,
|
|
ResumableUploadStatusResponse,
|
|
GlobalSearchResponse,
|
|
SearchResultProject,
|
|
SearchResultPackage,
|
|
SearchResultArtifact,
|
|
PresignedUrlResponse,
|
|
GarbageCollectionResponse,
|
|
OrphanedArtifactResponse,
|
|
StorageStatsResponse,
|
|
DeduplicationStatsResponse,
|
|
ProjectStatsResponse,
|
|
PackageStatsResponse,
|
|
ArtifactStatsResponse,
|
|
CrossProjectDeduplicationResponse,
|
|
TimeBasedStatsResponse,
|
|
StatsReportResponse,
|
|
)
|
|
from .metadata import extract_metadata
|
|
from .config import get_settings
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
def get_user_id(request: Request) -> str:
|
|
"""Extract user ID from request (simplified for now)"""
|
|
api_key = request.headers.get("X-Orchard-API-Key")
|
|
if api_key:
|
|
return "api-user"
|
|
auth = request.headers.get("Authorization")
|
|
if auth:
|
|
return "bearer-user"
|
|
return "anonymous"
|
|
|
|
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _increment_ref_count(db: Session, artifact_id: str) -> int:
|
|
"""
|
|
Atomically increment ref_count for an artifact using row-level locking.
|
|
Returns the new ref_count value.
|
|
|
|
Uses SELECT FOR UPDATE to prevent race conditions when multiple
|
|
requests try to modify the same artifact's ref_count simultaneously.
|
|
"""
|
|
# Lock the row to prevent concurrent modifications
|
|
artifact = (
|
|
db.query(Artifact).filter(Artifact.id == artifact_id).with_for_update().first()
|
|
)
|
|
if not artifact:
|
|
logger.warning(
|
|
f"Attempted to increment ref_count for non-existent artifact: {artifact_id[:12]}..."
|
|
)
|
|
return 0
|
|
|
|
artifact.ref_count += 1
|
|
db.flush() # Ensure the update is written but don't commit yet
|
|
return artifact.ref_count
|
|
|
|
|
|
def _decrement_ref_count(db: Session, artifact_id: str) -> int:
|
|
"""
|
|
Atomically decrement ref_count for an artifact using row-level locking.
|
|
Returns the new ref_count value.
|
|
|
|
Uses SELECT FOR UPDATE to prevent race conditions when multiple
|
|
requests try to modify the same artifact's ref_count simultaneously.
|
|
Will not decrement below 0.
|
|
"""
|
|
# Lock the row to prevent concurrent modifications
|
|
artifact = (
|
|
db.query(Artifact).filter(Artifact.id == artifact_id).with_for_update().first()
|
|
)
|
|
if not artifact:
|
|
logger.warning(
|
|
f"Attempted to decrement ref_count for non-existent artifact: {artifact_id[:12]}..."
|
|
)
|
|
return 0
|
|
|
|
# Prevent going below 0
|
|
if artifact.ref_count > 0:
|
|
artifact.ref_count -= 1
|
|
else:
|
|
logger.warning(
|
|
f"Attempted to decrement ref_count below 0 for artifact: {artifact_id[:12]}... "
|
|
f"(current: {artifact.ref_count})"
|
|
)
|
|
|
|
db.flush() # Ensure the update is written but don't commit yet
|
|
return artifact.ref_count
|
|
|
|
|
|
def _create_or_update_tag(
|
|
db: Session,
|
|
package_id: str,
|
|
tag_name: str,
|
|
new_artifact_id: str,
|
|
user_id: str,
|
|
) -> tuple[Tag, bool, Optional[str]]:
|
|
"""
|
|
Create or update a tag, handling ref_count and history.
|
|
|
|
Returns:
|
|
tuple of (tag, is_new, old_artifact_id)
|
|
- tag: The created/updated Tag object
|
|
- is_new: True if tag was created, False if updated
|
|
- old_artifact_id: Previous artifact_id if tag was updated, None otherwise
|
|
"""
|
|
existing_tag = (
|
|
db.query(Tag).filter(Tag.package_id == package_id, Tag.name == tag_name).first()
|
|
)
|
|
|
|
if existing_tag:
|
|
old_artifact_id = existing_tag.artifact_id
|
|
|
|
# Only process if artifact actually changed
|
|
if old_artifact_id != new_artifact_id:
|
|
# Record history
|
|
history = TagHistory(
|
|
tag_id=existing_tag.id,
|
|
old_artifact_id=old_artifact_id,
|
|
new_artifact_id=new_artifact_id,
|
|
change_type="update",
|
|
changed_by=user_id,
|
|
)
|
|
db.add(history)
|
|
|
|
# Update tag to point to new artifact
|
|
# NOTE: SQL trigger (tags_ref_count_update_trigger) handles ref_count:
|
|
# - Decrements old artifact's ref_count
|
|
# - Increments new artifact's ref_count
|
|
existing_tag.artifact_id = new_artifact_id
|
|
existing_tag.created_by = user_id
|
|
|
|
logger.info(
|
|
f"Tag '{tag_name}' updated: {old_artifact_id[:12]}... -> {new_artifact_id[:12]}..."
|
|
)
|
|
|
|
return existing_tag, False, old_artifact_id
|
|
else:
|
|
# Same artifact, no change needed
|
|
return existing_tag, False, None
|
|
else:
|
|
# Create new tag
|
|
new_tag = Tag(
|
|
package_id=package_id,
|
|
name=tag_name,
|
|
artifact_id=new_artifact_id,
|
|
created_by=user_id,
|
|
)
|
|
db.add(new_tag)
|
|
db.flush() # Get the tag ID
|
|
|
|
# Record history for creation
|
|
history = TagHistory(
|
|
tag_id=new_tag.id,
|
|
old_artifact_id=None,
|
|
new_artifact_id=new_artifact_id,
|
|
change_type="create",
|
|
changed_by=user_id,
|
|
)
|
|
db.add(history)
|
|
|
|
return new_tag, True, None
|
|
|
|
|
|
def _log_audit(
|
|
db: Session,
|
|
action: str,
|
|
resource: str,
|
|
user_id: str,
|
|
source_ip: Optional[str] = None,
|
|
details: Optional[dict] = None,
|
|
):
|
|
"""Log an action to the audit_logs table."""
|
|
audit_log = AuditLog(
|
|
action=action,
|
|
resource=resource,
|
|
user_id=user_id,
|
|
source_ip=source_ip,
|
|
details=details or {},
|
|
)
|
|
db.add(audit_log)
|
|
|
|
|
|
# Health check
|
|
@router.get("/health", response_model=HealthResponse)
|
|
def health_check(
|
|
db: Session = Depends(get_db),
|
|
storage: S3Storage = Depends(get_storage),
|
|
):
|
|
"""
|
|
Health check endpoint with optional storage and database health verification.
|
|
"""
|
|
storage_healthy = None
|
|
database_healthy = None
|
|
|
|
# Check database connectivity
|
|
try:
|
|
db.execute(text("SELECT 1"))
|
|
database_healthy = True
|
|
except Exception as e:
|
|
logger.warning(f"Database health check failed: {e}")
|
|
database_healthy = False
|
|
|
|
# Check storage connectivity by listing bucket (lightweight operation)
|
|
try:
|
|
storage.client.head_bucket(Bucket=storage.bucket)
|
|
storage_healthy = True
|
|
except Exception as e:
|
|
logger.warning(f"Storage health check failed: {e}")
|
|
storage_healthy = False
|
|
|
|
overall_status = "ok" if (storage_healthy and database_healthy) else "degraded"
|
|
|
|
return HealthResponse(
|
|
status=overall_status,
|
|
storage_healthy=storage_healthy,
|
|
database_healthy=database_healthy,
|
|
)
|
|
|
|
|
|
# Global search
|
|
@router.get("/api/v1/search", response_model=GlobalSearchResponse)
|
|
def global_search(
|
|
request: Request,
|
|
q: str = Query(..., min_length=1, description="Search query"),
|
|
limit: int = Query(default=5, ge=1, le=20, description="Results per type"),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""
|
|
Search across all entity types (projects, packages, artifacts/tags).
|
|
Returns limited results for each type plus total counts.
|
|
"""
|
|
user_id = get_user_id(request)
|
|
search_lower = q.lower()
|
|
|
|
# Search projects (name and description)
|
|
project_query = db.query(Project).filter(
|
|
or_(Project.is_public == True, Project.created_by == user_id),
|
|
or_(
|
|
func.lower(Project.name).contains(search_lower),
|
|
func.lower(Project.description).contains(search_lower),
|
|
),
|
|
)
|
|
project_count = project_query.count()
|
|
projects = project_query.order_by(Project.name).limit(limit).all()
|
|
|
|
# Search packages (name and description) with project name
|
|
package_query = (
|
|
db.query(Package, Project.name.label("project_name"))
|
|
.join(Project, Package.project_id == Project.id)
|
|
.filter(
|
|
or_(Project.is_public == True, Project.created_by == user_id),
|
|
or_(
|
|
func.lower(Package.name).contains(search_lower),
|
|
func.lower(Package.description).contains(search_lower),
|
|
),
|
|
)
|
|
)
|
|
package_count = package_query.count()
|
|
package_results = package_query.order_by(Package.name).limit(limit).all()
|
|
|
|
# Search tags/artifacts (tag name and original filename)
|
|
artifact_query = (
|
|
db.query(
|
|
Tag,
|
|
Artifact,
|
|
Package.name.label("package_name"),
|
|
Project.name.label("project_name"),
|
|
)
|
|
.join(Artifact, Tag.artifact_id == Artifact.id)
|
|
.join(Package, Tag.package_id == Package.id)
|
|
.join(Project, Package.project_id == Project.id)
|
|
.filter(
|
|
or_(Project.is_public == True, Project.created_by == user_id),
|
|
or_(
|
|
func.lower(Tag.name).contains(search_lower),
|
|
func.lower(Artifact.original_name).contains(search_lower),
|
|
),
|
|
)
|
|
)
|
|
artifact_count = artifact_query.count()
|
|
artifact_results = artifact_query.order_by(Tag.name).limit(limit).all()
|
|
|
|
return GlobalSearchResponse(
|
|
query=q,
|
|
projects=[
|
|
SearchResultProject(
|
|
id=p.id, name=p.name, description=p.description, is_public=p.is_public
|
|
)
|
|
for p in projects
|
|
],
|
|
packages=[
|
|
SearchResultPackage(
|
|
id=pkg.id,
|
|
project_id=pkg.project_id,
|
|
project_name=project_name,
|
|
name=pkg.name,
|
|
description=pkg.description,
|
|
format=pkg.format,
|
|
)
|
|
for pkg, project_name in package_results
|
|
],
|
|
artifacts=[
|
|
SearchResultArtifact(
|
|
tag_id=tag.id,
|
|
tag_name=tag.name,
|
|
artifact_id=artifact.id,
|
|
package_id=tag.package_id,
|
|
package_name=package_name,
|
|
project_name=project_name,
|
|
original_name=artifact.original_name,
|
|
)
|
|
for tag, artifact, package_name, project_name in artifact_results
|
|
],
|
|
counts={
|
|
"projects": project_count,
|
|
"packages": package_count,
|
|
"artifacts": artifact_count,
|
|
"total": project_count + package_count + artifact_count,
|
|
},
|
|
)
|
|
|
|
|
|
# Project routes
|
|
@router.get("/api/v1/projects", response_model=PaginatedResponse[ProjectResponse])
|
|
def list_projects(
|
|
request: Request,
|
|
page: int = Query(default=1, ge=1, description="Page number"),
|
|
limit: int = Query(default=20, ge=1, le=100, description="Items per page"),
|
|
search: Optional[str] = Query(
|
|
default=None, description="Search by project name or description"
|
|
),
|
|
visibility: Optional[str] = Query(
|
|
default=None, description="Filter by visibility (public, private)"
|
|
),
|
|
sort: str = Query(
|
|
default="name", description="Sort field (name, created_at, updated_at)"
|
|
),
|
|
order: str = Query(default="asc", description="Sort order (asc, desc)"),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
user_id = get_user_id(request)
|
|
|
|
# Validate sort field
|
|
valid_sort_fields = {
|
|
"name": Project.name,
|
|
"created_at": Project.created_at,
|
|
"updated_at": Project.updated_at,
|
|
}
|
|
if sort not in valid_sort_fields:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid sort field. Must be one of: {', '.join(valid_sort_fields.keys())}",
|
|
)
|
|
|
|
# Validate order
|
|
if order not in ("asc", "desc"):
|
|
raise HTTPException(
|
|
status_code=400, detail="Invalid order. Must be 'asc' or 'desc'"
|
|
)
|
|
|
|
# Base query - filter by access
|
|
query = db.query(Project).filter(
|
|
or_(Project.is_public == True, Project.created_by == user_id)
|
|
)
|
|
|
|
# Apply visibility filter
|
|
if visibility == "public":
|
|
query = query.filter(Project.is_public == True)
|
|
elif visibility == "private":
|
|
query = query.filter(Project.is_public == False, Project.created_by == user_id)
|
|
|
|
# Apply search filter (case-insensitive on name and description)
|
|
if search:
|
|
search_lower = search.lower()
|
|
query = query.filter(
|
|
or_(
|
|
func.lower(Project.name).contains(search_lower),
|
|
func.lower(Project.description).contains(search_lower),
|
|
)
|
|
)
|
|
|
|
# Get total count before pagination
|
|
total = query.count()
|
|
|
|
# Apply sorting
|
|
sort_column = valid_sort_fields[sort]
|
|
if order == "desc":
|
|
query = query.order_by(sort_column.desc())
|
|
else:
|
|
query = query.order_by(sort_column.asc())
|
|
|
|
# Apply pagination
|
|
offset = (page - 1) * limit
|
|
projects = query.offset(offset).limit(limit).all()
|
|
|
|
# Calculate total pages
|
|
total_pages = math.ceil(total / limit) if total > 0 else 1
|
|
|
|
return PaginatedResponse(
|
|
items=projects,
|
|
pagination=PaginationMeta(
|
|
page=page,
|
|
limit=limit,
|
|
total=total,
|
|
total_pages=total_pages,
|
|
),
|
|
)
|
|
|
|
|
|
@router.post("/api/v1/projects", response_model=ProjectResponse)
|
|
def create_project(
|
|
project: ProjectCreate, request: Request, db: Session = Depends(get_db)
|
|
):
|
|
user_id = get_user_id(request)
|
|
|
|
existing = db.query(Project).filter(Project.name == project.name).first()
|
|
if existing:
|
|
raise HTTPException(status_code=400, detail="Project already exists")
|
|
|
|
db_project = Project(
|
|
name=project.name,
|
|
description=project.description,
|
|
is_public=project.is_public,
|
|
created_by=user_id,
|
|
)
|
|
db.add(db_project)
|
|
db.commit()
|
|
db.refresh(db_project)
|
|
return db_project
|
|
|
|
|
|
@router.get("/api/v1/projects/{project_name}", response_model=ProjectResponse)
|
|
def get_project(project_name: str, db: Session = Depends(get_db)):
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
return project
|
|
|
|
|
|
@router.delete("/api/v1/projects/{project_name}", status_code=204)
|
|
def delete_project(
|
|
project_name: str,
|
|
request: Request,
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""
|
|
Delete a project and all its packages.
|
|
|
|
Decrements ref_count for all artifacts referenced by tags in all packages
|
|
within this project.
|
|
"""
|
|
user_id = get_user_id(request)
|
|
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
# Get counts for logging
|
|
packages = db.query(Package).filter(Package.project_id == project.id).all()
|
|
package_count = len(packages)
|
|
|
|
total_tags = 0
|
|
artifact_ids = set()
|
|
for package in packages:
|
|
tags = db.query(Tag).filter(Tag.package_id == package.id).all()
|
|
total_tags += len(tags)
|
|
for tag in tags:
|
|
artifact_ids.add(tag.artifact_id)
|
|
|
|
logger.info(
|
|
f"Project '{project_name}' deletion: {package_count} packages, "
|
|
f"{total_tags} tags affecting {len(artifact_ids)} artifacts"
|
|
)
|
|
|
|
# Delete the project (cascade will delete packages, tags, etc.)
|
|
# NOTE: SQL triggers (tags_ref_count_delete_trigger) handle ref_count automatically
|
|
db.delete(project)
|
|
db.commit()
|
|
|
|
# Audit log (after commit)
|
|
_log_audit(
|
|
db,
|
|
action="delete_project",
|
|
resource=f"project/{project_name}",
|
|
user_id=user_id,
|
|
source_ip=request.client.host if request.client else None,
|
|
details={
|
|
"packages_deleted": package_count,
|
|
"tags_deleted": total_tags,
|
|
"artifacts_affected": list(artifact_ids),
|
|
},
|
|
)
|
|
db.commit()
|
|
|
|
return None
|
|
|
|
|
|
# Package routes
|
|
@router.get(
|
|
"/api/v1/project/{project_name}/packages",
|
|
response_model=PaginatedResponse[PackageDetailResponse],
|
|
)
|
|
def list_packages(
|
|
project_name: str,
|
|
page: int = Query(default=1, ge=1, description="Page number"),
|
|
limit: int = Query(default=20, ge=1, le=100, description="Items per page"),
|
|
search: Optional[str] = Query(
|
|
default=None, description="Search by name or description"
|
|
),
|
|
sort: str = Query(
|
|
default="name", description="Sort field (name, created_at, updated_at)"
|
|
),
|
|
order: str = Query(default="asc", description="Sort order (asc, desc)"),
|
|
format: Optional[str] = Query(default=None, description="Filter by package format"),
|
|
platform: Optional[str] = Query(default=None, description="Filter by platform"),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
# Validate sort field
|
|
valid_sort_fields = {
|
|
"name": Package.name,
|
|
"created_at": Package.created_at,
|
|
"updated_at": Package.updated_at,
|
|
}
|
|
if sort not in valid_sort_fields:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid sort field. Must be one of: {', '.join(valid_sort_fields.keys())}",
|
|
)
|
|
|
|
# Validate order
|
|
if order not in ("asc", "desc"):
|
|
raise HTTPException(
|
|
status_code=400, detail="Invalid order. Must be 'asc' or 'desc'"
|
|
)
|
|
|
|
# Validate format filter
|
|
if format and format not in PACKAGE_FORMATS:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid format. Must be one of: {', '.join(PACKAGE_FORMATS)}",
|
|
)
|
|
|
|
# Validate platform filter
|
|
if platform and platform not in PACKAGE_PLATFORMS:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid platform. Must be one of: {', '.join(PACKAGE_PLATFORMS)}",
|
|
)
|
|
|
|
# Base query
|
|
query = db.query(Package).filter(Package.project_id == project.id)
|
|
|
|
# Apply search filter (case-insensitive on name and description)
|
|
if search:
|
|
search_lower = search.lower()
|
|
query = query.filter(
|
|
or_(
|
|
func.lower(Package.name).contains(search_lower),
|
|
func.lower(Package.description).contains(search_lower),
|
|
)
|
|
)
|
|
|
|
# Apply format filter
|
|
if format:
|
|
query = query.filter(Package.format == format)
|
|
|
|
# Apply platform filter
|
|
if platform:
|
|
query = query.filter(Package.platform == platform)
|
|
|
|
# Get total count before pagination
|
|
total = query.count()
|
|
|
|
# Apply sorting
|
|
sort_column = valid_sort_fields[sort]
|
|
if order == "desc":
|
|
query = query.order_by(sort_column.desc())
|
|
else:
|
|
query = query.order_by(sort_column.asc())
|
|
|
|
# Apply pagination
|
|
offset = (page - 1) * limit
|
|
packages = query.offset(offset).limit(limit).all()
|
|
|
|
# Calculate total pages
|
|
total_pages = math.ceil(total / limit) if total > 0 else 1
|
|
|
|
# Build detailed responses with aggregated data
|
|
detailed_packages = []
|
|
for pkg in packages:
|
|
# Get tag count
|
|
tag_count = (
|
|
db.query(func.count(Tag.id)).filter(Tag.package_id == pkg.id).scalar() or 0
|
|
)
|
|
|
|
# Get unique artifact count and total size via uploads
|
|
artifact_stats = (
|
|
db.query(
|
|
func.count(func.distinct(Upload.artifact_id)),
|
|
func.coalesce(func.sum(Artifact.size), 0),
|
|
)
|
|
.join(Artifact, Upload.artifact_id == Artifact.id)
|
|
.filter(Upload.package_id == pkg.id)
|
|
.first()
|
|
)
|
|
artifact_count = artifact_stats[0] if artifact_stats else 0
|
|
total_size = artifact_stats[1] if artifact_stats else 0
|
|
|
|
# Get latest tag
|
|
latest_tag_obj = (
|
|
db.query(Tag)
|
|
.filter(Tag.package_id == pkg.id)
|
|
.order_by(Tag.created_at.desc())
|
|
.first()
|
|
)
|
|
latest_tag = latest_tag_obj.name if latest_tag_obj else None
|
|
|
|
# Get latest upload timestamp
|
|
latest_upload = (
|
|
db.query(func.max(Upload.uploaded_at))
|
|
.filter(Upload.package_id == pkg.id)
|
|
.scalar()
|
|
)
|
|
|
|
# Get recent tags (limit 5)
|
|
recent_tags_objs = (
|
|
db.query(Tag)
|
|
.filter(Tag.package_id == pkg.id)
|
|
.order_by(Tag.created_at.desc())
|
|
.limit(5)
|
|
.all()
|
|
)
|
|
recent_tags = [
|
|
TagSummary(name=t.name, artifact_id=t.artifact_id, created_at=t.created_at)
|
|
for t in recent_tags_objs
|
|
]
|
|
|
|
detailed_packages.append(
|
|
PackageDetailResponse(
|
|
id=pkg.id,
|
|
project_id=pkg.project_id,
|
|
name=pkg.name,
|
|
description=pkg.description,
|
|
format=pkg.format,
|
|
platform=pkg.platform,
|
|
created_at=pkg.created_at,
|
|
updated_at=pkg.updated_at,
|
|
tag_count=tag_count,
|
|
artifact_count=artifact_count,
|
|
total_size=total_size,
|
|
latest_tag=latest_tag,
|
|
latest_upload_at=latest_upload,
|
|
recent_tags=recent_tags,
|
|
)
|
|
)
|
|
|
|
return PaginatedResponse(
|
|
items=detailed_packages,
|
|
pagination=PaginationMeta(
|
|
page=page,
|
|
limit=limit,
|
|
total=total,
|
|
total_pages=total_pages,
|
|
),
|
|
)
|
|
|
|
|
|
@router.get(
|
|
"/api/v1/project/{project_name}/packages/{package_name}",
|
|
response_model=PackageDetailResponse,
|
|
)
|
|
def get_package(
|
|
project_name: str,
|
|
package_name: str,
|
|
include_tags: bool = Query(
|
|
default=False, description="Include all tags (not just recent 5)"
|
|
),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""Get a single package with full metadata"""
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
pkg = (
|
|
db.query(Package)
|
|
.filter(Package.project_id == project.id, Package.name == package_name)
|
|
.first()
|
|
)
|
|
if not pkg:
|
|
raise HTTPException(status_code=404, detail="Package not found")
|
|
|
|
# Get tag count
|
|
tag_count = (
|
|
db.query(func.count(Tag.id)).filter(Tag.package_id == pkg.id).scalar() or 0
|
|
)
|
|
|
|
# Get unique artifact count and total size via uploads
|
|
artifact_stats = (
|
|
db.query(
|
|
func.count(func.distinct(Upload.artifact_id)),
|
|
func.coalesce(func.sum(Artifact.size), 0),
|
|
)
|
|
.join(Artifact, Upload.artifact_id == Artifact.id)
|
|
.filter(Upload.package_id == pkg.id)
|
|
.first()
|
|
)
|
|
artifact_count = artifact_stats[0] if artifact_stats else 0
|
|
total_size = artifact_stats[1] if artifact_stats else 0
|
|
|
|
# Get latest tag
|
|
latest_tag_obj = (
|
|
db.query(Tag)
|
|
.filter(Tag.package_id == pkg.id)
|
|
.order_by(Tag.created_at.desc())
|
|
.first()
|
|
)
|
|
latest_tag = latest_tag_obj.name if latest_tag_obj else None
|
|
|
|
# Get latest upload timestamp
|
|
latest_upload = (
|
|
db.query(func.max(Upload.uploaded_at))
|
|
.filter(Upload.package_id == pkg.id)
|
|
.scalar()
|
|
)
|
|
|
|
# Get tags (all if include_tags=true, else limit 5)
|
|
tags_query = (
|
|
db.query(Tag).filter(Tag.package_id == pkg.id).order_by(Tag.created_at.desc())
|
|
)
|
|
if not include_tags:
|
|
tags_query = tags_query.limit(5)
|
|
tags_objs = tags_query.all()
|
|
recent_tags = [
|
|
TagSummary(name=t.name, artifact_id=t.artifact_id, created_at=t.created_at)
|
|
for t in tags_objs
|
|
]
|
|
|
|
return PackageDetailResponse(
|
|
id=pkg.id,
|
|
project_id=pkg.project_id,
|
|
name=pkg.name,
|
|
description=pkg.description,
|
|
format=pkg.format,
|
|
platform=pkg.platform,
|
|
created_at=pkg.created_at,
|
|
updated_at=pkg.updated_at,
|
|
tag_count=tag_count,
|
|
artifact_count=artifact_count,
|
|
total_size=total_size,
|
|
latest_tag=latest_tag,
|
|
latest_upload_at=latest_upload,
|
|
recent_tags=recent_tags,
|
|
)
|
|
|
|
|
|
@router.post("/api/v1/project/{project_name}/packages", response_model=PackageResponse)
|
|
def create_package(
|
|
project_name: str, package: PackageCreate, db: Session = Depends(get_db)
|
|
):
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
# Validate format
|
|
if package.format not in PACKAGE_FORMATS:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid format. Must be one of: {', '.join(PACKAGE_FORMATS)}",
|
|
)
|
|
|
|
# Validate platform
|
|
if package.platform not in PACKAGE_PLATFORMS:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid platform. Must be one of: {', '.join(PACKAGE_PLATFORMS)}",
|
|
)
|
|
|
|
existing = (
|
|
db.query(Package)
|
|
.filter(Package.project_id == project.id, Package.name == package.name)
|
|
.first()
|
|
)
|
|
if existing:
|
|
raise HTTPException(
|
|
status_code=400, detail="Package already exists in this project"
|
|
)
|
|
|
|
db_package = Package(
|
|
project_id=project.id,
|
|
name=package.name,
|
|
description=package.description,
|
|
format=package.format,
|
|
platform=package.platform,
|
|
)
|
|
db.add(db_package)
|
|
db.commit()
|
|
db.refresh(db_package)
|
|
return db_package
|
|
|
|
|
|
@router.delete(
|
|
"/api/v1/project/{project_name}/packages/{package_name}",
|
|
status_code=204,
|
|
)
|
|
def delete_package(
|
|
project_name: str,
|
|
package_name: str,
|
|
request: Request,
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""
|
|
Delete a package and all its tags.
|
|
|
|
Decrements ref_count for all artifacts referenced by tags in this package.
|
|
The package's uploads records are preserved for audit purposes but will
|
|
have null package_id after cascade.
|
|
"""
|
|
user_id = get_user_id(request)
|
|
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
package = (
|
|
db.query(Package)
|
|
.filter(Package.project_id == project.id, Package.name == package_name)
|
|
.first()
|
|
)
|
|
if not package:
|
|
raise HTTPException(status_code=404, detail="Package not found")
|
|
|
|
# Get tags count and affected artifacts for logging
|
|
tags = db.query(Tag).filter(Tag.package_id == package.id).all()
|
|
artifact_ids = list(set(tag.artifact_id for tag in tags))
|
|
tag_count = len(tags)
|
|
|
|
logger.info(
|
|
f"Package '{package_name}' deletion: {tag_count} tags affecting "
|
|
f"{len(artifact_ids)} artifacts"
|
|
)
|
|
|
|
# Delete the package (cascade will delete tags, which triggers ref_count decrements)
|
|
# NOTE: SQL triggers (tags_ref_count_delete_trigger) handle ref_count automatically
|
|
db.delete(package)
|
|
db.commit()
|
|
|
|
# Audit log (after commit)
|
|
_log_audit(
|
|
db,
|
|
action="delete_package",
|
|
resource=f"project/{project_name}/{package_name}",
|
|
user_id=user_id,
|
|
source_ip=request.client.host if request.client else None,
|
|
details={
|
|
"tags_deleted": tag_count,
|
|
"artifacts_affected": artifact_ids,
|
|
},
|
|
)
|
|
db.commit()
|
|
|
|
return None
|
|
|
|
|
|
# Upload artifact
|
|
@router.post(
|
|
"/api/v1/project/{project_name}/{package_name}/upload",
|
|
response_model=UploadResponse,
|
|
)
|
|
def upload_artifact(
|
|
project_name: str,
|
|
package_name: str,
|
|
request: Request,
|
|
file: UploadFile = File(...),
|
|
tag: Optional[str] = Form(None),
|
|
db: Session = Depends(get_db),
|
|
storage: S3Storage = Depends(get_storage),
|
|
content_length: Optional[int] = Header(None, alias="Content-Length"),
|
|
):
|
|
user_id = get_user_id(request)
|
|
|
|
# Get project and package
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
package = (
|
|
db.query(Package)
|
|
.filter(Package.project_id == project.id, Package.name == package_name)
|
|
.first()
|
|
)
|
|
if not package:
|
|
raise HTTPException(status_code=404, detail="Package not found")
|
|
|
|
# Validate file size
|
|
settings = get_settings()
|
|
if content_length is not None:
|
|
if content_length > settings.max_file_size:
|
|
raise HTTPException(
|
|
status_code=413,
|
|
detail=f"File too large. Maximum size is {settings.max_file_size // (1024 * 1024 * 1024)}GB",
|
|
)
|
|
if content_length < settings.min_file_size:
|
|
raise HTTPException(
|
|
status_code=422,
|
|
detail="Empty files are not allowed",
|
|
)
|
|
|
|
# Extract format-specific metadata before storing
|
|
file_metadata = {}
|
|
if file.filename:
|
|
# Read file into memory for metadata extraction
|
|
file_content = file.file.read()
|
|
file.file.seek(0)
|
|
|
|
# Extract metadata
|
|
file_metadata = extract_metadata(
|
|
io.BytesIO(file_content), file.filename, file.content_type
|
|
)
|
|
|
|
# Store file (uses multipart for large files) with error handling
|
|
try:
|
|
storage_result = storage.store(file.file, content_length)
|
|
except HashComputationError as e:
|
|
logger.error(f"Hash computation failed during upload: {e}")
|
|
raise HTTPException(
|
|
status_code=422,
|
|
detail=f"Failed to process file: hash computation error - {str(e)}",
|
|
)
|
|
except S3ExistenceCheckError as e:
|
|
logger.error(f"S3 existence check failed during upload: {e}")
|
|
raise HTTPException(
|
|
status_code=503,
|
|
detail="Storage service temporarily unavailable. Please retry.",
|
|
)
|
|
except S3UploadError as e:
|
|
logger.error(f"S3 upload failed: {e}")
|
|
raise HTTPException(
|
|
status_code=503,
|
|
detail="Storage service temporarily unavailable. Please retry.",
|
|
)
|
|
except S3StorageUnavailableError as e:
|
|
logger.error(f"S3 storage unavailable: {e}")
|
|
raise HTTPException(
|
|
status_code=503,
|
|
detail="Storage backend is unavailable. Please retry later.",
|
|
)
|
|
except HashCollisionError as e:
|
|
# This is extremely rare - log critical alert
|
|
logger.critical(f"HASH COLLISION DETECTED: {e}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail="Data integrity error detected. Please contact support.",
|
|
)
|
|
except FileSizeExceededError as e:
|
|
logger.warning(f"File size exceeded during upload: {e}")
|
|
raise HTTPException(
|
|
status_code=413,
|
|
detail=f"File too large. Maximum size is {settings.max_file_size // (1024 * 1024 * 1024)}GB",
|
|
)
|
|
except StorageError as e:
|
|
logger.error(f"Storage error during upload: {e}")
|
|
raise HTTPException(status_code=500, detail="Internal storage error")
|
|
|
|
# Check if this is a deduplicated upload
|
|
deduplicated = False
|
|
saved_bytes = 0
|
|
|
|
# Create or update artifact record
|
|
# Use with_for_update() to lock the row and prevent race conditions
|
|
artifact = (
|
|
db.query(Artifact)
|
|
.filter(Artifact.id == storage_result.sha256)
|
|
.with_for_update()
|
|
.first()
|
|
)
|
|
if artifact:
|
|
# Artifact exists - this is a deduplicated upload
|
|
# NOTE: ref_count is managed by SQL triggers on tag INSERT/DELETE
|
|
# We don't manually increment here - the tag creation will trigger the increment
|
|
deduplicated = True
|
|
saved_bytes = storage_result.size
|
|
# Merge metadata if new metadata was extracted
|
|
if file_metadata and artifact.artifact_metadata:
|
|
artifact.artifact_metadata = {**artifact.artifact_metadata, **file_metadata}
|
|
elif file_metadata:
|
|
artifact.artifact_metadata = file_metadata
|
|
# Update checksums if not already set
|
|
if not artifact.checksum_md5 and storage_result.md5:
|
|
artifact.checksum_md5 = storage_result.md5
|
|
if not artifact.checksum_sha1 and storage_result.sha1:
|
|
artifact.checksum_sha1 = storage_result.sha1
|
|
if not artifact.s3_etag and storage_result.s3_etag:
|
|
artifact.s3_etag = storage_result.s3_etag
|
|
# Refresh to get updated ref_count
|
|
db.refresh(artifact)
|
|
else:
|
|
# Create new artifact with ref_count=0
|
|
# NOTE: ref_count is managed by SQL triggers on tag INSERT/DELETE
|
|
# When a tag is created for this artifact, the trigger will increment ref_count
|
|
artifact = Artifact(
|
|
id=storage_result.sha256,
|
|
size=storage_result.size,
|
|
content_type=file.content_type,
|
|
original_name=file.filename,
|
|
checksum_md5=storage_result.md5,
|
|
checksum_sha1=storage_result.sha1,
|
|
s3_etag=storage_result.s3_etag,
|
|
created_by=user_id,
|
|
s3_key=storage_result.s3_key,
|
|
artifact_metadata=file_metadata or {},
|
|
ref_count=0, # Triggers will manage this
|
|
)
|
|
db.add(artifact)
|
|
|
|
# Record upload
|
|
upload = Upload(
|
|
artifact_id=storage_result.sha256,
|
|
package_id=package.id,
|
|
original_name=file.filename,
|
|
uploaded_by=user_id,
|
|
source_ip=request.client.host if request.client else None,
|
|
deduplicated=deduplicated,
|
|
)
|
|
db.add(upload)
|
|
|
|
# Create or update tag if provided (with ref_count management and history)
|
|
if tag:
|
|
_create_or_update_tag(db, package.id, tag, storage_result.sha256, user_id)
|
|
|
|
# Log deduplication event
|
|
if deduplicated:
|
|
logger.info(
|
|
f"Deduplication: artifact {storage_result.sha256[:12]}... "
|
|
f"ref_count={artifact.ref_count}, saved_bytes={saved_bytes}"
|
|
)
|
|
|
|
# Audit log
|
|
_log_audit(
|
|
db,
|
|
action="upload",
|
|
resource=f"project/{project_name}/{package_name}/artifact/{storage_result.sha256[:12]}",
|
|
user_id=user_id,
|
|
source_ip=request.client.host if request.client else None,
|
|
details={
|
|
"artifact_id": storage_result.sha256,
|
|
"size": storage_result.size,
|
|
"deduplicated": deduplicated,
|
|
"saved_bytes": saved_bytes,
|
|
"tag": tag,
|
|
},
|
|
)
|
|
|
|
db.commit()
|
|
|
|
return UploadResponse(
|
|
artifact_id=storage_result.sha256,
|
|
sha256=storage_result.sha256,
|
|
size=storage_result.size,
|
|
project=project_name,
|
|
package=package_name,
|
|
tag=tag,
|
|
checksum_md5=storage_result.md5,
|
|
checksum_sha1=storage_result.sha1,
|
|
s3_etag=storage_result.s3_etag,
|
|
format_metadata=artifact.artifact_metadata,
|
|
deduplicated=deduplicated,
|
|
ref_count=artifact.ref_count,
|
|
)
|
|
|
|
|
|
# Resumable upload endpoints
|
|
@router.post(
|
|
"/api/v1/project/{project_name}/{package_name}/upload/init",
|
|
response_model=ResumableUploadInitResponse,
|
|
)
|
|
def init_resumable_upload(
|
|
project_name: str,
|
|
package_name: str,
|
|
init_request: ResumableUploadInitRequest,
|
|
request: Request,
|
|
db: Session = Depends(get_db),
|
|
storage: S3Storage = Depends(get_storage),
|
|
):
|
|
"""
|
|
Initialize a resumable upload session.
|
|
Client must provide the SHA256 hash of the file in advance.
|
|
"""
|
|
user_id = get_user_id(request)
|
|
|
|
# Validate project and package
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
package = (
|
|
db.query(Package)
|
|
.filter(Package.project_id == project.id, Package.name == package_name)
|
|
.first()
|
|
)
|
|
if not package:
|
|
raise HTTPException(status_code=404, detail="Package not found")
|
|
|
|
# Validate file size
|
|
settings = get_settings()
|
|
if init_request.size > settings.max_file_size:
|
|
raise HTTPException(
|
|
status_code=413,
|
|
detail=f"File too large. Maximum size is {settings.max_file_size // (1024 * 1024 * 1024)}GB",
|
|
)
|
|
if init_request.size < settings.min_file_size:
|
|
raise HTTPException(
|
|
status_code=422,
|
|
detail="Empty files are not allowed",
|
|
)
|
|
|
|
# Check if artifact already exists (deduplication)
|
|
existing_artifact = (
|
|
db.query(Artifact).filter(Artifact.id == init_request.expected_hash).first()
|
|
)
|
|
if existing_artifact:
|
|
# File already exists - deduplicated upload
|
|
# NOTE: ref_count is managed by SQL triggers on tag INSERT/DELETE/UPDATE
|
|
# We do NOT manually increment here because:
|
|
# 1. If a tag is provided, _create_or_update_tag will create/update a tag
|
|
# and the SQL trigger will handle ref_count
|
|
# 2. If no tag is provided, ref_count shouldn't change (no new reference)
|
|
|
|
# Record the upload
|
|
upload = Upload(
|
|
artifact_id=init_request.expected_hash,
|
|
package_id=package.id,
|
|
original_name=init_request.filename,
|
|
uploaded_by=user_id,
|
|
source_ip=request.client.host if request.client else None,
|
|
deduplicated=True,
|
|
)
|
|
db.add(upload)
|
|
|
|
# Create or update tag if provided (with ref_count management and history)
|
|
if init_request.tag:
|
|
_create_or_update_tag(
|
|
db, package.id, init_request.tag, init_request.expected_hash, user_id
|
|
)
|
|
|
|
# Log deduplication event
|
|
logger.info(
|
|
f"Deduplication (resumable init): artifact {init_request.expected_hash[:12]}... "
|
|
f"saved_bytes={init_request.size}"
|
|
)
|
|
|
|
# Audit log
|
|
_log_audit(
|
|
db,
|
|
action="upload",
|
|
resource=f"project/{project_name}/{package_name}/artifact/{init_request.expected_hash[:12]}",
|
|
user_id=user_id,
|
|
source_ip=request.client.host if request.client else None,
|
|
details={
|
|
"artifact_id": init_request.expected_hash,
|
|
"size": init_request.size,
|
|
"deduplicated": True,
|
|
"saved_bytes": init_request.size,
|
|
"tag": init_request.tag,
|
|
"resumable": True,
|
|
},
|
|
)
|
|
|
|
db.commit()
|
|
|
|
return ResumableUploadInitResponse(
|
|
upload_id=None,
|
|
already_exists=True,
|
|
artifact_id=init_request.expected_hash,
|
|
chunk_size=MULTIPART_CHUNK_SIZE,
|
|
)
|
|
|
|
# Initialize resumable upload
|
|
session = storage.initiate_resumable_upload(init_request.expected_hash)
|
|
|
|
return ResumableUploadInitResponse(
|
|
upload_id=session["upload_id"],
|
|
already_exists=False,
|
|
artifact_id=None,
|
|
chunk_size=MULTIPART_CHUNK_SIZE,
|
|
)
|
|
|
|
|
|
@router.put(
|
|
"/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/part/{part_number}"
|
|
)
|
|
def upload_part(
|
|
project_name: str,
|
|
package_name: str,
|
|
upload_id: str,
|
|
part_number: int,
|
|
request: Request,
|
|
db: Session = Depends(get_db),
|
|
storage: S3Storage = Depends(get_storage),
|
|
):
|
|
"""
|
|
Upload a part of a resumable upload.
|
|
Part numbers start at 1.
|
|
"""
|
|
# Validate project and package exist
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
package = (
|
|
db.query(Package)
|
|
.filter(Package.project_id == project.id, Package.name == package_name)
|
|
.first()
|
|
)
|
|
if not package:
|
|
raise HTTPException(status_code=404, detail="Package not found")
|
|
|
|
if part_number < 1:
|
|
raise HTTPException(status_code=400, detail="Part number must be >= 1")
|
|
|
|
# Read part data from request body
|
|
import asyncio
|
|
|
|
loop = asyncio.new_event_loop()
|
|
|
|
async def read_body():
|
|
return await request.body()
|
|
|
|
try:
|
|
data = loop.run_until_complete(read_body())
|
|
finally:
|
|
loop.close()
|
|
|
|
if not data:
|
|
raise HTTPException(status_code=400, detail="No data in request body")
|
|
|
|
try:
|
|
part_info = storage.upload_part(upload_id, part_number, data)
|
|
return ResumableUploadPartResponse(
|
|
part_number=part_info["PartNumber"],
|
|
etag=part_info["ETag"],
|
|
)
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=404, detail=str(e))
|
|
|
|
|
|
@router.post(
|
|
"/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/complete"
|
|
)
|
|
def complete_resumable_upload(
|
|
project_name: str,
|
|
package_name: str,
|
|
upload_id: str,
|
|
complete_request: ResumableUploadCompleteRequest,
|
|
request: Request,
|
|
db: Session = Depends(get_db),
|
|
storage: S3Storage = Depends(get_storage),
|
|
):
|
|
"""Complete a resumable upload"""
|
|
user_id = get_user_id(request)
|
|
|
|
# Validate project and package
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
package = (
|
|
db.query(Package)
|
|
.filter(Package.project_id == project.id, Package.name == package_name)
|
|
.first()
|
|
)
|
|
if not package:
|
|
raise HTTPException(status_code=404, detail="Package not found")
|
|
|
|
try:
|
|
sha256_hash, s3_key = storage.complete_resumable_upload(upload_id)
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=404, detail=str(e))
|
|
|
|
# Get file size from S3
|
|
obj_info = storage.get_object_info(s3_key)
|
|
size = obj_info["size"] if obj_info else 0
|
|
|
|
# Create artifact record
|
|
artifact = Artifact(
|
|
id=sha256_hash,
|
|
size=size,
|
|
s3_key=s3_key,
|
|
created_by=user_id,
|
|
format_metadata={},
|
|
)
|
|
db.add(artifact)
|
|
|
|
# Record upload
|
|
upload = Upload(
|
|
artifact_id=sha256_hash,
|
|
package_id=package.id,
|
|
uploaded_by=user_id,
|
|
source_ip=request.client.host if request.client else None,
|
|
)
|
|
db.add(upload)
|
|
|
|
# Create tag if provided
|
|
if complete_request.tag:
|
|
existing_tag = (
|
|
db.query(Tag)
|
|
.filter(Tag.package_id == package.id, Tag.name == complete_request.tag)
|
|
.first()
|
|
)
|
|
if existing_tag:
|
|
existing_tag.artifact_id = sha256_hash
|
|
existing_tag.created_by = user_id
|
|
else:
|
|
new_tag = Tag(
|
|
package_id=package.id,
|
|
name=complete_request.tag,
|
|
artifact_id=sha256_hash,
|
|
created_by=user_id,
|
|
)
|
|
db.add(new_tag)
|
|
|
|
db.commit()
|
|
|
|
return ResumableUploadCompleteResponse(
|
|
artifact_id=sha256_hash,
|
|
size=size,
|
|
project=project_name,
|
|
package=package_name,
|
|
tag=complete_request.tag,
|
|
)
|
|
|
|
|
|
@router.delete("/api/v1/project/{project_name}/{package_name}/upload/{upload_id}")
|
|
def abort_resumable_upload(
|
|
project_name: str,
|
|
package_name: str,
|
|
upload_id: str,
|
|
storage: S3Storage = Depends(get_storage),
|
|
):
|
|
"""Abort a resumable upload"""
|
|
try:
|
|
storage.abort_resumable_upload(upload_id)
|
|
return {"status": "aborted"}
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=404, detail=str(e))
|
|
|
|
|
|
@router.get("/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/status")
|
|
def get_upload_status(
|
|
project_name: str,
|
|
package_name: str,
|
|
upload_id: str,
|
|
storage: S3Storage = Depends(get_storage),
|
|
):
|
|
"""Get status of a resumable upload"""
|
|
try:
|
|
parts = storage.list_upload_parts(upload_id)
|
|
uploaded_parts = [p["PartNumber"] for p in parts]
|
|
total_bytes = sum(p.get("Size", 0) for p in parts)
|
|
|
|
return ResumableUploadStatusResponse(
|
|
upload_id=upload_id,
|
|
uploaded_parts=uploaded_parts,
|
|
total_uploaded_bytes=total_bytes,
|
|
)
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=404, detail=str(e))
|
|
|
|
|
|
# Helper function to resolve artifact reference
|
|
def _resolve_artifact_ref(
|
|
ref: str,
|
|
package: Package,
|
|
db: Session,
|
|
) -> Optional[Artifact]:
|
|
"""Resolve a reference (tag name, artifact:hash, tag:name) to an artifact"""
|
|
artifact = None
|
|
|
|
# Check for explicit prefixes
|
|
if ref.startswith("artifact:"):
|
|
artifact_id = ref[9:]
|
|
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
|
|
elif ref.startswith("tag:") or ref.startswith("version:"):
|
|
tag_name = ref.split(":", 1)[1]
|
|
tag = (
|
|
db.query(Tag)
|
|
.filter(Tag.package_id == package.id, Tag.name == tag_name)
|
|
.first()
|
|
)
|
|
if tag:
|
|
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
|
|
else:
|
|
# Try as tag name first
|
|
tag = (
|
|
db.query(Tag).filter(Tag.package_id == package.id, Tag.name == ref).first()
|
|
)
|
|
if tag:
|
|
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
|
|
else:
|
|
# Try as direct artifact ID
|
|
artifact = db.query(Artifact).filter(Artifact.id == ref).first()
|
|
|
|
return artifact
|
|
|
|
|
|
# Download artifact with range request support and download modes
|
|
@router.get("/api/v1/project/{project_name}/{package_name}/+/{ref}")
|
|
def download_artifact(
|
|
project_name: str,
|
|
package_name: str,
|
|
ref: str,
|
|
request: Request,
|
|
db: Session = Depends(get_db),
|
|
storage: S3Storage = Depends(get_storage),
|
|
range: Optional[str] = Header(None),
|
|
mode: Optional[Literal["proxy", "redirect", "presigned"]] = Query(
|
|
default=None,
|
|
description="Download mode: proxy (stream through backend), redirect (302 to presigned URL), presigned (return JSON with URL)",
|
|
),
|
|
):
|
|
settings = get_settings()
|
|
|
|
# Get project and package
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
package = (
|
|
db.query(Package)
|
|
.filter(Package.project_id == project.id, Package.name == package_name)
|
|
.first()
|
|
)
|
|
if not package:
|
|
raise HTTPException(status_code=404, detail="Package not found")
|
|
|
|
# Resolve reference to artifact
|
|
artifact = _resolve_artifact_ref(ref, package, db)
|
|
if not artifact:
|
|
raise HTTPException(status_code=404, detail="Artifact not found")
|
|
|
|
filename = artifact.original_name or f"{artifact.id}"
|
|
|
|
# Determine download mode (query param overrides server default)
|
|
download_mode = mode or settings.download_mode
|
|
|
|
# Handle presigned mode - return JSON with presigned URL
|
|
if download_mode == "presigned":
|
|
presigned_url = storage.generate_presigned_url(
|
|
artifact.s3_key,
|
|
response_content_type=artifact.content_type,
|
|
response_content_disposition=f'attachment; filename="{filename}"',
|
|
)
|
|
expires_at = datetime.now(timezone.utc) + timedelta(
|
|
seconds=settings.presigned_url_expiry
|
|
)
|
|
|
|
return PresignedUrlResponse(
|
|
url=presigned_url,
|
|
expires_at=expires_at,
|
|
method="GET",
|
|
artifact_id=artifact.id,
|
|
size=artifact.size,
|
|
content_type=artifact.content_type,
|
|
original_name=artifact.original_name,
|
|
checksum_sha256=artifact.id,
|
|
checksum_md5=artifact.checksum_md5,
|
|
)
|
|
|
|
# Handle redirect mode - return 302 redirect to presigned URL
|
|
if download_mode == "redirect":
|
|
presigned_url = storage.generate_presigned_url(
|
|
artifact.s3_key,
|
|
response_content_type=artifact.content_type,
|
|
response_content_disposition=f'attachment; filename="{filename}"',
|
|
)
|
|
return RedirectResponse(url=presigned_url, status_code=302)
|
|
|
|
# Proxy mode (default fallback) - stream through backend
|
|
# Handle range requests
|
|
if range:
|
|
stream, content_length, content_range = storage.get_stream(
|
|
artifact.s3_key, range
|
|
)
|
|
|
|
headers = {
|
|
"Content-Disposition": f'attachment; filename="{filename}"',
|
|
"Accept-Ranges": "bytes",
|
|
"Content-Length": str(content_length),
|
|
}
|
|
if content_range:
|
|
headers["Content-Range"] = content_range
|
|
|
|
return StreamingResponse(
|
|
stream,
|
|
status_code=206, # Partial Content
|
|
media_type=artifact.content_type or "application/octet-stream",
|
|
headers=headers,
|
|
)
|
|
|
|
# Full download
|
|
stream, content_length, _ = storage.get_stream(artifact.s3_key)
|
|
|
|
return StreamingResponse(
|
|
stream,
|
|
media_type=artifact.content_type or "application/octet-stream",
|
|
headers={
|
|
"Content-Disposition": f'attachment; filename="{filename}"',
|
|
"Accept-Ranges": "bytes",
|
|
"Content-Length": str(content_length),
|
|
},
|
|
)
|
|
|
|
|
|
# Get presigned URL endpoint (explicit endpoint for getting URL without redirect)
|
|
@router.get(
|
|
"/api/v1/project/{project_name}/{package_name}/+/{ref}/url",
|
|
response_model=PresignedUrlResponse,
|
|
)
|
|
def get_artifact_url(
|
|
project_name: str,
|
|
package_name: str,
|
|
ref: str,
|
|
db: Session = Depends(get_db),
|
|
storage: S3Storage = Depends(get_storage),
|
|
expiry: Optional[int] = Query(
|
|
default=None,
|
|
description="Custom expiry time in seconds (defaults to server setting)",
|
|
),
|
|
):
|
|
"""
|
|
Get a presigned URL for direct S3 download.
|
|
This endpoint always returns a presigned URL regardless of server download mode.
|
|
"""
|
|
settings = get_settings()
|
|
|
|
# Get project and package
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
package = (
|
|
db.query(Package)
|
|
.filter(Package.project_id == project.id, Package.name == package_name)
|
|
.first()
|
|
)
|
|
if not package:
|
|
raise HTTPException(status_code=404, detail="Package not found")
|
|
|
|
# Resolve reference to artifact
|
|
artifact = _resolve_artifact_ref(ref, package, db)
|
|
if not artifact:
|
|
raise HTTPException(status_code=404, detail="Artifact not found")
|
|
|
|
filename = artifact.original_name or f"{artifact.id}"
|
|
url_expiry = expiry or settings.presigned_url_expiry
|
|
|
|
presigned_url = storage.generate_presigned_url(
|
|
artifact.s3_key,
|
|
expiry=url_expiry,
|
|
response_content_type=artifact.content_type,
|
|
response_content_disposition=f'attachment; filename="{filename}"',
|
|
)
|
|
expires_at = datetime.now(timezone.utc) + timedelta(seconds=url_expiry)
|
|
|
|
return PresignedUrlResponse(
|
|
url=presigned_url,
|
|
expires_at=expires_at,
|
|
method="GET",
|
|
artifact_id=artifact.id,
|
|
size=artifact.size,
|
|
content_type=artifact.content_type,
|
|
original_name=artifact.original_name,
|
|
checksum_sha256=artifact.id,
|
|
checksum_md5=artifact.checksum_md5,
|
|
)
|
|
|
|
|
|
# HEAD request for download (to check file info without downloading)
|
|
@router.head("/api/v1/project/{project_name}/{package_name}/+/{ref}")
|
|
def head_artifact(
|
|
project_name: str,
|
|
package_name: str,
|
|
ref: str,
|
|
db: Session = Depends(get_db),
|
|
storage: S3Storage = Depends(get_storage),
|
|
):
|
|
# Get project and package
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
package = (
|
|
db.query(Package)
|
|
.filter(Package.project_id == project.id, Package.name == package_name)
|
|
.first()
|
|
)
|
|
if not package:
|
|
raise HTTPException(status_code=404, detail="Package not found")
|
|
|
|
# Resolve reference to artifact
|
|
artifact = _resolve_artifact_ref(ref, package, db)
|
|
if not artifact:
|
|
raise HTTPException(status_code=404, detail="Artifact not found")
|
|
|
|
filename = artifact.original_name or f"{artifact.id}"
|
|
|
|
return Response(
|
|
content=b"",
|
|
media_type=artifact.content_type or "application/octet-stream",
|
|
headers={
|
|
"Content-Disposition": f'attachment; filename="{filename}"',
|
|
"Accept-Ranges": "bytes",
|
|
"Content-Length": str(artifact.size),
|
|
"X-Artifact-Id": artifact.id,
|
|
},
|
|
)
|
|
|
|
|
|
# Compatibility route
|
|
@router.get("/project/{project_name}/{package_name}/+/{ref}")
|
|
def download_artifact_compat(
|
|
project_name: str,
|
|
package_name: str,
|
|
ref: str,
|
|
request: Request,
|
|
db: Session = Depends(get_db),
|
|
storage: S3Storage = Depends(get_storage),
|
|
range: Optional[str] = Header(None),
|
|
):
|
|
return download_artifact(
|
|
project_name, package_name, ref, request, db, storage, range
|
|
)
|
|
|
|
|
|
# Tag routes
|
|
@router.get(
|
|
"/api/v1/project/{project_name}/{package_name}/tags",
|
|
response_model=PaginatedResponse[TagDetailResponse],
|
|
)
|
|
def list_tags(
|
|
project_name: str,
|
|
package_name: str,
|
|
page: int = Query(default=1, ge=1, description="Page number"),
|
|
limit: int = Query(default=20, ge=1, le=100, description="Items per page"),
|
|
search: Optional[str] = Query(default=None, description="Search by tag name"),
|
|
sort: str = Query(default="name", description="Sort field (name, created_at)"),
|
|
order: str = Query(default="asc", description="Sort order (asc, desc)"),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
package = (
|
|
db.query(Package)
|
|
.filter(Package.project_id == project.id, Package.name == package_name)
|
|
.first()
|
|
)
|
|
if not package:
|
|
raise HTTPException(status_code=404, detail="Package not found")
|
|
|
|
# Validate sort field
|
|
valid_sort_fields = {"name": Tag.name, "created_at": Tag.created_at}
|
|
if sort not in valid_sort_fields:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid sort field. Must be one of: {', '.join(valid_sort_fields.keys())}",
|
|
)
|
|
|
|
# Validate order
|
|
if order not in ("asc", "desc"):
|
|
raise HTTPException(
|
|
status_code=400, detail="Invalid order. Must be 'asc' or 'desc'"
|
|
)
|
|
|
|
# Base query with JOIN to artifact for metadata
|
|
query = (
|
|
db.query(Tag, Artifact)
|
|
.join(Artifact, Tag.artifact_id == Artifact.id)
|
|
.filter(Tag.package_id == package.id)
|
|
)
|
|
|
|
# Apply search filter (case-insensitive on tag name OR artifact original filename)
|
|
if search:
|
|
search_lower = search.lower()
|
|
query = query.filter(
|
|
or_(
|
|
func.lower(Tag.name).contains(search_lower),
|
|
func.lower(Artifact.original_name).contains(search_lower),
|
|
)
|
|
)
|
|
|
|
# Get total count before pagination
|
|
total = query.count()
|
|
|
|
# Apply sorting
|
|
sort_column = valid_sort_fields[sort]
|
|
if order == "desc":
|
|
query = query.order_by(sort_column.desc())
|
|
else:
|
|
query = query.order_by(sort_column.asc())
|
|
|
|
# Apply pagination
|
|
offset = (page - 1) * limit
|
|
results = query.offset(offset).limit(limit).all()
|
|
|
|
# Calculate total pages
|
|
total_pages = math.ceil(total / limit) if total > 0 else 1
|
|
|
|
# Build detailed responses with artifact metadata
|
|
detailed_tags = []
|
|
for tag, artifact in results:
|
|
detailed_tags.append(
|
|
TagDetailResponse(
|
|
id=tag.id,
|
|
package_id=tag.package_id,
|
|
name=tag.name,
|
|
artifact_id=tag.artifact_id,
|
|
created_at=tag.created_at,
|
|
created_by=tag.created_by,
|
|
artifact_size=artifact.size,
|
|
artifact_content_type=artifact.content_type,
|
|
artifact_original_name=artifact.original_name,
|
|
artifact_created_at=artifact.created_at,
|
|
artifact_format_metadata=artifact.format_metadata,
|
|
)
|
|
)
|
|
|
|
return PaginatedResponse(
|
|
items=detailed_tags,
|
|
pagination=PaginationMeta(
|
|
page=page,
|
|
limit=limit,
|
|
total=total,
|
|
total_pages=total_pages,
|
|
),
|
|
)
|
|
|
|
|
|
@router.post(
|
|
"/api/v1/project/{project_name}/{package_name}/tags", response_model=TagResponse
|
|
)
|
|
def create_tag(
|
|
project_name: str,
|
|
package_name: str,
|
|
tag: TagCreate,
|
|
request: Request,
|
|
db: Session = Depends(get_db),
|
|
):
|
|
user_id = get_user_id(request)
|
|
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
package = (
|
|
db.query(Package)
|
|
.filter(Package.project_id == project.id, Package.name == package_name)
|
|
.first()
|
|
)
|
|
if not package:
|
|
raise HTTPException(status_code=404, detail="Package not found")
|
|
|
|
# Verify artifact exists
|
|
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
|
|
if not artifact:
|
|
raise HTTPException(status_code=404, detail="Artifact not found")
|
|
|
|
# Create or update tag
|
|
existing = (
|
|
db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag.name).first()
|
|
)
|
|
if existing:
|
|
existing.artifact_id = tag.artifact_id
|
|
existing.created_by = user_id
|
|
db.commit()
|
|
db.refresh(existing)
|
|
return existing
|
|
|
|
db_tag = Tag(
|
|
package_id=package.id,
|
|
name=tag.name,
|
|
artifact_id=tag.artifact_id,
|
|
created_by=user_id,
|
|
)
|
|
db.add(db_tag)
|
|
db.commit()
|
|
db.refresh(db_tag)
|
|
return db_tag
|
|
|
|
|
|
@router.get(
|
|
"/api/v1/project/{project_name}/{package_name}/tags/{tag_name}",
|
|
response_model=TagDetailResponse,
|
|
)
|
|
def get_tag(
|
|
project_name: str,
|
|
package_name: str,
|
|
tag_name: str,
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""Get a single tag with full artifact metadata"""
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
package = (
|
|
db.query(Package)
|
|
.filter(Package.project_id == project.id, Package.name == package_name)
|
|
.first()
|
|
)
|
|
if not package:
|
|
raise HTTPException(status_code=404, detail="Package not found")
|
|
|
|
result = (
|
|
db.query(Tag, Artifact)
|
|
.join(Artifact, Tag.artifact_id == Artifact.id)
|
|
.filter(Tag.package_id == package.id, Tag.name == tag_name)
|
|
.first()
|
|
)
|
|
|
|
if not result:
|
|
raise HTTPException(status_code=404, detail="Tag not found")
|
|
|
|
tag, artifact = result
|
|
return TagDetailResponse(
|
|
id=tag.id,
|
|
package_id=tag.package_id,
|
|
name=tag.name,
|
|
artifact_id=tag.artifact_id,
|
|
created_at=tag.created_at,
|
|
created_by=tag.created_by,
|
|
artifact_size=artifact.size,
|
|
artifact_content_type=artifact.content_type,
|
|
artifact_original_name=artifact.original_name,
|
|
artifact_created_at=artifact.created_at,
|
|
artifact_format_metadata=artifact.format_metadata,
|
|
)
|
|
|
|
|
|
@router.get(
|
|
"/api/v1/project/{project_name}/{package_name}/tags/{tag_name}/history",
|
|
response_model=List[TagHistoryResponse],
|
|
)
|
|
def get_tag_history(
|
|
project_name: str,
|
|
package_name: str,
|
|
tag_name: str,
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""Get the history of artifact assignments for a tag"""
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
package = (
|
|
db.query(Package)
|
|
.filter(Package.project_id == project.id, Package.name == package_name)
|
|
.first()
|
|
)
|
|
if not package:
|
|
raise HTTPException(status_code=404, detail="Package not found")
|
|
|
|
tag = (
|
|
db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag_name).first()
|
|
)
|
|
if not tag:
|
|
raise HTTPException(status_code=404, detail="Tag not found")
|
|
|
|
history = (
|
|
db.query(TagHistory)
|
|
.filter(TagHistory.tag_id == tag.id)
|
|
.order_by(TagHistory.changed_at.desc())
|
|
.all()
|
|
)
|
|
return history
|
|
|
|
|
|
@router.delete(
|
|
"/api/v1/project/{project_name}/{package_name}/tags/{tag_name}",
|
|
status_code=204,
|
|
)
|
|
def delete_tag(
|
|
project_name: str,
|
|
package_name: str,
|
|
tag_name: str,
|
|
request: Request,
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""
|
|
Delete a tag and decrement the artifact's ref_count.
|
|
|
|
Records the deletion in tag history before removing the tag.
|
|
"""
|
|
user_id = get_user_id(request)
|
|
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
package = (
|
|
db.query(Package)
|
|
.filter(Package.project_id == project.id, Package.name == package_name)
|
|
.first()
|
|
)
|
|
if not package:
|
|
raise HTTPException(status_code=404, detail="Package not found")
|
|
|
|
tag = (
|
|
db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag_name).first()
|
|
)
|
|
if not tag:
|
|
raise HTTPException(status_code=404, detail="Tag not found")
|
|
|
|
artifact_id = tag.artifact_id
|
|
|
|
# Record deletion in history
|
|
history = TagHistory(
|
|
tag_id=tag.id,
|
|
old_artifact_id=artifact_id,
|
|
new_artifact_id=artifact_id, # Same artifact for delete record
|
|
change_type="delete",
|
|
changed_by=user_id,
|
|
)
|
|
db.add(history)
|
|
db.flush() # Flush history before deleting tag (cascade will delete history)
|
|
|
|
# NOTE: ref_count decrement is handled by SQL trigger (tags_ref_count_delete_trigger)
|
|
# when the tag is deleted below
|
|
logger.info(f"Tag '{tag_name}' deleted for artifact {artifact_id[:12]}...")
|
|
|
|
# Delete the tag (SQL trigger will decrement ref_count)
|
|
db.delete(tag)
|
|
db.commit()
|
|
|
|
# Audit log (after commit so we can query the updated ref_count)
|
|
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
|
|
_log_audit(
|
|
db,
|
|
action="delete_tag",
|
|
resource=f"project/{project_name}/{package_name}/tag/{tag_name}",
|
|
user_id=user_id,
|
|
source_ip=request.client.host if request.client else None,
|
|
details={
|
|
"artifact_id": artifact_id,
|
|
"ref_count_after": artifact.ref_count if artifact else 0,
|
|
},
|
|
)
|
|
db.commit() # Commit the audit log
|
|
|
|
return None
|
|
|
|
|
|
# Consumer routes
|
|
@router.get(
|
|
"/api/v1/project/{project_name}/{package_name}/consumers",
|
|
response_model=List[ConsumerResponse],
|
|
)
|
|
def get_consumers(project_name: str, package_name: str, db: Session = Depends(get_db)):
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
package = (
|
|
db.query(Package)
|
|
.filter(Package.project_id == project.id, Package.name == package_name)
|
|
.first()
|
|
)
|
|
if not package:
|
|
raise HTTPException(status_code=404, detail="Package not found")
|
|
|
|
consumers = (
|
|
db.query(Consumer)
|
|
.filter(Consumer.package_id == package.id)
|
|
.order_by(Consumer.last_access.desc())
|
|
.all()
|
|
)
|
|
return consumers
|
|
|
|
|
|
# Package artifacts
|
|
@router.get(
|
|
"/api/v1/project/{project_name}/{package_name}/artifacts",
|
|
response_model=PaginatedResponse[PackageArtifactResponse],
|
|
)
|
|
def list_package_artifacts(
|
|
project_name: str,
|
|
package_name: str,
|
|
page: int = Query(default=1, ge=1, description="Page number"),
|
|
limit: int = Query(default=20, ge=1, le=100, description="Items per page"),
|
|
content_type: Optional[str] = Query(
|
|
default=None, description="Filter by content type"
|
|
),
|
|
created_after: Optional[datetime] = Query(
|
|
default=None, description="Filter artifacts created after this date"
|
|
),
|
|
created_before: Optional[datetime] = Query(
|
|
default=None, description="Filter artifacts created before this date"
|
|
),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""List all unique artifacts uploaded to a package"""
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
package = (
|
|
db.query(Package)
|
|
.filter(Package.project_id == project.id, Package.name == package_name)
|
|
.first()
|
|
)
|
|
if not package:
|
|
raise HTTPException(status_code=404, detail="Package not found")
|
|
|
|
# Get distinct artifacts uploaded to this package via uploads table
|
|
artifact_ids_subquery = (
|
|
db.query(func.distinct(Upload.artifact_id))
|
|
.filter(Upload.package_id == package.id)
|
|
.subquery()
|
|
)
|
|
|
|
query = db.query(Artifact).filter(Artifact.id.in_(artifact_ids_subquery))
|
|
|
|
# Apply content_type filter
|
|
if content_type:
|
|
query = query.filter(Artifact.content_type == content_type)
|
|
|
|
# Apply date range filters
|
|
if created_after:
|
|
query = query.filter(Artifact.created_at >= created_after)
|
|
if created_before:
|
|
query = query.filter(Artifact.created_at <= created_before)
|
|
|
|
# Get total count before pagination
|
|
total = query.count()
|
|
|
|
# Apply pagination
|
|
offset = (page - 1) * limit
|
|
artifacts = (
|
|
query.order_by(Artifact.created_at.desc()).offset(offset).limit(limit).all()
|
|
)
|
|
|
|
# Calculate total pages
|
|
total_pages = math.ceil(total / limit) if total > 0 else 1
|
|
|
|
# Build responses with tag info
|
|
artifact_responses = []
|
|
for artifact in artifacts:
|
|
# Get tags pointing to this artifact in this package
|
|
tags = (
|
|
db.query(Tag.name)
|
|
.filter(Tag.package_id == package.id, Tag.artifact_id == artifact.id)
|
|
.all()
|
|
)
|
|
tag_names = [t.name for t in tags]
|
|
|
|
artifact_responses.append(
|
|
PackageArtifactResponse(
|
|
id=artifact.id,
|
|
size=artifact.size,
|
|
content_type=artifact.content_type,
|
|
original_name=artifact.original_name,
|
|
created_at=artifact.created_at,
|
|
created_by=artifact.created_by,
|
|
format_metadata=artifact.format_metadata,
|
|
tags=tag_names,
|
|
)
|
|
)
|
|
|
|
return PaginatedResponse(
|
|
items=artifact_responses,
|
|
pagination=PaginationMeta(
|
|
page=page,
|
|
limit=limit,
|
|
total=total,
|
|
total_pages=total_pages,
|
|
),
|
|
)
|
|
|
|
|
|
# Artifact by ID
|
|
@router.get("/api/v1/artifact/{artifact_id}", response_model=ArtifactDetailResponse)
|
|
def get_artifact(artifact_id: str, db: Session = Depends(get_db)):
|
|
"""Get artifact metadata including list of packages/tags referencing it"""
|
|
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
|
|
if not artifact:
|
|
raise HTTPException(status_code=404, detail="Artifact not found")
|
|
|
|
# Get all tags referencing this artifact with package and project info
|
|
tags_with_context = (
|
|
db.query(Tag, Package, Project)
|
|
.join(Package, Tag.package_id == Package.id)
|
|
.join(Project, Package.project_id == Project.id)
|
|
.filter(Tag.artifact_id == artifact_id)
|
|
.all()
|
|
)
|
|
|
|
tag_infos = [
|
|
ArtifactTagInfo(
|
|
id=tag.id,
|
|
name=tag.name,
|
|
package_id=package.id,
|
|
package_name=package.name,
|
|
project_name=project.name,
|
|
)
|
|
for tag, package, project in tags_with_context
|
|
]
|
|
|
|
return ArtifactDetailResponse(
|
|
id=artifact.id,
|
|
sha256=artifact.id, # SHA256 hash is the artifact ID
|
|
size=artifact.size,
|
|
content_type=artifact.content_type,
|
|
original_name=artifact.original_name,
|
|
checksum_md5=artifact.checksum_md5,
|
|
checksum_sha1=artifact.checksum_sha1,
|
|
s3_etag=artifact.s3_etag,
|
|
created_at=artifact.created_at,
|
|
created_by=artifact.created_by,
|
|
ref_count=artifact.ref_count,
|
|
format_metadata=artifact.format_metadata,
|
|
tags=tag_infos,
|
|
)
|
|
|
|
|
|
# =============================================================================
|
|
# Garbage Collection Endpoints (ISSUE 36)
|
|
# =============================================================================
|
|
|
|
|
|
@router.get(
|
|
"/api/v1/admin/orphaned-artifacts",
|
|
response_model=List[OrphanedArtifactResponse],
|
|
)
|
|
def list_orphaned_artifacts(
|
|
request: Request,
|
|
limit: int = Query(
|
|
default=100, ge=1, le=1000, description="Max artifacts to return"
|
|
),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""
|
|
List artifacts with ref_count=0 (orphaned artifacts not referenced by any tag).
|
|
|
|
These artifacts can be safely cleaned up as they are not referenced by any tag.
|
|
"""
|
|
orphaned = (
|
|
db.query(Artifact)
|
|
.filter(Artifact.ref_count == 0)
|
|
.order_by(Artifact.created_at.asc())
|
|
.limit(limit)
|
|
.all()
|
|
)
|
|
|
|
return [
|
|
OrphanedArtifactResponse(
|
|
id=a.id,
|
|
size=a.size,
|
|
created_at=a.created_at,
|
|
created_by=a.created_by,
|
|
original_name=a.original_name,
|
|
)
|
|
for a in orphaned
|
|
]
|
|
|
|
|
|
@router.post(
|
|
"/api/v1/admin/garbage-collect",
|
|
response_model=GarbageCollectionResponse,
|
|
)
|
|
def garbage_collect(
|
|
request: Request,
|
|
dry_run: bool = Query(
|
|
default=True, description="If true, only report what would be deleted"
|
|
),
|
|
limit: int = Query(
|
|
default=100, ge=1, le=1000, description="Max artifacts to delete per run"
|
|
),
|
|
db: Session = Depends(get_db),
|
|
storage: S3Storage = Depends(get_storage),
|
|
):
|
|
"""
|
|
Clean up orphaned artifacts (ref_count=0) from storage and database.
|
|
|
|
By default runs in dry-run mode (only reports what would be deleted).
|
|
Set dry_run=false to actually delete artifacts.
|
|
|
|
Returns list of deleted artifact IDs and total bytes freed.
|
|
"""
|
|
user_id = get_user_id(request)
|
|
|
|
# Find orphaned artifacts
|
|
orphaned = (
|
|
db.query(Artifact)
|
|
.filter(Artifact.ref_count == 0)
|
|
.order_by(Artifact.created_at.asc())
|
|
.limit(limit)
|
|
.all()
|
|
)
|
|
|
|
deleted_ids = []
|
|
bytes_freed = 0
|
|
|
|
for artifact in orphaned:
|
|
if not dry_run:
|
|
# Delete from S3
|
|
try:
|
|
storage.delete(artifact.s3_key)
|
|
except Exception as e:
|
|
logger.error(f"Failed to delete S3 object {artifact.s3_key}: {e}")
|
|
continue
|
|
|
|
# Delete from database
|
|
db.delete(artifact)
|
|
logger.info(
|
|
f"Garbage collected artifact {artifact.id[:12]}... ({artifact.size} bytes)"
|
|
)
|
|
|
|
deleted_ids.append(artifact.id)
|
|
bytes_freed += artifact.size
|
|
|
|
if not dry_run:
|
|
# Audit log
|
|
_log_audit(
|
|
db,
|
|
action="garbage_collect",
|
|
resource="artifacts",
|
|
user_id=user_id,
|
|
source_ip=request.client.host if request.client else None,
|
|
details={
|
|
"artifacts_deleted": len(deleted_ids),
|
|
"bytes_freed": bytes_freed,
|
|
"artifact_ids": deleted_ids[:10], # Log first 10 for brevity
|
|
},
|
|
)
|
|
db.commit()
|
|
|
|
return GarbageCollectionResponse(
|
|
artifacts_deleted=len(deleted_ids),
|
|
bytes_freed=bytes_freed,
|
|
artifact_ids=deleted_ids,
|
|
dry_run=dry_run,
|
|
)
|
|
|
|
|
|
# =============================================================================
|
|
# Statistics Endpoints (ISSUE 34)
|
|
# =============================================================================
|
|
|
|
|
|
@router.get("/api/v1/stats", response_model=StorageStatsResponse)
|
|
def get_storage_stats(db: Session = Depends(get_db)):
|
|
"""
|
|
Get global storage statistics including deduplication metrics.
|
|
"""
|
|
# Total artifacts and size
|
|
total_stats = db.query(
|
|
func.count(Artifact.id),
|
|
func.coalesce(func.sum(Artifact.size), 0),
|
|
).first()
|
|
total_artifacts = total_stats[0] or 0
|
|
total_size_bytes = total_stats[1] or 0
|
|
|
|
# Unique artifacts (ref_count > 0) and their size
|
|
unique_stats = (
|
|
db.query(
|
|
func.count(Artifact.id),
|
|
)
|
|
.filter(Artifact.ref_count > 0)
|
|
.first()
|
|
)
|
|
unique_artifacts = unique_stats[0] or 0
|
|
|
|
# Orphaned artifacts (ref_count = 0)
|
|
orphaned_stats = (
|
|
db.query(
|
|
func.count(Artifact.id),
|
|
func.coalesce(func.sum(Artifact.size), 0),
|
|
)
|
|
.filter(Artifact.ref_count == 0)
|
|
.first()
|
|
)
|
|
orphaned_artifacts = orphaned_stats[0] or 0
|
|
orphaned_size_bytes = orphaned_stats[1] or 0
|
|
|
|
# Total uploads and deduplicated uploads
|
|
upload_stats = db.query(
|
|
func.count(Upload.id),
|
|
func.count(Upload.id).filter(Upload.deduplicated == True),
|
|
).first()
|
|
total_uploads = upload_stats[0] or 0
|
|
deduplicated_uploads = upload_stats[1] or 0
|
|
|
|
# Calculate deduplication ratio
|
|
deduplication_ratio = (
|
|
total_uploads / unique_artifacts if unique_artifacts > 0 else 0.0
|
|
)
|
|
|
|
# Calculate storage saved (sum of size * (ref_count - 1) for artifacts with ref_count > 1)
|
|
# This represents bytes that would have been stored without deduplication
|
|
saved_query = (
|
|
db.query(func.coalesce(func.sum(Artifact.size * (Artifact.ref_count - 1)), 0))
|
|
.filter(Artifact.ref_count > 1)
|
|
.first()
|
|
)
|
|
storage_saved_bytes = saved_query[0] or 0
|
|
|
|
return StorageStatsResponse(
|
|
total_artifacts=total_artifacts,
|
|
total_size_bytes=total_size_bytes,
|
|
unique_artifacts=unique_artifacts,
|
|
orphaned_artifacts=orphaned_artifacts,
|
|
orphaned_size_bytes=orphaned_size_bytes,
|
|
total_uploads=total_uploads,
|
|
deduplicated_uploads=deduplicated_uploads,
|
|
deduplication_ratio=deduplication_ratio,
|
|
storage_saved_bytes=storage_saved_bytes,
|
|
)
|
|
|
|
|
|
@router.get("/api/v1/stats/storage", response_model=StorageStatsResponse)
|
|
def get_storage_stats_alias(db: Session = Depends(get_db)):
|
|
"""Alias for /api/v1/stats - get global storage statistics."""
|
|
return get_storage_stats(db)
|
|
|
|
|
|
@router.get("/api/v1/stats/deduplication", response_model=DeduplicationStatsResponse)
|
|
def get_deduplication_stats(
|
|
top_n: int = Query(
|
|
default=10,
|
|
ge=1,
|
|
le=100,
|
|
description="Number of top referenced artifacts to return",
|
|
),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""
|
|
Get detailed deduplication effectiveness statistics.
|
|
"""
|
|
# Total logical bytes (sum of all upload sizes - what would be stored without dedup)
|
|
# We calculate this as: sum(artifact.size * artifact.ref_count) for all artifacts
|
|
logical_query = db.query(
|
|
func.coalesce(func.sum(Artifact.size * Artifact.ref_count), 0)
|
|
).first()
|
|
total_logical_bytes = logical_query[0] or 0
|
|
|
|
# Total physical bytes (actual storage used)
|
|
physical_query = (
|
|
db.query(func.coalesce(func.sum(Artifact.size), 0))
|
|
.filter(Artifact.ref_count > 0)
|
|
.first()
|
|
)
|
|
total_physical_bytes = physical_query[0] or 0
|
|
|
|
# Bytes saved
|
|
bytes_saved = total_logical_bytes - total_physical_bytes
|
|
|
|
# Savings percentage
|
|
savings_percentage = (
|
|
(bytes_saved / total_logical_bytes * 100) if total_logical_bytes > 0 else 0.0
|
|
)
|
|
|
|
# Upload counts
|
|
total_uploads = db.query(func.count(Upload.id)).scalar() or 0
|
|
unique_artifacts = (
|
|
db.query(func.count(Artifact.id)).filter(Artifact.ref_count > 0).scalar() or 0
|
|
)
|
|
duplicate_uploads = (
|
|
total_uploads - unique_artifacts if total_uploads > unique_artifacts else 0
|
|
)
|
|
|
|
# Average and max ref_count
|
|
ref_stats = (
|
|
db.query(
|
|
func.coalesce(func.avg(Artifact.ref_count), 0),
|
|
func.coalesce(func.max(Artifact.ref_count), 0),
|
|
)
|
|
.filter(Artifact.ref_count > 0)
|
|
.first()
|
|
)
|
|
average_ref_count = float(ref_stats[0] or 0)
|
|
max_ref_count = ref_stats[1] or 0
|
|
|
|
# Top N most referenced artifacts
|
|
top_artifacts = (
|
|
db.query(Artifact)
|
|
.filter(Artifact.ref_count > 1)
|
|
.order_by(Artifact.ref_count.desc())
|
|
.limit(top_n)
|
|
.all()
|
|
)
|
|
|
|
most_referenced = [
|
|
{
|
|
"artifact_id": a.id,
|
|
"ref_count": a.ref_count,
|
|
"size": a.size,
|
|
"storage_saved": a.size * (a.ref_count - 1),
|
|
"original_name": a.original_name,
|
|
"content_type": a.content_type,
|
|
}
|
|
for a in top_artifacts
|
|
]
|
|
|
|
return DeduplicationStatsResponse(
|
|
total_logical_bytes=total_logical_bytes,
|
|
total_physical_bytes=total_physical_bytes,
|
|
bytes_saved=bytes_saved,
|
|
savings_percentage=savings_percentage,
|
|
total_uploads=total_uploads,
|
|
unique_artifacts=unique_artifacts,
|
|
duplicate_uploads=duplicate_uploads,
|
|
average_ref_count=average_ref_count,
|
|
max_ref_count=max_ref_count,
|
|
most_referenced_artifacts=most_referenced,
|
|
)
|
|
|
|
|
|
@router.get(
|
|
"/api/v1/projects/{project_name}/stats", response_model=ProjectStatsResponse
|
|
)
|
|
def get_project_stats(
|
|
project_name: str,
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""
|
|
Get statistics for a specific project.
|
|
"""
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
# Package count
|
|
package_count = (
|
|
db.query(func.count(Package.id))
|
|
.filter(Package.project_id == project.id)
|
|
.scalar()
|
|
or 0
|
|
)
|
|
|
|
# Get all package IDs for this project
|
|
package_ids = (
|
|
db.query(Package.id).filter(Package.project_id == project.id).subquery()
|
|
)
|
|
|
|
# Tag count
|
|
tag_count = (
|
|
db.query(func.count(Tag.id)).filter(Tag.package_id.in_(package_ids)).scalar()
|
|
or 0
|
|
)
|
|
|
|
# Unique artifact count and total size (via uploads)
|
|
artifact_stats = (
|
|
db.query(
|
|
func.count(func.distinct(Upload.artifact_id)),
|
|
func.coalesce(func.sum(Artifact.size), 0),
|
|
)
|
|
.join(Artifact, Upload.artifact_id == Artifact.id)
|
|
.filter(Upload.package_id.in_(package_ids))
|
|
.first()
|
|
)
|
|
artifact_count = artifact_stats[0] if artifact_stats else 0
|
|
total_size_bytes = artifact_stats[1] if artifact_stats else 0
|
|
|
|
# Upload counts and storage saved
|
|
upload_stats = (
|
|
db.query(
|
|
func.count(Upload.id),
|
|
func.count(Upload.id).filter(Upload.deduplicated == True),
|
|
func.coalesce(
|
|
func.sum(Artifact.size).filter(Upload.deduplicated == True), 0
|
|
),
|
|
)
|
|
.join(Artifact, Upload.artifact_id == Artifact.id)
|
|
.filter(Upload.package_id.in_(package_ids))
|
|
.first()
|
|
)
|
|
upload_count = upload_stats[0] if upload_stats else 0
|
|
deduplicated_uploads = upload_stats[1] if upload_stats else 0
|
|
storage_saved_bytes = upload_stats[2] if upload_stats else 0
|
|
|
|
# Calculate deduplication ratio
|
|
deduplication_ratio = upload_count / artifact_count if artifact_count > 0 else 1.0
|
|
|
|
return ProjectStatsResponse(
|
|
project_id=str(project.id),
|
|
project_name=project.name,
|
|
package_count=package_count,
|
|
tag_count=tag_count,
|
|
artifact_count=artifact_count,
|
|
total_size_bytes=total_size_bytes,
|
|
upload_count=upload_count,
|
|
deduplicated_uploads=deduplicated_uploads,
|
|
storage_saved_bytes=storage_saved_bytes,
|
|
deduplication_ratio=deduplication_ratio,
|
|
)
|
|
|
|
|
|
# =============================================================================
|
|
# Package Statistics Endpoint
|
|
# =============================================================================
|
|
|
|
|
|
@router.get(
|
|
"/api/v1/project/{project_name}/packages/{package_name}/stats",
|
|
response_model=PackageStatsResponse,
|
|
)
|
|
def get_package_stats(
|
|
project_name: str,
|
|
package_name: str,
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""Get statistics for a specific package."""
|
|
project = db.query(Project).filter(Project.name == project_name).first()
|
|
if not project:
|
|
raise HTTPException(status_code=404, detail="Project not found")
|
|
|
|
package = (
|
|
db.query(Package)
|
|
.filter(Package.project_id == project.id, Package.name == package_name)
|
|
.first()
|
|
)
|
|
if not package:
|
|
raise HTTPException(status_code=404, detail="Package not found")
|
|
|
|
# Tag count
|
|
tag_count = (
|
|
db.query(func.count(Tag.id)).filter(Tag.package_id == package.id).scalar() or 0
|
|
)
|
|
|
|
# Artifact stats via uploads
|
|
artifact_stats = (
|
|
db.query(
|
|
func.count(func.distinct(Upload.artifact_id)),
|
|
func.coalesce(func.sum(Artifact.size), 0),
|
|
)
|
|
.join(Artifact, Upload.artifact_id == Artifact.id)
|
|
.filter(Upload.package_id == package.id)
|
|
.first()
|
|
)
|
|
artifact_count = artifact_stats[0] if artifact_stats else 0
|
|
total_size_bytes = artifact_stats[1] if artifact_stats else 0
|
|
|
|
# Upload stats
|
|
upload_stats = (
|
|
db.query(
|
|
func.count(Upload.id),
|
|
func.count(Upload.id).filter(Upload.deduplicated == True),
|
|
func.coalesce(
|
|
func.sum(Artifact.size).filter(Upload.deduplicated == True), 0
|
|
),
|
|
)
|
|
.join(Artifact, Upload.artifact_id == Artifact.id)
|
|
.filter(Upload.package_id == package.id)
|
|
.first()
|
|
)
|
|
upload_count = upload_stats[0] if upload_stats else 0
|
|
deduplicated_uploads = upload_stats[1] if upload_stats else 0
|
|
storage_saved_bytes = upload_stats[2] if upload_stats else 0
|
|
|
|
deduplication_ratio = upload_count / artifact_count if artifact_count > 0 else 1.0
|
|
|
|
return PackageStatsResponse(
|
|
package_id=str(package.id),
|
|
package_name=package.name,
|
|
project_name=project.name,
|
|
tag_count=tag_count,
|
|
artifact_count=artifact_count,
|
|
total_size_bytes=total_size_bytes,
|
|
upload_count=upload_count,
|
|
deduplicated_uploads=deduplicated_uploads,
|
|
storage_saved_bytes=storage_saved_bytes,
|
|
deduplication_ratio=deduplication_ratio,
|
|
)
|
|
|
|
|
|
# =============================================================================
|
|
# Artifact Statistics Endpoint
|
|
# =============================================================================
|
|
|
|
|
|
@router.get(
|
|
"/api/v1/artifact/{artifact_id}/stats", response_model=ArtifactStatsResponse
|
|
)
|
|
def get_artifact_stats(
|
|
artifact_id: str,
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""Get detailed statistics for a specific artifact."""
|
|
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
|
|
if not artifact:
|
|
raise HTTPException(status_code=404, detail="Artifact not found")
|
|
|
|
# Get all tags referencing this artifact
|
|
tags = (
|
|
db.query(Tag, Package, Project)
|
|
.join(Package, Tag.package_id == Package.id)
|
|
.join(Project, Package.project_id == Project.id)
|
|
.filter(Tag.artifact_id == artifact_id)
|
|
.all()
|
|
)
|
|
|
|
tag_list = [
|
|
{
|
|
"tag_name": tag.name,
|
|
"package_name": pkg.name,
|
|
"project_name": proj.name,
|
|
"created_at": tag.created_at.isoformat() if tag.created_at else None,
|
|
}
|
|
for tag, pkg, proj in tags
|
|
]
|
|
|
|
# Get unique projects and packages
|
|
projects = list(set(proj.name for _, _, proj in tags))
|
|
packages = list(set(f"{proj.name}/{pkg.name}" for _, pkg, proj in tags))
|
|
|
|
# Get first and last upload times
|
|
upload_times = (
|
|
db.query(func.min(Upload.uploaded_at), func.max(Upload.uploaded_at))
|
|
.filter(Upload.artifact_id == artifact_id)
|
|
.first()
|
|
)
|
|
|
|
return ArtifactStatsResponse(
|
|
artifact_id=artifact.id,
|
|
sha256=artifact.id,
|
|
size=artifact.size,
|
|
ref_count=artifact.ref_count,
|
|
storage_savings=(artifact.ref_count - 1) * artifact.size
|
|
if artifact.ref_count > 1
|
|
else 0,
|
|
tags=tag_list,
|
|
projects=projects,
|
|
packages=packages,
|
|
first_uploaded=upload_times[0] if upload_times else None,
|
|
last_referenced=upload_times[1] if upload_times else None,
|
|
)
|
|
|
|
|
|
# =============================================================================
|
|
# Cross-Project Deduplication Endpoint
|
|
# =============================================================================
|
|
|
|
|
|
@router.get(
|
|
"/api/v1/stats/cross-project", response_model=CrossProjectDeduplicationResponse
|
|
)
|
|
def get_cross_project_deduplication(
|
|
limit: int = Query(default=20, ge=1, le=100),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""Get statistics about artifacts shared across multiple projects."""
|
|
# Find artifacts that appear in multiple projects
|
|
# Subquery to count distinct projects per artifact
|
|
project_counts = (
|
|
db.query(
|
|
Upload.artifact_id,
|
|
func.count(func.distinct(Package.project_id)).label("project_count"),
|
|
)
|
|
.join(Package, Upload.package_id == Package.id)
|
|
.group_by(Upload.artifact_id)
|
|
.subquery()
|
|
)
|
|
|
|
# Get artifacts with more than one project
|
|
shared_artifacts_query = (
|
|
db.query(Artifact, project_counts.c.project_count)
|
|
.join(project_counts, Artifact.id == project_counts.c.artifact_id)
|
|
.filter(project_counts.c.project_count > 1)
|
|
.order_by(project_counts.c.project_count.desc(), Artifact.size.desc())
|
|
.limit(limit)
|
|
)
|
|
|
|
shared_artifacts = []
|
|
total_savings = 0
|
|
|
|
for artifact, project_count in shared_artifacts_query:
|
|
# Calculate savings: (project_count - 1) * size
|
|
savings = (project_count - 1) * artifact.size
|
|
total_savings += savings
|
|
|
|
# Get project names
|
|
project_names = (
|
|
db.query(func.distinct(Project.name))
|
|
.join(Package, Package.project_id == Project.id)
|
|
.join(Upload, Upload.package_id == Package.id)
|
|
.filter(Upload.artifact_id == artifact.id)
|
|
.all()
|
|
)
|
|
|
|
shared_artifacts.append(
|
|
{
|
|
"artifact_id": artifact.id,
|
|
"size": artifact.size,
|
|
"project_count": project_count,
|
|
"projects": [p[0] for p in project_names],
|
|
"storage_savings": savings,
|
|
}
|
|
)
|
|
|
|
# Total count of shared artifacts
|
|
shared_count = (
|
|
db.query(func.count())
|
|
.select_from(project_counts)
|
|
.filter(project_counts.c.project_count > 1)
|
|
.scalar()
|
|
or 0
|
|
)
|
|
|
|
return CrossProjectDeduplicationResponse(
|
|
shared_artifacts_count=shared_count,
|
|
total_cross_project_savings=total_savings,
|
|
shared_artifacts=shared_artifacts,
|
|
)
|
|
|
|
|
|
# =============================================================================
|
|
# Time-Based Statistics Endpoint
|
|
# =============================================================================
|
|
|
|
|
|
@router.get("/api/v1/stats/timeline", response_model=TimeBasedStatsResponse)
|
|
def get_time_based_stats(
|
|
period: str = Query(default="daily", regex="^(daily|weekly|monthly)$"),
|
|
from_date: Optional[datetime] = Query(default=None),
|
|
to_date: Optional[datetime] = Query(default=None),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""Get deduplication statistics over time."""
|
|
from datetime import timedelta
|
|
|
|
# Default date range: last 30 days
|
|
if to_date is None:
|
|
to_date = datetime.utcnow()
|
|
if from_date is None:
|
|
from_date = to_date - timedelta(days=30)
|
|
|
|
# Determine date truncation based on period
|
|
if period == "daily":
|
|
date_trunc = func.date_trunc("day", Upload.uploaded_at)
|
|
elif period == "weekly":
|
|
date_trunc = func.date_trunc("week", Upload.uploaded_at)
|
|
else: # monthly
|
|
date_trunc = func.date_trunc("month", Upload.uploaded_at)
|
|
|
|
# Query uploads grouped by period
|
|
stats = (
|
|
db.query(
|
|
date_trunc.label("period_start"),
|
|
func.count(Upload.id).label("total_uploads"),
|
|
func.count(func.distinct(Upload.artifact_id)).label("unique_artifacts"),
|
|
func.count(Upload.id)
|
|
.filter(Upload.deduplicated == True)
|
|
.label("duplicated"),
|
|
func.coalesce(
|
|
func.sum(Artifact.size).filter(Upload.deduplicated == True), 0
|
|
).label("bytes_saved"),
|
|
)
|
|
.join(Artifact, Upload.artifact_id == Artifact.id)
|
|
.filter(Upload.uploaded_at >= from_date, Upload.uploaded_at <= to_date)
|
|
.group_by(date_trunc)
|
|
.order_by(date_trunc)
|
|
.all()
|
|
)
|
|
|
|
data_points = [
|
|
{
|
|
"date": row.period_start.isoformat() if row.period_start else None,
|
|
"total_uploads": row.total_uploads,
|
|
"unique_artifacts": row.unique_artifacts,
|
|
"duplicated_uploads": row.duplicated,
|
|
"bytes_saved": row.bytes_saved,
|
|
}
|
|
for row in stats
|
|
]
|
|
|
|
return TimeBasedStatsResponse(
|
|
period=period,
|
|
start_date=from_date,
|
|
end_date=to_date,
|
|
data_points=data_points,
|
|
)
|
|
|
|
|
|
# =============================================================================
|
|
# CSV Export Endpoint
|
|
# =============================================================================
|
|
|
|
|
|
@router.get("/api/v1/stats/export")
|
|
def export_stats(
|
|
format: str = Query(default="json", regex="^(json|csv)$"),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""Export global statistics in JSON or CSV format."""
|
|
from fastapi.responses import Response
|
|
|
|
# Gather all stats
|
|
total_artifacts = db.query(func.count(Artifact.id)).scalar() or 0
|
|
total_size = db.query(func.coalesce(func.sum(Artifact.size), 0)).scalar() or 0
|
|
total_uploads = db.query(func.count(Upload.id)).scalar() or 0
|
|
deduplicated_uploads = (
|
|
db.query(func.count(Upload.id)).filter(Upload.deduplicated == True).scalar()
|
|
or 0
|
|
)
|
|
unique_artifacts = (
|
|
db.query(func.count(Artifact.id)).filter(Artifact.ref_count > 0).scalar() or 0
|
|
)
|
|
|
|
storage_saved = (
|
|
db.query(func.coalesce(func.sum(Artifact.size), 0))
|
|
.join(Upload, Upload.artifact_id == Artifact.id)
|
|
.filter(Upload.deduplicated == True)
|
|
.scalar()
|
|
or 0
|
|
)
|
|
|
|
stats = {
|
|
"generated_at": datetime.utcnow().isoformat(),
|
|
"total_artifacts": total_artifacts,
|
|
"total_size_bytes": total_size,
|
|
"total_uploads": total_uploads,
|
|
"unique_artifacts": unique_artifacts,
|
|
"deduplicated_uploads": deduplicated_uploads,
|
|
"storage_saved_bytes": storage_saved,
|
|
"deduplication_ratio": total_uploads / unique_artifacts
|
|
if unique_artifacts > 0
|
|
else 1.0,
|
|
}
|
|
|
|
if format == "csv":
|
|
import csv
|
|
import io
|
|
|
|
output = io.StringIO()
|
|
writer = csv.writer(output)
|
|
writer.writerow(["Metric", "Value"])
|
|
for key, value in stats.items():
|
|
writer.writerow([key, value])
|
|
|
|
return Response(
|
|
content=output.getvalue(),
|
|
media_type="text/csv",
|
|
headers={"Content-Disposition": "attachment; filename=orchard_stats.csv"},
|
|
)
|
|
|
|
return stats
|
|
|
|
|
|
# =============================================================================
|
|
# Summary Report Endpoint
|
|
# =============================================================================
|
|
|
|
|
|
@router.get("/api/v1/stats/report", response_model=StatsReportResponse)
|
|
def generate_stats_report(
|
|
format: str = Query(default="markdown", regex="^(markdown|json)$"),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""Generate a summary report of storage and deduplication statistics."""
|
|
# Gather stats
|
|
total_artifacts = db.query(func.count(Artifact.id)).scalar() or 0
|
|
total_size = int(db.query(func.coalesce(func.sum(Artifact.size), 0)).scalar() or 0)
|
|
total_uploads = db.query(func.count(Upload.id)).scalar() or 0
|
|
deduplicated_uploads = (
|
|
db.query(func.count(Upload.id)).filter(Upload.deduplicated == True).scalar()
|
|
or 0
|
|
)
|
|
unique_artifacts = (
|
|
db.query(func.count(Artifact.id)).filter(Artifact.ref_count > 0).scalar() or 0
|
|
)
|
|
orphaned_artifacts = (
|
|
db.query(func.count(Artifact.id)).filter(Artifact.ref_count == 0).scalar() or 0
|
|
)
|
|
|
|
storage_saved = int(
|
|
db.query(func.coalesce(func.sum(Artifact.size), 0))
|
|
.join(Upload, Upload.artifact_id == Artifact.id)
|
|
.filter(Upload.deduplicated == True)
|
|
.scalar()
|
|
or 0
|
|
)
|
|
|
|
project_count = db.query(func.count(Project.id)).scalar() or 0
|
|
package_count = db.query(func.count(Package.id)).scalar() or 0
|
|
|
|
# Top 5 most referenced artifacts
|
|
top_artifacts = (
|
|
db.query(Artifact)
|
|
.filter(Artifact.ref_count > 1)
|
|
.order_by(Artifact.ref_count.desc())
|
|
.limit(5)
|
|
.all()
|
|
)
|
|
|
|
def format_bytes(b):
|
|
for unit in ["B", "KB", "MB", "GB", "TB"]:
|
|
if b < 1024:
|
|
return f"{b:.2f} {unit}"
|
|
b /= 1024
|
|
return f"{b:.2f} PB"
|
|
|
|
generated_at = datetime.utcnow()
|
|
|
|
if format == "markdown":
|
|
report = f"""# Orchard Storage Report
|
|
|
|
Generated: {generated_at.strftime("%Y-%m-%d %H:%M:%S UTC")}
|
|
|
|
## Overview
|
|
|
|
| Metric | Value |
|
|
|--------|-------|
|
|
| Projects | {project_count} |
|
|
| Packages | {package_count} |
|
|
| Total Artifacts | {total_artifacts} |
|
|
| Unique Artifacts | {unique_artifacts} |
|
|
| Orphaned Artifacts | {orphaned_artifacts} |
|
|
|
|
## Storage
|
|
|
|
| Metric | Value |
|
|
|--------|-------|
|
|
| Total Storage Used | {format_bytes(total_size)} |
|
|
| Storage Saved | {format_bytes(storage_saved)} |
|
|
| Savings Percentage | {(storage_saved / (total_size + storage_saved) * 100) if (total_size + storage_saved) > 0 else 0:.1f}% |
|
|
|
|
## Uploads
|
|
|
|
| Metric | Value |
|
|
|--------|-------|
|
|
| Total Uploads | {total_uploads} |
|
|
| Deduplicated Uploads | {deduplicated_uploads} |
|
|
| Deduplication Ratio | {total_uploads / unique_artifacts if unique_artifacts > 0 else 1:.2f}x |
|
|
|
|
## Top Referenced Artifacts
|
|
|
|
| Artifact ID | Size | References | Savings |
|
|
|-------------|------|------------|---------|
|
|
"""
|
|
for art in top_artifacts:
|
|
savings = (art.ref_count - 1) * art.size
|
|
report += f"| `{art.id[:12]}...` | {format_bytes(art.size)} | {art.ref_count} | {format_bytes(savings)} |\n"
|
|
|
|
return StatsReportResponse(
|
|
format="markdown",
|
|
generated_at=generated_at,
|
|
content=report,
|
|
)
|
|
|
|
# JSON format
|
|
return StatsReportResponse(
|
|
format="json",
|
|
generated_at=generated_at,
|
|
content=json.dumps(
|
|
{
|
|
"overview": {
|
|
"projects": project_count,
|
|
"packages": package_count,
|
|
"total_artifacts": total_artifacts,
|
|
"unique_artifacts": unique_artifacts,
|
|
"orphaned_artifacts": orphaned_artifacts,
|
|
},
|
|
"storage": {
|
|
"total_bytes": total_size,
|
|
"saved_bytes": storage_saved,
|
|
"savings_percentage": (
|
|
storage_saved / (total_size + storage_saved) * 100
|
|
)
|
|
if (total_size + storage_saved) > 0
|
|
else 0,
|
|
},
|
|
"uploads": {
|
|
"total": total_uploads,
|
|
"deduplicated": deduplicated_uploads,
|
|
"ratio": total_uploads / unique_artifacts
|
|
if unique_artifacts > 0
|
|
else 1,
|
|
},
|
|
"top_artifacts": [
|
|
{
|
|
"id": art.id,
|
|
"size": art.size,
|
|
"ref_count": art.ref_count,
|
|
"savings": (art.ref_count - 1) * art.size,
|
|
}
|
|
for art in top_artifacts
|
|
],
|
|
},
|
|
indent=2,
|
|
),
|
|
)
|