- Add StorageBackend protocol for backend-agnostic storage interface - Add health check with storage and database connectivity verification - Add garbage collection endpoints for orphaned artifacts (ref_count=0) - Add deduplication statistics endpoints (/api/v1/stats, /stats/storage, /stats/deduplication) - Add per-project statistics endpoint - Add verify_integrity method for post-upload hash validation - Set up pytest infrastructure with mock S3 client - Add unit tests for hash calculation and duplicate detection
459 lines
10 KiB
Python
459 lines
10 KiB
Python
from datetime import datetime
|
|
from typing import Optional, List, Dict, Any, Generic, TypeVar
|
|
from pydantic import BaseModel
|
|
from uuid import UUID
|
|
|
|
T = TypeVar("T")
|
|
|
|
|
|
# Pagination schemas
|
|
class PaginationMeta(BaseModel):
|
|
page: int
|
|
limit: int
|
|
total: int
|
|
total_pages: int
|
|
|
|
|
|
class PaginatedResponse(BaseModel, Generic[T]):
|
|
items: List[T]
|
|
pagination: PaginationMeta
|
|
|
|
|
|
# Project schemas
|
|
class ProjectCreate(BaseModel):
|
|
name: str
|
|
description: Optional[str] = None
|
|
is_public: bool = True
|
|
|
|
|
|
class ProjectResponse(BaseModel):
|
|
id: UUID
|
|
name: str
|
|
description: Optional[str]
|
|
is_public: bool
|
|
created_at: datetime
|
|
updated_at: datetime
|
|
created_by: str
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
# Package format and platform enums
|
|
PACKAGE_FORMATS = [
|
|
"generic",
|
|
"npm",
|
|
"pypi",
|
|
"docker",
|
|
"deb",
|
|
"rpm",
|
|
"maven",
|
|
"nuget",
|
|
"helm",
|
|
]
|
|
PACKAGE_PLATFORMS = [
|
|
"any",
|
|
"linux",
|
|
"darwin",
|
|
"windows",
|
|
"linux-amd64",
|
|
"linux-arm64",
|
|
"darwin-amd64",
|
|
"darwin-arm64",
|
|
"windows-amd64",
|
|
]
|
|
|
|
|
|
# Package schemas
|
|
class PackageCreate(BaseModel):
|
|
name: str
|
|
description: Optional[str] = None
|
|
format: str = "generic"
|
|
platform: str = "any"
|
|
|
|
|
|
class PackageResponse(BaseModel):
|
|
id: UUID
|
|
project_id: UUID
|
|
name: str
|
|
description: Optional[str]
|
|
format: str
|
|
platform: str
|
|
created_at: datetime
|
|
updated_at: datetime
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class TagSummary(BaseModel):
|
|
"""Lightweight tag info for embedding in package responses"""
|
|
|
|
name: str
|
|
artifact_id: str
|
|
created_at: datetime
|
|
|
|
|
|
class PackageDetailResponse(BaseModel):
|
|
"""Package with aggregated metadata"""
|
|
|
|
id: UUID
|
|
project_id: UUID
|
|
name: str
|
|
description: Optional[str]
|
|
format: str
|
|
platform: str
|
|
created_at: datetime
|
|
updated_at: datetime
|
|
# Aggregated fields
|
|
tag_count: int = 0
|
|
artifact_count: int = 0
|
|
total_size: int = 0
|
|
latest_tag: Optional[str] = None
|
|
latest_upload_at: Optional[datetime] = None
|
|
# Recent tags (limit 5)
|
|
recent_tags: List[TagSummary] = []
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
# Artifact schemas
|
|
class ArtifactResponse(BaseModel):
|
|
id: str
|
|
sha256: str # Explicit SHA256 field (same as id)
|
|
size: int
|
|
content_type: Optional[str]
|
|
original_name: Optional[str]
|
|
checksum_md5: Optional[str] = None
|
|
checksum_sha1: Optional[str] = None
|
|
s3_etag: Optional[str] = None
|
|
created_at: datetime
|
|
created_by: str
|
|
ref_count: int
|
|
format_metadata: Optional[Dict[str, Any]] = None
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
# Tag schemas
|
|
class TagCreate(BaseModel):
|
|
name: str
|
|
artifact_id: str
|
|
|
|
|
|
class TagResponse(BaseModel):
|
|
id: UUID
|
|
package_id: UUID
|
|
name: str
|
|
artifact_id: str
|
|
created_at: datetime
|
|
created_by: str
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class TagDetailResponse(BaseModel):
|
|
"""Tag with embedded artifact metadata"""
|
|
|
|
id: UUID
|
|
package_id: UUID
|
|
name: str
|
|
artifact_id: str
|
|
created_at: datetime
|
|
created_by: str
|
|
# Artifact metadata
|
|
artifact_size: int
|
|
artifact_content_type: Optional[str]
|
|
artifact_original_name: Optional[str]
|
|
artifact_created_at: datetime
|
|
artifact_format_metadata: Optional[Dict[str, Any]] = None
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class TagHistoryResponse(BaseModel):
|
|
"""History entry for tag changes"""
|
|
|
|
id: UUID
|
|
tag_id: UUID
|
|
old_artifact_id: Optional[str]
|
|
new_artifact_id: str
|
|
changed_at: datetime
|
|
changed_by: str
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class ArtifactTagInfo(BaseModel):
|
|
"""Tag info for embedding in artifact responses"""
|
|
|
|
id: UUID
|
|
name: str
|
|
package_id: UUID
|
|
package_name: str
|
|
project_name: str
|
|
|
|
|
|
class ArtifactDetailResponse(BaseModel):
|
|
"""Artifact with list of tags/packages referencing it"""
|
|
|
|
id: str
|
|
sha256: str # Explicit SHA256 field (same as id)
|
|
size: int
|
|
content_type: Optional[str]
|
|
original_name: Optional[str]
|
|
checksum_md5: Optional[str] = None
|
|
checksum_sha1: Optional[str] = None
|
|
s3_etag: Optional[str] = None
|
|
created_at: datetime
|
|
created_by: str
|
|
ref_count: int
|
|
format_metadata: Optional[Dict[str, Any]] = None
|
|
tags: List[ArtifactTagInfo] = []
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class PackageArtifactResponse(BaseModel):
|
|
"""Artifact with tags for package artifact listing"""
|
|
|
|
id: str
|
|
sha256: str # Explicit SHA256 field (same as id)
|
|
size: int
|
|
content_type: Optional[str]
|
|
original_name: Optional[str]
|
|
checksum_md5: Optional[str] = None
|
|
checksum_sha1: Optional[str] = None
|
|
s3_etag: Optional[str] = None
|
|
created_at: datetime
|
|
created_by: str
|
|
format_metadata: Optional[Dict[str, Any]] = None
|
|
tags: List[str] = [] # Tag names pointing to this artifact
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
# Upload response
|
|
class UploadResponse(BaseModel):
|
|
artifact_id: str
|
|
sha256: str # Explicit SHA256 field (same as artifact_id)
|
|
size: int
|
|
project: str
|
|
package: str
|
|
tag: Optional[str]
|
|
checksum_md5: Optional[str] = None
|
|
checksum_sha1: Optional[str] = None
|
|
s3_etag: Optional[str] = None
|
|
format_metadata: Optional[Dict[str, Any]] = None
|
|
deduplicated: bool = False
|
|
ref_count: int = 1 # Current reference count after this upload
|
|
|
|
|
|
# Resumable upload schemas
|
|
class ResumableUploadInitRequest(BaseModel):
|
|
"""Request to initiate a resumable upload"""
|
|
|
|
expected_hash: str # SHA256 hash of the file (client must compute)
|
|
filename: str
|
|
content_type: Optional[str] = None
|
|
size: int
|
|
tag: Optional[str] = None
|
|
|
|
|
|
class ResumableUploadInitResponse(BaseModel):
|
|
"""Response from initiating a resumable upload"""
|
|
|
|
upload_id: Optional[str] # None if file already exists
|
|
already_exists: bool
|
|
artifact_id: Optional[str] = None # Set if already_exists is True
|
|
chunk_size: int # Recommended chunk size for parts
|
|
|
|
|
|
class ResumableUploadPartResponse(BaseModel):
|
|
"""Response from uploading a part"""
|
|
|
|
part_number: int
|
|
etag: str
|
|
|
|
|
|
class ResumableUploadCompleteRequest(BaseModel):
|
|
"""Request to complete a resumable upload"""
|
|
|
|
tag: Optional[str] = None
|
|
|
|
|
|
class ResumableUploadCompleteResponse(BaseModel):
|
|
"""Response from completing a resumable upload"""
|
|
|
|
artifact_id: str
|
|
size: int
|
|
project: str
|
|
package: str
|
|
tag: Optional[str]
|
|
|
|
|
|
class ResumableUploadStatusResponse(BaseModel):
|
|
"""Status of a resumable upload"""
|
|
|
|
upload_id: str
|
|
uploaded_parts: List[int]
|
|
total_uploaded_bytes: int
|
|
|
|
|
|
# Consumer schemas
|
|
class ConsumerResponse(BaseModel):
|
|
id: UUID
|
|
package_id: UUID
|
|
project_url: str
|
|
last_access: datetime
|
|
created_at: datetime
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
# Global search schemas
|
|
class SearchResultProject(BaseModel):
|
|
"""Project result for global search"""
|
|
|
|
id: UUID
|
|
name: str
|
|
description: Optional[str]
|
|
is_public: bool
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class SearchResultPackage(BaseModel):
|
|
"""Package result for global search"""
|
|
|
|
id: UUID
|
|
project_id: UUID
|
|
project_name: str
|
|
name: str
|
|
description: Optional[str]
|
|
format: str
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class SearchResultArtifact(BaseModel):
|
|
"""Artifact/tag result for global search"""
|
|
|
|
tag_id: UUID
|
|
tag_name: str
|
|
artifact_id: str
|
|
package_id: UUID
|
|
package_name: str
|
|
project_name: str
|
|
original_name: Optional[str]
|
|
|
|
|
|
class GlobalSearchResponse(BaseModel):
|
|
"""Combined search results across all entity types"""
|
|
|
|
query: str
|
|
projects: List[SearchResultProject]
|
|
packages: List[SearchResultPackage]
|
|
artifacts: List[SearchResultArtifact]
|
|
counts: Dict[str, int] # Total counts for each type
|
|
|
|
|
|
# Presigned URL response
|
|
class PresignedUrlResponse(BaseModel):
|
|
"""Response containing a presigned URL for direct S3 download"""
|
|
|
|
url: str
|
|
expires_at: datetime
|
|
method: str = "GET"
|
|
artifact_id: str
|
|
size: int
|
|
content_type: Optional[str] = None
|
|
original_name: Optional[str] = None
|
|
checksum_sha256: Optional[str] = None
|
|
checksum_md5: Optional[str] = None
|
|
|
|
|
|
# Health check
|
|
class HealthResponse(BaseModel):
|
|
status: str
|
|
version: str = "1.0.0"
|
|
storage_healthy: Optional[bool] = None
|
|
database_healthy: Optional[bool] = None
|
|
|
|
|
|
# Garbage collection schemas
|
|
class GarbageCollectionResponse(BaseModel):
|
|
"""Response from garbage collection operation"""
|
|
|
|
artifacts_deleted: int
|
|
bytes_freed: int
|
|
artifact_ids: List[str]
|
|
dry_run: bool
|
|
|
|
|
|
class OrphanedArtifactResponse(BaseModel):
|
|
"""Information about an orphaned artifact"""
|
|
|
|
id: str
|
|
size: int
|
|
created_at: datetime
|
|
created_by: str
|
|
original_name: Optional[str]
|
|
|
|
|
|
# Storage statistics schemas
|
|
class StorageStatsResponse(BaseModel):
|
|
"""Global storage statistics"""
|
|
|
|
total_artifacts: int
|
|
total_size_bytes: int
|
|
unique_artifacts: int # Artifacts with ref_count > 0
|
|
orphaned_artifacts: int # Artifacts with ref_count = 0
|
|
orphaned_size_bytes: int
|
|
total_uploads: int
|
|
deduplicated_uploads: int
|
|
deduplication_ratio: (
|
|
float # total_uploads / unique_artifacts (if > 1, deduplication is working)
|
|
)
|
|
storage_saved_bytes: int # Bytes saved through deduplication
|
|
|
|
|
|
class DeduplicationStatsResponse(BaseModel):
|
|
"""Deduplication effectiveness statistics"""
|
|
|
|
total_logical_bytes: (
|
|
int # Sum of all upload sizes (what would be stored without dedup)
|
|
)
|
|
total_physical_bytes: int # Actual storage used
|
|
bytes_saved: int
|
|
savings_percentage: float
|
|
total_uploads: int
|
|
unique_artifacts: int
|
|
duplicate_uploads: int
|
|
average_ref_count: float
|
|
max_ref_count: int
|
|
most_referenced_artifacts: List[Dict[str, Any]] # Top N most referenced
|
|
|
|
|
|
class ProjectStatsResponse(BaseModel):
|
|
"""Per-project statistics"""
|
|
|
|
project_id: str
|
|
project_name: str
|
|
package_count: int
|
|
tag_count: int
|
|
artifact_count: int
|
|
total_size_bytes: int
|
|
upload_count: int
|
|
deduplicated_uploads: int
|