Add storage abstraction, stats endpoints, garbage collection, and test infrastructure
- Add StorageBackend protocol for backend-agnostic storage interface - Add health check with storage and database connectivity verification - Add garbage collection endpoints for orphaned artifacts (ref_count=0) - Add deduplication statistics endpoints (/api/v1/stats, /stats/storage, /stats/deduplication) - Add per-project statistics endpoint - Add verify_integrity method for post-upload hash validation - Set up pytest infrastructure with mock S3 client - Add unit tests for hash calculation and duplicate detection
This commit is contained in:
@@ -387,3 +387,72 @@ class PresignedUrlResponse(BaseModel):
|
||||
class HealthResponse(BaseModel):
|
||||
status: str
|
||||
version: str = "1.0.0"
|
||||
storage_healthy: Optional[bool] = None
|
||||
database_healthy: Optional[bool] = None
|
||||
|
||||
|
||||
# Garbage collection schemas
|
||||
class GarbageCollectionResponse(BaseModel):
|
||||
"""Response from garbage collection operation"""
|
||||
|
||||
artifacts_deleted: int
|
||||
bytes_freed: int
|
||||
artifact_ids: List[str]
|
||||
dry_run: bool
|
||||
|
||||
|
||||
class OrphanedArtifactResponse(BaseModel):
|
||||
"""Information about an orphaned artifact"""
|
||||
|
||||
id: str
|
||||
size: int
|
||||
created_at: datetime
|
||||
created_by: str
|
||||
original_name: Optional[str]
|
||||
|
||||
|
||||
# Storage statistics schemas
|
||||
class StorageStatsResponse(BaseModel):
|
||||
"""Global storage statistics"""
|
||||
|
||||
total_artifacts: int
|
||||
total_size_bytes: int
|
||||
unique_artifacts: int # Artifacts with ref_count > 0
|
||||
orphaned_artifacts: int # Artifacts with ref_count = 0
|
||||
orphaned_size_bytes: int
|
||||
total_uploads: int
|
||||
deduplicated_uploads: int
|
||||
deduplication_ratio: (
|
||||
float # total_uploads / unique_artifacts (if > 1, deduplication is working)
|
||||
)
|
||||
storage_saved_bytes: int # Bytes saved through deduplication
|
||||
|
||||
|
||||
class DeduplicationStatsResponse(BaseModel):
|
||||
"""Deduplication effectiveness statistics"""
|
||||
|
||||
total_logical_bytes: (
|
||||
int # Sum of all upload sizes (what would be stored without dedup)
|
||||
)
|
||||
total_physical_bytes: int # Actual storage used
|
||||
bytes_saved: int
|
||||
savings_percentage: float
|
||||
total_uploads: int
|
||||
unique_artifacts: int
|
||||
duplicate_uploads: int
|
||||
average_ref_count: float
|
||||
max_ref_count: int
|
||||
most_referenced_artifacts: List[Dict[str, Any]] # Top N most referenced
|
||||
|
||||
|
||||
class ProjectStatsResponse(BaseModel):
|
||||
"""Per-project statistics"""
|
||||
|
||||
project_id: str
|
||||
project_name: str
|
||||
package_count: int
|
||||
tag_count: int
|
||||
artifact_count: int
|
||||
total_size_bytes: int
|
||||
upload_count: int
|
||||
deduplicated_uploads: int
|
||||
|
||||
Reference in New Issue
Block a user