Add ref_count management for deletions with atomic operations and error handling
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
from datetime import datetime
|
||||
from typing import Optional, List, Dict, Any, Generic, TypeVar
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, field_validator
|
||||
from uuid import UUID
|
||||
|
||||
T = TypeVar("T")
|
||||
@@ -40,8 +40,28 @@ class ProjectResponse(BaseModel):
|
||||
|
||||
|
||||
# Package format and platform enums
|
||||
PACKAGE_FORMATS = ["generic", "npm", "pypi", "docker", "deb", "rpm", "maven", "nuget", "helm"]
|
||||
PACKAGE_PLATFORMS = ["any", "linux", "darwin", "windows", "linux-amd64", "linux-arm64", "darwin-amd64", "darwin-arm64", "windows-amd64"]
|
||||
PACKAGE_FORMATS = [
|
||||
"generic",
|
||||
"npm",
|
||||
"pypi",
|
||||
"docker",
|
||||
"deb",
|
||||
"rpm",
|
||||
"maven",
|
||||
"nuget",
|
||||
"helm",
|
||||
]
|
||||
PACKAGE_PLATFORMS = [
|
||||
"any",
|
||||
"linux",
|
||||
"darwin",
|
||||
"windows",
|
||||
"linux-amd64",
|
||||
"linux-arm64",
|
||||
"darwin-amd64",
|
||||
"darwin-arm64",
|
||||
"windows-amd64",
|
||||
]
|
||||
|
||||
|
||||
# Package schemas
|
||||
@@ -68,6 +88,7 @@ class PackageResponse(BaseModel):
|
||||
|
||||
class TagSummary(BaseModel):
|
||||
"""Lightweight tag info for embedding in package responses"""
|
||||
|
||||
name: str
|
||||
artifact_id: str
|
||||
created_at: datetime
|
||||
@@ -75,6 +96,7 @@ class TagSummary(BaseModel):
|
||||
|
||||
class PackageDetailResponse(BaseModel):
|
||||
"""Package with aggregated metadata"""
|
||||
|
||||
id: UUID
|
||||
project_id: UUID
|
||||
name: str
|
||||
@@ -135,6 +157,7 @@ class TagResponse(BaseModel):
|
||||
|
||||
class TagDetailResponse(BaseModel):
|
||||
"""Tag with embedded artifact metadata"""
|
||||
|
||||
id: UUID
|
||||
package_id: UUID
|
||||
name: str
|
||||
@@ -154,6 +177,7 @@ class TagDetailResponse(BaseModel):
|
||||
|
||||
class TagHistoryResponse(BaseModel):
|
||||
"""History entry for tag changes"""
|
||||
|
||||
id: UUID
|
||||
tag_id: UUID
|
||||
old_artifact_id: Optional[str]
|
||||
@@ -167,6 +191,7 @@ class TagHistoryResponse(BaseModel):
|
||||
|
||||
class ArtifactTagInfo(BaseModel):
|
||||
"""Tag info for embedding in artifact responses"""
|
||||
|
||||
id: UUID
|
||||
name: str
|
||||
package_id: UUID
|
||||
@@ -176,6 +201,7 @@ class ArtifactTagInfo(BaseModel):
|
||||
|
||||
class ArtifactDetailResponse(BaseModel):
|
||||
"""Artifact with list of tags/packages referencing it"""
|
||||
|
||||
id: str
|
||||
sha256: str # Explicit SHA256 field (same as id)
|
||||
size: int
|
||||
@@ -196,6 +222,7 @@ class ArtifactDetailResponse(BaseModel):
|
||||
|
||||
class PackageArtifactResponse(BaseModel):
|
||||
"""Artifact with tags for package artifact listing"""
|
||||
|
||||
id: str
|
||||
sha256: str # Explicit SHA256 field (same as id)
|
||||
size: int
|
||||
@@ -226,20 +253,35 @@ class UploadResponse(BaseModel):
|
||||
s3_etag: Optional[str] = None
|
||||
format_metadata: Optional[Dict[str, Any]] = None
|
||||
deduplicated: bool = False
|
||||
ref_count: int = 1 # Current reference count after this upload
|
||||
|
||||
|
||||
# Resumable upload schemas
|
||||
class ResumableUploadInitRequest(BaseModel):
|
||||
"""Request to initiate a resumable upload"""
|
||||
|
||||
expected_hash: str # SHA256 hash of the file (client must compute)
|
||||
filename: str
|
||||
content_type: Optional[str] = None
|
||||
size: int
|
||||
tag: Optional[str] = None
|
||||
|
||||
@field_validator("expected_hash")
|
||||
@classmethod
|
||||
def validate_sha256_hash(cls, v: str) -> str:
|
||||
"""Validate that expected_hash is a valid 64-character lowercase hex SHA256 hash."""
|
||||
import re
|
||||
|
||||
if not re.match(r"^[a-f0-9]{64}$", v.lower()):
|
||||
raise ValueError(
|
||||
"expected_hash must be a valid 64-character lowercase hexadecimal SHA256 hash"
|
||||
)
|
||||
return v.lower() # Normalize to lowercase
|
||||
|
||||
|
||||
class ResumableUploadInitResponse(BaseModel):
|
||||
"""Response from initiating a resumable upload"""
|
||||
|
||||
upload_id: Optional[str] # None if file already exists
|
||||
already_exists: bool
|
||||
artifact_id: Optional[str] = None # Set if already_exists is True
|
||||
@@ -248,17 +290,20 @@ class ResumableUploadInitResponse(BaseModel):
|
||||
|
||||
class ResumableUploadPartResponse(BaseModel):
|
||||
"""Response from uploading a part"""
|
||||
|
||||
part_number: int
|
||||
etag: str
|
||||
|
||||
|
||||
class ResumableUploadCompleteRequest(BaseModel):
|
||||
"""Request to complete a resumable upload"""
|
||||
|
||||
tag: Optional[str] = None
|
||||
|
||||
|
||||
class ResumableUploadCompleteResponse(BaseModel):
|
||||
"""Response from completing a resumable upload"""
|
||||
|
||||
artifact_id: str
|
||||
size: int
|
||||
project: str
|
||||
@@ -268,6 +313,7 @@ class ResumableUploadCompleteResponse(BaseModel):
|
||||
|
||||
class ResumableUploadStatusResponse(BaseModel):
|
||||
"""Status of a resumable upload"""
|
||||
|
||||
upload_id: str
|
||||
uploaded_parts: List[int]
|
||||
total_uploaded_bytes: int
|
||||
@@ -288,6 +334,7 @@ class ConsumerResponse(BaseModel):
|
||||
# Global search schemas
|
||||
class SearchResultProject(BaseModel):
|
||||
"""Project result for global search"""
|
||||
|
||||
id: UUID
|
||||
name: str
|
||||
description: Optional[str]
|
||||
@@ -299,6 +346,7 @@ class SearchResultProject(BaseModel):
|
||||
|
||||
class SearchResultPackage(BaseModel):
|
||||
"""Package result for global search"""
|
||||
|
||||
id: UUID
|
||||
project_id: UUID
|
||||
project_name: str
|
||||
@@ -312,6 +360,7 @@ class SearchResultPackage(BaseModel):
|
||||
|
||||
class SearchResultArtifact(BaseModel):
|
||||
"""Artifact/tag result for global search"""
|
||||
|
||||
tag_id: UUID
|
||||
tag_name: str
|
||||
artifact_id: str
|
||||
@@ -323,6 +372,7 @@ class SearchResultArtifact(BaseModel):
|
||||
|
||||
class GlobalSearchResponse(BaseModel):
|
||||
"""Combined search results across all entity types"""
|
||||
|
||||
query: str
|
||||
projects: List[SearchResultProject]
|
||||
packages: List[SearchResultPackage]
|
||||
@@ -333,6 +383,7 @@ class GlobalSearchResponse(BaseModel):
|
||||
# Presigned URL response
|
||||
class PresignedUrlResponse(BaseModel):
|
||||
"""Response containing a presigned URL for direct S3 download"""
|
||||
|
||||
url: str
|
||||
expires_at: datetime
|
||||
method: str = "GET"
|
||||
@@ -348,3 +399,131 @@ class PresignedUrlResponse(BaseModel):
|
||||
class HealthResponse(BaseModel):
|
||||
status: str
|
||||
version: str = "1.0.0"
|
||||
storage_healthy: Optional[bool] = None
|
||||
database_healthy: Optional[bool] = None
|
||||
|
||||
|
||||
# Garbage collection schemas
|
||||
class GarbageCollectionResponse(BaseModel):
|
||||
"""Response from garbage collection operation"""
|
||||
|
||||
artifacts_deleted: int
|
||||
bytes_freed: int
|
||||
artifact_ids: List[str]
|
||||
dry_run: bool
|
||||
|
||||
|
||||
class OrphanedArtifactResponse(BaseModel):
|
||||
"""Information about an orphaned artifact"""
|
||||
|
||||
id: str
|
||||
size: int
|
||||
created_at: datetime
|
||||
created_by: str
|
||||
original_name: Optional[str]
|
||||
|
||||
|
||||
# Storage statistics schemas
|
||||
class StorageStatsResponse(BaseModel):
|
||||
"""Global storage statistics"""
|
||||
|
||||
total_artifacts: int
|
||||
total_size_bytes: int
|
||||
unique_artifacts: int # Artifacts with ref_count > 0
|
||||
orphaned_artifacts: int # Artifacts with ref_count = 0
|
||||
orphaned_size_bytes: int
|
||||
total_uploads: int
|
||||
deduplicated_uploads: int
|
||||
deduplication_ratio: (
|
||||
float # total_uploads / unique_artifacts (if > 1, deduplication is working)
|
||||
)
|
||||
storage_saved_bytes: int # Bytes saved through deduplication
|
||||
|
||||
|
||||
class DeduplicationStatsResponse(BaseModel):
|
||||
"""Deduplication effectiveness statistics"""
|
||||
|
||||
total_logical_bytes: (
|
||||
int # Sum of all upload sizes (what would be stored without dedup)
|
||||
)
|
||||
total_physical_bytes: int # Actual storage used
|
||||
bytes_saved: int
|
||||
savings_percentage: float
|
||||
total_uploads: int
|
||||
unique_artifacts: int
|
||||
duplicate_uploads: int
|
||||
average_ref_count: float
|
||||
max_ref_count: int
|
||||
most_referenced_artifacts: List[Dict[str, Any]] # Top N most referenced
|
||||
|
||||
|
||||
class ProjectStatsResponse(BaseModel):
|
||||
"""Per-project statistics"""
|
||||
|
||||
project_id: str
|
||||
project_name: str
|
||||
package_count: int
|
||||
tag_count: int
|
||||
artifact_count: int
|
||||
total_size_bytes: int
|
||||
upload_count: int
|
||||
deduplicated_uploads: int
|
||||
storage_saved_bytes: int = 0 # Bytes saved through deduplication
|
||||
deduplication_ratio: float = 1.0 # upload_count / artifact_count
|
||||
|
||||
|
||||
class PackageStatsResponse(BaseModel):
|
||||
"""Per-package statistics"""
|
||||
|
||||
package_id: str
|
||||
package_name: str
|
||||
project_name: str
|
||||
tag_count: int
|
||||
artifact_count: int
|
||||
total_size_bytes: int
|
||||
upload_count: int
|
||||
deduplicated_uploads: int
|
||||
storage_saved_bytes: int = 0
|
||||
deduplication_ratio: float = 1.0
|
||||
|
||||
|
||||
class ArtifactStatsResponse(BaseModel):
|
||||
"""Per-artifact reference statistics"""
|
||||
|
||||
artifact_id: str
|
||||
sha256: str
|
||||
size: int
|
||||
ref_count: int
|
||||
storage_savings: int # (ref_count - 1) * size
|
||||
tags: List[Dict[str, Any]] # Tags referencing this artifact
|
||||
projects: List[str] # Projects using this artifact
|
||||
packages: List[str] # Packages using this artifact
|
||||
first_uploaded: Optional[datetime] = None
|
||||
last_referenced: Optional[datetime] = None
|
||||
|
||||
|
||||
class CrossProjectDeduplicationResponse(BaseModel):
|
||||
"""Cross-project deduplication statistics"""
|
||||
|
||||
shared_artifacts_count: int # Artifacts used in multiple projects
|
||||
total_cross_project_savings: int # Bytes saved by cross-project sharing
|
||||
shared_artifacts: List[Dict[str, Any]] # Details of shared artifacts
|
||||
|
||||
|
||||
class TimeBasedStatsResponse(BaseModel):
|
||||
"""Time-based deduplication statistics"""
|
||||
|
||||
period: str # "daily", "weekly", "monthly"
|
||||
start_date: datetime
|
||||
end_date: datetime
|
||||
data_points: List[
|
||||
Dict[str, Any]
|
||||
] # List of {date, uploads, unique, duplicated, bytes_saved}
|
||||
|
||||
|
||||
class StatsReportResponse(BaseModel):
|
||||
"""Summary report in various formats"""
|
||||
|
||||
format: str # "json", "csv", "markdown"
|
||||
generated_at: datetime
|
||||
content: str # The report content
|
||||
|
||||
Reference in New Issue
Block a user