from datetime import datetime from typing import Optional, List, Dict, Any, Generic, TypeVar from pydantic import BaseModel, field_validator from uuid import UUID T = TypeVar("T") # Pagination schemas class PaginationMeta(BaseModel): page: int limit: int total: int total_pages: int has_more: bool = False # True if there are more pages after current page class PaginatedResponse(BaseModel, Generic[T]): items: List[T] pagination: PaginationMeta # Project schemas class ProjectCreate(BaseModel): name: str description: Optional[str] = None is_public: bool = True class ProjectResponse(BaseModel): id: UUID name: str description: Optional[str] is_public: bool created_at: datetime updated_at: datetime created_by: str class Config: from_attributes = True class ProjectUpdate(BaseModel): """Schema for updating a project""" description: Optional[str] = None is_public: Optional[bool] = None # Package format and platform enums PACKAGE_FORMATS = [ "generic", "npm", "pypi", "docker", "deb", "rpm", "maven", "nuget", "helm", ] PACKAGE_PLATFORMS = [ "any", "linux", "darwin", "windows", "linux-amd64", "linux-arm64", "darwin-amd64", "darwin-arm64", "windows-amd64", ] # Package schemas class PackageCreate(BaseModel): name: str description: Optional[str] = None format: str = "generic" platform: str = "any" class PackageResponse(BaseModel): id: UUID project_id: UUID name: str description: Optional[str] format: str platform: str created_at: datetime updated_at: datetime class Config: from_attributes = True class PackageUpdate(BaseModel): """Schema for updating a package""" description: Optional[str] = None format: Optional[str] = None platform: Optional[str] = None class TagSummary(BaseModel): """Lightweight tag info for embedding in package responses""" name: str artifact_id: str created_at: datetime class PackageDetailResponse(BaseModel): """Package with aggregated metadata""" id: UUID project_id: UUID name: str description: Optional[str] format: str platform: str created_at: datetime updated_at: datetime # Aggregated fields tag_count: int = 0 artifact_count: int = 0 total_size: int = 0 latest_tag: Optional[str] = None latest_upload_at: Optional[datetime] = None # Recent tags (limit 5) recent_tags: List[TagSummary] = [] class Config: from_attributes = True # Artifact schemas class ArtifactResponse(BaseModel): id: str sha256: str # Explicit SHA256 field (same as id) size: int content_type: Optional[str] original_name: Optional[str] checksum_md5: Optional[str] = None checksum_sha1: Optional[str] = None s3_etag: Optional[str] = None created_at: datetime created_by: str ref_count: int format_metadata: Optional[Dict[str, Any]] = None class Config: from_attributes = True # Tag schemas class TagCreate(BaseModel): name: str artifact_id: str class TagResponse(BaseModel): id: UUID package_id: UUID name: str artifact_id: str created_at: datetime created_by: str class Config: from_attributes = True class TagDetailResponse(BaseModel): """Tag with embedded artifact metadata""" id: UUID package_id: UUID name: str artifact_id: str created_at: datetime created_by: str # Artifact metadata artifact_size: int artifact_content_type: Optional[str] artifact_original_name: Optional[str] artifact_created_at: datetime artifact_format_metadata: Optional[Dict[str, Any]] = None class Config: from_attributes = True class TagHistoryResponse(BaseModel): """History entry for tag changes""" id: UUID tag_id: UUID old_artifact_id: Optional[str] new_artifact_id: str changed_at: datetime changed_by: str class Config: from_attributes = True class TagHistoryDetailResponse(BaseModel): """Tag history with artifact metadata for each version""" id: UUID tag_id: UUID tag_name: str old_artifact_id: Optional[str] new_artifact_id: str changed_at: datetime changed_by: str # Artifact metadata for new artifact artifact_size: int artifact_original_name: Optional[str] artifact_content_type: Optional[str] class Config: from_attributes = True # Audit log schemas class AuditLogResponse(BaseModel): """Audit log entry response""" id: UUID action: str resource: str user_id: str details: Optional[Dict[str, Any]] timestamp: datetime source_ip: Optional[str] class Config: from_attributes = True # Upload history schemas class UploadHistoryResponse(BaseModel): """Upload event with artifact details""" id: UUID artifact_id: str package_id: UUID package_name: str project_name: str original_name: Optional[str] tag_name: Optional[str] uploaded_at: datetime uploaded_by: str source_ip: Optional[str] deduplicated: bool # Artifact metadata artifact_size: int artifact_content_type: Optional[str] class Config: from_attributes = True # Artifact provenance schemas class ArtifactProvenanceResponse(BaseModel): """Full provenance/history of an artifact""" artifact_id: str sha256: str size: int content_type: Optional[str] original_name: Optional[str] created_at: datetime created_by: str ref_count: int # First upload info first_uploaded_at: datetime first_uploaded_by: str # Usage statistics upload_count: int # References packages: List[Dict[str, Any]] # List of {project_name, package_name, tag_names} tags: List[ Dict[str, Any] ] # List of {project_name, package_name, tag_name, created_at} # Upload history uploads: List[Dict[str, Any]] # List of upload events class Config: from_attributes = True class ArtifactTagInfo(BaseModel): """Tag info for embedding in artifact responses""" id: UUID name: str package_id: UUID package_name: str project_name: str class ArtifactDetailResponse(BaseModel): """Artifact with list of tags/packages referencing it""" id: str sha256: str # Explicit SHA256 field (same as id) size: int content_type: Optional[str] original_name: Optional[str] checksum_md5: Optional[str] = None checksum_sha1: Optional[str] = None s3_etag: Optional[str] = None created_at: datetime created_by: str ref_count: int format_metadata: Optional[Dict[str, Any]] = None tags: List[ArtifactTagInfo] = [] class Config: from_attributes = True class PackageArtifactResponse(BaseModel): """Artifact with tags for package artifact listing""" id: str sha256: str # Explicit SHA256 field (same as id) size: int content_type: Optional[str] original_name: Optional[str] checksum_md5: Optional[str] = None checksum_sha1: Optional[str] = None s3_etag: Optional[str] = None created_at: datetime created_by: str format_metadata: Optional[Dict[str, Any]] = None tags: List[str] = [] # Tag names pointing to this artifact class Config: from_attributes = True class GlobalArtifactResponse(BaseModel): """Artifact with project/package context for global listing""" id: str sha256: str size: int content_type: Optional[str] original_name: Optional[str] created_at: datetime created_by: str format_metadata: Optional[Dict[str, Any]] = None ref_count: int = 0 # Context from tags/packages projects: List[str] = [] # List of project names containing this artifact packages: List[str] = [] # List of "project/package" paths tags: List[str] = [] # List of "project/package:tag" references class Config: from_attributes = True class GlobalTagResponse(BaseModel): """Tag with project/package context for global listing""" id: UUID name: str artifact_id: str created_at: datetime created_by: str project_name: str package_name: str artifact_size: Optional[int] = None artifact_content_type: Optional[str] = None class Config: from_attributes = True # Upload response class UploadResponse(BaseModel): artifact_id: str sha256: str # Explicit SHA256 field (same as artifact_id) size: int project: str package: str tag: Optional[str] checksum_md5: Optional[str] = None checksum_sha1: Optional[str] = None s3_etag: Optional[str] = None format_metadata: Optional[Dict[str, Any]] = None deduplicated: bool = False ref_count: int = 1 # Current reference count after this upload # Enhanced metadata (Issue #19) upload_id: Optional[UUID] = None # UUID of the upload record content_type: Optional[str] = None original_name: Optional[str] = None created_at: Optional[datetime] = None # Resumable upload schemas class ResumableUploadInitRequest(BaseModel): """Request to initiate a resumable upload""" expected_hash: str # SHA256 hash of the file (client must compute) filename: str content_type: Optional[str] = None size: int tag: Optional[str] = None @field_validator("expected_hash") @classmethod def validate_sha256_hash(cls, v: str) -> str: """Validate that expected_hash is a valid 64-character lowercase hex SHA256 hash.""" import re if not re.match(r"^[a-f0-9]{64}$", v.lower()): raise ValueError( "expected_hash must be a valid 64-character lowercase hexadecimal SHA256 hash" ) return v.lower() # Normalize to lowercase class ResumableUploadInitResponse(BaseModel): """Response from initiating a resumable upload""" upload_id: Optional[str] # None if file already exists already_exists: bool artifact_id: Optional[str] = None # Set if already_exists is True chunk_size: int # Recommended chunk size for parts class ResumableUploadPartResponse(BaseModel): """Response from uploading a part""" part_number: int etag: str class ResumableUploadCompleteRequest(BaseModel): """Request to complete a resumable upload""" tag: Optional[str] = None class ResumableUploadCompleteResponse(BaseModel): """Response from completing a resumable upload""" artifact_id: str size: int project: str package: str tag: Optional[str] class ResumableUploadStatusResponse(BaseModel): """Status of a resumable upload""" upload_id: str uploaded_parts: List[int] total_uploaded_bytes: int # Consumer schemas class ConsumerResponse(BaseModel): id: UUID package_id: UUID project_url: str last_access: datetime created_at: datetime class Config: from_attributes = True # Global search schemas class SearchResultProject(BaseModel): """Project result for global search""" id: UUID name: str description: Optional[str] is_public: bool class Config: from_attributes = True class SearchResultPackage(BaseModel): """Package result for global search""" id: UUID project_id: UUID project_name: str name: str description: Optional[str] format: str class Config: from_attributes = True class SearchResultArtifact(BaseModel): """Artifact/tag result for global search""" tag_id: UUID tag_name: str artifact_id: str package_id: UUID package_name: str project_name: str original_name: Optional[str] class GlobalSearchResponse(BaseModel): """Combined search results across all entity types""" query: str projects: List[SearchResultProject] packages: List[SearchResultPackage] artifacts: List[SearchResultArtifact] counts: Dict[str, int] # Total counts for each type # Presigned URL response class PresignedUrlResponse(BaseModel): """Response containing a presigned URL for direct S3 download""" url: str expires_at: datetime method: str = "GET" artifact_id: str size: int content_type: Optional[str] = None original_name: Optional[str] = None checksum_sha256: Optional[str] = None checksum_md5: Optional[str] = None # Health check class HealthResponse(BaseModel): status: str version: str = "1.0.0" storage_healthy: Optional[bool] = None database_healthy: Optional[bool] = None # Garbage collection schemas class GarbageCollectionResponse(BaseModel): """Response from garbage collection operation""" artifacts_deleted: int bytes_freed: int artifact_ids: List[str] dry_run: bool class OrphanedArtifactResponse(BaseModel): """Information about an orphaned artifact""" id: str size: int created_at: datetime created_by: str original_name: Optional[str] # Storage statistics schemas class StorageStatsResponse(BaseModel): """Global storage statistics""" total_artifacts: int total_size_bytes: int unique_artifacts: int # Artifacts with ref_count > 0 orphaned_artifacts: int # Artifacts with ref_count = 0 orphaned_size_bytes: int total_uploads: int deduplicated_uploads: int deduplication_ratio: ( float # total_uploads / unique_artifacts (if > 1, deduplication is working) ) storage_saved_bytes: int # Bytes saved through deduplication class ConsistencyCheckResponse(BaseModel): """Result of S3/Database consistency check""" total_artifacts_checked: int orphaned_s3_objects: int # Objects in S3 but not in DB missing_s3_objects: int # Records in DB but not in S3 size_mismatches: int # Records where DB size != S3 size healthy: bool orphaned_s3_keys: List[str] = [] # Limited list of orphaned S3 keys missing_s3_keys: List[str] = [] # Limited list of missing S3 keys size_mismatch_artifacts: List[Dict[str, Any]] = [] # Limited list of mismatches class DeduplicationStatsResponse(BaseModel): """Deduplication effectiveness statistics""" total_logical_bytes: ( int # Sum of all upload sizes (what would be stored without dedup) ) total_physical_bytes: int # Actual storage used bytes_saved: int savings_percentage: float total_uploads: int unique_artifacts: int duplicate_uploads: int average_ref_count: float max_ref_count: int most_referenced_artifacts: List[Dict[str, Any]] # Top N most referenced class ProjectStatsResponse(BaseModel): """Per-project statistics""" project_id: str project_name: str package_count: int tag_count: int artifact_count: int total_size_bytes: int upload_count: int deduplicated_uploads: int storage_saved_bytes: int = 0 # Bytes saved through deduplication deduplication_ratio: float = 1.0 # upload_count / artifact_count class PackageStatsResponse(BaseModel): """Per-package statistics""" package_id: str package_name: str project_name: str tag_count: int artifact_count: int total_size_bytes: int upload_count: int deduplicated_uploads: int storage_saved_bytes: int = 0 deduplication_ratio: float = 1.0 class ArtifactStatsResponse(BaseModel): """Per-artifact reference statistics""" artifact_id: str sha256: str size: int ref_count: int storage_savings: int # (ref_count - 1) * size tags: List[Dict[str, Any]] # Tags referencing this artifact projects: List[str] # Projects using this artifact packages: List[str] # Packages using this artifact first_uploaded: Optional[datetime] = None last_referenced: Optional[datetime] = None class CrossProjectDeduplicationResponse(BaseModel): """Cross-project deduplication statistics""" shared_artifacts_count: int # Artifacts used in multiple projects total_cross_project_savings: int # Bytes saved by cross-project sharing shared_artifacts: List[Dict[str, Any]] # Details of shared artifacts class TimeBasedStatsResponse(BaseModel): """Time-based deduplication statistics""" period: str # "daily", "weekly", "monthly" start_date: datetime end_date: datetime data_points: List[ Dict[str, Any] ] # List of {date, uploads, unique, duplicated, bytes_saved} class StatsReportResponse(BaseModel): """Summary report in various formats""" format: str # "json", "csv", "markdown" generated_at: datetime content: str # The report content