Metadata database tracks all uploads with project, package, tag, and timestamp queryable via API
This commit is contained in:
@@ -1,8 +1,16 @@
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from sqlalchemy import (
|
||||
Column, String, Text, Boolean, Integer, BigInteger,
|
||||
DateTime, ForeignKey, CheckConstraint, Index, JSON
|
||||
Column,
|
||||
String,
|
||||
Text,
|
||||
Boolean,
|
||||
Integer,
|
||||
BigInteger,
|
||||
DateTime,
|
||||
ForeignKey,
|
||||
CheckConstraint,
|
||||
Index,
|
||||
JSON,
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import relationship, declarative_base
|
||||
@@ -19,11 +27,17 @@ class Project(Base):
|
||||
description = Column(Text)
|
||||
is_public = Column(Boolean, default=True)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
updated_at = Column(
|
||||
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
|
||||
)
|
||||
created_by = Column(String(255), nullable=False)
|
||||
|
||||
packages = relationship("Package", back_populates="project", cascade="all, delete-orphan")
|
||||
permissions = relationship("AccessPermission", back_populates="project", cascade="all, delete-orphan")
|
||||
packages = relationship(
|
||||
"Package", back_populates="project", cascade="all, delete-orphan"
|
||||
)
|
||||
permissions = relationship(
|
||||
"AccessPermission", back_populates="project", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_projects_name", "name"),
|
||||
@@ -35,32 +49,44 @@ class Package(Base):
|
||||
__tablename__ = "packages"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
|
||||
project_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("projects.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
name = Column(String(255), nullable=False)
|
||||
description = Column(Text)
|
||||
format = Column(String(50), default="generic", nullable=False)
|
||||
platform = Column(String(50), default="any", nullable=False)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
updated_at = Column(
|
||||
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
|
||||
)
|
||||
|
||||
project = relationship("Project", back_populates="packages")
|
||||
tags = relationship("Tag", back_populates="package", cascade="all, delete-orphan")
|
||||
uploads = relationship("Upload", back_populates="package", cascade="all, delete-orphan")
|
||||
consumers = relationship("Consumer", back_populates="package", cascade="all, delete-orphan")
|
||||
uploads = relationship(
|
||||
"Upload", back_populates="package", cascade="all, delete-orphan"
|
||||
)
|
||||
consumers = relationship(
|
||||
"Consumer", back_populates="package", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_packages_project_id", "project_id"),
|
||||
Index("idx_packages_name", "name"),
|
||||
Index("idx_packages_format", "format"),
|
||||
Index("idx_packages_platform", "platform"),
|
||||
Index("idx_packages_project_name", "project_id", "name", unique=True), # Composite unique index
|
||||
Index(
|
||||
"idx_packages_project_name", "project_id", "name", unique=True
|
||||
), # Composite unique index
|
||||
CheckConstraint(
|
||||
"format IN ('generic', 'npm', 'pypi', 'docker', 'deb', 'rpm', 'maven', 'nuget', 'helm')",
|
||||
name="check_package_format"
|
||||
name="check_package_format",
|
||||
),
|
||||
CheckConstraint(
|
||||
"platform IN ('any', 'linux', 'darwin', 'windows', 'linux-amd64', 'linux-arm64', 'darwin-amd64', 'darwin-arm64', 'windows-amd64')",
|
||||
name="check_package_platform"
|
||||
name="check_package_platform",
|
||||
),
|
||||
{"extend_existing": True},
|
||||
)
|
||||
@@ -76,7 +102,9 @@ class Artifact(Base):
|
||||
checksum_md5 = Column(String(32)) # MD5 hash for additional verification
|
||||
checksum_sha1 = Column(String(40)) # SHA1 hash for compatibility
|
||||
s3_etag = Column(String(64)) # S3 ETag for verification
|
||||
artifact_metadata = Column("metadata", JSON, default=dict) # Format-specific metadata (column name is 'metadata')
|
||||
artifact_metadata = Column(
|
||||
"metadata", JSON, default=dict
|
||||
) # Format-specific metadata (column name is 'metadata')
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
created_by = Column(String(255), nullable=False)
|
||||
ref_count = Column(Integer, default=1)
|
||||
@@ -113,22 +141,34 @@ class Tag(Base):
|
||||
__tablename__ = "tags"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
package_id = Column(UUID(as_uuid=True), ForeignKey("packages.id", ondelete="CASCADE"), nullable=False)
|
||||
package_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("packages.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
name = Column(String(255), nullable=False)
|
||||
artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
updated_at = Column(
|
||||
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
|
||||
)
|
||||
created_by = Column(String(255), nullable=False)
|
||||
|
||||
package = relationship("Package", back_populates="tags")
|
||||
artifact = relationship("Artifact", back_populates="tags")
|
||||
history = relationship("TagHistory", back_populates="tag", cascade="all, delete-orphan")
|
||||
history = relationship(
|
||||
"TagHistory", back_populates="tag", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_tags_package_id", "package_id"),
|
||||
Index("idx_tags_artifact_id", "artifact_id"),
|
||||
Index("idx_tags_package_name", "package_id", "name", unique=True), # Composite unique index
|
||||
Index("idx_tags_package_created_at", "package_id", "created_at"), # For recent tags queries
|
||||
Index(
|
||||
"idx_tags_package_name", "package_id", "name", unique=True
|
||||
), # Composite unique index
|
||||
Index(
|
||||
"idx_tags_package_created_at", "package_id", "created_at"
|
||||
), # For recent tags queries
|
||||
)
|
||||
|
||||
|
||||
@@ -136,7 +176,9 @@ class TagHistory(Base):
|
||||
__tablename__ = "tag_history"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
tag_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False)
|
||||
tag_id = Column(
|
||||
UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
old_artifact_id = Column(String(64), ForeignKey("artifacts.id"))
|
||||
new_artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
|
||||
change_type = Column(String(20), nullable=False, default="update")
|
||||
@@ -148,7 +190,9 @@ class TagHistory(Base):
|
||||
__table_args__ = (
|
||||
Index("idx_tag_history_tag_id", "tag_id"),
|
||||
Index("idx_tag_history_changed_at", "changed_at"),
|
||||
CheckConstraint("change_type IN ('create', 'update', 'delete')", name="check_change_type"),
|
||||
CheckConstraint(
|
||||
"change_type IN ('create', 'update', 'delete')", name="check_change_type"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -164,6 +208,11 @@ class Upload(Base):
|
||||
duration_ms = Column(Integer) # Upload timing in milliseconds
|
||||
deduplicated = Column(Boolean, default=False) # Whether artifact was deduplicated
|
||||
checksum_verified = Column(Boolean, default=True) # Whether checksum was verified
|
||||
status = Column(
|
||||
String(20), default="completed", nullable=False
|
||||
) # pending, completed, failed
|
||||
error_message = Column(Text) # Error details for failed uploads
|
||||
client_checksum = Column(String(64)) # Client-provided SHA256 for verification
|
||||
uploaded_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
uploaded_by = Column(String(255), nullable=False)
|
||||
source_ip = Column(String(45))
|
||||
@@ -177,6 +226,35 @@ class Upload(Base):
|
||||
Index("idx_uploads_uploaded_at", "uploaded_at"),
|
||||
Index("idx_uploads_package_uploaded_at", "package_id", "uploaded_at"),
|
||||
Index("idx_uploads_uploaded_by_at", "uploaded_by", "uploaded_at"),
|
||||
Index("idx_uploads_status", "status"),
|
||||
Index("idx_uploads_status_uploaded_at", "status", "uploaded_at"),
|
||||
CheckConstraint(
|
||||
"status IN ('pending', 'completed', 'failed')", name="check_upload_status"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class UploadLock(Base):
|
||||
"""Track in-progress uploads for conflict detection (409 responses)."""
|
||||
|
||||
__tablename__ = "upload_locks"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
sha256_hash = Column(String(64), nullable=False)
|
||||
package_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("packages.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
locked_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
locked_by = Column(String(255), nullable=False)
|
||||
expires_at = Column(DateTime(timezone=True), nullable=False)
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_upload_locks_expires_at", "expires_at"),
|
||||
Index(
|
||||
"idx_upload_locks_hash_package", "sha256_hash", "package_id", unique=True
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -184,7 +262,11 @@ class Consumer(Base):
|
||||
__tablename__ = "consumers"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
package_id = Column(UUID(as_uuid=True), ForeignKey("packages.id", ondelete="CASCADE"), nullable=False)
|
||||
package_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("packages.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
project_url = Column(String(2048), nullable=False)
|
||||
last_access = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
@@ -201,7 +283,11 @@ class AccessPermission(Base):
|
||||
__tablename__ = "access_permissions"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
|
||||
project_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("projects.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
user_id = Column(String(255), nullable=False)
|
||||
level = Column(String(20), nullable=False)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
@@ -252,3 +338,51 @@ class AuditLog(Base):
|
||||
Index("idx_audit_logs_resource_timestamp", "resource", "timestamp"),
|
||||
Index("idx_audit_logs_user_timestamp", "user_id", "timestamp"),
|
||||
)
|
||||
|
||||
|
||||
class ProjectHistory(Base):
|
||||
"""Track changes to project metadata over time."""
|
||||
|
||||
__tablename__ = "project_history"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
project_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("projects.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
field_name = Column(String(100), nullable=False)
|
||||
old_value = Column(Text)
|
||||
new_value = Column(Text)
|
||||
changed_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
changed_by = Column(String(255), nullable=False)
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_project_history_project_id", "project_id"),
|
||||
Index("idx_project_history_changed_at", "changed_at"),
|
||||
Index("idx_project_history_project_changed_at", "project_id", "changed_at"),
|
||||
)
|
||||
|
||||
|
||||
class PackageHistory(Base):
|
||||
"""Track changes to package metadata over time."""
|
||||
|
||||
__tablename__ = "package_history"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
package_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("packages.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
field_name = Column(String(100), nullable=False)
|
||||
old_value = Column(Text)
|
||||
new_value = Column(Text)
|
||||
changed_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
changed_by = Column(String(255), nullable=False)
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_package_history_package_id", "package_id"),
|
||||
Index("idx_package_history_changed_at", "changed_at"),
|
||||
Index("idx_package_history_package_changed_at", "package_id", "changed_at"),
|
||||
)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -12,6 +12,7 @@ class PaginationMeta(BaseModel):
|
||||
limit: int
|
||||
total: int
|
||||
total_pages: int
|
||||
has_more: bool = False # True if there are more pages after current page
|
||||
|
||||
|
||||
class PaginatedResponse(BaseModel, Generic[T]):
|
||||
@@ -39,6 +40,13 @@ class ProjectResponse(BaseModel):
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class ProjectUpdate(BaseModel):
|
||||
"""Schema for updating a project"""
|
||||
|
||||
description: Optional[str] = None
|
||||
is_public: Optional[bool] = None
|
||||
|
||||
|
||||
# Package format and platform enums
|
||||
PACKAGE_FORMATS = [
|
||||
"generic",
|
||||
@@ -86,6 +94,14 @@ class PackageResponse(BaseModel):
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class PackageUpdate(BaseModel):
|
||||
"""Schema for updating a package"""
|
||||
|
||||
description: Optional[str] = None
|
||||
format: Optional[str] = None
|
||||
platform: Optional[str] = None
|
||||
|
||||
|
||||
class TagSummary(BaseModel):
|
||||
"""Lightweight tag info for embedding in package responses"""
|
||||
|
||||
@@ -189,6 +205,93 @@ class TagHistoryResponse(BaseModel):
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class TagHistoryDetailResponse(BaseModel):
|
||||
"""Tag history with artifact metadata for each version"""
|
||||
|
||||
id: UUID
|
||||
tag_id: UUID
|
||||
tag_name: str
|
||||
old_artifact_id: Optional[str]
|
||||
new_artifact_id: str
|
||||
changed_at: datetime
|
||||
changed_by: str
|
||||
# Artifact metadata for new artifact
|
||||
artifact_size: int
|
||||
artifact_original_name: Optional[str]
|
||||
artifact_content_type: Optional[str]
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
# Audit log schemas
|
||||
class AuditLogResponse(BaseModel):
|
||||
"""Audit log entry response"""
|
||||
|
||||
id: UUID
|
||||
action: str
|
||||
resource: str
|
||||
user_id: str
|
||||
details: Optional[Dict[str, Any]]
|
||||
timestamp: datetime
|
||||
source_ip: Optional[str]
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
# Upload history schemas
|
||||
class UploadHistoryResponse(BaseModel):
|
||||
"""Upload event with artifact details"""
|
||||
|
||||
id: UUID
|
||||
artifact_id: str
|
||||
package_id: UUID
|
||||
package_name: str
|
||||
project_name: str
|
||||
original_name: Optional[str]
|
||||
tag_name: Optional[str]
|
||||
uploaded_at: datetime
|
||||
uploaded_by: str
|
||||
source_ip: Optional[str]
|
||||
deduplicated: bool
|
||||
# Artifact metadata
|
||||
artifact_size: int
|
||||
artifact_content_type: Optional[str]
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
# Artifact provenance schemas
|
||||
class ArtifactProvenanceResponse(BaseModel):
|
||||
"""Full provenance/history of an artifact"""
|
||||
|
||||
artifact_id: str
|
||||
sha256: str
|
||||
size: int
|
||||
content_type: Optional[str]
|
||||
original_name: Optional[str]
|
||||
created_at: datetime
|
||||
created_by: str
|
||||
ref_count: int
|
||||
# First upload info
|
||||
first_uploaded_at: datetime
|
||||
first_uploaded_by: str
|
||||
# Usage statistics
|
||||
upload_count: int
|
||||
# References
|
||||
packages: List[Dict[str, Any]] # List of {project_name, package_name, tag_names}
|
||||
tags: List[
|
||||
Dict[str, Any]
|
||||
] # List of {project_name, package_name, tag_name, created_at}
|
||||
# Upload history
|
||||
uploads: List[Dict[str, Any]] # List of upload events
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class ArtifactTagInfo(BaseModel):
|
||||
"""Tag info for embedding in artifact responses"""
|
||||
|
||||
@@ -240,6 +343,44 @@ class PackageArtifactResponse(BaseModel):
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class GlobalArtifactResponse(BaseModel):
|
||||
"""Artifact with project/package context for global listing"""
|
||||
|
||||
id: str
|
||||
sha256: str
|
||||
size: int
|
||||
content_type: Optional[str]
|
||||
original_name: Optional[str]
|
||||
created_at: datetime
|
||||
created_by: str
|
||||
format_metadata: Optional[Dict[str, Any]] = None
|
||||
ref_count: int = 0
|
||||
# Context from tags/packages
|
||||
projects: List[str] = [] # List of project names containing this artifact
|
||||
packages: List[str] = [] # List of "project/package" paths
|
||||
tags: List[str] = [] # List of "project/package:tag" references
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class GlobalTagResponse(BaseModel):
|
||||
"""Tag with project/package context for global listing"""
|
||||
|
||||
id: UUID
|
||||
name: str
|
||||
artifact_id: str
|
||||
created_at: datetime
|
||||
created_by: str
|
||||
project_name: str
|
||||
package_name: str
|
||||
artifact_size: Optional[int] = None
|
||||
artifact_content_type: Optional[str] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
# Upload response
|
||||
class UploadResponse(BaseModel):
|
||||
artifact_id: str
|
||||
@@ -254,6 +395,11 @@ class UploadResponse(BaseModel):
|
||||
format_metadata: Optional[Dict[str, Any]] = None
|
||||
deduplicated: bool = False
|
||||
ref_count: int = 1 # Current reference count after this upload
|
||||
# Enhanced metadata (Issue #19)
|
||||
upload_id: Optional[UUID] = None # UUID of the upload record
|
||||
content_type: Optional[str] = None
|
||||
original_name: Optional[str] = None
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
|
||||
# Resumable upload schemas
|
||||
@@ -440,6 +586,19 @@ class StorageStatsResponse(BaseModel):
|
||||
storage_saved_bytes: int # Bytes saved through deduplication
|
||||
|
||||
|
||||
class ConsistencyCheckResponse(BaseModel):
|
||||
"""Result of S3/Database consistency check"""
|
||||
|
||||
total_artifacts_checked: int
|
||||
orphaned_s3_objects: int # Objects in S3 but not in DB
|
||||
missing_s3_objects: int # Records in DB but not in S3
|
||||
size_mismatches: int # Records where DB size != S3 size
|
||||
healthy: bool
|
||||
orphaned_s3_keys: List[str] = [] # Limited list of orphaned S3 keys
|
||||
missing_s3_keys: List[str] = [] # Limited list of missing S3 keys
|
||||
size_mismatch_artifacts: List[Dict[str, Any]] = [] # Limited list of mismatches
|
||||
|
||||
|
||||
class DeduplicationStatsResponse(BaseModel):
|
||||
"""Deduplication effectiveness statistics"""
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ from typing import List, Optional, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
import logging
|
||||
|
||||
from ..models import Artifact, Tag, Upload, Package
|
||||
from ..models import Artifact, Tag
|
||||
from ..repositories.artifact import ArtifactRepository
|
||||
from ..repositories.tag import TagRepository
|
||||
from ..storage import S3Storage
|
||||
@@ -40,10 +40,14 @@ class ArtifactCleanupService:
|
||||
artifact = self.artifact_repo.get_by_sha256(artifact_id)
|
||||
if artifact:
|
||||
artifact = self.artifact_repo.decrement_ref_count(artifact)
|
||||
logger.info(f"Decremented ref_count for artifact {artifact_id}: now {artifact.ref_count}")
|
||||
logger.info(
|
||||
f"Decremented ref_count for artifact {artifact_id}: now {artifact.ref_count}"
|
||||
)
|
||||
return artifact
|
||||
|
||||
def on_tag_updated(self, old_artifact_id: str, new_artifact_id: str) -> Tuple[Optional[Artifact], Optional[Artifact]]:
|
||||
def on_tag_updated(
|
||||
self, old_artifact_id: str, new_artifact_id: str
|
||||
) -> Tuple[Optional[Artifact], Optional[Artifact]]:
|
||||
"""
|
||||
Called when a tag is updated to point to a different artifact.
|
||||
Decrements ref_count for old artifact, increments for new (if different).
|
||||
@@ -58,13 +62,17 @@ class ArtifactCleanupService:
|
||||
old_artifact = self.artifact_repo.get_by_sha256(old_artifact_id)
|
||||
if old_artifact:
|
||||
old_artifact = self.artifact_repo.decrement_ref_count(old_artifact)
|
||||
logger.info(f"Decremented ref_count for old artifact {old_artifact_id}: now {old_artifact.ref_count}")
|
||||
logger.info(
|
||||
f"Decremented ref_count for old artifact {old_artifact_id}: now {old_artifact.ref_count}"
|
||||
)
|
||||
|
||||
# Increment new artifact ref_count
|
||||
new_artifact = self.artifact_repo.get_by_sha256(new_artifact_id)
|
||||
if new_artifact:
|
||||
new_artifact = self.artifact_repo.increment_ref_count(new_artifact)
|
||||
logger.info(f"Incremented ref_count for new artifact {new_artifact_id}: now {new_artifact.ref_count}")
|
||||
logger.info(
|
||||
f"Incremented ref_count for new artifact {new_artifact_id}: now {new_artifact.ref_count}"
|
||||
)
|
||||
|
||||
return old_artifact, new_artifact
|
||||
|
||||
@@ -84,11 +92,15 @@ class ArtifactCleanupService:
|
||||
if artifact:
|
||||
self.artifact_repo.decrement_ref_count(artifact)
|
||||
affected_artifacts.append(tag.artifact_id)
|
||||
logger.info(f"Decremented ref_count for artifact {tag.artifact_id} (package delete)")
|
||||
logger.info(
|
||||
f"Decremented ref_count for artifact {tag.artifact_id} (package delete)"
|
||||
)
|
||||
|
||||
return affected_artifacts
|
||||
|
||||
def cleanup_orphaned_artifacts(self, batch_size: int = 100, dry_run: bool = False) -> List[str]:
|
||||
def cleanup_orphaned_artifacts(
|
||||
self, batch_size: int = 100, dry_run: bool = False
|
||||
) -> List[str]:
|
||||
"""
|
||||
Find and delete artifacts with ref_count = 0.
|
||||
|
||||
@@ -116,7 +128,9 @@ class ArtifactCleanupService:
|
||||
# Then delete from database
|
||||
self.artifact_repo.delete(artifact)
|
||||
deleted_ids.append(artifact.id)
|
||||
logger.info(f"Deleted orphaned artifact from database: {artifact.id}")
|
||||
logger.info(
|
||||
f"Deleted orphaned artifact from database: {artifact.id}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete artifact {artifact.id}: {e}")
|
||||
|
||||
@@ -128,10 +142,12 @@ class ArtifactCleanupService:
|
||||
def get_orphaned_count(self) -> int:
|
||||
"""Get count of artifacts with ref_count = 0."""
|
||||
from sqlalchemy import func
|
||||
|
||||
return (
|
||||
self.db.query(func.count(Artifact.id))
|
||||
.filter(Artifact.ref_count == 0)
|
||||
.scalar() or 0
|
||||
.scalar()
|
||||
or 0
|
||||
)
|
||||
|
||||
def verify_ref_counts(self, fix: bool = False) -> List[dict]:
|
||||
@@ -173,7 +189,9 @@ class ArtifactCleanupService:
|
||||
|
||||
if fix:
|
||||
artifact.ref_count = max(actual_count, 1)
|
||||
logger.warning(f"Fixed ref_count for artifact {artifact.id}: {mismatch['stored_ref_count']} -> {artifact.ref_count}")
|
||||
logger.warning(
|
||||
f"Fixed ref_count for artifact {artifact.id}: {mismatch['stored_ref_count']} -> {artifact.ref_count}"
|
||||
)
|
||||
|
||||
if fix and mismatches:
|
||||
self.db.commit()
|
||||
|
||||
@@ -202,6 +202,9 @@ class StorageResult(NamedTuple):
|
||||
md5: Optional[str] = None
|
||||
sha1: Optional[str] = None
|
||||
s3_etag: Optional[str] = None
|
||||
already_existed: bool = (
|
||||
False # True if artifact was deduplicated (S3 object already existed)
|
||||
)
|
||||
|
||||
|
||||
class S3StorageUnavailableError(StorageError):
|
||||
@@ -354,6 +357,7 @@ class S3Storage:
|
||||
md5=md5_hash,
|
||||
sha1=sha1_hash,
|
||||
s3_etag=s3_etag,
|
||||
already_existed=exists,
|
||||
)
|
||||
|
||||
def _store_multipart(self, file: BinaryIO, content_length: int) -> StorageResult:
|
||||
@@ -433,6 +437,7 @@ class S3Storage:
|
||||
md5=md5_hash,
|
||||
sha1=sha1_hash,
|
||||
s3_etag=s3_etag,
|
||||
already_existed=True,
|
||||
)
|
||||
|
||||
# Seek back to start for upload
|
||||
@@ -486,6 +491,7 @@ class S3Storage:
|
||||
md5=md5_hash,
|
||||
sha1=sha1_hash,
|
||||
s3_etag=s3_etag,
|
||||
already_existed=False,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@@ -535,6 +541,7 @@ class S3Storage:
|
||||
md5=md5_hash,
|
||||
sha1=sha1_hash,
|
||||
s3_etag=s3_etag,
|
||||
already_existed=True,
|
||||
)
|
||||
|
||||
# Upload based on size
|
||||
@@ -615,6 +622,7 @@ class S3Storage:
|
||||
md5=md5_hash,
|
||||
sha1=sha1_hash,
|
||||
s3_etag=s3_etag,
|
||||
already_existed=False,
|
||||
)
|
||||
|
||||
def initiate_resumable_upload(self, expected_hash: str) -> Dict[str, Any]:
|
||||
|
||||
Reference in New Issue
Block a user