Merge branch 'feature/audit-history-api' into 'main'

Metadata database tracks all uploads with project, package, tag, and timestamp queryable via API See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!21
2026-01-07 12:31:46 -06:00
parent 81458b3bcb 2f1891cf01
commit 08dce6cbb8
24 changed files with 5044 additions and 2123 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,51 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ## [Unreleased]
 ### Added
+- Added global artifacts endpoint `GET /api/v1/artifacts` with project/package/tag/size/date filters (#18)
+- Added global tags endpoint `GET /api/v1/tags` with project/package/search/date filters (#18)
+- Added wildcard pattern matching (`*`) for tag filters across all endpoints (#18)
+- Added comma-separated multi-value support for tag filters (#18)
+- Added `search` parameter to `/api/v1/uploads` for filename search (#18)
+- Added `tag` filter to `/api/v1/uploads` endpoint (#18)
+- Added `sort` and `order` parameters to `/api/v1/uploads` endpoint (#18)
+- Added `min_size` and `max_size` filters to package artifacts endpoint (#18)
+- Added `sort` and `order` parameters to package artifacts endpoint (#18)
+- Added `from` and `to` date filters to package tags endpoint (#18)
+- Added `GlobalArtifactResponse` and `GlobalTagResponse` schemas (#18)
+- Added S3 object verification before database commit during upload (#19)
+- Added S3 object cleanup on database commit failure (#19)
+- Added upload duration tracking (`duration_ms` field) (#19)
+- Added `User-Agent` header capture during uploads (#19)
+- Added `X-Checksum-SHA256` header support for client-side checksum verification (#19)
+- Added `status`, `error_message`, `client_checksum` columns to uploads table (#19)
+- Added `upload_locks` table for future concurrent upload conflict detection (#19)
+- Added consistency check endpoint `GET /api/v1/admin/consistency-check` (#19)
+- Added `PUT /api/v1/projects/{project}` endpoint for project updates with audit logging (#20)
+- Added `PUT /api/v1/project/{project}/packages/{package}` endpoint for package updates with audit logging (#20)
+- Added `artifact.download` audit logging to download endpoint (#20)
+- Added `ProjectHistory` and `PackageHistory` models with database triggers (#20)
+- Added migration `004_history_tables.sql` for project/package history (#20)
+- Added migration `005_upload_enhancements.sql` for upload status tracking (#19)
+- Added 9 integration tests for global artifacts/tags endpoints (#18)
+- Added global uploads query endpoint `GET /api/v1/uploads` with project/package/user/date filters (#18)
+- Added project-level uploads endpoint `GET /api/v1/project/{project}/uploads` (#18)
+- Added `has_more` field to pagination metadata for easier pagination UI (#18)
+- Added `upload_id`, `content_type`, `original_name`, `created_at` fields to upload response (#19)
+- Added audit log API endpoints with filtering and pagination (#20)
+  - `GET /api/v1/audit-logs` - list all audit logs with action/resource/user/date filters
+  - `GET /api/v1/projects/{project}/audit-logs` - project-scoped audit logs
+  - `GET /api/v1/project/{project}/{package}/audit-logs` - package-scoped audit logs
+- Added upload history API endpoints (#20)
+  - `GET /api/v1/project/{project}/{package}/uploads` - list upload events for a package
+  - `GET /api/v1/artifact/{id}/uploads` - list all uploads of a specific artifact
+- Added artifact provenance endpoint `GET /api/v1/artifact/{id}/history` (#20)
+  - Returns full artifact history including packages, tags, and upload events
+- Added audit logging for project.create, package.create, tag.create, tag.update, artifact.upload actions (#20)
+- Added `AuditLogResponse`, `UploadHistoryResponse`, `ArtifactProvenanceResponse` schemas (#20)
+- Added `TagHistoryDetailResponse` schema with artifact metadata (#20)
+- Added 31 integration tests for audit log, history, and upload query endpoints (#22)
+### Changed
+- Standardized audit action naming to `{entity}.{action}` pattern (project.delete, package.delete, tag.delete) (#20)
 - Added `StorageBackend` protocol/interface for backend-agnostic storage (#33)
 - Added `health_check()` method to storage backend with `/health` endpoint integration (#33)
 - Added `verify_integrity()` method for post-upload hash validation (#33)
--- a/backend/app/models.py
+++ b/backend/app/models.py
@@ -1,8 +1,16 @@
 from datetime import datetime
-from typing import Optional
 from sqlalchemy import (
-    Column, String, Text, Boolean, Integer, BigInteger,
-    DateTime, ForeignKey, CheckConstraint, Index, JSON
+    Column,
+    String,
+    Text,
+    Boolean,
+    Integer,
+    BigInteger,
+    DateTime,
+    ForeignKey,
+    CheckConstraint,
+    Index,
+    JSON,
 )
 from sqlalchemy.dialects.postgresql import UUID
 from sqlalchemy.orm import relationship, declarative_base
@@ -19,11 +27,17 @@ class Project(Base):
    description = Column(Text)
    is_public = Column(Boolean, default=True)
    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
-    updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
+    updated_at = Column(
+        DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
+    )
    created_by = Column(String(255), nullable=False)

-    packages = relationship("Package", back_populates="project", cascade="all, delete-orphan")
-    permissions = relationship("AccessPermission", back_populates="project", cascade="all, delete-orphan")
+    packages = relationship(
+        "Package", back_populates="project", cascade="all, delete-orphan"
+    )
+    permissions = relationship(
+        "AccessPermission", back_populates="project", cascade="all, delete-orphan"
+    )

    __table_args__ = (
        Index("idx_projects_name", "name"),
@@ -35,32 +49,44 @@ class Package(Base):
    __tablename__ = "packages"

    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
-    project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
+    project_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("projects.id", ondelete="CASCADE"),
+        nullable=False,
+    )
    name = Column(String(255), nullable=False)
    description = Column(Text)
    format = Column(String(50), default="generic", nullable=False)
    platform = Column(String(50), default="any", nullable=False)
    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
-    updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
+    updated_at = Column(
+        DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
+    )

    project = relationship("Project", back_populates="packages")
    tags = relationship("Tag", back_populates="package", cascade="all, delete-orphan")
-    uploads = relationship("Upload", back_populates="package", cascade="all, delete-orphan")
-    consumers = relationship("Consumer", back_populates="package", cascade="all, delete-orphan")
+    uploads = relationship(
+        "Upload", back_populates="package", cascade="all, delete-orphan"
+    )
+    consumers = relationship(
+        "Consumer", back_populates="package", cascade="all, delete-orphan"
+    )

    __table_args__ = (
        Index("idx_packages_project_id", "project_id"),
        Index("idx_packages_name", "name"),
        Index("idx_packages_format", "format"),
        Index("idx_packages_platform", "platform"),
-        Index("idx_packages_project_name", "project_id", "name", unique=True),  # Composite unique index
+        Index(
+            "idx_packages_project_name", "project_id", "name", unique=True
+        ),  # Composite unique index
        CheckConstraint(
            "format IN ('generic', 'npm', 'pypi', 'docker', 'deb', 'rpm', 'maven', 'nuget', 'helm')",
-            name="check_package_format"
+            name="check_package_format",
        ),
        CheckConstraint(
            "platform IN ('any', 'linux', 'darwin', 'windows', 'linux-amd64', 'linux-arm64', 'darwin-amd64', 'darwin-arm64', 'windows-amd64')",
-            name="check_package_platform"
+            name="check_package_platform",
        ),
        {"extend_existing": True},
    )
@@ -76,7 +102,9 @@ class Artifact(Base):
    checksum_md5 = Column(String(32))  # MD5 hash for additional verification
    checksum_sha1 = Column(String(40))  # SHA1 hash for compatibility
    s3_etag = Column(String(64))  # S3 ETag for verification
-    artifact_metadata = Column("metadata", JSON, default=dict)  # Format-specific metadata (column name is 'metadata')
+    artifact_metadata = Column(
+        "metadata", JSON, default=dict
+    )  # Format-specific metadata (column name is 'metadata')
    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
    created_by = Column(String(255), nullable=False)
    ref_count = Column(Integer, default=1)
@@ -113,22 +141,34 @@ class Tag(Base):
    __tablename__ = "tags"

    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
-    package_id = Column(UUID(as_uuid=True), ForeignKey("packages.id", ondelete="CASCADE"), nullable=False)
+    package_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("packages.id", ondelete="CASCADE"),
+        nullable=False,
+    )
    name = Column(String(255), nullable=False)
    artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
-    updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
+    updated_at = Column(
+        DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
+    )
    created_by = Column(String(255), nullable=False)

    package = relationship("Package", back_populates="tags")
    artifact = relationship("Artifact", back_populates="tags")
-    history = relationship("TagHistory", back_populates="tag", cascade="all, delete-orphan")
+    history = relationship(
+        "TagHistory", back_populates="tag", cascade="all, delete-orphan"
+    )

    __table_args__ = (
        Index("idx_tags_package_id", "package_id"),
        Index("idx_tags_artifact_id", "artifact_id"),
-        Index("idx_tags_package_name", "package_id", "name", unique=True),  # Composite unique index
-        Index("idx_tags_package_created_at", "package_id", "created_at"),  # For recent tags queries
+        Index(
+            "idx_tags_package_name", "package_id", "name", unique=True
+        ),  # Composite unique index
+        Index(
+            "idx_tags_package_created_at", "package_id", "created_at"
+        ),  # For recent tags queries
    )


@@ -136,7 +176,9 @@ class TagHistory(Base):
    __tablename__ = "tag_history"

    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
-    tag_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False)
+    tag_id = Column(
+        UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False
+    )
    old_artifact_id = Column(String(64), ForeignKey("artifacts.id"))
    new_artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
    change_type = Column(String(20), nullable=False, default="update")
@@ -148,7 +190,9 @@ class TagHistory(Base):
    __table_args__ = (
        Index("idx_tag_history_tag_id", "tag_id"),
        Index("idx_tag_history_changed_at", "changed_at"),
-        CheckConstraint("change_type IN ('create', 'update', 'delete')", name="check_change_type"),
+        CheckConstraint(
+            "change_type IN ('create', 'update', 'delete')", name="check_change_type"
+        ),
    )


@@ -164,6 +208,11 @@ class Upload(Base):
    duration_ms = Column(Integer)  # Upload timing in milliseconds
    deduplicated = Column(Boolean, default=False)  # Whether artifact was deduplicated
    checksum_verified = Column(Boolean, default=True)  # Whether checksum was verified
+    status = Column(
+        String(20), default="completed", nullable=False
+    )  # pending, completed, failed
+    error_message = Column(Text)  # Error details for failed uploads
+    client_checksum = Column(String(64))  # Client-provided SHA256 for verification
    uploaded_at = Column(DateTime(timezone=True), default=datetime.utcnow)
    uploaded_by = Column(String(255), nullable=False)
    source_ip = Column(String(45))
@@ -177,6 +226,35 @@ class Upload(Base):
        Index("idx_uploads_uploaded_at", "uploaded_at"),
        Index("idx_uploads_package_uploaded_at", "package_id", "uploaded_at"),
        Index("idx_uploads_uploaded_by_at", "uploaded_by", "uploaded_at"),
+        Index("idx_uploads_status", "status"),
+        Index("idx_uploads_status_uploaded_at", "status", "uploaded_at"),
+        CheckConstraint(
+            "status IN ('pending', 'completed', 'failed')", name="check_upload_status"
+        ),
+    )
+
+
+class UploadLock(Base):
+    """Track in-progress uploads for conflict detection (409 responses)."""
+
+    __tablename__ = "upload_locks"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    sha256_hash = Column(String(64), nullable=False)
+    package_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("packages.id", ondelete="CASCADE"),
+        nullable=False,
+    )
+    locked_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+    locked_by = Column(String(255), nullable=False)
+    expires_at = Column(DateTime(timezone=True), nullable=False)
+
+    __table_args__ = (
+        Index("idx_upload_locks_expires_at", "expires_at"),
+        Index(
+            "idx_upload_locks_hash_package", "sha256_hash", "package_id", unique=True
+        ),
    )


@@ -184,7 +262,11 @@ class Consumer(Base):
    __tablename__ = "consumers"

    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
-    package_id = Column(UUID(as_uuid=True), ForeignKey("packages.id", ondelete="CASCADE"), nullable=False)
+    package_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("packages.id", ondelete="CASCADE"),
+        nullable=False,
+    )
    project_url = Column(String(2048), nullable=False)
    last_access = Column(DateTime(timezone=True), default=datetime.utcnow)
    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
@@ -201,7 +283,11 @@ class AccessPermission(Base):
    __tablename__ = "access_permissions"

    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
-    project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
+    project_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("projects.id", ondelete="CASCADE"),
+        nullable=False,
+    )
    user_id = Column(String(255), nullable=False)
    level = Column(String(20), nullable=False)
    created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
@@ -252,3 +338,51 @@ class AuditLog(Base):
        Index("idx_audit_logs_resource_timestamp", "resource", "timestamp"),
        Index("idx_audit_logs_user_timestamp", "user_id", "timestamp"),
    )
+
+
+class ProjectHistory(Base):
+    """Track changes to project metadata over time."""
+
+    __tablename__ = "project_history"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    project_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("projects.id", ondelete="CASCADE"),
+        nullable=False,
+    )
+    field_name = Column(String(100), nullable=False)
+    old_value = Column(Text)
+    new_value = Column(Text)
+    changed_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+    changed_by = Column(String(255), nullable=False)
+
+    __table_args__ = (
+        Index("idx_project_history_project_id", "project_id"),
+        Index("idx_project_history_changed_at", "changed_at"),
+        Index("idx_project_history_project_changed_at", "project_id", "changed_at"),
+    )
+
+
+class PackageHistory(Base):
+    """Track changes to package metadata over time."""
+
+    __tablename__ = "package_history"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    package_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("packages.id", ondelete="CASCADE"),
+        nullable=False,
+    )
+    field_name = Column(String(100), nullable=False)
+    old_value = Column(Text)
+    new_value = Column(Text)
+    changed_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+    changed_by = Column(String(255), nullable=False)
+
+    __table_args__ = (
+        Index("idx_package_history_package_id", "package_id"),
+        Index("idx_package_history_changed_at", "changed_at"),
+        Index("idx_package_history_package_changed_at", "package_id", "changed_at"),
+    )
--- a/backend/app/routes.py
+++ b/backend/app/routes.py
--- a/backend/app/schemas.py
+++ b/backend/app/schemas.py
@@ -12,6 +12,7 @@ class PaginationMeta(BaseModel):
    limit: int
    total: int
    total_pages: int
+    has_more: bool = False  # True if there are more pages after current page


 class PaginatedResponse(BaseModel, Generic[T]):
@@ -39,6 +40,13 @@ class ProjectResponse(BaseModel):
        from_attributes = True


+class ProjectUpdate(BaseModel):
+    """Schema for updating a project"""
+
+    description: Optional[str] = None
+    is_public: Optional[bool] = None
+
+
 # Package format and platform enums
 PACKAGE_FORMATS = [
    "generic",
@@ -86,6 +94,14 @@ class PackageResponse(BaseModel):
        from_attributes = True


+class PackageUpdate(BaseModel):
+    """Schema for updating a package"""
+
+    description: Optional[str] = None
+    format: Optional[str] = None
+    platform: Optional[str] = None
+
+
 class TagSummary(BaseModel):
    """Lightweight tag info for embedding in package responses"""

@@ -189,6 +205,93 @@ class TagHistoryResponse(BaseModel):
        from_attributes = True


+class TagHistoryDetailResponse(BaseModel):
+    """Tag history with artifact metadata for each version"""
+
+    id: UUID
+    tag_id: UUID
+    tag_name: str
+    old_artifact_id: Optional[str]
+    new_artifact_id: str
+    changed_at: datetime
+    changed_by: str
+    # Artifact metadata for new artifact
+    artifact_size: int
+    artifact_original_name: Optional[str]
+    artifact_content_type: Optional[str]
+
+    class Config:
+        from_attributes = True
+
+
+# Audit log schemas
+class AuditLogResponse(BaseModel):
+    """Audit log entry response"""
+
+    id: UUID
+    action: str
+    resource: str
+    user_id: str
+    details: Optional[Dict[str, Any]]
+    timestamp: datetime
+    source_ip: Optional[str]
+
+    class Config:
+        from_attributes = True
+
+
+# Upload history schemas
+class UploadHistoryResponse(BaseModel):
+    """Upload event with artifact details"""
+
+    id: UUID
+    artifact_id: str
+    package_id: UUID
+    package_name: str
+    project_name: str
+    original_name: Optional[str]
+    tag_name: Optional[str]
+    uploaded_at: datetime
+    uploaded_by: str
+    source_ip: Optional[str]
+    deduplicated: bool
+    # Artifact metadata
+    artifact_size: int
+    artifact_content_type: Optional[str]
+
+    class Config:
+        from_attributes = True
+
+
+# Artifact provenance schemas
+class ArtifactProvenanceResponse(BaseModel):
+    """Full provenance/history of an artifact"""
+
+    artifact_id: str
+    sha256: str
+    size: int
+    content_type: Optional[str]
+    original_name: Optional[str]
+    created_at: datetime
+    created_by: str
+    ref_count: int
+    # First upload info
+    first_uploaded_at: datetime
+    first_uploaded_by: str
+    # Usage statistics
+    upload_count: int
+    # References
+    packages: List[Dict[str, Any]]  # List of {project_name, package_name, tag_names}
+    tags: List[
+        Dict[str, Any]
+    ]  # List of {project_name, package_name, tag_name, created_at}
+    # Upload history
+    uploads: List[Dict[str, Any]]  # List of upload events
+
+    class Config:
+        from_attributes = True
+
+
 class ArtifactTagInfo(BaseModel):
    """Tag info for embedding in artifact responses"""

@@ -240,6 +343,44 @@ class PackageArtifactResponse(BaseModel):
        from_attributes = True


+class GlobalArtifactResponse(BaseModel):
+    """Artifact with project/package context for global listing"""
+
+    id: str
+    sha256: str
+    size: int
+    content_type: Optional[str]
+    original_name: Optional[str]
+    created_at: datetime
+    created_by: str
+    format_metadata: Optional[Dict[str, Any]] = None
+    ref_count: int = 0
+    # Context from tags/packages
+    projects: List[str] = []  # List of project names containing this artifact
+    packages: List[str] = []  # List of "project/package" paths
+    tags: List[str] = []  # List of "project/package:tag" references
+
+    class Config:
+        from_attributes = True
+
+
+class GlobalTagResponse(BaseModel):
+    """Tag with project/package context for global listing"""
+
+    id: UUID
+    name: str
+    artifact_id: str
+    created_at: datetime
+    created_by: str
+    project_name: str
+    package_name: str
+    artifact_size: Optional[int] = None
+    artifact_content_type: Optional[str] = None
+
+    class Config:
+        from_attributes = True
+
+
 # Upload response
 class UploadResponse(BaseModel):
    artifact_id: str
@@ -254,6 +395,11 @@ class UploadResponse(BaseModel):
    format_metadata: Optional[Dict[str, Any]] = None
    deduplicated: bool = False
    ref_count: int = 1  # Current reference count after this upload
+    # Enhanced metadata (Issue #19)
+    upload_id: Optional[UUID] = None  # UUID of the upload record
+    content_type: Optional[str] = None
+    original_name: Optional[str] = None
+    created_at: Optional[datetime] = None


 # Resumable upload schemas
@@ -440,6 +586,19 @@ class StorageStatsResponse(BaseModel):
    storage_saved_bytes: int  # Bytes saved through deduplication


+class ConsistencyCheckResponse(BaseModel):
+    """Result of S3/Database consistency check"""
+
+    total_artifacts_checked: int
+    orphaned_s3_objects: int  # Objects in S3 but not in DB
+    missing_s3_objects: int  # Records in DB but not in S3
+    size_mismatches: int  # Records where DB size != S3 size
+    healthy: bool
+    orphaned_s3_keys: List[str] = []  # Limited list of orphaned S3 keys
+    missing_s3_keys: List[str] = []  # Limited list of missing S3 keys
+    size_mismatch_artifacts: List[Dict[str, Any]] = []  # Limited list of mismatches
+
+
 class DeduplicationStatsResponse(BaseModel):
    """Deduplication effectiveness statistics"""

--- a/backend/app/services/artifact_cleanup.py
+++ b/backend/app/services/artifact_cleanup.py
@@ -6,7 +6,7 @@ from typing import List, Optional, Tuple
 from sqlalchemy.orm import Session
 import logging

-from ..models import Artifact, Tag, Upload, Package
+from ..models import Artifact, Tag
 from ..repositories.artifact import ArtifactRepository
 from ..repositories.tag import TagRepository
 from ..storage import S3Storage
@@ -40,10 +40,14 @@ class ArtifactCleanupService:
        artifact = self.artifact_repo.get_by_sha256(artifact_id)
        if artifact:
            artifact = self.artifact_repo.decrement_ref_count(artifact)
-            logger.info(f"Decremented ref_count for artifact {artifact_id}: now {artifact.ref_count}")
+            logger.info(
+                f"Decremented ref_count for artifact {artifact_id}: now {artifact.ref_count}"
+            )
        return artifact

-    def on_tag_updated(self, old_artifact_id: str, new_artifact_id: str) -> Tuple[Optional[Artifact], Optional[Artifact]]:
+    def on_tag_updated(
+        self, old_artifact_id: str, new_artifact_id: str
+    ) -> Tuple[Optional[Artifact], Optional[Artifact]]:
        """
        Called when a tag is updated to point to a different artifact.
        Decrements ref_count for old artifact, increments for new (if different).
@@ -58,13 +62,17 @@ class ArtifactCleanupService:
            old_artifact = self.artifact_repo.get_by_sha256(old_artifact_id)
            if old_artifact:
                old_artifact = self.artifact_repo.decrement_ref_count(old_artifact)
-                logger.info(f"Decremented ref_count for old artifact {old_artifact_id}: now {old_artifact.ref_count}")
+                logger.info(
+                    f"Decremented ref_count for old artifact {old_artifact_id}: now {old_artifact.ref_count}"
+                )

            # Increment new artifact ref_count
            new_artifact = self.artifact_repo.get_by_sha256(new_artifact_id)
            if new_artifact:
                new_artifact = self.artifact_repo.increment_ref_count(new_artifact)
-                logger.info(f"Incremented ref_count for new artifact {new_artifact_id}: now {new_artifact.ref_count}")
+                logger.info(
+                    f"Incremented ref_count for new artifact {new_artifact_id}: now {new_artifact.ref_count}"
+                )

        return old_artifact, new_artifact

@@ -84,11 +92,15 @@ class ArtifactCleanupService:
            if artifact:
                self.artifact_repo.decrement_ref_count(artifact)
                affected_artifacts.append(tag.artifact_id)
-                logger.info(f"Decremented ref_count for artifact {tag.artifact_id} (package delete)")
+                logger.info(
+                    f"Decremented ref_count for artifact {tag.artifact_id} (package delete)"
+                )

        return affected_artifacts

-    def cleanup_orphaned_artifacts(self, batch_size: int = 100, dry_run: bool = False) -> List[str]:
+    def cleanup_orphaned_artifacts(
+        self, batch_size: int = 100, dry_run: bool = False
+    ) -> List[str]:
        """
        Find and delete artifacts with ref_count = 0.

@@ -116,7 +128,9 @@ class ArtifactCleanupService:
                    # Then delete from database
                    self.artifact_repo.delete(artifact)
                    deleted_ids.append(artifact.id)
-                    logger.info(f"Deleted orphaned artifact from database: {artifact.id}")
+                    logger.info(
+                        f"Deleted orphaned artifact from database: {artifact.id}"
+                    )
                except Exception as e:
                    logger.error(f"Failed to delete artifact {artifact.id}: {e}")

@@ -128,10 +142,12 @@ class ArtifactCleanupService:
    def get_orphaned_count(self) -> int:
        """Get count of artifacts with ref_count = 0."""
        from sqlalchemy import func
+
        return (
            self.db.query(func.count(Artifact.id))
            .filter(Artifact.ref_count == 0)
-            .scalar() or 0
+            .scalar()
+            or 0
        )

    def verify_ref_counts(self, fix: bool = False) -> List[dict]:
@@ -173,7 +189,9 @@ class ArtifactCleanupService:

                if fix:
                    artifact.ref_count = max(actual_count, 1)
-                    logger.warning(f"Fixed ref_count for artifact {artifact.id}: {mismatch['stored_ref_count']} -> {artifact.ref_count}")
+                    logger.warning(
+                        f"Fixed ref_count for artifact {artifact.id}: {mismatch['stored_ref_count']} -> {artifact.ref_count}"
+                    )

        if fix and mismatches:
            self.db.commit()
--- a/backend/app/storage.py
+++ b/backend/app/storage.py
@@ -202,6 +202,9 @@ class StorageResult(NamedTuple):
    md5: Optional[str] = None
    sha1: Optional[str] = None
    s3_etag: Optional[str] = None
+    already_existed: bool = (
+        False  # True if artifact was deduplicated (S3 object already existed)
+    )


 class S3StorageUnavailableError(StorageError):
@@ -354,6 +357,7 @@ class S3Storage:
            md5=md5_hash,
            sha1=sha1_hash,
            s3_etag=s3_etag,
+            already_existed=exists,
        )

    def _store_multipart(self, file: BinaryIO, content_length: int) -> StorageResult:
@@ -433,6 +437,7 @@ class S3Storage:
                md5=md5_hash,
                sha1=sha1_hash,
                s3_etag=s3_etag,
+                already_existed=True,
            )

        # Seek back to start for upload
@@ -486,6 +491,7 @@ class S3Storage:
                md5=md5_hash,
                sha1=sha1_hash,
                s3_etag=s3_etag,
+                already_existed=False,
            )

        except Exception as e:
@@ -535,6 +541,7 @@ class S3Storage:
                md5=md5_hash,
                sha1=sha1_hash,
                s3_etag=s3_etag,
+                already_existed=True,
            )

        # Upload based on size
@@ -615,6 +622,7 @@ class S3Storage:
            md5=md5_hash,
            sha1=sha1_hash,
            s3_etag=s3_etag,
+            already_existed=False,
        )

    def initiate_resumable_upload(self, expected_hash: str) -> Dict[str, Any]:
--- a/backend/tests/conftest.py
+++ b/backend/tests/conftest.py
@@ -4,15 +4,14 @@ Test configuration and fixtures for Orchard backend tests.
 This module provides:
 - Database fixtures with test isolation
 - Mock S3 storage using moto
- Test data factories for common scenarios
+- Shared pytest fixtures
 """

 import os
 import pytest
-import hashlib
-from typing import Generator, BinaryIO
-from unittest.mock import MagicMock, patch
 import io
+from typing import Generator
+from unittest.mock import MagicMock

 # Set test environment defaults before importing app modules
 # Use setdefault to NOT override existing env vars (from docker-compose)
@@ -26,54 +25,27 @@ os.environ.setdefault("ORCHARD_S3_BUCKET", "test-bucket")
 os.environ.setdefault("ORCHARD_S3_ACCESS_KEY_ID", "test")
 os.environ.setdefault("ORCHARD_S3_SECRET_ACCESS_KEY", "test")

-
-# =============================================================================
-# Test Data Factories
-# =============================================================================
-
-
-def create_test_file(content: bytes = None, size: int = 1024) -> io.BytesIO:
-    """
-    Create a test file with known content.
-
-    Args:
-        content: Specific content to use, or None to generate random-ish content
-        size: Size of generated content if content is None
-
-    Returns:
-        BytesIO object with the content
-    """
-    if content is None:
-        content = os.urandom(size)
-    return io.BytesIO(content)
-
-
-def compute_sha256(content: bytes) -> str:
-    """Compute SHA256 hash of content as lowercase hex string."""
-    return hashlib.sha256(content).hexdigest()
-
-
-def compute_md5(content: bytes) -> str:
-    """Compute MD5 hash of content as lowercase hex string."""
-    return hashlib.md5(content).hexdigest()
-
-
-def compute_sha1(content: bytes) -> str:
-    """Compute SHA1 hash of content as lowercase hex string."""
-    return hashlib.sha1(content).hexdigest()
-
-
-# Known test data with pre-computed hashes
-TEST_CONTENT_HELLO = b"Hello, World!"
-TEST_HASH_HELLO = "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f"
-TEST_MD5_HELLO = "65a8e27d8879283831b664bd8b7f0ad4"
-TEST_SHA1_HELLO = "0a0a9f2a6772942557ab5355d76af442f8f65e01"
-
-TEST_CONTENT_EMPTY = b""
-# Note: Empty content should be rejected by the storage layer
-
-TEST_CONTENT_BINARY = bytes(range(256))
-TEST_HASH_BINARY = compute_sha256(TEST_CONTENT_BINARY)
+# Re-export factory functions for backward compatibility
+from tests.factories import (
+    create_test_file,
+    compute_sha256,
+    compute_md5,
+    compute_sha1,
+    upload_test_file,
+    TEST_CONTENT_HELLO,
+    TEST_HASH_HELLO,
+    TEST_MD5_HELLO,
+    TEST_SHA1_HELLO,
+    TEST_CONTENT_EMPTY,
+    TEST_CONTENT_BINARY,
+    TEST_HASH_BINARY,
+    get_s3_client,
+    get_s3_bucket,
+    list_s3_objects_by_hash,
+    count_s3_objects_by_prefix,
+    s3_object_exists,
+    delete_s3_object_by_hash,
+)


 # =============================================================================
@@ -289,126 +261,3 @@ def test_content():
    content = f"test-content-{uuid.uuid4().hex}".encode()
    sha256 = compute_sha256(content)
    return (content, sha256)
-
-
-def upload_test_file(
-    client,
-    project: str,
-    package: str,
-    content: bytes,
-    filename: str = "test.bin",
-    tag: str = None,
-) -> dict:
-    """
-    Helper function to upload a test file.
-
-    Returns the upload response as a dict.
-    """
-    files = {"file": (filename, io.BytesIO(content), "application/octet-stream")}
-    data = {}
-    if tag:
-        data["tag"] = tag
-
-    response = client.post(
-        f"/api/v1/project/{project}/{package}/upload",
-        files=files,
-        data=data if data else None,
-    )
-    assert response.status_code == 200, f"Upload failed: {response.text}"
-    return response.json()
-
-
-# =============================================================================
-# S3 Direct Access Helpers (for integration tests)
-# =============================================================================
-
-
-def get_s3_client():
-    """
-    Create a boto3 S3 client for direct S3 access in integration tests.
-
-    Uses environment variables for configuration (same as the app).
-    Note: When running in container, S3 endpoint should be 'minio:9000' not 'localhost:9000'.
-    """
-    import boto3
-    from botocore.config import Config
-
-    config = Config(s3={"addressing_style": "path"})
-
-    # Use the same endpoint as the app (minio:9000 in container, localhost:9000 locally)
-    endpoint = os.environ.get("ORCHARD_S3_ENDPOINT", "http://minio:9000")
-
-    return boto3.client(
-        "s3",
-        endpoint_url=endpoint,
-        region_name=os.environ.get("ORCHARD_S3_REGION", "us-east-1"),
-        aws_access_key_id=os.environ.get("ORCHARD_S3_ACCESS_KEY_ID", "minioadmin"),
-        aws_secret_access_key=os.environ.get(
-            "ORCHARD_S3_SECRET_ACCESS_KEY", "minioadmin"
-        ),
-        config=config,
-    )
-
-
-def get_s3_bucket():
-    """Get the S3 bucket name from environment."""
-    return os.environ.get("ORCHARD_S3_BUCKET", "orchard-artifacts")
-
-
-def list_s3_objects_by_hash(sha256_hash: str) -> list:
-    """
-    List S3 objects that match a specific SHA256 hash.
-
-    Uses the fruits/{hash[:2]}/{hash[2:4]}/{hash} key pattern.
-    Returns list of matching object keys.
-    """
-    client = get_s3_client()
-    bucket = get_s3_bucket()
-    prefix = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
-
-    response = client.list_objects_v2(Bucket=bucket, Prefix=prefix)
-
-    if "Contents" not in response:
-        return []
-
-    return [obj["Key"] for obj in response["Contents"]]
-
-
-def count_s3_objects_by_prefix(prefix: str) -> int:
-    """
-    Count S3 objects with a given prefix.
-
-    Useful for checking if duplicate uploads created multiple objects.
-    """
-    client = get_s3_client()
-    bucket = get_s3_bucket()
-
-    response = client.list_objects_v2(Bucket=bucket, Prefix=prefix)
-
-    if "Contents" not in response:
-        return 0
-
-    return len(response["Contents"])
-
-
-def s3_object_exists(sha256_hash: str) -> bool:
-    """
-    Check if an S3 object exists for a given SHA256 hash.
-    """
-    objects = list_s3_objects_by_hash(sha256_hash)
-    return len(objects) > 0
-
-
-def delete_s3_object_by_hash(sha256_hash: str) -> bool:
-    """
-    Delete an S3 object by its SHA256 hash (for test cleanup).
-    """
-    client = get_s3_client()
-    bucket = get_s3_bucket()
-    s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
-
-    try:
-        client.delete_object(Bucket=bucket, Key=s3_key)
-        return True
-    except Exception:
-        return False
--- a/backend/tests/factories.py
+++ b/backend/tests/factories.py
@@ -0,0 +1,288 @@
+"""
+Test data factories for Orchard backend tests.
+
+This module provides factory functions for creating test data,
+including test files, pre-computed hashes, and helper utilities.
+"""
+
+import hashlib
+import io
+import os
+import uuid
+from typing import Optional
+
+
+# =============================================================================
+# Hash Computation Utilities
+# =============================================================================
+
+
+def compute_sha256(content: bytes) -> str:
+    """Compute SHA256 hash of content as lowercase hex string."""
+    return hashlib.sha256(content).hexdigest()
+
+
+def compute_md5(content: bytes) -> str:
+    """Compute MD5 hash of content as lowercase hex string."""
+    return hashlib.md5(content).hexdigest()
+
+
+def compute_sha1(content: bytes) -> str:
+    """Compute SHA1 hash of content as lowercase hex string."""
+    return hashlib.sha1(content).hexdigest()
+
+
+# =============================================================================
+# Test File Factories
+# =============================================================================
+
+
+def create_test_file(content: Optional[bytes] = None, size: int = 1024) -> io.BytesIO:
+    """
+    Create a test file with known content.
+
+    Args:
+        content: Specific content to use, or None to generate random-ish content
+        size: Size of generated content if content is None
+
+    Returns:
+        BytesIO object with the content
+    """
+    if content is None:
+        content = os.urandom(size)
+    return io.BytesIO(content)
+
+
+def create_unique_content(prefix: str = "test-content") -> tuple[bytes, str]:
+    """
+    Create unique test content with its SHA256 hash.
+
+    Args:
+        prefix: Prefix for the content string
+
+    Returns:
+        Tuple of (content_bytes, sha256_hash)
+    """
+    content = f"{prefix}-{uuid.uuid4().hex}".encode()
+    sha256 = compute_sha256(content)
+    return content, sha256
+
+
+# =============================================================================
+# Known Test Data (Pre-computed hashes for deterministic tests)
+# =============================================================================
+
+
+TEST_CONTENT_HELLO = b"Hello, World!"
+TEST_HASH_HELLO = "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f"
+TEST_MD5_HELLO = "65a8e27d8879283831b664bd8b7f0ad4"
+TEST_SHA1_HELLO = "0a0a9f2a6772942557ab5355d76af442f8f65e01"
+
+TEST_CONTENT_EMPTY = b""
+# Note: Empty content should be rejected by the storage layer
+
+TEST_CONTENT_BINARY = bytes(range(256))
+TEST_HASH_BINARY = compute_sha256(TEST_CONTENT_BINARY)
+
+
+# =============================================================================
+# API Test Helpers
+# =============================================================================
+
+
+def upload_test_file(
+    client,
+    project: str,
+    package: str,
+    content: bytes,
+    filename: str = "test.bin",
+    tag: Optional[str] = None,
+) -> dict:
+    """
+    Helper function to upload a test file via the API.
+
+    Args:
+        client: HTTP client (httpx or TestClient)
+        project: Project name
+        package: Package name
+        content: File content as bytes
+        filename: Original filename
+        tag: Optional tag to assign
+
+    Returns:
+        The upload response as a dict
+    """
+    files = {"file": (filename, io.BytesIO(content), "application/octet-stream")}
+    data = {}
+    if tag:
+        data["tag"] = tag
+
+    response = client.post(
+        f"/api/v1/project/{project}/{package}/upload",
+        files=files,
+        data=data if data else None,
+    )
+    assert response.status_code == 200, f"Upload failed: {response.text}"
+    return response.json()
+
+
+# =============================================================================
+# Project/Package Factories
+# =============================================================================
+
+
+def create_test_project(client, unique_id: Optional[str] = None) -> str:
+    """
+    Create a test project via the API.
+
+    Args:
+        client: HTTP client
+        unique_id: Unique identifier for the project name
+
+    Returns:
+        Project name
+    """
+    if unique_id is None:
+        unique_id = uuid.uuid4().hex[:8]
+
+    project_name = f"test-project-{unique_id}"
+    response = client.post(
+        "/api/v1/projects",
+        json={"name": project_name, "description": "Test project", "is_public": True},
+    )
+    assert response.status_code == 200, f"Failed to create project: {response.text}"
+    return project_name
+
+
+def create_test_package(client, project: str, unique_id: Optional[str] = None) -> str:
+    """
+    Create a test package via the API.
+
+    Args:
+        client: HTTP client
+        project: Project name
+        unique_id: Unique identifier for the package name
+
+    Returns:
+        Package name
+    """
+    if unique_id is None:
+        unique_id = uuid.uuid4().hex[:8]
+
+    package_name = f"test-package-{unique_id}"
+    response = client.post(
+        f"/api/v1/project/{project}/packages",
+        json={"name": package_name, "description": "Test package"},
+    )
+    assert response.status_code == 200, f"Failed to create package: {response.text}"
+    return package_name
+
+
+def delete_test_project(client, project: str) -> None:
+    """
+    Delete a test project (cleanup helper).
+
+    Args:
+        client: HTTP client
+        project: Project name to delete
+    """
+    try:
+        client.delete(f"/api/v1/projects/{project}")
+    except Exception:
+        pass  # Ignore cleanup errors
+
+
+# =============================================================================
+# S3 Test Helpers
+# =============================================================================
+
+
+def get_s3_client():
+    """
+    Create a boto3 S3 client for direct S3 access in integration tests.
+
+    Uses environment variables for configuration (same as the app).
+    Note: When running in container, S3 endpoint should be 'minio:9000' not 'localhost:9000'.
+    """
+    import boto3
+    from botocore.config import Config
+
+    config = Config(s3={"addressing_style": "path"})
+
+    # Use the same endpoint as the app (minio:9000 in container, localhost:9000 locally)
+    endpoint = os.environ.get("ORCHARD_S3_ENDPOINT", "http://minio:9000")
+
+    return boto3.client(
+        "s3",
+        endpoint_url=endpoint,
+        region_name=os.environ.get("ORCHARD_S3_REGION", "us-east-1"),
+        aws_access_key_id=os.environ.get("ORCHARD_S3_ACCESS_KEY_ID", "minioadmin"),
+        aws_secret_access_key=os.environ.get(
+            "ORCHARD_S3_SECRET_ACCESS_KEY", "minioadmin"
+        ),
+        config=config,
+    )
+
+
+def get_s3_bucket() -> str:
+    """Get the S3 bucket name from environment."""
+    return os.environ.get("ORCHARD_S3_BUCKET", "orchard-artifacts")
+
+
+def list_s3_objects_by_hash(sha256_hash: str) -> list:
+    """
+    List S3 objects that match a specific SHA256 hash.
+
+    Uses the fruits/{hash[:2]}/{hash[2:4]}/{hash} key pattern.
+    Returns list of matching object keys.
+    """
+    client = get_s3_client()
+    bucket = get_s3_bucket()
+    prefix = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
+
+    response = client.list_objects_v2(Bucket=bucket, Prefix=prefix)
+
+    if "Contents" not in response:
+        return []
+
+    return [obj["Key"] for obj in response["Contents"]]
+
+
+def count_s3_objects_by_prefix(prefix: str) -> int:
+    """
+    Count S3 objects with a given prefix.
+
+    Useful for checking if duplicate uploads created multiple objects.
+    """
+    client = get_s3_client()
+    bucket = get_s3_bucket()
+
+    response = client.list_objects_v2(Bucket=bucket, Prefix=prefix)
+
+    if "Contents" not in response:
+        return 0
+
+    return len(response["Contents"])
+
+
+def s3_object_exists(sha256_hash: str) -> bool:
+    """
+    Check if an S3 object exists for a given SHA256 hash.
+    """
+    objects = list_s3_objects_by_hash(sha256_hash)
+    return len(objects) > 0
+
+
+def delete_s3_object_by_hash(sha256_hash: str) -> bool:
+    """
+    Delete an S3 object by its SHA256 hash (for test cleanup).
+    """
+    client = get_s3_client()
+    bucket = get_s3_bucket()
+    s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
+
+    try:
+        client.delete_object(Bucket=bucket, Key=s3_key)
+        return True
+    except Exception:
+        return False
--- a/backend/tests/integration/init.py
+++ b/backend/tests/integration/init.py
--- a/backend/tests/integration/test_artifacts_api.py
+++ b/backend/tests/integration/test_artifacts_api.py
@@ -0,0 +1,638 @@
+"""
+Integration tests for artifact API endpoints.
+
+Tests cover:
+- Artifact retrieval by ID
+- Artifact stats endpoint
+- Artifact provenance/history
+- Artifact uploads listing
+- Garbage collection endpoints
+- Orphaned artifacts management
+"""
+
+import pytest
+from tests.factories import compute_sha256, upload_test_file
+
+
+class TestArtifactRetrieval:
+    """Tests for artifact retrieval endpoints."""
+
+    @pytest.mark.integration
+    def test_get_artifact_by_id(self, integration_client, test_package):
+        """Test retrieving an artifact by its SHA256 ID."""
+        project_name, package_name = test_package
+        content = b"artifact retrieval test"
+        expected_hash = compute_sha256(content)
+
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="v1"
+        )
+
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert data["id"] == expected_hash
+        assert data["sha256"] == expected_hash
+        assert data["size"] == len(content)
+        assert "ref_count" in data
+        assert "created_at" in data
+
+    @pytest.mark.integration
+    def test_get_nonexistent_artifact(self, integration_client):
+        """Test getting a non-existent artifact returns 404."""
+        fake_hash = "a" * 64
+        response = integration_client.get(f"/api/v1/artifact/{fake_hash}")
+        assert response.status_code == 404
+
+    @pytest.mark.integration
+    def test_artifact_includes_tags(self, integration_client, test_package):
+        """Test artifact response includes tags pointing to it."""
+        project_name, package_name = test_package
+        content = b"artifact with tags test"
+        expected_hash = compute_sha256(content)
+
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="tagged-v1"
+        )
+
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "tags" in data
+        assert len(data["tags"]) >= 1
+
+        tag = data["tags"][0]
+        assert "name" in tag
+        assert "package_name" in tag
+        assert "project_name" in tag
+
+
+class TestArtifactStats:
+    """Tests for artifact statistics endpoint."""
+
+    @pytest.mark.integration
+    def test_artifact_stats_returns_valid_response(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test artifact stats returns expected fields."""
+        project, package = test_package
+        content = f"artifact stats test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        upload_test_file(
+            integration_client, project, package, content, tag=f"art-{unique_test_id}"
+        )
+
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "artifact_id" in data
+        assert "sha256" in data
+        assert "size" in data
+        assert "ref_count" in data
+        assert "storage_savings" in data
+        assert "tags" in data
+        assert "projects" in data
+        assert "packages" in data
+
+    @pytest.mark.integration
+    def test_artifact_stats_not_found(self, integration_client):
+        """Test artifact stats returns 404 for non-existent artifact."""
+        fake_hash = "0" * 64
+        response = integration_client.get(f"/api/v1/artifact/{fake_hash}/stats")
+        assert response.status_code == 404
+
+    @pytest.mark.integration
+    def test_artifact_stats_shows_correct_projects(
+        self, integration_client, unique_test_id
+    ):
+        """Test artifact stats shows all projects using the artifact."""
+        content = f"multi-project artifact {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        proj1 = f"art-stats-a-{unique_test_id}"
+        proj2 = f"art-stats-b-{unique_test_id}"
+
+        try:
+            # Create projects and packages
+            integration_client.post(
+                "/api/v1/projects",
+                json={"name": proj1, "description": "Test", "is_public": True},
+            )
+            integration_client.post(
+                "/api/v1/projects",
+                json={"name": proj2, "description": "Test", "is_public": True},
+            )
+            integration_client.post(
+                f"/api/v1/project/{proj1}/packages",
+                json={"name": "pkg", "description": "Test"},
+            )
+            integration_client.post(
+                f"/api/v1/project/{proj2}/packages",
+                json={"name": "pkg", "description": "Test"},
+            )
+
+            # Upload same content to both projects
+            upload_test_file(integration_client, proj1, "pkg", content, tag="v1")
+            upload_test_file(integration_client, proj2, "pkg", content, tag="v1")
+
+            # Check artifact stats
+            response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats")
+            assert response.status_code == 200
+
+            data = response.json()
+            assert len(data["projects"]) == 2
+            assert proj1 in data["projects"]
+            assert proj2 in data["projects"]
+
+        finally:
+            integration_client.delete(f"/api/v1/projects/{proj1}")
+            integration_client.delete(f"/api/v1/projects/{proj2}")
+
+
+class TestArtifactProvenance:
+    """Tests for artifact provenance/history endpoint."""
+
+    @pytest.mark.integration
+    def test_artifact_history_returns_200(self, integration_client, test_package):
+        """Test artifact history endpoint returns 200."""
+        project_name, package_name = test_package
+
+        upload_result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"provenance test content",
+            "prov.txt",
+        )
+        artifact_id = upload_result["artifact_id"]
+
+        response = integration_client.get(f"/api/v1/artifact/{artifact_id}/history")
+        assert response.status_code == 200
+
+    @pytest.mark.integration
+    def test_artifact_history_has_required_fields(
+        self, integration_client, test_package
+    ):
+        """Test artifact history has all required fields."""
+        project_name, package_name = test_package
+
+        upload_result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"provenance fields test",
+            "fields.txt",
+        )
+        artifact_id = upload_result["artifact_id"]
+
+        response = integration_client.get(f"/api/v1/artifact/{artifact_id}/history")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "artifact_id" in data
+        assert "sha256" in data
+        assert "size" in data
+        assert "created_at" in data
+        assert "created_by" in data
+        assert "ref_count" in data
+        assert "first_uploaded_at" in data
+        assert "first_uploaded_by" in data
+        assert "upload_count" in data
+        assert "packages" in data
+        assert "tags" in data
+        assert "uploads" in data
+
+    @pytest.mark.integration
+    def test_artifact_history_not_found(self, integration_client):
+        """Test non-existent artifact returns 404."""
+        fake_hash = "b" * 64
+        response = integration_client.get(f"/api/v1/artifact/{fake_hash}/history")
+        assert response.status_code == 404
+
+    @pytest.mark.integration
+    def test_artifact_history_with_tag(self, integration_client, test_package):
+        """Test artifact history includes tag information when tagged."""
+        project_name, package_name = test_package
+
+        upload_result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"tagged provenance test",
+            "tagged.txt",
+            tag="v1.0.0",
+        )
+        artifact_id = upload_result["artifact_id"]
+
+        response = integration_client.get(f"/api/v1/artifact/{artifact_id}/history")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["tags"]) >= 1
+
+        tag = data["tags"][0]
+        assert "project_name" in tag
+        assert "package_name" in tag
+        assert "tag_name" in tag
+
+
+class TestArtifactUploads:
+    """Tests for artifact uploads listing endpoint."""
+
+    @pytest.mark.integration
+    def test_artifact_uploads_returns_200(self, integration_client, test_package):
+        """Test artifact uploads endpoint returns 200."""
+        project_name, package_name = test_package
+
+        upload_result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"artifact upload test",
+            "artifact.txt",
+        )
+        artifact_id = upload_result["artifact_id"]
+
+        response = integration_client.get(f"/api/v1/artifact/{artifact_id}/uploads")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+        assert len(data["items"]) >= 1
+
+    @pytest.mark.integration
+    def test_artifact_uploads_not_found(self, integration_client):
+        """Test non-existent artifact returns 404."""
+        fake_hash = "a" * 64
+        response = integration_client.get(f"/api/v1/artifact/{fake_hash}/uploads")
+        assert response.status_code == 404
+
+
+class TestOrphanedArtifacts:
+    """Tests for orphaned artifacts management."""
+
+    @pytest.mark.integration
+    def test_list_orphaned_artifacts_returns_list(self, integration_client):
+        """Test orphaned artifacts endpoint returns a list."""
+        response = integration_client.get("/api/v1/admin/orphaned-artifacts")
+        assert response.status_code == 200
+        assert isinstance(response.json(), list)
+
+    @pytest.mark.integration
+    def test_orphaned_artifact_has_required_fields(self, integration_client):
+        """Test orphaned artifact response has required fields."""
+        response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1")
+        assert response.status_code == 200
+
+        data = response.json()
+        if len(data) > 0:
+            artifact = data[0]
+            assert "id" in artifact
+            assert "size" in artifact
+            assert "created_at" in artifact
+            assert "created_by" in artifact
+            assert "original_name" in artifact
+
+    @pytest.mark.integration
+    def test_orphaned_artifacts_respects_limit(self, integration_client):
+        """Test orphaned artifacts endpoint respects limit parameter."""
+        response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=5")
+        assert response.status_code == 200
+        assert len(response.json()) <= 5
+
+    @pytest.mark.integration
+    def test_artifact_becomes_orphaned_when_tag_deleted(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test artifact appears in orphaned list after tag is deleted."""
+        project, package = test_package
+        content = f"orphan test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        # Upload with tag
+        upload_test_file(integration_client, project, package, content, tag="temp-tag")
+
+        # Verify not in orphaned list
+        response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1000")
+        orphaned_ids = [a["id"] for a in response.json()]
+        assert expected_hash not in orphaned_ids
+
+        # Delete the tag
+        integration_client.delete(f"/api/v1/project/{project}/{package}/tags/temp-tag")
+
+        # Verify now in orphaned list
+        response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1000")
+        orphaned_ids = [a["id"] for a in response.json()]
+        assert expected_hash in orphaned_ids
+
+
+class TestGarbageCollection:
+    """Tests for garbage collection endpoint."""
+
+    @pytest.mark.integration
+    def test_garbage_collect_dry_run_returns_response(self, integration_client):
+        """Test garbage collection dry run returns valid response."""
+        response = integration_client.post("/api/v1/admin/garbage-collect?dry_run=true")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "artifacts_deleted" in data
+        assert "bytes_freed" in data
+        assert "artifact_ids" in data
+        assert "dry_run" in data
+        assert data["dry_run"] is True
+
+    @pytest.mark.integration
+    def test_garbage_collect_dry_run_doesnt_delete(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test garbage collection dry run doesn't actually delete artifacts."""
+        project, package = test_package
+        content = f"dry run test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        # Upload and delete tag to create orphan
+        upload_test_file(integration_client, project, package, content, tag="dry-run")
+        integration_client.delete(f"/api/v1/project/{project}/{package}/tags/dry-run")
+
+        # Verify artifact exists
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.status_code == 200
+
+        # Run garbage collection in dry-run mode
+        gc_response = integration_client.post(
+            "/api/v1/admin/garbage-collect?dry_run=true&limit=1000"
+        )
+        assert gc_response.status_code == 200
+        assert expected_hash in gc_response.json()["artifact_ids"]
+
+        # Verify artifact STILL exists
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.status_code == 200
+
+    @pytest.mark.integration
+    def test_garbage_collect_preserves_referenced_artifacts(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test garbage collection doesn't delete artifacts with ref_count > 0."""
+        project, package = test_package
+        content = f"preserve test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        # Upload with tag (ref_count=1)
+        upload_test_file(integration_client, project, package, content, tag="keep-this")
+
+        # Verify artifact exists with ref_count=1
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.status_code == 200
+        assert response.json()["ref_count"] == 1
+
+        # Run garbage collection (dry_run to not affect other tests)
+        gc_response = integration_client.post(
+            "/api/v1/admin/garbage-collect?dry_run=true&limit=1000"
+        )
+        assert gc_response.status_code == 200
+
+        # Verify artifact was NOT in delete list
+        assert expected_hash not in gc_response.json()["artifact_ids"]
+
+        # Verify artifact still exists
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.status_code == 200
+        assert response.json()["ref_count"] == 1
+
+    @pytest.mark.integration
+    def test_garbage_collect_respects_limit(self, integration_client):
+        """Test garbage collection respects limit parameter."""
+        response = integration_client.post(
+            "/api/v1/admin/garbage-collect?dry_run=true&limit=5"
+        )
+        assert response.status_code == 200
+        assert response.json()["artifacts_deleted"] <= 5
+
+    @pytest.mark.integration
+    def test_garbage_collect_returns_bytes_freed(self, integration_client):
+        """Test garbage collection returns accurate bytes_freed."""
+        response = integration_client.post("/api/v1/admin/garbage-collect?dry_run=true")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert data["bytes_freed"] >= 0
+        assert isinstance(data["bytes_freed"], int)
+
+
+class TestGlobalUploads:
+    """Tests for global uploads endpoint."""
+
+    @pytest.mark.integration
+    def test_global_uploads_returns_200(self, integration_client):
+        """Test global uploads endpoint returns 200."""
+        response = integration_client.get("/api/v1/uploads")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+    @pytest.mark.integration
+    def test_global_uploads_pagination(self, integration_client):
+        """Test global uploads endpoint respects pagination."""
+        response = integration_client.get("/api/v1/uploads?limit=5&page=1")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["items"]) <= 5
+        assert data["pagination"]["limit"] == 5
+        assert data["pagination"]["page"] == 1
+
+    @pytest.mark.integration
+    def test_global_uploads_filter_by_project(self, integration_client, test_package):
+        """Test filtering global uploads by project name."""
+        project_name, package_name = test_package
+
+        # Upload a file
+        upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"global filter test",
+            "global.txt",
+        )
+
+        response = integration_client.get(f"/api/v1/uploads?project={project_name}")
+        assert response.status_code == 200
+
+        data = response.json()
+        for item in data["items"]:
+            assert item["project_name"] == project_name
+
+    @pytest.mark.integration
+    def test_global_uploads_has_more_field(self, integration_client):
+        """Test pagination includes has_more field."""
+        response = integration_client.get("/api/v1/uploads?limit=1")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "has_more" in data["pagination"]
+        assert isinstance(data["pagination"]["has_more"], bool)
+
+
+class TestGlobalArtifacts:
+    """Tests for global artifacts endpoint."""
+
+    @pytest.mark.integration
+    def test_global_artifacts_returns_200(self, integration_client):
+        """Test global artifacts endpoint returns 200."""
+        response = integration_client.get("/api/v1/artifacts")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+    @pytest.mark.integration
+    def test_global_artifacts_pagination(self, integration_client):
+        """Test global artifacts endpoint respects pagination."""
+        response = integration_client.get("/api/v1/artifacts?limit=5&page=1")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["items"]) <= 5
+        assert data["pagination"]["limit"] == 5
+
+    @pytest.mark.integration
+    def test_global_artifacts_filter_by_size(self, integration_client):
+        """Test filtering global artifacts by size range."""
+        response = integration_client.get(
+            "/api/v1/artifacts?min_size=1&max_size=1000000"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        for item in data["items"]:
+            assert 1 <= item["size"] <= 1000000
+
+    @pytest.mark.integration
+    def test_global_artifacts_sort_by_size(self, integration_client):
+        """Test sorting global artifacts by size."""
+        response = integration_client.get("/api/v1/artifacts?sort=size&order=desc")
+        assert response.status_code == 200
+        data = response.json()
+        if len(data["items"]) > 1:
+            sizes = [item["size"] for item in data["items"]]
+            assert sizes == sorted(sizes, reverse=True)
+
+    @pytest.mark.integration
+    def test_global_artifacts_invalid_sort_returns_400(self, integration_client):
+        """Test invalid sort field returns 400."""
+        response = integration_client.get("/api/v1/artifacts?sort=invalid_field")
+        assert response.status_code == 400
+
+
+class TestGlobalTags:
+    """Tests for global tags endpoint."""
+
+    @pytest.mark.integration
+    def test_global_tags_returns_200(self, integration_client):
+        """Test global tags endpoint returns 200."""
+        response = integration_client.get("/api/v1/tags")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+    @pytest.mark.integration
+    def test_global_tags_pagination(self, integration_client):
+        """Test global tags endpoint respects pagination."""
+        response = integration_client.get("/api/v1/tags?limit=5&page=1")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["items"]) <= 5
+        assert data["pagination"]["limit"] == 5
+
+    @pytest.mark.integration
+    def test_global_tags_has_project_context(self, integration_client):
+        """Test global tags response includes project/package context."""
+        response = integration_client.get("/api/v1/tags?limit=1")
+        assert response.status_code == 200
+
+        data = response.json()
+        if len(data["items"]) > 0:
+            item = data["items"][0]
+            assert "project_name" in item
+            assert "package_name" in item
+            assert "artifact_id" in item
+
+    @pytest.mark.integration
+    def test_global_tags_search_with_wildcard(self, integration_client):
+        """Test global tags search supports wildcards."""
+        response = integration_client.get("/api/v1/tags?search=v*")
+        assert response.status_code == 200
+        # Just verify it doesn't error; results may vary
+
+
+class TestAuditLogs:
+    """Tests for global audit logs endpoint."""
+
+    @pytest.mark.integration
+    def test_list_audit_logs_returns_valid_response(self, integration_client):
+        """Test audit logs endpoint returns valid paginated response."""
+        response = integration_client.get("/api/v1/audit-logs")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+        assert isinstance(data["items"], list)
+
+        pagination = data["pagination"]
+        assert "page" in pagination
+        assert "limit" in pagination
+        assert "total" in pagination
+        assert "total_pages" in pagination
+
+    @pytest.mark.integration
+    def test_audit_logs_respects_pagination(self, integration_client):
+        """Test audit logs endpoint respects limit parameter."""
+        response = integration_client.get("/api/v1/audit-logs?limit=5")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["items"]) <= 5
+        assert data["pagination"]["limit"] == 5
+
+    @pytest.mark.integration
+    def test_audit_logs_filter_by_action(self, integration_client, test_package):
+        """Test filtering audit logs by action type."""
+        project_name, package_name = test_package
+
+        response = integration_client.get("/api/v1/audit-logs?action=project.create")
+        assert response.status_code == 200
+
+        data = response.json()
+        for item in data["items"]:
+            assert item["action"] == "project.create"
+
+    @pytest.mark.integration
+    def test_audit_log_entry_has_required_fields(
+        self, integration_client, test_project
+    ):
+        """Test audit log entries have all required fields."""
+        response = integration_client.get("/api/v1/audit-logs?limit=10")
+        assert response.status_code == 200
+
+        data = response.json()
+        if data["items"]:
+            item = data["items"][0]
+            assert "id" in item
+            assert "action" in item
+            assert "resource" in item
+            assert "user_id" in item
+            assert "timestamp" in item
--- a/backend/tests/integration/test_packages_api.py
+++ b/backend/tests/integration/test_packages_api.py
@@ -0,0 +1,345 @@
+"""
+Integration tests for package API endpoints.
+
+Tests cover:
+- Package CRUD operations
+- Package listing with pagination, search, filtering
+- Package stats endpoint
+- Package-level audit logs
+- Cascade delete behavior
+"""
+
+import pytest
+from tests.factories import compute_sha256, upload_test_file
+
+
+class TestPackageCRUD:
+    """Tests for package create, read, update, delete operations."""
+
+    @pytest.mark.integration
+    def test_create_package(self, integration_client, test_project, unique_test_id):
+        """Test creating a new package."""
+        package_name = f"test-create-pkg-{unique_test_id}"
+
+        response = integration_client.post(
+            f"/api/v1/project/{test_project}/packages",
+            json={
+                "name": package_name,
+                "description": "Test package",
+                "format": "npm",
+                "platform": "linux",
+            },
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert data["name"] == package_name
+        assert data["description"] == "Test package"
+        assert data["format"] == "npm"
+        assert data["platform"] == "linux"
+
+    @pytest.mark.integration
+    def test_get_package(self, integration_client, test_package):
+        """Test getting a package by name."""
+        project_name, package_name = test_package
+
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/packages/{package_name}"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert data["name"] == package_name
+
+    @pytest.mark.integration
+    def test_get_nonexistent_package(self, integration_client, test_project):
+        """Test getting a non-existent package returns 404."""
+        response = integration_client.get(
+            f"/api/v1/project/{test_project}/packages/nonexistent-pkg"
+        )
+        assert response.status_code == 404
+
+    @pytest.mark.integration
+    def test_list_packages(self, integration_client, test_package):
+        """Test listing packages includes created package."""
+        project_name, package_name = test_package
+
+        response = integration_client.get(f"/api/v1/project/{project_name}/packages")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+        package_names = [p["name"] for p in data["items"]]
+        assert package_name in package_names
+
+    @pytest.mark.integration
+    def test_delete_package(self, integration_client, test_project, unique_test_id):
+        """Test deleting a package."""
+        package_name = f"test-delete-pkg-{unique_test_id}"
+
+        # Create package
+        integration_client.post(
+            f"/api/v1/project/{test_project}/packages",
+            json={"name": package_name, "description": "To be deleted"},
+        )
+
+        # Delete package
+        response = integration_client.delete(
+            f"/api/v1/project/{test_project}/packages/{package_name}"
+        )
+        assert response.status_code == 204
+
+        # Verify deleted
+        response = integration_client.get(
+            f"/api/v1/project/{test_project}/packages/{package_name}"
+        )
+        assert response.status_code == 404
+
+
+class TestPackageListingFilters:
+    """Tests for package listing with filters and pagination."""
+
+    @pytest.mark.integration
+    def test_packages_pagination(self, integration_client, test_project):
+        """Test package listing respects pagination parameters."""
+        response = integration_client.get(
+            f"/api/v1/project/{test_project}/packages?page=1&limit=5"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["items"]) <= 5
+        assert data["pagination"]["limit"] == 5
+        assert data["pagination"]["page"] == 1
+
+    @pytest.mark.integration
+    def test_packages_filter_by_format(
+        self, integration_client, test_project, unique_test_id
+    ):
+        """Test package filtering by format."""
+        # Create a package with specific format
+        package_name = f"npm-pkg-{unique_test_id}"
+        integration_client.post(
+            f"/api/v1/project/{test_project}/packages",
+            json={"name": package_name, "format": "npm"},
+        )
+
+        response = integration_client.get(
+            f"/api/v1/project/{test_project}/packages?format=npm"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        for pkg in data["items"]:
+            assert pkg["format"] == "npm"
+
+    @pytest.mark.integration
+    def test_packages_filter_by_platform(
+        self, integration_client, test_project, unique_test_id
+    ):
+        """Test package filtering by platform."""
+        # Create a package with specific platform
+        package_name = f"linux-pkg-{unique_test_id}"
+        integration_client.post(
+            f"/api/v1/project/{test_project}/packages",
+            json={"name": package_name, "platform": "linux"},
+        )
+
+        response = integration_client.get(
+            f"/api/v1/project/{test_project}/packages?platform=linux"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        for pkg in data["items"]:
+            assert pkg["platform"] == "linux"
+
+
+class TestPackageStats:
+    """Tests for package statistics endpoint."""
+
+    @pytest.mark.integration
+    def test_package_stats_returns_valid_response(
+        self, integration_client, test_package
+    ):
+        """Test package stats endpoint returns expected fields."""
+        project, package = test_package
+        response = integration_client.get(
+            f"/api/v1/project/{project}/packages/{package}/stats"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "package_id" in data
+        assert "package_name" in data
+        assert "project_name" in data
+        assert "tag_count" in data
+        assert "artifact_count" in data
+        assert "total_size_bytes" in data
+        assert "upload_count" in data
+        assert "deduplicated_uploads" in data
+        assert "storage_saved_bytes" in data
+        assert "deduplication_ratio" in data
+
+    @pytest.mark.integration
+    def test_package_stats_not_found(self, integration_client, test_project):
+        """Test package stats returns 404 for non-existent package."""
+        response = integration_client.get(
+            f"/api/v1/project/{test_project}/packages/nonexistent-package/stats"
+        )
+        assert response.status_code == 404
+
+
+class TestPackageAuditLogs:
+    """Tests for package-level audit logs endpoint."""
+
+    @pytest.mark.integration
+    def test_package_audit_logs_returns_200(self, integration_client, test_package):
+        """Test package audit logs endpoint returns 200."""
+        project_name, package_name = test_package
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/audit-logs"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+    @pytest.mark.integration
+    def test_package_audit_logs_project_not_found(self, integration_client):
+        """Test non-existent project returns 404."""
+        response = integration_client.get(
+            "/api/v1/project/nonexistent/nonexistent/audit-logs"
+        )
+        assert response.status_code == 404
+
+    @pytest.mark.integration
+    def test_package_audit_logs_package_not_found(
+        self, integration_client, test_project
+    ):
+        """Test non-existent package returns 404."""
+        response = integration_client.get(
+            f"/api/v1/project/{test_project}/nonexistent-package/audit-logs"
+        )
+        assert response.status_code == 404
+
+
+class TestPackageCascadeDelete:
+    """Tests for cascade delete behavior when deleting packages."""
+
+    @pytest.mark.integration
+    def test_ref_count_decrements_on_package_delete(
+        self, integration_client, unique_test_id
+    ):
+        """Test ref_count decrements for all tags when package is deleted."""
+        project_name = f"cascade-pkg-{unique_test_id}"
+        package_name = f"test-pkg-{unique_test_id}"
+
+        # Create project
+        response = integration_client.post(
+            "/api/v1/projects",
+            json={
+                "name": project_name,
+                "description": "Test project",
+                "is_public": True,
+            },
+        )
+        assert response.status_code == 200
+
+        # Create package
+        response = integration_client.post(
+            f"/api/v1/project/{project_name}/packages",
+            json={"name": package_name, "description": "Test package"},
+        )
+        assert response.status_code == 200
+
+        # Upload content with multiple tags
+        content = f"cascade delete test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="v1"
+        )
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="v2"
+        )
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="v3"
+        )
+
+        # Verify ref_count is 3
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 3
+
+        # Delete the package
+        delete_response = integration_client.delete(
+            f"/api/v1/project/{project_name}/packages/{package_name}"
+        )
+        assert delete_response.status_code == 204
+
+        # Verify ref_count is 0
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 0
+
+        # Cleanup
+        integration_client.delete(f"/api/v1/projects/{project_name}")
+
+
+class TestPackageUploads:
+    """Tests for package-level uploads endpoint."""
+
+    @pytest.mark.integration
+    def test_package_uploads_returns_200(self, integration_client, test_package):
+        """Test package uploads endpoint returns 200."""
+        project_name, package_name = test_package
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/uploads"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+    @pytest.mark.integration
+    def test_package_uploads_after_upload(self, integration_client, test_package):
+        """Test uploads are recorded after file upload."""
+        project_name, package_name = test_package
+
+        # Upload a file
+        upload_result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"test upload content",
+            "test.txt",
+        )
+        assert upload_result["artifact_id"]
+
+        # Check uploads endpoint
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/uploads"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["items"]) >= 1
+
+        # Verify upload record fields
+        upload = data["items"][0]
+        assert "artifact_id" in upload
+        assert "package_name" in upload
+        assert "project_name" in upload
+        assert "uploaded_at" in upload
+        assert "uploaded_by" in upload
+
+    @pytest.mark.integration
+    def test_package_uploads_project_not_found(self, integration_client):
+        """Test non-existent project returns 404."""
+        response = integration_client.get(
+            "/api/v1/project/nonexistent/nonexistent/uploads"
+        )
+        assert response.status_code == 404
--- a/backend/tests/integration/test_projects_api.py
+++ b/backend/tests/integration/test_projects_api.py
@@ -0,0 +1,322 @@
+"""
+Integration tests for project API endpoints.
+
+Tests cover:
+- Project CRUD operations
+- Project listing with pagination, search, and sorting
+- Project stats endpoint
+- Project-level audit logs
+- Cascade delete behavior
+"""
+
+import pytest
+from tests.factories import compute_sha256, upload_test_file
+
+
+class TestProjectCRUD:
+    """Tests for project create, read, update, delete operations."""
+
+    @pytest.mark.integration
+    def test_create_project(self, integration_client, unique_test_id):
+        """Test creating a new project."""
+        project_name = f"test-create-{unique_test_id}"
+
+        try:
+            response = integration_client.post(
+                "/api/v1/projects",
+                json={
+                    "name": project_name,
+                    "description": "Test project",
+                    "is_public": True,
+                },
+            )
+            assert response.status_code == 200
+
+            data = response.json()
+            assert data["name"] == project_name
+            assert data["description"] == "Test project"
+            assert data["is_public"] is True
+            assert "id" in data
+            assert "created_at" in data
+        finally:
+            integration_client.delete(f"/api/v1/projects/{project_name}")
+
+    @pytest.mark.integration
+    def test_get_project(self, integration_client, test_project):
+        """Test getting a project by name."""
+        response = integration_client.get(f"/api/v1/projects/{test_project}")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert data["name"] == test_project
+
+    @pytest.mark.integration
+    def test_get_nonexistent_project(self, integration_client):
+        """Test getting a non-existent project returns 404."""
+        response = integration_client.get("/api/v1/projects/nonexistent-project-xyz")
+        assert response.status_code == 404
+
+    @pytest.mark.integration
+    def test_list_projects(self, integration_client, test_project):
+        """Test listing projects includes created project."""
+        response = integration_client.get("/api/v1/projects")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+        project_names = [p["name"] for p in data["items"]]
+        assert test_project in project_names
+
+    @pytest.mark.integration
+    def test_delete_project(self, integration_client, unique_test_id):
+        """Test deleting a project."""
+        project_name = f"test-delete-{unique_test_id}"
+
+        # Create project
+        integration_client.post(
+            "/api/v1/projects",
+            json={"name": project_name, "description": "To be deleted"},
+        )
+
+        # Delete project
+        response = integration_client.delete(f"/api/v1/projects/{project_name}")
+        assert response.status_code == 204
+
+        # Verify deleted
+        response = integration_client.get(f"/api/v1/projects/{project_name}")
+        assert response.status_code == 404
+
+
+class TestProjectListingFilters:
+    """Tests for project listing with filters and pagination."""
+
+    @pytest.mark.integration
+    def test_projects_pagination(self, integration_client):
+        """Test project listing respects pagination parameters."""
+        response = integration_client.get("/api/v1/projects?page=1&limit=5")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["items"]) <= 5
+        assert data["pagination"]["limit"] == 5
+        assert data["pagination"]["page"] == 1
+        assert "has_more" in data["pagination"]
+
+    @pytest.mark.integration
+    def test_projects_search(self, integration_client, test_project):
+        """Test project search by name."""
+        # Search for our test project
+        response = integration_client.get(
+            f"/api/v1/projects?search={test_project[:10]}"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        # Our project should be in results
+        project_names = [p["name"] for p in data["items"]]
+        assert test_project in project_names
+
+    @pytest.mark.integration
+    def test_projects_sort_by_name(self, integration_client):
+        """Test project sorting by name."""
+        response = integration_client.get("/api/v1/projects?sort=name&order=asc")
+        assert response.status_code == 200
+
+        data = response.json()
+        names = [p["name"] for p in data["items"]]
+        assert names == sorted(names)
+
+
+class TestProjectStats:
+    """Tests for project statistics endpoint."""
+
+    @pytest.mark.integration
+    def test_project_stats_returns_valid_response(
+        self, integration_client, test_project
+    ):
+        """Test project stats endpoint returns expected fields."""
+        response = integration_client.get(f"/api/v1/projects/{test_project}/stats")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "project_id" in data
+        assert "project_name" in data
+        assert "package_count" in data
+        assert "tag_count" in data
+        assert "artifact_count" in data
+        assert "total_size_bytes" in data
+        assert "upload_count" in data
+        assert "deduplicated_uploads" in data
+        assert "storage_saved_bytes" in data
+        assert "deduplication_ratio" in data
+
+    @pytest.mark.integration
+    def test_project_stats_not_found(self, integration_client):
+        """Test project stats returns 404 for non-existent project."""
+        response = integration_client.get("/api/v1/projects/nonexistent-project/stats")
+        assert response.status_code == 404
+
+
+class TestProjectAuditLogs:
+    """Tests for project-level audit logs endpoint."""
+
+    @pytest.mark.integration
+    def test_project_audit_logs_returns_200(self, integration_client, test_project):
+        """Test project audit logs endpoint returns 200."""
+        response = integration_client.get(f"/api/v1/projects/{test_project}/audit-logs")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+    @pytest.mark.integration
+    def test_project_audit_logs_not_found(self, integration_client):
+        """Test non-existent project returns 404."""
+        response = integration_client.get(
+            "/api/v1/projects/nonexistent-project/audit-logs"
+        )
+        assert response.status_code == 404
+
+
+class TestProjectCascadeDelete:
+    """Tests for cascade delete behavior when deleting projects."""
+
+    @pytest.mark.integration
+    def test_project_delete_cascades_to_packages(
+        self, integration_client, unique_test_id
+    ):
+        """Test deleting project cascades to packages."""
+        project_name = f"cascade-proj-{unique_test_id}"
+        package_name = f"cascade-pkg-{unique_test_id}"
+
+        try:
+            # Create project and package
+            integration_client.post(
+                "/api/v1/projects",
+                json={"name": project_name, "description": "Test", "is_public": True},
+            )
+            integration_client.post(
+                f"/api/v1/project/{project_name}/packages",
+                json={"name": package_name, "description": "Test package"},
+            )
+
+            # Verify package exists
+            response = integration_client.get(
+                f"/api/v1/project/{project_name}/packages/{package_name}"
+            )
+            assert response.status_code == 200
+
+            # Delete project
+            integration_client.delete(f"/api/v1/projects/{project_name}")
+
+            # Verify project is deleted (and package with it)
+            response = integration_client.get(f"/api/v1/projects/{project_name}")
+            assert response.status_code == 404
+        except Exception:
+            # Cleanup if test fails
+            integration_client.delete(f"/api/v1/projects/{project_name}")
+            raise
+
+    @pytest.mark.integration
+    def test_ref_count_decrements_on_project_delete(
+        self, integration_client, unique_test_id
+    ):
+        """Test ref_count decrements for all tags when project is deleted."""
+        project_name = f"cascade-proj-{unique_test_id}"
+        package1_name = f"pkg1-{unique_test_id}"
+        package2_name = f"pkg2-{unique_test_id}"
+
+        # Create project
+        response = integration_client.post(
+            "/api/v1/projects",
+            json={
+                "name": project_name,
+                "description": "Test project",
+                "is_public": True,
+            },
+        )
+        assert response.status_code == 200
+
+        # Create two packages
+        for pkg_name in [package1_name, package2_name]:
+            response = integration_client.post(
+                f"/api/v1/project/{project_name}/packages",
+                json={"name": pkg_name, "description": "Test package"},
+            )
+            assert response.status_code == 200
+
+        # Upload same content with tags in both packages
+        content = f"project cascade test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        upload_test_file(
+            integration_client, project_name, package1_name, content, tag="v1"
+        )
+        upload_test_file(
+            integration_client, project_name, package1_name, content, tag="v2"
+        )
+        upload_test_file(
+            integration_client, project_name, package2_name, content, tag="latest"
+        )
+        upload_test_file(
+            integration_client, project_name, package2_name, content, tag="stable"
+        )
+
+        # Verify ref_count is 4 (2 tags in each of 2 packages)
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 4
+
+        # Delete the project
+        delete_response = integration_client.delete(f"/api/v1/projects/{project_name}")
+        assert delete_response.status_code == 204
+
+        # Verify ref_count is 0
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 0
+
+
+class TestProjectUploads:
+    """Tests for project-level uploads endpoint."""
+
+    @pytest.mark.integration
+    def test_project_uploads_returns_200(self, integration_client, test_project):
+        """Test project uploads endpoint returns 200."""
+        response = integration_client.get(f"/api/v1/project/{test_project}/uploads")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+    @pytest.mark.integration
+    def test_project_uploads_after_upload(self, integration_client, test_package):
+        """Test uploads are recorded in project uploads."""
+        project_name, package_name = test_package
+
+        # Upload a file
+        upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"project uploads test",
+            "project.txt",
+        )
+
+        response = integration_client.get(f"/api/v1/project/{project_name}/uploads")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["items"]) >= 1
+
+        # Verify project name matches
+        for item in data["items"]:
+            assert item["project_name"] == project_name
+
+    @pytest.mark.integration
+    def test_project_uploads_not_found(self, integration_client):
+        """Test non-existent project returns 404."""
+        response = integration_client.get("/api/v1/project/nonexistent/uploads")
+        assert response.status_code == 404
--- a/backend/tests/integration/test_tags_api.py
+++ b/backend/tests/integration/test_tags_api.py
@@ -0,0 +1,403 @@
+"""
+Integration tests for tag API endpoints.
+
+Tests cover:
+- Tag CRUD operations
+- Tag listing with pagination and search
+- Tag history tracking
+- ref_count behavior with tag operations
+"""
+
+import pytest
+from tests.factories import compute_sha256, upload_test_file
+
+
+class TestTagCRUD:
+    """Tests for tag create, read, delete operations."""
+
+    @pytest.mark.integration
+    def test_create_tag_via_upload(self, integration_client, test_package):
+        """Test creating a tag via upload endpoint."""
+        project_name, package_name = test_package
+
+        result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"tag create test",
+            tag="v1.0.0",
+        )
+
+        assert result["tag"] == "v1.0.0"
+        assert result["artifact_id"]
+
+    @pytest.mark.integration
+    def test_create_tag_via_post(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test creating a tag via POST /tags endpoint."""
+        project_name, package_name = test_package
+
+        # First upload an artifact
+        result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"artifact for tag",
+        )
+        artifact_id = result["artifact_id"]
+
+        # Create tag via POST
+        tag_name = f"post-tag-{unique_test_id}"
+        response = integration_client.post(
+            f"/api/v1/project/{project_name}/{package_name}/tags",
+            json={"name": tag_name, "artifact_id": artifact_id},
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert data["name"] == tag_name
+        assert data["artifact_id"] == artifact_id
+
+    @pytest.mark.integration
+    def test_get_tag(self, integration_client, test_package):
+        """Test getting a tag by name."""
+        project_name, package_name = test_package
+
+        upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"get tag test",
+            tag="get-tag",
+        )
+
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/tags/get-tag"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert data["name"] == "get-tag"
+        assert "artifact_id" in data
+        assert "artifact_size" in data
+        assert "artifact_content_type" in data
+
+    @pytest.mark.integration
+    def test_list_tags(self, integration_client, test_package):
+        """Test listing tags for a package."""
+        project_name, package_name = test_package
+
+        # Create some tags
+        upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"list tags test",
+            tag="list-v1",
+        )
+
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/tags"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+        tag_names = [t["name"] for t in data["items"]]
+        assert "list-v1" in tag_names
+
+    @pytest.mark.integration
+    def test_delete_tag(self, integration_client, test_package):
+        """Test deleting a tag."""
+        project_name, package_name = test_package
+
+        upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"delete tag test",
+            tag="to-delete",
+        )
+
+        # Delete tag
+        response = integration_client.delete(
+            f"/api/v1/project/{project_name}/{package_name}/tags/to-delete"
+        )
+        assert response.status_code == 204
+
+        # Verify deleted
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/tags/to-delete"
+        )
+        assert response.status_code == 404
+
+
+class TestTagListingFilters:
+    """Tests for tag listing with filters and search."""
+
+    @pytest.mark.integration
+    def test_tags_pagination(self, integration_client, test_package):
+        """Test tag listing respects pagination."""
+        project_name, package_name = test_package
+
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/tags?limit=5"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["items"]) <= 5
+        assert data["pagination"]["limit"] == 5
+
+    @pytest.mark.integration
+    def test_tags_search(self, integration_client, test_package, unique_test_id):
+        """Test tag search by name."""
+        project_name, package_name = test_package
+
+        tag_name = f"searchable-{unique_test_id}"
+        upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"search test",
+            tag=tag_name,
+        )
+
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/tags?search=searchable"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        tag_names = [t["name"] for t in data["items"]]
+        assert tag_name in tag_names
+
+
+class TestTagHistory:
+    """Tests for tag history tracking."""
+
+    @pytest.mark.integration
+    def test_tag_history_on_create(self, integration_client, test_package):
+        """Test tag history is created when tag is created."""
+        project_name, package_name = test_package
+
+        upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"history create test",
+            tag="history-create",
+        )
+
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/tags/history-create/history"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data) >= 1
+
+    @pytest.mark.integration
+    def test_tag_history_on_update(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test tag history is created when tag is updated."""
+        project_name, package_name = test_package
+
+        tag_name = f"history-update-{unique_test_id}"
+
+        # Create tag with first artifact
+        upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"first content",
+            tag=tag_name,
+        )
+
+        # Update tag with second artifact
+        upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"second content",
+            tag=tag_name,
+        )
+
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/tags/{tag_name}/history"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        # Should have at least 2 history entries (create + update)
+        assert len(data) >= 2
+
+
+class TestTagRefCount:
+    """Tests for ref_count behavior with tag operations."""
+
+    @pytest.mark.integration
+    def test_ref_count_decrements_on_tag_delete(self, integration_client, test_package):
+        """Test ref_count decrements when a tag is deleted."""
+        project_name, package_name = test_package
+        content = b"ref count delete test"
+        expected_hash = compute_sha256(content)
+
+        # Upload with two tags
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="rc-v1"
+        )
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="rc-v2"
+        )
+
+        # Verify ref_count is 2
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 2
+
+        # Delete one tag
+        delete_response = integration_client.delete(
+            f"/api/v1/project/{project_name}/{package_name}/tags/rc-v1"
+        )
+        assert delete_response.status_code == 204
+
+        # Verify ref_count is now 1
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 1
+
+    @pytest.mark.integration
+    def test_ref_count_zero_after_all_tags_deleted(
+        self, integration_client, test_package
+    ):
+        """Test ref_count goes to 0 when all tags are deleted."""
+        project_name, package_name = test_package
+        content = b"orphan test content"
+        expected_hash = compute_sha256(content)
+
+        # Upload with one tag
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="only-tag"
+        )
+
+        # Delete the tag
+        integration_client.delete(
+            f"/api/v1/project/{project_name}/{package_name}/tags/only-tag"
+        )
+
+        # Verify ref_count is 0
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 0
+
+    @pytest.mark.integration
+    def test_ref_count_adjusts_on_tag_update(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test ref_count adjusts when a tag is updated to point to different artifact."""
+        project_name, package_name = test_package
+
+        # Upload two different artifacts
+        content1 = f"artifact one {unique_test_id}".encode()
+        content2 = f"artifact two {unique_test_id}".encode()
+        hash1 = compute_sha256(content1)
+        hash2 = compute_sha256(content2)
+
+        # Upload first artifact with tag "latest"
+        upload_test_file(
+            integration_client, project_name, package_name, content1, tag="latest"
+        )
+
+        # Verify first artifact has ref_count 1
+        response = integration_client.get(f"/api/v1/artifact/{hash1}")
+        assert response.json()["ref_count"] == 1
+
+        # Upload second artifact with different tag
+        upload_test_file(
+            integration_client, project_name, package_name, content2, tag="stable"
+        )
+
+        # Now update "latest" tag to point to second artifact
+        upload_test_file(
+            integration_client, project_name, package_name, content2, tag="latest"
+        )
+
+        # Verify first artifact ref_count decreased to 0
+        response = integration_client.get(f"/api/v1/artifact/{hash1}")
+        assert response.json()["ref_count"] == 0
+
+        # Verify second artifact ref_count increased to 2
+        response = integration_client.get(f"/api/v1/artifact/{hash2}")
+        assert response.json()["ref_count"] == 2
+
+    @pytest.mark.integration
+    def test_ref_count_unchanged_when_tag_same_artifact(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test ref_count doesn't change when tag is 'updated' to same artifact."""
+        project_name, package_name = test_package
+
+        content = f"same artifact {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        # Upload with tag
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="same-v1"
+        )
+
+        # Verify ref_count is 1
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 1
+
+        # Upload same content with same tag (no-op)
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="same-v1"
+        )
+
+        # Verify ref_count is still 1
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 1
+
+    @pytest.mark.integration
+    def test_tag_via_post_endpoint_increments_ref_count(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test creating tag via POST /tags endpoint increments ref_count."""
+        project_name, package_name = test_package
+
+        content = f"tag endpoint test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        # Upload artifact without tag
+        result = upload_test_file(
+            integration_client, project_name, package_name, content, filename="test.bin"
+        )
+        artifact_id = result["artifact_id"]
+
+        # Verify ref_count is 0 (no tags yet)
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 0
+
+        # Create tag via POST endpoint
+        tag_response = integration_client.post(
+            f"/api/v1/project/{project_name}/{package_name}/tags",
+            json={"name": "post-v1", "artifact_id": artifact_id},
+        )
+        assert tag_response.status_code == 200
+
+        # Verify ref_count is now 1
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 1
+
+        # Create another tag via POST endpoint
+        tag_response = integration_client.post(
+            f"/api/v1/project/{project_name}/{package_name}/tags",
+            json={"name": "post-latest", "artifact_id": artifact_id},
+        )
+        assert tag_response.status_code == 200
+
+        # Verify ref_count is now 2
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 2
--- a/backend/tests/integration/test_upload_download_api.py
+++ b/backend/tests/integration/test_upload_download_api.py
@@ -1,33 +1,109 @@
 """
-Integration tests for duplicate uploads and storage verification.
-
-These tests require the full stack to be running (docker-compose.local.yml).
+Integration tests for upload and download API endpoints.

 Tests cover:
- Duplicate upload scenarios across packages and projects
- Storage verification (single S3 object, single artifact row)
- Upload table tracking
- Content integrity verification
+- Upload functionality and deduplication
+- Download by tag and artifact ID
 - Concurrent upload handling
- Failure cleanup
+- File size validation
+- Upload failure cleanup
+- S3 storage verification
 """

 import pytest
 import io
 import threading
-import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
-from tests.conftest import (
+from tests.factories import (
    compute_sha256,
    upload_test_file,
    list_s3_objects_by_hash,
    s3_object_exists,
-    delete_s3_object_by_hash,
 )


-class TestDuplicateUploadScenarios:
-    """Integration tests for duplicate upload behavior."""
+class TestUploadBasics:
+    """Tests for basic upload functionality."""
+
+    @pytest.mark.integration
+    def test_upload_returns_artifact_id(self, integration_client, test_package):
+        """Test upload returns the artifact ID (SHA256 hash)."""
+        project_name, package_name = test_package
+        content = b"basic upload test"
+        expected_hash = compute_sha256(content)
+
+        result = upload_test_file(
+            integration_client, project_name, package_name, content, tag="v1"
+        )
+
+        assert result["artifact_id"] == expected_hash
+
+    @pytest.mark.integration
+    def test_upload_response_has_upload_id(self, integration_client, test_package):
+        """Test upload response includes upload_id."""
+        project_name, package_name = test_package
+
+        result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"upload id test",
+            "uploadid.txt",
+        )
+
+        assert "upload_id" in result
+        assert result["upload_id"] is not None
+
+    @pytest.mark.integration
+    def test_upload_response_has_content_type(self, integration_client, test_package):
+        """Test upload response includes content_type."""
+        project_name, package_name = test_package
+
+        result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"content type test",
+            "content.txt",
+        )
+
+        assert "content_type" in result
+
+    @pytest.mark.integration
+    def test_upload_response_has_original_name(self, integration_client, test_package):
+        """Test upload response includes original_name."""
+        project_name, package_name = test_package
+
+        result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"original name test",
+            "originalname.txt",
+        )
+
+        assert "original_name" in result
+        assert result["original_name"] == "originalname.txt"
+
+    @pytest.mark.integration
+    def test_upload_response_has_created_at(self, integration_client, test_package):
+        """Test upload response includes created_at."""
+        project_name, package_name = test_package
+
+        result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"created at test",
+            "createdat.txt",
+        )
+
+        assert "created_at" in result
+        assert result["created_at"] is not None
+
+
+class TestDuplicateUploads:
+    """Tests for duplicate upload deduplication behavior."""

    @pytest.mark.integration
    def test_same_file_twice_returns_same_artifact_id(
@@ -103,62 +179,11 @@ class TestDuplicateUploadScenarios:
        assert result2["artifact_id"] == expected_hash
        assert result2["deduplicated"] is True

-    @pytest.mark.integration
-    def test_same_file_different_projects_shares_artifact(
-        self, integration_client, unique_test_id
-    ):
-        """Test uploading same file to different projects shares artifact."""
-        content = f"content shared across projects {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Create two projects with packages
-        proj1 = f"project-x-{unique_test_id}"
-        proj2 = f"project-y-{unique_test_id}"
-        pkg_name = "shared-pkg"
-
-        try:
-            # Create projects and packages
-            integration_client.post(
-                "/api/v1/projects",
-                json={"name": proj1, "description": "Project X", "is_public": True},
-            )
-            integration_client.post(
-                "/api/v1/projects",
-                json={"name": proj2, "description": "Project Y", "is_public": True},
-            )
-            integration_client.post(
-                f"/api/v1/project/{proj1}/packages",
-                json={"name": pkg_name, "description": "Package"},
-            )
-            integration_client.post(
-                f"/api/v1/project/{proj2}/packages",
-                json={"name": pkg_name, "description": "Package"},
-            )
-
-            # Upload to first project
-            result1 = upload_test_file(
-                integration_client, proj1, pkg_name, content, tag="v1"
-            )
-            assert result1["artifact_id"] == expected_hash
-            assert result1["deduplicated"] is False
-
-            # Upload to second project
-            result2 = upload_test_file(
-                integration_client, proj2, pkg_name, content, tag="v1"
-            )
-            assert result2["artifact_id"] == expected_hash
-            assert result2["deduplicated"] is True
-
-        finally:
-            # Cleanup
-            integration_client.delete(f"/api/v1/projects/{proj1}")
-            integration_client.delete(f"/api/v1/projects/{proj2}")
-
    @pytest.mark.integration
    def test_same_file_different_filenames_shares_artifact(
        self, integration_client, test_package
    ):
-        """Test uploading same file with different original filenames shares artifact."""
+        """Test uploading same file with different filenames shares artifact."""
        project, package = test_package
        content = b"content with different filenames"
        expected_hash = compute_sha256(content)
@@ -186,110 +211,68 @@ class TestDuplicateUploadScenarios:
        assert result2["artifact_id"] == expected_hash
        assert result2["deduplicated"] is True

-    @pytest.mark.integration
-    def test_same_file_different_tags_shares_artifact(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test uploading same file with different tags shares artifact."""
-        project, package = test_package
-        content = f"content with different tags {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)

-        tags = ["latest", "stable", "v1.0.0", "release"]
-        for i, tag in enumerate(tags):
-            result = upload_test_file(
-                integration_client, project, package, content, tag=tag
-            )
-            assert result["artifact_id"] == expected_hash
-            if i == 0:
-                assert result["deduplicated"] is False
-            else:
-                assert result["deduplicated"] is True
-
-
-class TestStorageVerification:
-    """Tests to verify storage behavior after duplicate uploads."""
+class TestDownload:
+    """Tests for download functionality."""

    @pytest.mark.integration
-    def test_artifact_table_single_row_after_duplicates(
-        self, integration_client, test_package
-    ):
-        """Test artifact table contains only one row after duplicate uploads."""
+    def test_download_by_tag(self, integration_client, test_package):
+        """Test downloading artifact by tag name."""
        project, package = test_package
-        content = b"content for single row test"
-        expected_hash = compute_sha256(content)
+        original_content = b"download by tag test"

-        # Upload same content multiple times with different tags
-        for tag in ["v1", "v2", "v3"]:
-            upload_test_file(integration_client, project, package, content, tag=tag)
+        upload_test_file(
+            integration_client, project, package, original_content, tag="download-tag"
+        )

-        # Query artifact - should exist and be unique
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.status_code == 200
-        artifact = response.json()
-        assert artifact["id"] == expected_hash
-        assert artifact["ref_count"] == 3
-
-    @pytest.mark.integration
-    def test_upload_table_multiple_rows_for_duplicates(
-        self, integration_client, test_package
-    ):
-        """Test upload table contains multiple rows for duplicate uploads (event tracking)."""
-        project, package = test_package
-        content = b"content for upload tracking test"
-
-        # Upload same content 3 times
-        for tag in ["upload1", "upload2", "upload3"]:
-            upload_test_file(integration_client, project, package, content, tag=tag)
-
-        # Check package stats - should show 3 uploads but fewer unique artifacts
        response = integration_client.get(
-            f"/api/v1/project/{project}/packages/{package}"
+            f"/api/v1/project/{project}/{package}/+/download-tag",
+            params={"mode": "proxy"},
        )
        assert response.status_code == 200
-        pkg_info = response.json()
-        assert pkg_info["tag_count"] == 3
+        assert response.content == original_content

    @pytest.mark.integration
-    def test_artifact_content_matches_original(self, integration_client, test_package):
-        """Test artifact content retrieved matches original content exactly."""
+    def test_download_by_artifact_id(self, integration_client, test_package):
+        """Test downloading artifact by artifact ID."""
+        project, package = test_package
+        original_content = b"download by id test"
+        expected_hash = compute_sha256(original_content)
+
+        upload_test_file(integration_client, project, package, original_content)
+
+        response = integration_client.get(
+            f"/api/v1/project/{project}/{package}/+/artifact:{expected_hash}",
+            params={"mode": "proxy"},
+        )
+        assert response.status_code == 200
+        assert response.content == original_content
+
+    @pytest.mark.integration
+    def test_download_nonexistent_tag(self, integration_client, test_package):
+        """Test downloading nonexistent tag returns 404."""
+        project, package = test_package
+
+        response = integration_client.get(
+            f"/api/v1/project/{project}/{package}/+/nonexistent-tag"
+        )
+        assert response.status_code == 404
+
+    @pytest.mark.integration
+    def test_content_matches_original(self, integration_client, test_package):
+        """Test downloaded content matches original exactly."""
        project, package = test_package
        original_content = b"exact content verification test data 12345"

-        # Upload
-        result = upload_test_file(
+        upload_test_file(
            integration_client, project, package, original_content, tag="verify"
        )

-        # Download and compare
-        download_response = integration_client.get(
+        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/verify", params={"mode": "proxy"}
        )
-        assert download_response.status_code == 200
-        downloaded_content = download_response.content
-        assert downloaded_content == original_content
-
-    @pytest.mark.integration
-    def test_storage_stats_reflect_deduplication(
-        self, integration_client, test_package
-    ):
-        """Test total storage size matches single artifact size after duplicates."""
-        project, package = test_package
-        content = b"content for storage stats test - should only count once"
-        content_size = len(content)
-
-        # Upload same content 5 times
-        for tag in ["a", "b", "c", "d", "e"]:
-            upload_test_file(integration_client, project, package, content, tag=tag)
-
-        # Check global stats
-        response = integration_client.get("/api/v1/stats")
        assert response.status_code == 200
-        stats = response.json()
-
-        # Deduplication should show savings
-        assert stats["deduplicated_uploads"] > 0
-        assert stats["storage_saved_bytes"] > 0
+        assert response.content == original_content


 class TestConcurrentUploads:
@@ -308,7 +291,6 @@ class TestConcurrentUploads:

        def upload_worker(tag_suffix):
            try:
-                # Create a new client for this thread
                from httpx import Client

                base_url = "http://localhost:8080"
@@ -332,13 +314,11 @@ class TestConcurrentUploads:
            except Exception as e:
                errors.append(str(e))

-        # Run concurrent uploads
        with ThreadPoolExecutor(max_workers=num_concurrent) as executor:
            futures = [executor.submit(upload_worker, i) for i in range(num_concurrent)]
            for future in as_completed(futures):
-                pass  # Wait for all to complete
+                pass

-        # Verify results
        assert len(errors) == 0, f"Errors during concurrent uploads: {errors}"
        assert len(results) == num_concurrent

@@ -353,227 +333,27 @@ class TestConcurrentUploads:
        assert response.json()["ref_count"] == num_concurrent


-class TestDeduplicationAcrossRestarts:
-    """Tests for deduplication persistence."""
-
-    @pytest.mark.integration
-    def test_deduplication_persists(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """
-        Test deduplication works with persisted data.
-
-        This test uploads content, then uploads the same content again.
-        Since the database persists, the second upload should detect
-        the existing artifact even without server restart.
-        """
-        project, package = test_package
-        content = f"persisted content for dedup test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # First upload
-        result1 = upload_test_file(
-            integration_client, project, package, content, tag="persist1"
-        )
-        assert result1["artifact_id"] == expected_hash
-        assert result1["deduplicated"] is False
-
-        # Second upload (simulating after restart - data is persisted)
-        result2 = upload_test_file(
-            integration_client, project, package, content, tag="persist2"
-        )
-        assert result2["artifact_id"] == expected_hash
-        assert result2["deduplicated"] is True
-
-        # Verify artifact exists with correct ref_count
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.status_code == 200
-        assert response.json()["ref_count"] == 2
-
-
-class TestS3ObjectVerification:
-    """Tests to verify S3 storage behavior directly."""
-
-    @pytest.mark.integration
-    def test_s3_bucket_single_object_after_duplicates(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test S3 bucket contains only one object after duplicate uploads."""
-        project, package = test_package
-        content = f"content for s3 object count test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Upload same content multiple times with different tags
-        for tag in ["s3test1", "s3test2", "s3test3"]:
-            upload_test_file(integration_client, project, package, content, tag=tag)
-
-        # Verify only one S3 object exists for this hash
-        s3_objects = list_s3_objects_by_hash(expected_hash)
-        assert len(s3_objects) == 1, (
-            f"Expected 1 S3 object, found {len(s3_objects)}: {s3_objects}"
-        )
-
-        # Verify the object key follows expected pattern
-        expected_key = (
-            f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
-        )
-        assert s3_objects[0] == expected_key
-
-
-class TestUploadFailureCleanup:
-    """Tests for cleanup when uploads fail."""
-
-    @pytest.mark.integration
-    def test_upload_failure_invalid_project_no_orphaned_s3(
-        self, integration_client, unique_test_id
-    ):
-        """Test upload to non-existent project doesn't leave orphaned S3 objects."""
-        content = f"content for orphan s3 test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Attempt upload to non-existent project
-        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
-        response = integration_client.post(
-            f"/api/v1/project/nonexistent-project-{unique_test_id}/nonexistent-pkg/upload",
-            files=files,
-            data={"tag": "test"},
-        )
-
-        # Upload should fail
-        assert response.status_code == 404
-
-        # Verify no S3 object was created
-        assert not s3_object_exists(expected_hash), (
-            "Orphaned S3 object found after failed upload"
-        )
-
-    @pytest.mark.integration
-    def test_upload_failure_invalid_package_no_orphaned_s3(
-        self, integration_client, test_project, unique_test_id
-    ):
-        """Test upload to non-existent package doesn't leave orphaned S3 objects."""
-        content = f"content for orphan s3 test pkg {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Attempt upload to non-existent package
-        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
-        response = integration_client.post(
-            f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
-            files=files,
-            data={"tag": "test"},
-        )
-
-        # Upload should fail
-        assert response.status_code == 404
-
-        # Verify no S3 object was created
-        assert not s3_object_exists(expected_hash), (
-            "Orphaned S3 object found after failed upload"
-        )
-
-    @pytest.mark.integration
-    def test_upload_failure_empty_file_no_orphaned_s3(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test upload of empty file doesn't leave orphaned S3 objects or DB records."""
-        project, package = test_package
-        content = b""  # Empty content
-
-        # Attempt upload of empty file
-        files = {"file": ("empty.bin", io.BytesIO(content), "application/octet-stream")}
-        response = integration_client.post(
-            f"/api/v1/project/{project}/{package}/upload",
-            files=files,
-            data={"tag": f"empty-{unique_test_id}"},
-        )
-
-        # Upload should fail (empty files are rejected)
-        assert response.status_code in (400, 422), (
-            f"Expected 400/422, got {response.status_code}"
-        )
-
-    @pytest.mark.integration
-    def test_upload_failure_no_orphaned_database_records(
-        self, integration_client, test_project, unique_test_id
-    ):
-        """Test failed upload doesn't leave orphaned database records."""
-        content = f"content for db orphan test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Attempt upload to non-existent package (should fail before DB insert)
-        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
-        response = integration_client.post(
-            f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
-            files=files,
-            data={"tag": "test"},
-        )
-
-        # Upload should fail
-        assert response.status_code == 404
-
-        # Verify no artifact record was created
-        artifact_response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert artifact_response.status_code == 404, (
-            "Orphaned artifact record found after failed upload"
-        )
-
-    @pytest.mark.integration
-    def test_duplicate_tag_upload_handles_gracefully(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test uploading with duplicate tag is handled without orphaned data."""
-        project, package = test_package
-        content1 = f"content version 1 {unique_test_id}".encode()
-        content2 = f"content version 2 {unique_test_id}".encode()
-        tag = f"duplicate-tag-{unique_test_id}"
-
-        # First upload with tag
-        result1 = upload_test_file(
-            integration_client, project, package, content1, tag=tag
-        )
-        hash1 = result1["artifact_id"]
-
-        # Second upload with same tag (should update the tag to point to new artifact)
-        result2 = upload_test_file(
-            integration_client, project, package, content2, tag=tag
-        )
-        hash2 = result2["artifact_id"]
-
-        # Both artifacts should exist
-        assert integration_client.get(f"/api/v1/artifact/{hash1}").status_code == 200
-        assert integration_client.get(f"/api/v1/artifact/{hash2}").status_code == 200
-
-        # Tag should point to the second artifact
-        tag_response = integration_client.get(
-            f"/api/v1/project/{project}/{package}/tags/{tag}"
-        )
-        assert tag_response.status_code == 200
-        assert tag_response.json()["artifact_id"] == hash2
-
-
 class TestFileSizeValidation:
    """Tests for file size limits and empty file rejection."""

    @pytest.mark.integration
    def test_empty_file_rejected(self, integration_client, test_package):
-        """Test that empty files are rejected with appropriate error."""
+        """Test empty files are rejected with appropriate error."""
        project, package = test_package

-        # Try to upload empty content
        files = {"file": ("empty.txt", io.BytesIO(b""), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
        )

-        # Should be rejected (422 from storage layer or validation)
        assert response.status_code in [422, 400]

    @pytest.mark.integration
    def test_small_valid_file_accepted(self, integration_client, test_package):
-        """Test that small (1 byte) files are accepted."""
+        """Test small (1 byte) files are accepted."""
        project, package = test_package
-        content = b"X"  # Single byte
+        content = b"X"

        result = upload_test_file(
            integration_client, project, package, content, tag="tiny"
@@ -586,7 +366,7 @@ class TestFileSizeValidation:
    def test_file_size_reported_correctly(
        self, integration_client, test_package, unique_test_id
    ):
-        """Test that file size is correctly reported in response."""
+        """Test file size is correctly reported in response."""
        project, package = test_package
        content = f"Test content for size check {unique_test_id}".encode()
        expected_size = len(content)
@@ -602,3 +382,121 @@ class TestFileSizeValidation:
            f"/api/v1/artifact/{result['artifact_id']}"
        )
        assert artifact_response.json()["size"] == expected_size
+
+
+class TestUploadFailureCleanup:
+    """Tests for cleanup when uploads fail."""
+
+    @pytest.mark.integration
+    def test_upload_failure_invalid_project_no_orphaned_s3(
+        self, integration_client, unique_test_id
+    ):
+        """Test upload to non-existent project doesn't leave orphaned S3 objects."""
+        content = f"content for orphan s3 test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
+        response = integration_client.post(
+            f"/api/v1/project/nonexistent-project-{unique_test_id}/nonexistent-pkg/upload",
+            files=files,
+            data={"tag": "test"},
+        )
+
+        assert response.status_code == 404
+
+        # Verify no S3 object was created
+        assert not s3_object_exists(expected_hash), (
+            "Orphaned S3 object found after failed upload"
+        )
+
+    @pytest.mark.integration
+    def test_upload_failure_invalid_package_no_orphaned_s3(
+        self, integration_client, test_project, unique_test_id
+    ):
+        """Test upload to non-existent package doesn't leave orphaned S3 objects."""
+        content = f"content for orphan s3 test pkg {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
+        response = integration_client.post(
+            f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
+            files=files,
+            data={"tag": "test"},
+        )
+
+        assert response.status_code == 404
+
+        assert not s3_object_exists(expected_hash), (
+            "Orphaned S3 object found after failed upload"
+        )
+
+    @pytest.mark.integration
+    def test_upload_failure_no_orphaned_database_records(
+        self, integration_client, test_project, unique_test_id
+    ):
+        """Test failed upload doesn't leave orphaned database records."""
+        content = f"content for db orphan test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
+        response = integration_client.post(
+            f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
+            files=files,
+            data={"tag": "test"},
+        )
+
+        assert response.status_code == 404
+
+        artifact_response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert artifact_response.status_code == 404, (
+            "Orphaned artifact record found after failed upload"
+        )
+
+
+class TestS3StorageVerification:
+    """Tests to verify S3 storage behavior."""
+
+    @pytest.mark.integration
+    def test_s3_single_object_after_duplicates(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test S3 bucket contains only one object after duplicate uploads."""
+        project, package = test_package
+        content = f"content for s3 object count test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        # Upload same content multiple times
+        for tag in ["s3test1", "s3test2", "s3test3"]:
+            upload_test_file(integration_client, project, package, content, tag=tag)
+
+        # Verify only one S3 object exists
+        s3_objects = list_s3_objects_by_hash(expected_hash)
+        assert len(s3_objects) == 1, (
+            f"Expected 1 S3 object, found {len(s3_objects)}: {s3_objects}"
+        )
+
+        # Verify object key follows expected pattern
+        expected_key = (
+            f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
+        )
+        assert s3_objects[0] == expected_key
+
+    @pytest.mark.integration
+    def test_artifact_table_single_row_after_duplicates(
+        self, integration_client, test_package
+    ):
+        """Test artifact table contains only one row after duplicate uploads."""
+        project, package = test_package
+        content = b"content for single row test"
+        expected_hash = compute_sha256(content)
+
+        # Upload same content multiple times
+        for tag in ["v1", "v2", "v3"]:
+            upload_test_file(integration_client, project, package, content, tag=tag)
+
+        # Query artifact
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.status_code == 200
+        artifact = response.json()
+        assert artifact["id"] == expected_hash
+        assert artifact["ref_count"] == 3
--- a/backend/tests/test_duplicate_detection.py
+++ b/backend/tests/test_duplicate_detection.py
@@ -1,207 +0,0 @@
-"""
-Unit tests for duplicate detection and deduplication logic.
-
-Tests cover:
- _exists() method correctly identifies existing S3 keys
- S3 key generation follows expected pattern
- Storage layer skips upload when artifact already exists
- Storage layer performs upload when artifact does not exist
-"""
-
-import pytest
-import io
-from unittest.mock import MagicMock, patch
-from tests.conftest import (
-    compute_sha256,
-    TEST_CONTENT_HELLO,
-    TEST_HASH_HELLO,
-)
-
-
-class TestExistsMethod:
-    """Tests for the _exists() method that checks S3 object existence."""
-
-    @pytest.mark.unit
-    def test_exists_returns_true_for_existing_key(self, mock_storage, mock_s3_client):
-        """Test _exists() returns True when object exists."""
-        # Pre-populate the mock storage
-        test_key = "fruits/df/fd/test-hash"
-        mock_s3_client.objects[test_key] = b"content"
-
-        result = mock_storage._exists(test_key)
-
-        assert result is True
-
-    @pytest.mark.unit
-    def test_exists_returns_false_for_nonexistent_key(self, mock_storage):
-        """Test _exists() returns False when object doesn't exist."""
-        result = mock_storage._exists("fruits/no/ne/nonexistent-key")
-
-        assert result is False
-
-    @pytest.mark.unit
-    def test_exists_handles_404_error(self, mock_storage):
-        """Test _exists() handles 404 errors gracefully."""
-        # The mock client raises ClientError for nonexistent keys
-        result = mock_storage._exists("fruits/xx/yy/does-not-exist")
-
-        assert result is False
-
-
-class TestS3KeyGeneration:
-    """Tests for S3 key pattern generation."""
-
-    @pytest.mark.unit
-    def test_s3_key_pattern(self):
-        """Test S3 key follows pattern: fruits/{hash[:2]}/{hash[2:4]}/{hash}"""
-        test_hash = "abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890"
-
-        expected_key = f"fruits/{test_hash[:2]}/{test_hash[2:4]}/{test_hash}"
-        # Expected: fruits/ab/cd/abcdef1234567890...
-
-        assert expected_key == f"fruits/ab/cd/{test_hash}"
-
-    @pytest.mark.unit
-    def test_s3_key_generation_in_storage(self, mock_storage):
-        """Test storage layer generates correct S3 key."""
-        content = TEST_CONTENT_HELLO
-        file_obj = io.BytesIO(content)
-
-        result = mock_storage._store_simple(file_obj)
-
-        expected_key = (
-            f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
-        )
-        assert result.s3_key == expected_key
-
-    @pytest.mark.unit
-    def test_s3_key_uses_sha256_hash(self, mock_storage):
-        """Test S3 key is derived from SHA256 hash."""
-        content = b"unique test content for key test"
-        file_obj = io.BytesIO(content)
-        expected_hash = compute_sha256(content)
-
-        result = mock_storage._store_simple(file_obj)
-
-        # Key should contain the hash
-        assert expected_hash in result.s3_key
-
-
-class TestDeduplicationBehavior:
-    """Tests for deduplication (skip upload when exists)."""
-
-    @pytest.mark.unit
-    def test_skips_upload_when_exists(self, mock_storage, mock_s3_client):
-        """Test storage skips S3 upload when artifact already exists."""
-        content = TEST_CONTENT_HELLO
-        s3_key = (
-            f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
-        )
-
-        # Pre-populate storage (simulate existing artifact)
-        mock_s3_client.objects[s3_key] = content
-
-        # Track put_object calls
-        original_put = mock_s3_client.put_object
-        put_called = []
-
-        def tracked_put(*args, **kwargs):
-            put_called.append(True)
-            return original_put(*args, **kwargs)
-
-        mock_s3_client.put_object = tracked_put
-
-        # Store the same content
-        file_obj = io.BytesIO(content)
-        result = mock_storage._store_simple(file_obj)
-
-        # put_object should NOT have been called (deduplication)
-        assert len(put_called) == 0
-        assert result.sha256 == TEST_HASH_HELLO
-
-    @pytest.mark.unit
-    def test_uploads_when_not_exists(self, mock_storage, mock_s3_client):
-        """Test storage uploads to S3 when artifact doesn't exist."""
-        content = b"brand new unique content"
-        content_hash = compute_sha256(content)
-        s3_key = f"fruits/{content_hash[:2]}/{content_hash[2:4]}/{content_hash}"
-
-        # Ensure object doesn't exist
-        assert s3_key not in mock_s3_client.objects
-
-        # Store the content
-        file_obj = io.BytesIO(content)
-        result = mock_storage._store_simple(file_obj)
-
-        # Object should now exist in mock storage
-        assert s3_key in mock_s3_client.objects
-        assert mock_s3_client.objects[s3_key] == content
-
-    @pytest.mark.unit
-    def test_returns_same_hash_for_duplicate(self, mock_storage, mock_s3_client):
-        """Test storing same content twice returns same hash."""
-        content = b"content to be stored twice"
-
-        # First store
-        file1 = io.BytesIO(content)
-        result1 = mock_storage._store_simple(file1)
-
-        # Second store (duplicate)
-        file2 = io.BytesIO(content)
-        result2 = mock_storage._store_simple(file2)
-
-        assert result1.sha256 == result2.sha256
-        assert result1.s3_key == result2.s3_key
-
-    @pytest.mark.unit
-    def test_different_content_different_keys(self, mock_storage):
-        """Test different content produces different S3 keys."""
-        content1 = b"first content"
-        content2 = b"second content"
-
-        file1 = io.BytesIO(content1)
-        result1 = mock_storage._store_simple(file1)
-
-        file2 = io.BytesIO(content2)
-        result2 = mock_storage._store_simple(file2)
-
-        assert result1.sha256 != result2.sha256
-        assert result1.s3_key != result2.s3_key
-
-
-class TestDeduplicationEdgeCases:
-    """Edge case tests for deduplication."""
-
-    @pytest.mark.unit
-    def test_same_content_different_filenames(self, mock_storage):
-        """Test same content with different metadata is deduplicated."""
-        content = b"identical content"
-
-        # Store with "filename1"
-        file1 = io.BytesIO(content)
-        result1 = mock_storage._store_simple(file1)
-
-        # Store with "filename2" (same content)
-        file2 = io.BytesIO(content)
-        result2 = mock_storage._store_simple(file2)
-
-        # Both should have same hash (content-addressable)
-        assert result1.sha256 == result2.sha256
-
-    @pytest.mark.unit
-    def test_whitespace_only_difference(self, mock_storage):
-        """Test content differing only by whitespace produces different hashes."""
-        content1 = b"test content"
-        content2 = b"test  content"  # Extra space
-        content3 = b"test content "  # Trailing space
-
-        file1 = io.BytesIO(content1)
-        file2 = io.BytesIO(content2)
-        file3 = io.BytesIO(content3)
-
-        result1 = mock_storage._store_simple(file1)
-        result2 = mock_storage._store_simple(file2)
-        result3 = mock_storage._store_simple(file3)
-
-        # All should be different (content-addressable)
-        assert len({result1.sha256, result2.sha256, result3.sha256}) == 3
--- a/backend/tests/test_garbage_collection.py
+++ b/backend/tests/test_garbage_collection.py
@@ -1,168 +0,0 @@
-"""
-Integration tests for garbage collection functionality.
-
-Tests cover:
- Listing orphaned artifacts (ref_count=0)
- Garbage collection in dry-run mode
- Garbage collection actual deletion
- Verifying artifacts with refs are not deleted
-"""
-
-import pytest
-from tests.conftest import (
-    compute_sha256,
-    upload_test_file,
-)
-
-
-class TestOrphanedArtifactsEndpoint:
-    """Tests for GET /api/v1/admin/orphaned-artifacts endpoint."""
-
-    @pytest.mark.integration
-    def test_list_orphaned_artifacts_returns_list(self, integration_client):
-        """Test orphaned artifacts endpoint returns a list."""
-        response = integration_client.get("/api/v1/admin/orphaned-artifacts")
-        assert response.status_code == 200
-        assert isinstance(response.json(), list)
-
-    @pytest.mark.integration
-    def test_orphaned_artifact_has_required_fields(self, integration_client):
-        """Test orphaned artifact response has required fields."""
-        response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1")
-        assert response.status_code == 200
-
-        data = response.json()
-        if len(data) > 0:
-            artifact = data[0]
-            assert "id" in artifact
-            assert "size" in artifact
-            assert "created_at" in artifact
-            assert "created_by" in artifact
-            assert "original_name" in artifact
-
-    @pytest.mark.integration
-    def test_orphaned_artifacts_respects_limit(self, integration_client):
-        """Test orphaned artifacts endpoint respects limit parameter."""
-        response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=5")
-        assert response.status_code == 200
-        assert len(response.json()) <= 5
-
-    @pytest.mark.integration
-    def test_artifact_becomes_orphaned_when_tag_deleted(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test artifact appears in orphaned list after tag is deleted."""
-        project, package = test_package
-        content = f"orphan test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Upload with tag
-        upload_test_file(integration_client, project, package, content, tag="temp-tag")
-
-        # Verify not in orphaned list (has ref_count=1)
-        response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1000")
-        orphaned_ids = [a["id"] for a in response.json()]
-        assert expected_hash not in orphaned_ids
-
-        # Delete the tag
-        integration_client.delete(f"/api/v1/project/{project}/{package}/tags/temp-tag")
-
-        # Verify now in orphaned list (ref_count=0)
-        response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1000")
-        orphaned_ids = [a["id"] for a in response.json()]
-        assert expected_hash in orphaned_ids
-
-
-class TestGarbageCollectionEndpoint:
-    """Tests for POST /api/v1/admin/garbage-collect endpoint."""
-
-    @pytest.mark.integration
-    def test_garbage_collect_dry_run_returns_response(self, integration_client):
-        """Test garbage collection dry run returns valid response."""
-        response = integration_client.post("/api/v1/admin/garbage-collect?dry_run=true")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert "artifacts_deleted" in data
-        assert "bytes_freed" in data
-        assert "artifact_ids" in data
-        assert "dry_run" in data
-        assert data["dry_run"] is True
-
-    @pytest.mark.integration
-    def test_garbage_collect_dry_run_doesnt_delete(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test garbage collection dry run doesn't actually delete artifacts."""
-        project, package = test_package
-        content = f"dry run test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Upload and delete tag to create orphan
-        upload_test_file(integration_client, project, package, content, tag="dry-run")
-        integration_client.delete(f"/api/v1/project/{project}/{package}/tags/dry-run")
-
-        # Verify artifact exists
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.status_code == 200
-
-        # Run garbage collection in dry-run mode
-        gc_response = integration_client.post(
-            "/api/v1/admin/garbage-collect?dry_run=true&limit=1000"
-        )
-        assert gc_response.status_code == 200
-        assert expected_hash in gc_response.json()["artifact_ids"]
-
-        # Verify artifact STILL exists (dry run didn't delete)
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.status_code == 200
-
-    @pytest.mark.integration
-    def test_garbage_collect_preserves_referenced_artifacts(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test garbage collection doesn't delete artifacts with ref_count > 0."""
-        project, package = test_package
-        content = f"preserve test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Upload with tag (ref_count=1)
-        upload_test_file(integration_client, project, package, content, tag="keep-this")
-
-        # Verify artifact exists with ref_count=1
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.status_code == 200
-        assert response.json()["ref_count"] == 1
-
-        # Run garbage collection (dry_run to not affect other tests)
-        gc_response = integration_client.post(
-            "/api/v1/admin/garbage-collect?dry_run=true&limit=1000"
-        )
-        assert gc_response.status_code == 200
-
-        # Verify artifact was NOT in delete list (has ref_count > 0)
-        assert expected_hash not in gc_response.json()["artifact_ids"]
-
-        # Verify artifact still exists
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.status_code == 200
-        assert response.json()["ref_count"] == 1
-
-    @pytest.mark.integration
-    def test_garbage_collect_respects_limit(self, integration_client):
-        """Test garbage collection respects limit parameter."""
-        response = integration_client.post(
-            "/api/v1/admin/garbage-collect?dry_run=true&limit=5"
-        )
-        assert response.status_code == 200
-        assert response.json()["artifacts_deleted"] <= 5
-
-    @pytest.mark.integration
-    def test_garbage_collect_returns_bytes_freed(self, integration_client):
-        """Test garbage collection returns accurate bytes_freed."""
-        response = integration_client.post("/api/v1/admin/garbage-collect?dry_run=true")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert data["bytes_freed"] >= 0
-        assert isinstance(data["bytes_freed"], int)
--- a/backend/tests/test_hash_calculation.py
+++ b/backend/tests/test_hash_calculation.py
@@ -1,215 +0,0 @@
-"""
-Unit tests for SHA256 hash calculation and deduplication logic.
-
-Tests cover:
- Hash computation produces consistent results
- Hash is always 64 character lowercase hexadecimal
- Different content produces different hashes
- Binary content handling
- Large file handling (streaming)
-"""
-
-import pytest
-import hashlib
-import io
-from tests.conftest import (
-    create_test_file,
-    compute_sha256,
-    TEST_CONTENT_HELLO,
-    TEST_HASH_HELLO,
-    TEST_CONTENT_BINARY,
-    TEST_HASH_BINARY,
-)
-
-
-class TestHashComputation:
-    """Unit tests for hash calculation functionality."""
-
-    @pytest.mark.unit
-    def test_sha256_consistent_results(self):
-        """Test SHA256 hash produces consistent results for identical content."""
-        content = b"test content for hashing"
-
-        # Compute hash multiple times
-        hash1 = compute_sha256(content)
-        hash2 = compute_sha256(content)
-        hash3 = compute_sha256(content)
-
-        assert hash1 == hash2 == hash3
-
-    @pytest.mark.unit
-    def test_sha256_different_content_different_hash(self):
-        """Test SHA256 produces different hashes for different content."""
-        content1 = b"content version 1"
-        content2 = b"content version 2"
-
-        hash1 = compute_sha256(content1)
-        hash2 = compute_sha256(content2)
-
-        assert hash1 != hash2
-
-    @pytest.mark.unit
-    def test_sha256_format_64_char_hex(self):
-        """Test SHA256 hash is always 64 character lowercase hexadecimal."""
-        test_cases = [
-            b"",  # Empty
-            b"a",  # Single char
-            b"Hello, World!",  # Normal string
-            bytes(range(256)),  # All byte values
-            b"x" * 10000,  # Larger content
-        ]
-
-        for content in test_cases:
-            hash_value = compute_sha256(content)
-
-            # Check length
-            assert len(hash_value) == 64, (
-                f"Hash length should be 64, got {len(hash_value)}"
-            )
-
-            # Check lowercase
-            assert hash_value == hash_value.lower(), "Hash should be lowercase"
-
-            # Check hexadecimal
-            assert all(c in "0123456789abcdef" for c in hash_value), (
-                "Hash should be hex"
-            )
-
-    @pytest.mark.unit
-    def test_sha256_known_value(self):
-        """Test SHA256 produces expected hash for known input."""
-        assert compute_sha256(TEST_CONTENT_HELLO) == TEST_HASH_HELLO
-
-    @pytest.mark.unit
-    def test_sha256_binary_content(self):
-        """Test SHA256 handles binary content correctly."""
-        assert compute_sha256(TEST_CONTENT_BINARY) == TEST_HASH_BINARY
-
-        # Test with null bytes
-        content_with_nulls = b"\x00\x00test\x00\x00"
-        hash_value = compute_sha256(content_with_nulls)
-        assert len(hash_value) == 64
-
-    @pytest.mark.unit
-    def test_sha256_streaming_computation(self):
-        """Test SHA256 can be computed in chunks (streaming)."""
-        # Large content
-        chunk_size = 8192
-        total_size = chunk_size * 10  # 80KB
-        content = b"x" * total_size
-
-        # Direct computation
-        direct_hash = compute_sha256(content)
-
-        # Streaming computation
-        hasher = hashlib.sha256()
-        for i in range(0, total_size, chunk_size):
-            hasher.update(content[i : i + chunk_size])
-        streaming_hash = hasher.hexdigest()
-
-        assert direct_hash == streaming_hash
-
-    @pytest.mark.unit
-    def test_sha256_order_matters(self):
-        """Test that content order affects hash (not just content set)."""
-        content1 = b"AB"
-        content2 = b"BA"
-
-        assert compute_sha256(content1) != compute_sha256(content2)
-
-
-class TestStorageHashComputation:
-    """Tests for hash computation in the storage layer."""
-
-    @pytest.mark.unit
-    def test_storage_computes_sha256(self, mock_storage):
-        """Test storage layer correctly computes SHA256 hash."""
-        content = TEST_CONTENT_HELLO
-        file_obj = io.BytesIO(content)
-
-        result = mock_storage._store_simple(file_obj)
-
-        assert result.sha256 == TEST_HASH_HELLO
-
-    @pytest.mark.unit
-    def test_storage_computes_md5(self, mock_storage):
-        """Test storage layer also computes MD5 hash."""
-        content = TEST_CONTENT_HELLO
-        file_obj = io.BytesIO(content)
-
-        result = mock_storage._store_simple(file_obj)
-
-        expected_md5 = hashlib.md5(content).hexdigest()
-        assert result.md5 == expected_md5
-
-    @pytest.mark.unit
-    def test_storage_computes_sha1(self, mock_storage):
-        """Test storage layer also computes SHA1 hash."""
-        content = TEST_CONTENT_HELLO
-        file_obj = io.BytesIO(content)
-
-        result = mock_storage._store_simple(file_obj)
-
-        expected_sha1 = hashlib.sha1(content).hexdigest()
-        assert result.sha1 == expected_sha1
-
-    @pytest.mark.unit
-    def test_storage_returns_correct_size(self, mock_storage):
-        """Test storage layer returns correct file size."""
-        content = b"test content with known size"
-        file_obj = io.BytesIO(content)
-
-        result = mock_storage._store_simple(file_obj)
-
-        assert result.size == len(content)
-
-    @pytest.mark.unit
-    def test_storage_generates_correct_s3_key(self, mock_storage):
-        """Test storage layer generates correct S3 key pattern."""
-        content = TEST_CONTENT_HELLO
-        file_obj = io.BytesIO(content)
-
-        result = mock_storage._store_simple(file_obj)
-
-        # Key should be: fruits/{hash[:2]}/{hash[2:4]}/{hash}
-        expected_key = (
-            f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
-        )
-        assert result.s3_key == expected_key
-
-
-class TestHashEdgeCases:
-    """Edge case tests for hash computation."""
-
-    @pytest.mark.unit
-    def test_hash_empty_content_rejected(self, mock_storage):
-        """Test that empty content is rejected."""
-        from app.storage import HashComputationError
-
-        file_obj = io.BytesIO(b"")
-
-        with pytest.raises(HashComputationError):
-            mock_storage._store_simple(file_obj)
-
-    @pytest.mark.unit
-    def test_hash_large_file_streaming(self, mock_storage):
-        """Test hash computation for large files uses streaming."""
-        # Create a 10MB file
-        size = 10 * 1024 * 1024
-        content = b"x" * size
-        file_obj = io.BytesIO(content)
-
-        result = mock_storage._store_simple(file_obj)
-
-        expected_hash = compute_sha256(content)
-        assert result.sha256 == expected_hash
-
-    @pytest.mark.unit
-    def test_hash_special_bytes(self):
-        """Test hash handles all byte values correctly."""
-        # All possible byte values
-        content = bytes(range(256))
-        hash_value = compute_sha256(content)
-
-        assert len(hash_value) == 64
-        assert hash_value == TEST_HASH_BINARY
--- a/backend/tests/test_ref_count.py
+++ b/backend/tests/test_ref_count.py
@@ -1,458 +0,0 @@
-"""
-Unit and integration tests for reference counting behavior.
-
-Tests cover:
- ref_count is set correctly for new artifacts
- ref_count increments on duplicate uploads
- ref_count query correctly identifies existing artifacts
- Artifact lookup by SHA256 hash works correctly
-"""
-
-import pytest
-import io
-from tests.conftest import (
-    compute_sha256,
-    upload_test_file,
-    TEST_CONTENT_HELLO,
-    TEST_HASH_HELLO,
-)
-
-
-class TestRefCountQuery:
-    """Tests for ref_count querying and artifact lookup."""
-
-    @pytest.mark.integration
-    def test_artifact_lookup_by_sha256(self, integration_client, test_package):
-        """Test artifact lookup by SHA256 hash (primary key) works correctly."""
-        project, package = test_package
-        content = b"unique content for lookup test"
-        expected_hash = compute_sha256(content)
-
-        # Upload a file
-        upload_result = upload_test_file(
-            integration_client, project, package, content, tag="v1"
-        )
-        assert upload_result["artifact_id"] == expected_hash
-
-        # Look up artifact by ID (SHA256)
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.status_code == 200
-
-        artifact = response.json()
-        assert artifact["id"] == expected_hash
-        assert artifact["sha256"] == expected_hash
-        assert artifact["size"] == len(content)
-
-    @pytest.mark.integration
-    def test_ref_count_query_identifies_existing_artifact(
-        self, integration_client, test_package
-    ):
-        """Test ref_count query correctly identifies existing artifacts by hash."""
-        project, package = test_package
-        content = b"content for ref count query test"
-        expected_hash = compute_sha256(content)
-
-        # Upload a file with a tag
-        upload_result = upload_test_file(
-            integration_client, project, package, content, tag="v1"
-        )
-
-        # Query artifact and check ref_count
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.status_code == 200
-
-        artifact = response.json()
-        assert artifact["ref_count"] >= 1  # At least 1 from the tag
-
-    @pytest.mark.integration
-    def test_ref_count_set_to_1_for_new_artifact_with_tag(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test ref_count is set to 1 for new artifacts when created with a tag."""
-        project, package = test_package
-        content = f"brand new content for ref count test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Upload a new file with a tag
-        upload_result = upload_test_file(
-            integration_client, project, package, content, tag="initial"
-        )
-
-        assert upload_result["artifact_id"] == expected_hash
-        assert upload_result["ref_count"] == 1
-        assert upload_result["deduplicated"] is False
-
-    @pytest.mark.integration
-    def test_ref_count_increments_on_duplicate_upload_with_tag(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test ref_count is incremented when duplicate content is uploaded with a new tag."""
-        project, package = test_package
-        content = f"content that will be uploaded twice {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # First upload with tag
-        result1 = upload_test_file(
-            integration_client, project, package, content, tag="v1"
-        )
-        assert result1["ref_count"] == 1
-        assert result1["deduplicated"] is False
-
-        # Second upload with different tag (same content)
-        result2 = upload_test_file(
-            integration_client, project, package, content, tag="v2"
-        )
-        assert result2["artifact_id"] == expected_hash
-        assert result2["ref_count"] == 2
-        assert result2["deduplicated"] is True
-
-    @pytest.mark.integration
-    def test_ref_count_after_multiple_tags(self, integration_client, test_package):
-        """Test ref_count correctly reflects number of tags pointing to artifact."""
-        project, package = test_package
-        content = b"content for multiple tag test"
-        expected_hash = compute_sha256(content)
-
-        # Upload with multiple tags
-        tags = ["v1", "v2", "v3", "latest"]
-        for i, tag in enumerate(tags):
-            result = upload_test_file(
-                integration_client, project, package, content, tag=tag
-            )
-            assert result["artifact_id"] == expected_hash
-            assert result["ref_count"] == i + 1
-
-        # Verify final ref_count via artifact endpoint
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.status_code == 200
-        assert response.json()["ref_count"] == len(tags)
-
-
-class TestRefCountWithDeletion:
-    """Tests for ref_count behavior when tags are deleted."""
-
-    @pytest.mark.integration
-    def test_ref_count_decrements_on_tag_delete(self, integration_client, test_package):
-        """Test ref_count decrements when a tag is deleted."""
-        project, package = test_package
-        content = b"content for delete test"
-        expected_hash = compute_sha256(content)
-
-        # Upload with two tags
-        upload_test_file(integration_client, project, package, content, tag="v1")
-        upload_test_file(integration_client, project, package, content, tag="v2")
-
-        # Verify ref_count is 2
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 2
-
-        # Delete one tag
-        delete_response = integration_client.delete(
-            f"/api/v1/project/{project}/{package}/tags/v1"
-        )
-        assert delete_response.status_code == 204
-
-        # Verify ref_count is now 1
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 1
-
-    @pytest.mark.integration
-    def test_ref_count_zero_after_all_tags_deleted(
-        self, integration_client, test_package
-    ):
-        """Test ref_count goes to 0 when all tags are deleted."""
-        project, package = test_package
-        content = b"content that will be orphaned"
-        expected_hash = compute_sha256(content)
-
-        # Upload with one tag
-        upload_test_file(integration_client, project, package, content, tag="only-tag")
-
-        # Delete the tag
-        integration_client.delete(f"/api/v1/project/{project}/{package}/tags/only-tag")
-
-        # Verify ref_count is 0
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 0
-
-
-class TestRefCountCascadeDelete:
-    """Tests for ref_count behavior during cascade deletions."""
-
-    @pytest.mark.integration
-    def test_ref_count_decrements_on_package_delete(
-        self, integration_client, unique_test_id
-    ):
-        """Test ref_count decrements for all tags when package is deleted."""
-        # Create a project and package manually (not using fixtures to control cleanup)
-        project_name = f"cascade-pkg-{unique_test_id}"
-        package_name = f"test-pkg-{unique_test_id}"
-
-        # Create project
-        response = integration_client.post(
-            "/api/v1/projects",
-            json={
-                "name": project_name,
-                "description": "Test project",
-                "is_public": True,
-            },
-        )
-        assert response.status_code == 200
-
-        # Create package
-        response = integration_client.post(
-            f"/api/v1/project/{project_name}/packages",
-            json={"name": package_name, "description": "Test package"},
-        )
-        assert response.status_code == 200
-
-        # Upload content with multiple tags
-        content = f"cascade delete test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        upload_test_file(
-            integration_client, project_name, package_name, content, tag="v1"
-        )
-        upload_test_file(
-            integration_client, project_name, package_name, content, tag="v2"
-        )
-        upload_test_file(
-            integration_client, project_name, package_name, content, tag="v3"
-        )
-
-        # Verify ref_count is 3
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 3
-
-        # Delete the package (should cascade delete all tags and decrement ref_count)
-        delete_response = integration_client.delete(
-            f"/api/v1/project/{project_name}/packages/{package_name}"
-        )
-        assert delete_response.status_code == 204
-
-        # Verify ref_count is 0 (all tags were deleted)
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 0
-
-        # Cleanup: delete the project
-        integration_client.delete(f"/api/v1/projects/{project_name}")
-
-    @pytest.mark.integration
-    def test_ref_count_decrements_on_project_delete(
-        self, integration_client, unique_test_id
-    ):
-        """Test ref_count decrements for all tags in all packages when project is deleted."""
-        # Create a project manually (not using fixtures to control cleanup)
-        project_name = f"cascade-proj-{unique_test_id}"
-        package1_name = f"pkg1-{unique_test_id}"
-        package2_name = f"pkg2-{unique_test_id}"
-
-        # Create project
-        response = integration_client.post(
-            "/api/v1/projects",
-            json={
-                "name": project_name,
-                "description": "Test project",
-                "is_public": True,
-            },
-        )
-        assert response.status_code == 200
-
-        # Create two packages
-        for pkg_name in [package1_name, package2_name]:
-            response = integration_client.post(
-                f"/api/v1/project/{project_name}/packages",
-                json={"name": pkg_name, "description": "Test package"},
-            )
-            assert response.status_code == 200
-
-        # Upload same content with tags in both packages
-        content = f"project cascade test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        upload_test_file(
-            integration_client, project_name, package1_name, content, tag="v1"
-        )
-        upload_test_file(
-            integration_client, project_name, package1_name, content, tag="v2"
-        )
-        upload_test_file(
-            integration_client, project_name, package2_name, content, tag="latest"
-        )
-        upload_test_file(
-            integration_client, project_name, package2_name, content, tag="stable"
-        )
-
-        # Verify ref_count is 4 (2 tags in each of 2 packages)
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 4
-
-        # Delete the project (should cascade delete all packages, tags, and decrement ref_count)
-        delete_response = integration_client.delete(f"/api/v1/projects/{project_name}")
-        assert delete_response.status_code == 204
-
-        # Verify ref_count is 0
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 0
-
-    @pytest.mark.integration
-    def test_shared_artifact_ref_count_partial_decrement(
-        self, integration_client, unique_test_id
-    ):
-        """Test ref_count correctly decrements when artifact is shared across packages."""
-        # Create project with two packages
-        project_name = f"shared-artifact-{unique_test_id}"
-        package1_name = f"pkg1-{unique_test_id}"
-        package2_name = f"pkg2-{unique_test_id}"
-
-        # Create project
-        response = integration_client.post(
-            "/api/v1/projects",
-            json={
-                "name": project_name,
-                "description": "Test project",
-                "is_public": True,
-            },
-        )
-        assert response.status_code == 200
-
-        # Create two packages
-        for pkg_name in [package1_name, package2_name]:
-            response = integration_client.post(
-                f"/api/v1/project/{project_name}/packages",
-                json={"name": pkg_name, "description": "Test package"},
-            )
-            assert response.status_code == 200
-
-        # Upload same content to both packages
-        content = f"shared artifact {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        upload_test_file(
-            integration_client, project_name, package1_name, content, tag="v1"
-        )
-        upload_test_file(
-            integration_client, project_name, package2_name, content, tag="v1"
-        )
-
-        # Verify ref_count is 2
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 2
-
-        # Delete only package1 (package2 still references the artifact)
-        delete_response = integration_client.delete(
-            f"/api/v1/project/{project_name}/packages/{package1_name}"
-        )
-        assert delete_response.status_code == 204
-
-        # Verify ref_count is 1 (only package2's tag remains)
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 1
-
-        # Cleanup
-        integration_client.delete(f"/api/v1/projects/{project_name}")
-
-
-class TestRefCountTagUpdate:
-    """Tests for ref_count behavior when tags are updated to point to different artifacts."""
-
-    @pytest.mark.integration
-    def test_ref_count_adjusts_on_tag_update(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test ref_count adjusts when a tag is updated to point to a different artifact."""
-        project, package = test_package
-
-        # Upload two different artifacts
-        content1 = f"artifact one {unique_test_id}".encode()
-        content2 = f"artifact two {unique_test_id}".encode()
-        hash1 = compute_sha256(content1)
-        hash2 = compute_sha256(content2)
-
-        # Upload first artifact with tag "latest"
-        upload_test_file(integration_client, project, package, content1, tag="latest")
-
-        # Verify first artifact has ref_count 1
-        response = integration_client.get(f"/api/v1/artifact/{hash1}")
-        assert response.json()["ref_count"] == 1
-
-        # Upload second artifact with different tag
-        upload_test_file(integration_client, project, package, content2, tag="stable")
-
-        # Now update "latest" tag to point to second artifact
-        # This is done by uploading the same content with the same tag
-        upload_test_file(integration_client, project, package, content2, tag="latest")
-
-        # Verify first artifact ref_count decreased to 0 (tag moved away)
-        response = integration_client.get(f"/api/v1/artifact/{hash1}")
-        assert response.json()["ref_count"] == 0
-
-        # Verify second artifact ref_count increased to 2 (stable + latest)
-        response = integration_client.get(f"/api/v1/artifact/{hash2}")
-        assert response.json()["ref_count"] == 2
-
-    @pytest.mark.integration
-    def test_ref_count_unchanged_when_tag_same_artifact(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test ref_count doesn't change when tag is 'updated' to same artifact."""
-        project, package = test_package
-
-        content = f"same artifact {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Upload with tag
-        upload_test_file(integration_client, project, package, content, tag="v1")
-
-        # Verify ref_count is 1
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 1
-
-        # Upload same content with same tag (no-op)
-        upload_test_file(integration_client, project, package, content, tag="v1")
-
-        # Verify ref_count is still 1 (no double-counting)
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 1
-
-    @pytest.mark.integration
-    def test_tag_via_post_endpoint_increments_ref_count(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test creating tag via POST /tags endpoint increments ref_count."""
-        project, package = test_package
-
-        content = f"tag endpoint test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Upload artifact without tag
-        result = upload_test_file(
-            integration_client, project, package, content, filename="test.bin", tag=None
-        )
-        artifact_id = result["artifact_id"]
-
-        # Verify ref_count is 0 (no tags yet)
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 0
-
-        # Create tag via POST endpoint
-        tag_response = integration_client.post(
-            f"/api/v1/project/{project}/{package}/tags",
-            json={"name": "v1.0.0", "artifact_id": artifact_id},
-        )
-        assert tag_response.status_code == 200
-
-        # Verify ref_count is now 1
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 1
-
-        # Create another tag via POST endpoint
-        tag_response = integration_client.post(
-            f"/api/v1/project/{project}/{package}/tags",
-            json={"name": "latest", "artifact_id": artifact_id},
-        )
-        assert tag_response.status_code == 200
-
-        # Verify ref_count is now 2
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 2
--- a/backend/tests/test_stats_endpoints.py
+++ b/backend/tests/test_stats_endpoints.py
@@ -1,488 +0,0 @@
-"""
-Integration tests for statistics endpoints.
-
-Tests cover:
- Global stats endpoint
- Deduplication stats endpoint
- Cross-project deduplication
- Timeline stats
- Export and report endpoints
- Package and artifact stats
-"""
-
-import pytest
-from tests.conftest import compute_sha256, upload_test_file
-
-
-class TestGlobalStats:
-    """Tests for GET /api/v1/stats endpoint."""
-
-    @pytest.mark.integration
-    def test_stats_returns_valid_response(self, integration_client):
-        """Test stats endpoint returns expected fields."""
-        response = integration_client.get("/api/v1/stats")
-        assert response.status_code == 200
-
-        data = response.json()
-        # Check all required fields exist
-        assert "total_artifacts" in data
-        assert "total_size_bytes" in data
-        assert "unique_artifacts" in data
-        assert "orphaned_artifacts" in data
-        assert "orphaned_size_bytes" in data
-        assert "total_uploads" in data
-        assert "deduplicated_uploads" in data
-        assert "deduplication_ratio" in data
-        assert "storage_saved_bytes" in data
-
-    @pytest.mark.integration
-    def test_stats_values_are_non_negative(self, integration_client):
-        """Test all stat values are non-negative."""
-        response = integration_client.get("/api/v1/stats")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert data["total_artifacts"] >= 0
-        assert data["total_size_bytes"] >= 0
-        assert data["unique_artifacts"] >= 0
-        assert data["orphaned_artifacts"] >= 0
-        assert data["total_uploads"] >= 0
-        assert data["deduplicated_uploads"] >= 0
-        assert data["deduplication_ratio"] >= 0
-        assert data["storage_saved_bytes"] >= 0
-
-    @pytest.mark.integration
-    def test_stats_update_after_upload(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test stats update after uploading an artifact."""
-        project, package = test_package
-
-        # Get initial stats
-        initial_response = integration_client.get("/api/v1/stats")
-        initial_stats = initial_response.json()
-
-        # Upload a new file
-        content = f"stats test content {unique_test_id}".encode()
-        upload_test_file(
-            integration_client, project, package, content, tag=f"stats-{unique_test_id}"
-        )
-
-        # Get updated stats
-        updated_response = integration_client.get("/api/v1/stats")
-        updated_stats = updated_response.json()
-
-        # Verify stats increased
-        assert updated_stats["total_uploads"] >= initial_stats["total_uploads"]
-
-
-class TestDeduplicationStats:
-    """Tests for GET /api/v1/stats/deduplication endpoint."""
-
-    @pytest.mark.integration
-    def test_dedup_stats_returns_valid_response(self, integration_client):
-        """Test deduplication stats returns expected fields."""
-        response = integration_client.get("/api/v1/stats/deduplication")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert "total_logical_bytes" in data
-        assert "total_physical_bytes" in data
-        assert "bytes_saved" in data
-        assert "savings_percentage" in data
-        assert "total_uploads" in data
-        assert "unique_artifacts" in data
-        assert "duplicate_uploads" in data
-        assert "average_ref_count" in data
-        assert "max_ref_count" in data
-        assert "most_referenced_artifacts" in data
-
-    @pytest.mark.integration
-    def test_most_referenced_artifacts_format(self, integration_client):
-        """Test most_referenced_artifacts has correct structure."""
-        response = integration_client.get("/api/v1/stats/deduplication")
-        assert response.status_code == 200
-
-        data = response.json()
-        artifacts = data["most_referenced_artifacts"]
-        assert isinstance(artifacts, list)
-
-        if len(artifacts) > 0:
-            artifact = artifacts[0]
-            assert "artifact_id" in artifact
-            assert "ref_count" in artifact
-            assert "size" in artifact
-            assert "storage_saved" in artifact
-
-    @pytest.mark.integration
-    def test_dedup_stats_with_top_n_param(self, integration_client):
-        """Test deduplication stats respects top_n parameter."""
-        response = integration_client.get("/api/v1/stats/deduplication?top_n=3")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert len(data["most_referenced_artifacts"]) <= 3
-
-    @pytest.mark.integration
-    def test_savings_percentage_valid_range(self, integration_client):
-        """Test savings percentage is between 0 and 100."""
-        response = integration_client.get("/api/v1/stats/deduplication")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert 0 <= data["savings_percentage"] <= 100
-
-
-class TestCrossProjectStats:
-    """Tests for GET /api/v1/stats/cross-project endpoint."""
-
-    @pytest.mark.integration
-    def test_cross_project_returns_valid_response(self, integration_client):
-        """Test cross-project stats returns expected fields."""
-        response = integration_client.get("/api/v1/stats/cross-project")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert "shared_artifacts_count" in data
-        assert "total_cross_project_savings" in data
-        assert "shared_artifacts" in data
-        assert isinstance(data["shared_artifacts"], list)
-
-    @pytest.mark.integration
-    def test_cross_project_respects_limit(self, integration_client):
-        """Test cross-project stats respects limit parameter."""
-        response = integration_client.get("/api/v1/stats/cross-project?limit=5")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert len(data["shared_artifacts"]) <= 5
-
-    @pytest.mark.integration
-    def test_cross_project_detects_shared_artifacts(
-        self, integration_client, unique_test_id
-    ):
-        """Test cross-project deduplication is detected."""
-        content = f"shared across projects {unique_test_id}".encode()
-
-        # Create two projects
-        proj1 = f"cross-proj-a-{unique_test_id}"
-        proj2 = f"cross-proj-b-{unique_test_id}"
-
-        try:
-            # Create projects and packages
-            integration_client.post(
-                "/api/v1/projects",
-                json={"name": proj1, "description": "Test", "is_public": True},
-            )
-            integration_client.post(
-                "/api/v1/projects",
-                json={"name": proj2, "description": "Test", "is_public": True},
-            )
-            integration_client.post(
-                f"/api/v1/project/{proj1}/packages",
-                json={"name": "pkg", "description": "Test"},
-            )
-            integration_client.post(
-                f"/api/v1/project/{proj2}/packages",
-                json={"name": "pkg", "description": "Test"},
-            )
-
-            # Upload same content to both projects
-            upload_test_file(integration_client, proj1, "pkg", content, tag="v1")
-            upload_test_file(integration_client, proj2, "pkg", content, tag="v1")
-
-            # Check cross-project stats
-            response = integration_client.get("/api/v1/stats/cross-project")
-            assert response.status_code == 200
-
-            data = response.json()
-            assert data["shared_artifacts_count"] >= 1
-
-        finally:
-            # Cleanup
-            integration_client.delete(f"/api/v1/projects/{proj1}")
-            integration_client.delete(f"/api/v1/projects/{proj2}")
-
-
-class TestTimelineStats:
-    """Tests for GET /api/v1/stats/timeline endpoint."""
-
-    @pytest.mark.integration
-    def test_timeline_returns_valid_response(self, integration_client):
-        """Test timeline stats returns expected fields."""
-        response = integration_client.get("/api/v1/stats/timeline")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert "period" in data
-        assert "start_date" in data
-        assert "end_date" in data
-        assert "data_points" in data
-        assert isinstance(data["data_points"], list)
-
-    @pytest.mark.integration
-    def test_timeline_daily_period(self, integration_client):
-        """Test timeline with daily period."""
-        response = integration_client.get("/api/v1/stats/timeline?period=daily")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert data["period"] == "daily"
-
-    @pytest.mark.integration
-    def test_timeline_weekly_period(self, integration_client):
-        """Test timeline with weekly period."""
-        response = integration_client.get("/api/v1/stats/timeline?period=weekly")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert data["period"] == "weekly"
-
-    @pytest.mark.integration
-    def test_timeline_monthly_period(self, integration_client):
-        """Test timeline with monthly period."""
-        response = integration_client.get("/api/v1/stats/timeline?period=monthly")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert data["period"] == "monthly"
-
-    @pytest.mark.integration
-    def test_timeline_invalid_period_rejected(self, integration_client):
-        """Test timeline rejects invalid period."""
-        response = integration_client.get("/api/v1/stats/timeline?period=invalid")
-        assert response.status_code == 422
-
-    @pytest.mark.integration
-    def test_timeline_data_point_structure(self, integration_client):
-        """Test timeline data points have correct structure."""
-        response = integration_client.get("/api/v1/stats/timeline")
-        assert response.status_code == 200
-
-        data = response.json()
-        if len(data["data_points"]) > 0:
-            point = data["data_points"][0]
-            assert "date" in point
-            assert "total_uploads" in point
-            assert "unique_artifacts" in point
-            assert "duplicated_uploads" in point
-            assert "bytes_saved" in point
-
-
-class TestExportEndpoint:
-    """Tests for GET /api/v1/stats/export endpoint."""
-
-    @pytest.mark.integration
-    def test_export_json_format(self, integration_client):
-        """Test export with JSON format."""
-        response = integration_client.get("/api/v1/stats/export?format=json")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert "total_artifacts" in data
-        assert "generated_at" in data
-
-    @pytest.mark.integration
-    def test_export_csv_format(self, integration_client):
-        """Test export with CSV format."""
-        response = integration_client.get("/api/v1/stats/export?format=csv")
-        assert response.status_code == 200
-        assert "text/csv" in response.headers.get("content-type", "")
-
-        content = response.text
-        assert "Metric,Value" in content
-        assert "total_artifacts" in content
-
-    @pytest.mark.integration
-    def test_export_invalid_format_rejected(self, integration_client):
-        """Test export rejects invalid format."""
-        response = integration_client.get("/api/v1/stats/export?format=xml")
-        assert response.status_code == 422
-
-
-class TestReportEndpoint:
-    """Tests for GET /api/v1/stats/report endpoint."""
-
-    @pytest.mark.integration
-    def test_report_markdown_format(self, integration_client):
-        """Test report with markdown format."""
-        response = integration_client.get("/api/v1/stats/report?format=markdown")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert data["format"] == "markdown"
-        assert "generated_at" in data
-        assert "content" in data
-        assert "# Orchard Storage Report" in data["content"]
-
-    @pytest.mark.integration
-    def test_report_json_format(self, integration_client):
-        """Test report with JSON format."""
-        response = integration_client.get("/api/v1/stats/report?format=json")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert data["format"] == "json"
-        assert "content" in data
-
-    @pytest.mark.integration
-    def test_report_contains_sections(self, integration_client):
-        """Test markdown report contains expected sections."""
-        response = integration_client.get("/api/v1/stats/report?format=markdown")
-        assert response.status_code == 200
-
-        content = response.json()["content"]
-        assert "## Overview" in content
-        assert "## Storage" in content
-        assert "## Uploads" in content
-
-
-class TestProjectStats:
-    """Tests for GET /api/v1/projects/:project/stats endpoint."""
-
-    @pytest.mark.integration
-    def test_project_stats_returns_valid_response(
-        self, integration_client, test_project
-    ):
-        """Test project stats returns expected fields."""
-        response = integration_client.get(f"/api/v1/projects/{test_project}/stats")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert "project_id" in data
-        assert "project_name" in data
-        assert "package_count" in data
-        assert "tag_count" in data
-        assert "artifact_count" in data
-        assert "total_size_bytes" in data
-        assert "upload_count" in data
-        assert "deduplicated_uploads" in data
-        assert "storage_saved_bytes" in data
-        assert "deduplication_ratio" in data
-
-    @pytest.mark.integration
-    def test_project_stats_not_found(self, integration_client):
-        """Test project stats returns 404 for non-existent project."""
-        response = integration_client.get("/api/v1/projects/nonexistent-project/stats")
-        assert response.status_code == 404
-
-
-class TestPackageStats:
-    """Tests for GET /api/v1/project/:project/packages/:package/stats endpoint."""
-
-    @pytest.mark.integration
-    def test_package_stats_returns_valid_response(
-        self, integration_client, test_package
-    ):
-        """Test package stats returns expected fields."""
-        project, package = test_package
-        response = integration_client.get(
-            f"/api/v1/project/{project}/packages/{package}/stats"
-        )
-        assert response.status_code == 200
-
-        data = response.json()
-        assert "package_id" in data
-        assert "package_name" in data
-        assert "project_name" in data
-        assert "tag_count" in data
-        assert "artifact_count" in data
-        assert "total_size_bytes" in data
-        assert "upload_count" in data
-        assert "deduplicated_uploads" in data
-        assert "storage_saved_bytes" in data
-        assert "deduplication_ratio" in data
-
-    @pytest.mark.integration
-    def test_package_stats_not_found(self, integration_client, test_project):
-        """Test package stats returns 404 for non-existent package."""
-        response = integration_client.get(
-            f"/api/v1/project/{test_project}/packages/nonexistent-package/stats"
-        )
-        assert response.status_code == 404
-
-
-class TestArtifactStats:
-    """Tests for GET /api/v1/artifact/:id/stats endpoint."""
-
-    @pytest.mark.integration
-    def test_artifact_stats_returns_valid_response(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test artifact stats returns expected fields."""
-        project, package = test_package
-        content = f"artifact stats test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Upload artifact
-        upload_test_file(
-            integration_client, project, package, content, tag=f"art-{unique_test_id}"
-        )
-
-        # Get artifact stats
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert "artifact_id" in data
-        assert "sha256" in data
-        assert "size" in data
-        assert "ref_count" in data
-        assert "storage_savings" in data
-        assert "tags" in data
-        assert "projects" in data
-        assert "packages" in data
-
-    @pytest.mark.integration
-    def test_artifact_stats_not_found(self, integration_client):
-        """Test artifact stats returns 404 for non-existent artifact."""
-        fake_hash = "0" * 64
-        response = integration_client.get(f"/api/v1/artifact/{fake_hash}/stats")
-        assert response.status_code == 404
-
-    @pytest.mark.integration
-    def test_artifact_stats_shows_correct_projects(
-        self, integration_client, unique_test_id
-    ):
-        """Test artifact stats shows all projects using the artifact."""
-        content = f"multi-project artifact {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        proj1 = f"art-stats-a-{unique_test_id}"
-        proj2 = f"art-stats-b-{unique_test_id}"
-
-        try:
-            # Create projects and packages
-            integration_client.post(
-                "/api/v1/projects",
-                json={"name": proj1, "description": "Test", "is_public": True},
-            )
-            integration_client.post(
-                "/api/v1/projects",
-                json={"name": proj2, "description": "Test", "is_public": True},
-            )
-            integration_client.post(
-                f"/api/v1/project/{proj1}/packages",
-                json={"name": "pkg", "description": "Test"},
-            )
-            integration_client.post(
-                f"/api/v1/project/{proj2}/packages",
-                json={"name": "pkg", "description": "Test"},
-            )
-
-            # Upload same content to both projects
-            upload_test_file(integration_client, proj1, "pkg", content, tag="v1")
-            upload_test_file(integration_client, proj2, "pkg", content, tag="v1")
-
-            # Check artifact stats
-            response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats")
-            assert response.status_code == 200
-
-            data = response.json()
-            assert len(data["projects"]) == 2
-            assert proj1 in data["projects"]
-            assert proj2 in data["projects"]
-
-        finally:
-            integration_client.delete(f"/api/v1/projects/{proj1}")
-            integration_client.delete(f"/api/v1/projects/{proj2}")
--- a/backend/tests/unit/init.py
+++ b/backend/tests/unit/init.py
--- a/backend/tests/unit/test_models.py
+++ b/backend/tests/unit/test_models.py
@@ -0,0 +1,271 @@
+"""
+Unit tests for SQLAlchemy models.
+
+Tests cover:
+- Model instantiation and defaults
+- Property aliases (sha256, format_metadata)
+- Relationship definitions
+- Constraint definitions
+"""
+
+import pytest
+import uuid
+from datetime import datetime
+
+
+class TestArtifactModel:
+    """Tests for the Artifact model."""
+
+    @pytest.mark.unit
+    def test_artifact_sha256_property(self):
+        """Test sha256 property is an alias for id."""
+        from app.models import Artifact
+
+        artifact = Artifact(
+            id="a" * 64,
+            size=1024,
+            created_by="test-user",
+            s3_key="fruits/aa/aa/test",
+        )
+
+        assert artifact.sha256 == artifact.id
+        assert artifact.sha256 == "a" * 64
+
+    @pytest.mark.unit
+    def test_artifact_format_metadata_alias(self):
+        """Test format_metadata is an alias for artifact_metadata."""
+        from app.models import Artifact
+
+        test_metadata = {"format": "tarball", "version": "1.0.0"}
+        artifact = Artifact(
+            id="b" * 64,
+            size=2048,
+            created_by="test-user",
+            s3_key="fruits/bb/bb/test",
+            artifact_metadata=test_metadata,
+        )
+
+        assert artifact.format_metadata == test_metadata
+        assert artifact.format_metadata == artifact.artifact_metadata
+
+    @pytest.mark.unit
+    def test_artifact_format_metadata_setter(self):
+        """Test format_metadata setter updates artifact_metadata."""
+        from app.models import Artifact
+
+        artifact = Artifact(
+            id="c" * 64,
+            size=512,
+            created_by="test-user",
+            s3_key="fruits/cc/cc/test",
+        )
+
+        new_metadata = {"type": "rpm", "arch": "x86_64"}
+        artifact.format_metadata = new_metadata
+
+        assert artifact.artifact_metadata == new_metadata
+        assert artifact.format_metadata == new_metadata
+
+    @pytest.mark.unit
+    def test_artifact_default_ref_count(self):
+        """Test artifact ref_count column has default value of 1."""
+        from app.models import Artifact
+
+        # Check the column definition has the right default
+        ref_count_col = Artifact.__table__.columns["ref_count"]
+        assert ref_count_col.default is not None
+        assert ref_count_col.default.arg == 1
+
+    @pytest.mark.unit
+    def test_artifact_default_metadata_is_dict(self):
+        """Test artifact default metadata is an empty dict."""
+        from app.models import Artifact
+
+        artifact = Artifact(
+            id="e" * 64,
+            size=100,
+            created_by="test-user",
+            s3_key="fruits/ee/ee/test",
+        )
+
+        # Default might be None until saved, but the column default is dict
+        assert artifact.artifact_metadata is None or isinstance(
+            artifact.artifact_metadata, dict
+        )
+
+
+class TestProjectModel:
+    """Tests for the Project model."""
+
+    @pytest.mark.unit
+    def test_project_default_is_public(self):
+        """Test project is_public column has default value of True."""
+        from app.models import Project
+
+        # Check the column definition has the right default
+        is_public_col = Project.__table__.columns["is_public"]
+        assert is_public_col.default is not None
+        assert is_public_col.default.arg is True
+
+    @pytest.mark.unit
+    def test_project_uuid_generation(self):
+        """Test project generates UUID by default."""
+        from app.models import Project
+
+        project = Project(
+            name="uuid-test-project",
+            created_by="test-user",
+        )
+
+        # UUID should be set by default function
+        assert project.id is not None or hasattr(Project.id, "default")
+
+
+class TestPackageModel:
+    """Tests for the Package model."""
+
+    @pytest.mark.unit
+    def test_package_default_format(self):
+        """Test package format column has default value of 'generic'."""
+        from app.models import Package
+
+        # Check the column definition has the right default
+        format_col = Package.__table__.columns["format"]
+        assert format_col.default is not None
+        assert format_col.default.arg == "generic"
+
+    @pytest.mark.unit
+    def test_package_default_platform(self):
+        """Test package platform column has default value of 'any'."""
+        from app.models import Package
+
+        # Check the column definition has the right default
+        platform_col = Package.__table__.columns["platform"]
+        assert platform_col.default is not None
+        assert platform_col.default.arg == "any"
+
+
+class TestTagModel:
+    """Tests for the Tag model."""
+
+    @pytest.mark.unit
+    def test_tag_requires_package_id(self):
+        """Test tag requires package_id."""
+        from app.models import Tag
+
+        tag = Tag(
+            name="v1.0.0",
+            package_id=uuid.uuid4(),
+            artifact_id="f" * 64,
+            created_by="test-user",
+        )
+
+        assert tag.package_id is not None
+        assert tag.artifact_id == "f" * 64
+
+
+class TestTagHistoryModel:
+    """Tests for the TagHistory model."""
+
+    @pytest.mark.unit
+    def test_tag_history_default_change_type(self):
+        """Test tag history change_type column has default value of 'update'."""
+        from app.models import TagHistory
+
+        # Check the column definition has the right default
+        change_type_col = TagHistory.__table__.columns["change_type"]
+        assert change_type_col.default is not None
+        assert change_type_col.default.arg == "update"
+
+    @pytest.mark.unit
+    def test_tag_history_allows_null_old_artifact(self):
+        """Test tag history allows null old_artifact_id (for create events)."""
+        from app.models import TagHistory
+
+        history = TagHistory(
+            tag_id=uuid.uuid4(),
+            old_artifact_id=None,
+            new_artifact_id="h" * 64,
+            change_type="create",
+            changed_by="test-user",
+        )
+
+        assert history.old_artifact_id is None
+
+
+class TestUploadModel:
+    """Tests for the Upload model."""
+
+    @pytest.mark.unit
+    def test_upload_default_deduplicated_is_false(self):
+        """Test upload deduplicated column has default value of False."""
+        from app.models import Upload
+
+        # Check the column definition has the right default
+        deduplicated_col = Upload.__table__.columns["deduplicated"]
+        assert deduplicated_col.default is not None
+        assert deduplicated_col.default.arg is False
+
+    @pytest.mark.unit
+    def test_upload_default_checksum_verified_is_true(self):
+        """Test upload checksum_verified column has default value of True."""
+        from app.models import Upload
+
+        # Check the column definition has the right default
+        checksum_verified_col = Upload.__table__.columns["checksum_verified"]
+        assert checksum_verified_col.default is not None
+        assert checksum_verified_col.default.arg is True
+
+
+class TestAccessPermissionModel:
+    """Tests for the AccessPermission model."""
+
+    @pytest.mark.unit
+    def test_access_permission_levels(self):
+        """Test valid access permission levels."""
+        from app.models import AccessPermission
+
+        # This tests the check constraint values
+        valid_levels = ["read", "write", "admin"]
+
+        for level in valid_levels:
+            permission = AccessPermission(
+                project_id=uuid.uuid4(),
+                user_id="test-user",
+                level=level,
+            )
+            assert permission.level == level
+
+
+class TestAuditLogModel:
+    """Tests for the AuditLog model."""
+
+    @pytest.mark.unit
+    def test_audit_log_required_fields(self):
+        """Test audit log has all required fields."""
+        from app.models import AuditLog
+
+        log = AuditLog(
+            action="project.create",
+            resource="/projects/test-project",
+            user_id="test-user",
+        )
+
+        assert log.action == "project.create"
+        assert log.resource == "/projects/test-project"
+        assert log.user_id == "test-user"
+
+    @pytest.mark.unit
+    def test_audit_log_optional_details(self):
+        """Test audit log can have optional details JSON."""
+        from app.models import AuditLog
+
+        details = {"old_value": "v1", "new_value": "v2"}
+        log = AuditLog(
+            action="tag.update",
+            resource="/projects/test/packages/pkg/tags/latest",
+            user_id="test-user",
+            details=details,
+        )
+
+        assert log.details == details
--- a/backend/tests/unit/test_storage.py
+++ b/backend/tests/unit/test_storage.py
@@ -0,0 +1,439 @@
+"""
+Unit tests for S3 storage layer.
+
+Tests cover:
+- SHA256 hash calculation and consistency
+- Hash format validation (64-char hex)
+- S3 key generation pattern
+- Deduplication behavior (_exists method)
+- Storage result computation (MD5, SHA1, size)
+- Edge cases (empty files, large files, binary content)
+"""
+
+import pytest
+import hashlib
+import io
+from tests.factories import (
+    compute_sha256,
+    TEST_CONTENT_HELLO,
+    TEST_HASH_HELLO,
+    TEST_CONTENT_BINARY,
+    TEST_HASH_BINARY,
+)
+
+
+# =============================================================================
+# Hash Computation Tests
+# =============================================================================
+
+
+class TestHashComputation:
+    """Unit tests for hash calculation functionality."""
+
+    @pytest.mark.unit
+    def test_sha256_consistent_results(self):
+        """Test SHA256 hash produces consistent results for identical content."""
+        content = b"test content for hashing"
+
+        # Compute hash multiple times
+        hash1 = compute_sha256(content)
+        hash2 = compute_sha256(content)
+        hash3 = compute_sha256(content)
+
+        assert hash1 == hash2 == hash3
+
+    @pytest.mark.unit
+    def test_sha256_different_content_different_hash(self):
+        """Test SHA256 produces different hashes for different content."""
+        content1 = b"content version 1"
+        content2 = b"content version 2"
+
+        hash1 = compute_sha256(content1)
+        hash2 = compute_sha256(content2)
+
+        assert hash1 != hash2
+
+    @pytest.mark.unit
+    def test_sha256_format_64_char_hex(self):
+        """Test SHA256 hash is always 64 character lowercase hexadecimal."""
+        test_cases = [
+            b"",  # Empty
+            b"a",  # Single char
+            b"Hello, World!",  # Normal string
+            bytes(range(256)),  # All byte values
+            b"x" * 10000,  # Larger content
+        ]
+
+        for content in test_cases:
+            hash_value = compute_sha256(content)
+
+            # Check length
+            assert len(hash_value) == 64, (
+                f"Hash length should be 64, got {len(hash_value)}"
+            )
+
+            # Check lowercase
+            assert hash_value == hash_value.lower(), "Hash should be lowercase"
+
+            # Check hexadecimal
+            assert all(c in "0123456789abcdef" for c in hash_value), (
+                "Hash should be hex"
+            )
+
+    @pytest.mark.unit
+    def test_sha256_known_value(self):
+        """Test SHA256 produces expected hash for known input."""
+        assert compute_sha256(TEST_CONTENT_HELLO) == TEST_HASH_HELLO
+
+    @pytest.mark.unit
+    def test_sha256_binary_content(self):
+        """Test SHA256 handles binary content correctly."""
+        assert compute_sha256(TEST_CONTENT_BINARY) == TEST_HASH_BINARY
+
+        # Test with null bytes
+        content_with_nulls = b"\x00\x00test\x00\x00"
+        hash_value = compute_sha256(content_with_nulls)
+        assert len(hash_value) == 64
+
+    @pytest.mark.unit
+    def test_sha256_streaming_computation(self):
+        """Test SHA256 can be computed in chunks (streaming)."""
+        # Large content
+        chunk_size = 8192
+        total_size = chunk_size * 10  # 80KB
+        content = b"x" * total_size
+
+        # Direct computation
+        direct_hash = compute_sha256(content)
+
+        # Streaming computation
+        hasher = hashlib.sha256()
+        for i in range(0, total_size, chunk_size):
+            hasher.update(content[i : i + chunk_size])
+        streaming_hash = hasher.hexdigest()
+
+        assert direct_hash == streaming_hash
+
+    @pytest.mark.unit
+    def test_sha256_order_matters(self):
+        """Test that content order affects hash (not just content set)."""
+        content1 = b"AB"
+        content2 = b"BA"
+
+        assert compute_sha256(content1) != compute_sha256(content2)
+
+
+# =============================================================================
+# Storage Hash Computation Tests
+# =============================================================================
+
+
+class TestStorageHashComputation:
+    """Tests for hash computation in the storage layer."""
+
+    @pytest.mark.unit
+    def test_storage_computes_sha256(self, mock_storage):
+        """Test storage layer correctly computes SHA256 hash."""
+        content = TEST_CONTENT_HELLO
+        file_obj = io.BytesIO(content)
+
+        result = mock_storage._store_simple(file_obj)
+
+        assert result.sha256 == TEST_HASH_HELLO
+
+    @pytest.mark.unit
+    def test_storage_computes_md5(self, mock_storage):
+        """Test storage layer also computes MD5 hash."""
+        content = TEST_CONTENT_HELLO
+        file_obj = io.BytesIO(content)
+
+        result = mock_storage._store_simple(file_obj)
+
+        expected_md5 = hashlib.md5(content).hexdigest()
+        assert result.md5 == expected_md5
+
+    @pytest.mark.unit
+    def test_storage_computes_sha1(self, mock_storage):
+        """Test storage layer also computes SHA1 hash."""
+        content = TEST_CONTENT_HELLO
+        file_obj = io.BytesIO(content)
+
+        result = mock_storage._store_simple(file_obj)
+
+        expected_sha1 = hashlib.sha1(content).hexdigest()
+        assert result.sha1 == expected_sha1
+
+    @pytest.mark.unit
+    def test_storage_returns_correct_size(self, mock_storage):
+        """Test storage layer returns correct file size."""
+        content = b"test content with known size"
+        file_obj = io.BytesIO(content)
+
+        result = mock_storage._store_simple(file_obj)
+
+        assert result.size == len(content)
+
+    @pytest.mark.unit
+    def test_storage_generates_correct_s3_key(self, mock_storage):
+        """Test storage layer generates correct S3 key pattern."""
+        content = TEST_CONTENT_HELLO
+        file_obj = io.BytesIO(content)
+
+        result = mock_storage._store_simple(file_obj)
+
+        # Key should be: fruits/{hash[:2]}/{hash[2:4]}/{hash}
+        expected_key = (
+            f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
+        )
+        assert result.s3_key == expected_key
+
+
+# =============================================================================
+# Hash Edge Cases
+# =============================================================================
+
+
+class TestHashEdgeCases:
+    """Edge case tests for hash computation."""
+
+    @pytest.mark.unit
+    def test_hash_empty_content_rejected(self, mock_storage):
+        """Test that empty content is rejected."""
+        from app.storage import HashComputationError
+
+        file_obj = io.BytesIO(b"")
+
+        with pytest.raises(HashComputationError):
+            mock_storage._store_simple(file_obj)
+
+    @pytest.mark.unit
+    def test_hash_large_file_streaming(self, mock_storage):
+        """Test hash computation for large files uses streaming."""
+        # Create a 10MB file
+        size = 10 * 1024 * 1024
+        content = b"x" * size
+        file_obj = io.BytesIO(content)
+
+        result = mock_storage._store_simple(file_obj)
+
+        expected_hash = compute_sha256(content)
+        assert result.sha256 == expected_hash
+
+    @pytest.mark.unit
+    def test_hash_special_bytes(self):
+        """Test hash handles all byte values correctly."""
+        # All possible byte values
+        content = bytes(range(256))
+        hash_value = compute_sha256(content)
+
+        assert len(hash_value) == 64
+        assert hash_value == TEST_HASH_BINARY
+
+
+# =============================================================================
+# S3 Existence Check Tests
+# =============================================================================
+
+
+class TestExistsMethod:
+    """Tests for the _exists() method that checks S3 object existence."""
+
+    @pytest.mark.unit
+    def test_exists_returns_true_for_existing_key(self, mock_storage, mock_s3_client):
+        """Test _exists() returns True when object exists."""
+        # Pre-populate the mock storage
+        test_key = "fruits/df/fd/test-hash"
+        mock_s3_client.objects[test_key] = b"content"
+
+        result = mock_storage._exists(test_key)
+
+        assert result is True
+
+    @pytest.mark.unit
+    def test_exists_returns_false_for_nonexistent_key(self, mock_storage):
+        """Test _exists() returns False when object doesn't exist."""
+        result = mock_storage._exists("fruits/no/ne/nonexistent-key")
+
+        assert result is False
+
+    @pytest.mark.unit
+    def test_exists_handles_404_error(self, mock_storage):
+        """Test _exists() handles 404 errors gracefully."""
+        # The mock client raises ClientError for nonexistent keys
+        result = mock_storage._exists("fruits/xx/yy/does-not-exist")
+
+        assert result is False
+
+
+# =============================================================================
+# S3 Key Generation Tests
+# =============================================================================
+
+
+class TestS3KeyGeneration:
+    """Tests for S3 key pattern generation."""
+
+    @pytest.mark.unit
+    def test_s3_key_pattern(self):
+        """Test S3 key follows pattern: fruits/{hash[:2]}/{hash[2:4]}/{hash}"""
+        test_hash = "abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890"
+
+        expected_key = f"fruits/{test_hash[:2]}/{test_hash[2:4]}/{test_hash}"
+        # Expected: fruits/ab/cd/abcdef1234567890...
+
+        assert expected_key == f"fruits/ab/cd/{test_hash}"
+
+    @pytest.mark.unit
+    def test_s3_key_generation_in_storage(self, mock_storage):
+        """Test storage layer generates correct S3 key."""
+        content = TEST_CONTENT_HELLO
+        file_obj = io.BytesIO(content)
+
+        result = mock_storage._store_simple(file_obj)
+
+        expected_key = (
+            f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
+        )
+        assert result.s3_key == expected_key
+
+    @pytest.mark.unit
+    def test_s3_key_uses_sha256_hash(self, mock_storage):
+        """Test S3 key is derived from SHA256 hash."""
+        content = b"unique test content for key test"
+        file_obj = io.BytesIO(content)
+        expected_hash = compute_sha256(content)
+
+        result = mock_storage._store_simple(file_obj)
+
+        # Key should contain the hash
+        assert expected_hash in result.s3_key
+
+
+# =============================================================================
+# Deduplication Behavior Tests
+# =============================================================================
+
+
+class TestDeduplicationBehavior:
+    """Tests for deduplication (skip upload when exists)."""
+
+    @pytest.mark.unit
+    def test_skips_upload_when_exists(self, mock_storage, mock_s3_client):
+        """Test storage skips S3 upload when artifact already exists."""
+        content = TEST_CONTENT_HELLO
+        s3_key = (
+            f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
+        )
+
+        # Pre-populate storage (simulate existing artifact)
+        mock_s3_client.objects[s3_key] = content
+
+        # Track put_object calls
+        original_put = mock_s3_client.put_object
+        put_called = []
+
+        def tracked_put(*args, **kwargs):
+            put_called.append(True)
+            return original_put(*args, **kwargs)
+
+        mock_s3_client.put_object = tracked_put
+
+        # Store the same content
+        file_obj = io.BytesIO(content)
+        result = mock_storage._store_simple(file_obj)
+
+        # put_object should NOT have been called (deduplication)
+        assert len(put_called) == 0
+        assert result.sha256 == TEST_HASH_HELLO
+
+    @pytest.mark.unit
+    def test_uploads_when_not_exists(self, mock_storage, mock_s3_client):
+        """Test storage uploads to S3 when artifact doesn't exist."""
+        content = b"brand new unique content"
+        content_hash = compute_sha256(content)
+        s3_key = f"fruits/{content_hash[:2]}/{content_hash[2:4]}/{content_hash}"
+
+        # Ensure object doesn't exist
+        assert s3_key not in mock_s3_client.objects
+
+        # Store the content
+        file_obj = io.BytesIO(content)
+        result = mock_storage._store_simple(file_obj)
+
+        # Object should now exist in mock storage
+        assert s3_key in mock_s3_client.objects
+        assert mock_s3_client.objects[s3_key] == content
+
+    @pytest.mark.unit
+    def test_returns_same_hash_for_duplicate(self, mock_storage, mock_s3_client):
+        """Test storing same content twice returns same hash."""
+        content = b"content to be stored twice"
+
+        # First store
+        file1 = io.BytesIO(content)
+        result1 = mock_storage._store_simple(file1)
+
+        # Second store (duplicate)
+        file2 = io.BytesIO(content)
+        result2 = mock_storage._store_simple(file2)
+
+        assert result1.sha256 == result2.sha256
+        assert result1.s3_key == result2.s3_key
+
+    @pytest.mark.unit
+    def test_different_content_different_keys(self, mock_storage):
+        """Test different content produces different S3 keys."""
+        content1 = b"first content"
+        content2 = b"second content"
+
+        file1 = io.BytesIO(content1)
+        result1 = mock_storage._store_simple(file1)
+
+        file2 = io.BytesIO(content2)
+        result2 = mock_storage._store_simple(file2)
+
+        assert result1.sha256 != result2.sha256
+        assert result1.s3_key != result2.s3_key
+
+
+# =============================================================================
+# Deduplication Edge Cases
+# =============================================================================
+
+
+class TestDeduplicationEdgeCases:
+    """Edge case tests for deduplication."""
+
+    @pytest.mark.unit
+    def test_same_content_different_filenames(self, mock_storage):
+        """Test same content with different metadata is deduplicated."""
+        content = b"identical content"
+
+        # Store with "filename1"
+        file1 = io.BytesIO(content)
+        result1 = mock_storage._store_simple(file1)
+
+        # Store with "filename2" (same content)
+        file2 = io.BytesIO(content)
+        result2 = mock_storage._store_simple(file2)
+
+        # Both should have same hash (content-addressable)
+        assert result1.sha256 == result2.sha256
+
+    @pytest.mark.unit
+    def test_whitespace_only_difference(self, mock_storage):
+        """Test content differing only by whitespace produces different hashes."""
+        content1 = b"test content"
+        content2 = b"test  content"  # Extra space
+        content3 = b"test content "  # Trailing space
+
+        file1 = io.BytesIO(content1)
+        file2 = io.BytesIO(content2)
+        file3 = io.BytesIO(content3)
+
+        result1 = mock_storage._store_simple(file1)
+        result2 = mock_storage._store_simple(file2)
+        result3 = mock_storage._store_simple(file3)
+
+        # All should be different (content-addressable)
+        assert len({result1.sha256, result2.sha256, result3.sha256}) == 3
--- a/migrations/004_history_tables.sql
+++ b/migrations/004_history_tables.sql
@@ -0,0 +1,98 @@
+-- Migration 004: Project and Package History Tables
+-- Adds history tracking tables for project and package metadata changes
+
+-- ============================================
+-- Project History Table
+-- ============================================
+CREATE TABLE IF NOT EXISTS project_history (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
+    field_name VARCHAR(100) NOT NULL,
+    old_value TEXT,
+    new_value TEXT,
+    changed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    changed_by VARCHAR(255) NOT NULL
+);
+
+CREATE INDEX IF NOT EXISTS idx_project_history_project_id ON project_history(project_id);
+CREATE INDEX IF NOT EXISTS idx_project_history_changed_at ON project_history(changed_at);
+CREATE INDEX IF NOT EXISTS idx_project_history_project_changed_at ON project_history(project_id, changed_at);
+
+-- ============================================
+-- Package History Table
+-- ============================================
+CREATE TABLE IF NOT EXISTS package_history (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    package_id UUID NOT NULL REFERENCES packages(id) ON DELETE CASCADE,
+    field_name VARCHAR(100) NOT NULL,
+    old_value TEXT,
+    new_value TEXT,
+    changed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    changed_by VARCHAR(255) NOT NULL
+);
+
+CREATE INDEX IF NOT EXISTS idx_package_history_package_id ON package_history(package_id);
+CREATE INDEX IF NOT EXISTS idx_package_history_changed_at ON package_history(changed_at);
+CREATE INDEX IF NOT EXISTS idx_package_history_package_changed_at ON package_history(package_id, changed_at);
+
+-- ============================================
+-- Project Update Trigger
+-- ============================================
+CREATE OR REPLACE FUNCTION log_project_changes()
+RETURNS TRIGGER AS $$
+BEGIN
+    -- Log description change
+    IF OLD.description IS DISTINCT FROM NEW.description THEN
+        INSERT INTO project_history (project_id, field_name, old_value, new_value, changed_by)
+        VALUES (NEW.id, 'description', OLD.description, NEW.description, COALESCE(current_setting('app.current_user', true), 'system'));
+    END IF;
+    
+    -- Log is_public change
+    IF OLD.is_public IS DISTINCT FROM NEW.is_public THEN
+        INSERT INTO project_history (project_id, field_name, old_value, new_value, changed_by)
+        VALUES (NEW.id, 'is_public', OLD.is_public::text, NEW.is_public::text, COALESCE(current_setting('app.current_user', true), 'system'));
+    END IF;
+    
+    RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+DROP TRIGGER IF EXISTS project_changes_trigger ON projects;
+CREATE TRIGGER project_changes_trigger
+    AFTER UPDATE ON projects
+    FOR EACH ROW
+    EXECUTE FUNCTION log_project_changes();
+
+-- ============================================
+-- Package Update Trigger
+-- ============================================
+CREATE OR REPLACE FUNCTION log_package_changes()
+RETURNS TRIGGER AS $$
+BEGIN
+    -- Log description change
+    IF OLD.description IS DISTINCT FROM NEW.description THEN
+        INSERT INTO package_history (package_id, field_name, old_value, new_value, changed_by)
+        VALUES (NEW.id, 'description', OLD.description, NEW.description, COALESCE(current_setting('app.current_user', true), 'system'));
+    END IF;
+    
+    -- Log format change
+    IF OLD.format IS DISTINCT FROM NEW.format THEN
+        INSERT INTO package_history (package_id, field_name, old_value, new_value, changed_by)
+        VALUES (NEW.id, 'format', OLD.format, NEW.format, COALESCE(current_setting('app.current_user', true), 'system'));
+    END IF;
+    
+    -- Log platform change
+    IF OLD.platform IS DISTINCT FROM NEW.platform THEN
+        INSERT INTO package_history (package_id, field_name, old_value, new_value, changed_by)
+        VALUES (NEW.id, 'platform', OLD.platform, NEW.platform, COALESCE(current_setting('app.current_user', true), 'system'));
+    END IF;
+    
+    RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+DROP TRIGGER IF EXISTS package_changes_trigger ON packages;
+CREATE TRIGGER package_changes_trigger
+    AFTER UPDATE ON packages
+    FOR EACH ROW
+    EXECUTE FUNCTION log_package_changes();
--- a/migrations/005_upload_enhancements.sql
+++ b/migrations/005_upload_enhancements.sql
@@ -0,0 +1,83 @@
+-- Migration 005: Upload Workflow Enhancements
+-- Adds status tracking and error handling for uploads
+
+-- ============================================
+-- Add status column to uploads table
+-- ============================================
+DO $$ 
+BEGIN
+    IF NOT EXISTS (SELECT 1 FROM information_schema.columns 
+                   WHERE table_name = 'uploads' AND column_name = 'status') THEN
+        ALTER TABLE uploads ADD COLUMN status VARCHAR(20) DEFAULT 'completed' NOT NULL;
+    END IF;
+END $$;
+
+-- ============================================
+-- Add error_message column for failed uploads
+-- ============================================
+DO $$ 
+BEGIN
+    IF NOT EXISTS (SELECT 1 FROM information_schema.columns 
+                   WHERE table_name = 'uploads' AND column_name = 'error_message') THEN
+        ALTER TABLE uploads ADD COLUMN error_message TEXT;
+    END IF;
+END $$;
+
+-- ============================================
+-- Add client_checksum column for verification
+-- ============================================
+DO $$ 
+BEGIN
+    IF NOT EXISTS (SELECT 1 FROM information_schema.columns 
+                   WHERE table_name = 'uploads' AND column_name = 'client_checksum') THEN
+        ALTER TABLE uploads ADD COLUMN client_checksum VARCHAR(64);
+    END IF;
+END $$;
+
+-- ============================================
+-- Add indexes for upload status queries
+-- ============================================
+CREATE INDEX IF NOT EXISTS idx_uploads_status ON uploads(status);
+CREATE INDEX IF NOT EXISTS idx_uploads_status_uploaded_at ON uploads(status, uploaded_at);
+
+-- ============================================
+-- Add constraint to validate status values
+-- ============================================
+DO $$ 
+BEGIN
+    IF NOT EXISTS (SELECT 1 FROM information_schema.constraint_column_usage 
+                   WHERE constraint_name = 'check_upload_status') THEN
+        ALTER TABLE uploads ADD CONSTRAINT check_upload_status 
+            CHECK (status IN ('pending', 'completed', 'failed'));
+    END IF;
+END $$;
+
+-- ============================================
+-- Create table for tracking in-progress uploads (for 409 conflict detection)
+-- ============================================
+CREATE TABLE IF NOT EXISTS upload_locks (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    sha256_hash VARCHAR(64) NOT NULL,
+    package_id UUID NOT NULL REFERENCES packages(id) ON DELETE CASCADE,
+    locked_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    locked_by VARCHAR(255) NOT NULL,
+    expires_at TIMESTAMP WITH TIME ZONE NOT NULL,
+    UNIQUE(sha256_hash, package_id)
+);
+
+CREATE INDEX IF NOT EXISTS idx_upload_locks_expires_at ON upload_locks(expires_at);
+CREATE INDEX IF NOT EXISTS idx_upload_locks_hash_package ON upload_locks(sha256_hash, package_id);
+
+-- ============================================
+-- Function to clean up expired upload locks
+-- ============================================
+CREATE OR REPLACE FUNCTION cleanup_expired_upload_locks()
+RETURNS INTEGER AS $$
+DECLARE
+    deleted_count INTEGER;
+BEGIN
+    DELETE FROM upload_locks WHERE expires_at < NOW();
+    GET DIAGNOSTICS deleted_count = ROW_COUNT;
+    RETURN deleted_count;
+END;
+$$ LANGUAGE plpgsql;