From 2f1891cf0126ec0e7d4c789d872a2cb2dd3a1745 Mon Sep 17 00:00:00 2001
From: Mondo Diaz <armando.m.diaz@boeing.com>
Date: Wed, 7 Jan 2026 12:31:44 -0600
Subject: [PATCH] Metadata database tracks all uploads with project, package,
 tag, and timestamp queryable via API

---
 CHANGELOG.md                                  |   45 +
 backend/app/models.py                         |  180 +-
 backend/app/routes.py                         | 1503 ++++++++++++++++-
 backend/app/schemas.py                        |  159 ++
 backend/app/services/artifact_cleanup.py      |   38 +-
 backend/app/storage.py                        |    8 +
 backend/tests/conftest.py                     |  199 +--
 backend/tests/factories.py                    |  288 ++++
 backend/tests/integration/__init__.py         |    0
 .../tests/integration/test_artifacts_api.py   |  638 +++++++
 .../tests/integration/test_packages_api.py    |  345 ++++
 .../tests/integration/test_projects_api.py    |  322 ++++
 backend/tests/integration/test_tags_api.py    |  403 +++++
 .../test_upload_download_api.py}              |  612 +++----
 backend/tests/test_duplicate_detection.py     |  207 ---
 backend/tests/test_garbage_collection.py      |  168 --
 backend/tests/test_hash_calculation.py        |  215 ---
 backend/tests/test_ref_count.py               |  458 -----
 backend/tests/test_stats_endpoints.py         |  488 ------
 backend/tests/unit/__init__.py                |    0
 backend/tests/unit/test_models.py             |  271 +++
 backend/tests/unit/test_storage.py            |  439 +++++
 migrations/004_history_tables.sql             |   98 ++
 migrations/005_upload_enhancements.sql        |   83 +
 24 files changed, 5044 insertions(+), 2123 deletions(-)
 create mode 100644 backend/tests/factories.py
 create mode 100644 backend/tests/integration/__init__.py
 create mode 100644 backend/tests/integration/test_artifacts_api.py
 create mode 100644 backend/tests/integration/test_packages_api.py
 create mode 100644 backend/tests/integration/test_projects_api.py
 create mode 100644 backend/tests/integration/test_tags_api.py
 rename backend/tests/{test_integration_uploads.py => integration/test_upload_download_api.py} (56%)
 delete mode 100644 backend/tests/test_duplicate_detection.py
 delete mode 100644 backend/tests/test_garbage_collection.py
 delete mode 100644 backend/tests/test_hash_calculation.py
 delete mode 100644 backend/tests/test_ref_count.py
 delete mode 100644 backend/tests/test_stats_endpoints.py
 create mode 100644 backend/tests/unit/__init__.py
 create mode 100644 backend/tests/unit/test_models.py
 create mode 100644 backend/tests/unit/test_storage.py
 create mode 100644 migrations/004_history_tables.sql
 create mode 100644 migrations/005_upload_enhancements.sql

diff --git a/CHANGELOG.md b/CHANGELOG.md
index db52574..41a8dec 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,51 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 ### Added
+- Added global artifacts endpoint `GET /api/v1/artifacts` with project/package/tag/size/date filters (#18)
+- Added global tags endpoint `GET /api/v1/tags` with project/package/search/date filters (#18)
+- Added wildcard pattern matching (`*`) for tag filters across all endpoints (#18)
+- Added comma-separated multi-value support for tag filters (#18)
+- Added `search` parameter to `/api/v1/uploads` for filename search (#18)
+- Added `tag` filter to `/api/v1/uploads` endpoint (#18)
+- Added `sort` and `order` parameters to `/api/v1/uploads` endpoint (#18)
+- Added `min_size` and `max_size` filters to package artifacts endpoint (#18)
+- Added `sort` and `order` parameters to package artifacts endpoint (#18)
+- Added `from` and `to` date filters to package tags endpoint (#18)
+- Added `GlobalArtifactResponse` and `GlobalTagResponse` schemas (#18)
+- Added S3 object verification before database commit during upload (#19)
+- Added S3 object cleanup on database commit failure (#19)
+- Added upload duration tracking (`duration_ms` field) (#19)
+- Added `User-Agent` header capture during uploads (#19)
+- Added `X-Checksum-SHA256` header support for client-side checksum verification (#19)
+- Added `status`, `error_message`, `client_checksum` columns to uploads table (#19)
+- Added `upload_locks` table for future concurrent upload conflict detection (#19)
+- Added consistency check endpoint `GET /api/v1/admin/consistency-check` (#19)
+- Added `PUT /api/v1/projects/{project}` endpoint for project updates with audit logging (#20)
+- Added `PUT /api/v1/project/{project}/packages/{package}` endpoint for package updates with audit logging (#20)
+- Added `artifact.download` audit logging to download endpoint (#20)
+- Added `ProjectHistory` and `PackageHistory` models with database triggers (#20)
+- Added migration `004_history_tables.sql` for project/package history (#20)
+- Added migration `005_upload_enhancements.sql` for upload status tracking (#19)
+- Added 9 integration tests for global artifacts/tags endpoints (#18)
+- Added global uploads query endpoint `GET /api/v1/uploads` with project/package/user/date filters (#18)
+- Added project-level uploads endpoint `GET /api/v1/project/{project}/uploads` (#18)
+- Added `has_more` field to pagination metadata for easier pagination UI (#18)
+- Added `upload_id`, `content_type`, `original_name`, `created_at` fields to upload response (#19)
+- Added audit log API endpoints with filtering and pagination (#20)
+  - `GET /api/v1/audit-logs` - list all audit logs with action/resource/user/date filters
+  - `GET /api/v1/projects/{project}/audit-logs` - project-scoped audit logs
+  - `GET /api/v1/project/{project}/{package}/audit-logs` - package-scoped audit logs
+- Added upload history API endpoints (#20)
+  - `GET /api/v1/project/{project}/{package}/uploads` - list upload events for a package
+  - `GET /api/v1/artifact/{id}/uploads` - list all uploads of a specific artifact
+- Added artifact provenance endpoint `GET /api/v1/artifact/{id}/history` (#20)
+  - Returns full artifact history including packages, tags, and upload events
+- Added audit logging for project.create, package.create, tag.create, tag.update, artifact.upload actions (#20)
+- Added `AuditLogResponse`, `UploadHistoryResponse`, `ArtifactProvenanceResponse` schemas (#20)
+- Added `TagHistoryDetailResponse` schema with artifact metadata (#20)
+- Added 31 integration tests for audit log, history, and upload query endpoints (#22)
+### Changed
+- Standardized audit action naming to `{entity}.{action}` pattern (project.delete, package.delete, tag.delete) (#20)
 - Added `StorageBackend` protocol/interface for backend-agnostic storage (#33)
 - Added `health_check()` method to storage backend with `/health` endpoint integration (#33)
 - Added `verify_integrity()` method for post-upload hash validation (#33)
diff --git a/backend/app/models.py b/backend/app/models.py
index 6fba3d5..37f23ef 100644
--- a/backend/app/models.py
+++ b/backend/app/models.py
@@ -1,8 +1,16 @@
 from datetime import datetime
-from typing import Optional
 from sqlalchemy import (
-    Column, String, Text, Boolean, Integer, BigInteger,
-    DateTime, ForeignKey, CheckConstraint, Index, JSON
+    Column,
+    String,
+    Text,
+    Boolean,
+    Integer,
+    BigInteger,
+    DateTime,
+    ForeignKey,
+    CheckConstraint,
+    Index,
+    JSON,
 )
 from sqlalchemy.dialects.postgresql import UUID
 from sqlalchemy.orm import relationship, declarative_base
@@ -19,11 +27,17 @@ class Project(Base):
     description = Column(Text)
     is_public = Column(Boolean, default=True)
     created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
-    updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
+    updated_at = Column(
+        DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
+    )
     created_by = Column(String(255), nullable=False)
 
-    packages = relationship("Package", back_populates="project", cascade="all, delete-orphan")
-    permissions = relationship("AccessPermission", back_populates="project", cascade="all, delete-orphan")
+    packages = relationship(
+        "Package", back_populates="project", cascade="all, delete-orphan"
+    )
+    permissions = relationship(
+        "AccessPermission", back_populates="project", cascade="all, delete-orphan"
+    )
 
     __table_args__ = (
         Index("idx_projects_name", "name"),
@@ -35,32 +49,44 @@ class Package(Base):
     __tablename__ = "packages"
 
     id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
-    project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
+    project_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("projects.id", ondelete="CASCADE"),
+        nullable=False,
+    )
     name = Column(String(255), nullable=False)
     description = Column(Text)
     format = Column(String(50), default="generic", nullable=False)
     platform = Column(String(50), default="any", nullable=False)
     created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
-    updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
+    updated_at = Column(
+        DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
+    )
 
     project = relationship("Project", back_populates="packages")
     tags = relationship("Tag", back_populates="package", cascade="all, delete-orphan")
-    uploads = relationship("Upload", back_populates="package", cascade="all, delete-orphan")
-    consumers = relationship("Consumer", back_populates="package", cascade="all, delete-orphan")
+    uploads = relationship(
+        "Upload", back_populates="package", cascade="all, delete-orphan"
+    )
+    consumers = relationship(
+        "Consumer", back_populates="package", cascade="all, delete-orphan"
+    )
 
     __table_args__ = (
         Index("idx_packages_project_id", "project_id"),
         Index("idx_packages_name", "name"),
         Index("idx_packages_format", "format"),
         Index("idx_packages_platform", "platform"),
-        Index("idx_packages_project_name", "project_id", "name", unique=True),  # Composite unique index
+        Index(
+            "idx_packages_project_name", "project_id", "name", unique=True
+        ),  # Composite unique index
         CheckConstraint(
             "format IN ('generic', 'npm', 'pypi', 'docker', 'deb', 'rpm', 'maven', 'nuget', 'helm')",
-            name="check_package_format"
+            name="check_package_format",
         ),
         CheckConstraint(
             "platform IN ('any', 'linux', 'darwin', 'windows', 'linux-amd64', 'linux-arm64', 'darwin-amd64', 'darwin-arm64', 'windows-amd64')",
-            name="check_package_platform"
+            name="check_package_platform",
         ),
         {"extend_existing": True},
     )
@@ -76,7 +102,9 @@ class Artifact(Base):
     checksum_md5 = Column(String(32))  # MD5 hash for additional verification
     checksum_sha1 = Column(String(40))  # SHA1 hash for compatibility
     s3_etag = Column(String(64))  # S3 ETag for verification
-    artifact_metadata = Column("metadata", JSON, default=dict)  # Format-specific metadata (column name is 'metadata')
+    artifact_metadata = Column(
+        "metadata", JSON, default=dict
+    )  # Format-specific metadata (column name is 'metadata')
     created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
     created_by = Column(String(255), nullable=False)
     ref_count = Column(Integer, default=1)
@@ -113,22 +141,34 @@ class Tag(Base):
     __tablename__ = "tags"
 
     id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
-    package_id = Column(UUID(as_uuid=True), ForeignKey("packages.id", ondelete="CASCADE"), nullable=False)
+    package_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("packages.id", ondelete="CASCADE"),
+        nullable=False,
+    )
     name = Column(String(255), nullable=False)
     artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
     created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
-    updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
+    updated_at = Column(
+        DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
+    )
     created_by = Column(String(255), nullable=False)
 
     package = relationship("Package", back_populates="tags")
     artifact = relationship("Artifact", back_populates="tags")
-    history = relationship("TagHistory", back_populates="tag", cascade="all, delete-orphan")
+    history = relationship(
+        "TagHistory", back_populates="tag", cascade="all, delete-orphan"
+    )
 
     __table_args__ = (
         Index("idx_tags_package_id", "package_id"),
         Index("idx_tags_artifact_id", "artifact_id"),
-        Index("idx_tags_package_name", "package_id", "name", unique=True),  # Composite unique index
-        Index("idx_tags_package_created_at", "package_id", "created_at"),  # For recent tags queries
+        Index(
+            "idx_tags_package_name", "package_id", "name", unique=True
+        ),  # Composite unique index
+        Index(
+            "idx_tags_package_created_at", "package_id", "created_at"
+        ),  # For recent tags queries
     )
 
 
@@ -136,7 +176,9 @@ class TagHistory(Base):
     __tablename__ = "tag_history"
 
     id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
-    tag_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False)
+    tag_id = Column(
+        UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False
+    )
     old_artifact_id = Column(String(64), ForeignKey("artifacts.id"))
     new_artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
     change_type = Column(String(20), nullable=False, default="update")
@@ -148,7 +190,9 @@ class TagHistory(Base):
     __table_args__ = (
         Index("idx_tag_history_tag_id", "tag_id"),
         Index("idx_tag_history_changed_at", "changed_at"),
-        CheckConstraint("change_type IN ('create', 'update', 'delete')", name="check_change_type"),
+        CheckConstraint(
+            "change_type IN ('create', 'update', 'delete')", name="check_change_type"
+        ),
     )
 
 
@@ -164,6 +208,11 @@ class Upload(Base):
     duration_ms = Column(Integer)  # Upload timing in milliseconds
     deduplicated = Column(Boolean, default=False)  # Whether artifact was deduplicated
     checksum_verified = Column(Boolean, default=True)  # Whether checksum was verified
+    status = Column(
+        String(20), default="completed", nullable=False
+    )  # pending, completed, failed
+    error_message = Column(Text)  # Error details for failed uploads
+    client_checksum = Column(String(64))  # Client-provided SHA256 for verification
     uploaded_at = Column(DateTime(timezone=True), default=datetime.utcnow)
     uploaded_by = Column(String(255), nullable=False)
     source_ip = Column(String(45))
@@ -177,6 +226,35 @@ class Upload(Base):
         Index("idx_uploads_uploaded_at", "uploaded_at"),
         Index("idx_uploads_package_uploaded_at", "package_id", "uploaded_at"),
         Index("idx_uploads_uploaded_by_at", "uploaded_by", "uploaded_at"),
+        Index("idx_uploads_status", "status"),
+        Index("idx_uploads_status_uploaded_at", "status", "uploaded_at"),
+        CheckConstraint(
+            "status IN ('pending', 'completed', 'failed')", name="check_upload_status"
+        ),
+    )
+
+
+class UploadLock(Base):
+    """Track in-progress uploads for conflict detection (409 responses)."""
+
+    __tablename__ = "upload_locks"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    sha256_hash = Column(String(64), nullable=False)
+    package_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("packages.id", ondelete="CASCADE"),
+        nullable=False,
+    )
+    locked_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+    locked_by = Column(String(255), nullable=False)
+    expires_at = Column(DateTime(timezone=True), nullable=False)
+
+    __table_args__ = (
+        Index("idx_upload_locks_expires_at", "expires_at"),
+        Index(
+            "idx_upload_locks_hash_package", "sha256_hash", "package_id", unique=True
+        ),
     )
 
 
@@ -184,7 +262,11 @@ class Consumer(Base):
     __tablename__ = "consumers"
 
     id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
-    package_id = Column(UUID(as_uuid=True), ForeignKey("packages.id", ondelete="CASCADE"), nullable=False)
+    package_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("packages.id", ondelete="CASCADE"),
+        nullable=False,
+    )
     project_url = Column(String(2048), nullable=False)
     last_access = Column(DateTime(timezone=True), default=datetime.utcnow)
     created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
@@ -201,7 +283,11 @@ class AccessPermission(Base):
     __tablename__ = "access_permissions"
 
     id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
-    project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
+    project_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("projects.id", ondelete="CASCADE"),
+        nullable=False,
+    )
     user_id = Column(String(255), nullable=False)
     level = Column(String(20), nullable=False)
     created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
@@ -252,3 +338,51 @@ class AuditLog(Base):
         Index("idx_audit_logs_resource_timestamp", "resource", "timestamp"),
         Index("idx_audit_logs_user_timestamp", "user_id", "timestamp"),
     )
+
+
+class ProjectHistory(Base):
+    """Track changes to project metadata over time."""
+
+    __tablename__ = "project_history"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    project_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("projects.id", ondelete="CASCADE"),
+        nullable=False,
+    )
+    field_name = Column(String(100), nullable=False)
+    old_value = Column(Text)
+    new_value = Column(Text)
+    changed_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+    changed_by = Column(String(255), nullable=False)
+
+    __table_args__ = (
+        Index("idx_project_history_project_id", "project_id"),
+        Index("idx_project_history_changed_at", "changed_at"),
+        Index("idx_project_history_project_changed_at", "project_id", "changed_at"),
+    )
+
+
+class PackageHistory(Base):
+    """Track changes to package metadata over time."""
+
+    __tablename__ = "package_history"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    package_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey("packages.id", ondelete="CASCADE"),
+        nullable=False,
+    )
+    field_name = Column(String(100), nullable=False)
+    old_value = Column(Text)
+    new_value = Column(Text)
+    changed_at = Column(DateTime(timezone=True), default=datetime.utcnow)
+    changed_by = Column(String(255), nullable=False)
+
+    __table_args__ = (
+        Index("idx_package_history_package_id", "package_id"),
+        Index("idx_package_history_changed_at", "changed_at"),
+        Index("idx_package_history_package_changed_at", "package_id", "changed_at"),
+    )
diff --git a/backend/app/routes.py b/backend/app/routes.py
index c06c394..7975746 100644
--- a/backend/app/routes.py
+++ b/backend/app/routes.py
@@ -39,13 +39,16 @@ from .models import (
     Tag,
     TagHistory,
     Upload,
+    UploadLock,
     Consumer,
     AuditLog,
 )
 from .schemas import (
     ProjectCreate,
+    ProjectUpdate,
     ProjectResponse,
     PackageCreate,
+    PackageUpdate,
     PackageResponse,
     PackageDetailResponse,
     TagSummary,
@@ -58,6 +61,10 @@ from .schemas import (
     TagResponse,
     TagDetailResponse,
     TagHistoryResponse,
+    TagHistoryDetailResponse,
+    AuditLogResponse,
+    UploadHistoryResponse,
+    ArtifactProvenanceResponse,
     UploadResponse,
     ConsumerResponse,
     HealthResponse,
@@ -76,6 +83,7 @@ from .schemas import (
     PresignedUrlResponse,
     GarbageCollectionResponse,
     OrphanedArtifactResponse,
+    ConsistencyCheckResponse,
     StorageStatsResponse,
     DeduplicationStatsResponse,
     ProjectStatsResponse,
@@ -84,6 +92,8 @@ from .schemas import (
     CrossProjectDeduplicationResponse,
     TimeBasedStatsResponse,
     StatsReportResponse,
+    GlobalArtifactResponse,
+    GlobalTagResponse,
 )
 from .metadata import extract_metadata
 from .config import get_settings
@@ -91,6 +101,18 @@ from .config import get_settings
 router = APIRouter()
 
 
+def sanitize_filename(filename: str) -> str:
+    """Sanitize filename for use in Content-Disposition header.
+
+    Removes characters that could enable header injection attacks:
+    - Double quotes (") - could break out of quoted filename
+    - Carriage return (\\r) and newline (\\n) - could inject headers
+    """
+    import re
+
+    return re.sub(r'[\r\n"]', "", filename)
+
+
 def get_user_id(request: Request) -> str:
     """Extract user ID from request (simplified for now)"""
     api_key = request.headers.get("X-Orchard-API-Key")
@@ -103,6 +125,7 @@ def get_user_id(request: Request) -> str:
 
 
 import logging
+import time
 
 logger = logging.getLogger(__name__)
 
@@ -478,6 +501,7 @@ def list_projects(
             limit=limit,
             total=total,
             total_pages=total_pages,
+            has_more=page < total_pages,
         ),
     )
 
@@ -499,6 +523,17 @@ def create_project(
         created_by=user_id,
     )
     db.add(db_project)
+
+    # Audit log
+    _log_audit(
+        db=db,
+        action="project.create",
+        resource=f"project/{project.name}",
+        user_id=user_id,
+        source_ip=request.client.host if request.client else None,
+        details={"is_public": project.is_public},
+    )
+
     db.commit()
     db.refresh(db_project)
     return db_project
@@ -512,6 +547,60 @@ def get_project(project_name: str, db: Session = Depends(get_db)):
     return project
 
 
+@router.put("/api/v1/projects/{project_name}", response_model=ProjectResponse)
+def update_project(
+    project_name: str,
+    project_update: ProjectUpdate,
+    request: Request,
+    db: Session = Depends(get_db),
+):
+    """Update a project's metadata."""
+    user_id = get_user_id(request)
+
+    project = db.query(Project).filter(Project.name == project_name).first()
+    if not project:
+        raise HTTPException(status_code=404, detail="Project not found")
+
+    # Track changes for audit log
+    changes = {}
+    if (
+        project_update.description is not None
+        and project_update.description != project.description
+    ):
+        changes["description"] = {
+            "old": project.description,
+            "new": project_update.description,
+        }
+        project.description = project_update.description
+    if (
+        project_update.is_public is not None
+        and project_update.is_public != project.is_public
+    ):
+        changes["is_public"] = {
+            "old": project.is_public,
+            "new": project_update.is_public,
+        }
+        project.is_public = project_update.is_public
+
+    if not changes:
+        # No changes, return current project
+        return project
+
+    # Audit log
+    _log_audit(
+        db=db,
+        action="project.update",
+        resource=f"project/{project_name}",
+        user_id=user_id,
+        source_ip=request.client.host if request.client else None,
+        details={"changes": changes},
+    )
+
+    db.commit()
+    db.refresh(project)
+    return project
+
+
 @router.delete("/api/v1/projects/{project_name}", status_code=204)
 def delete_project(
     project_name: str,
@@ -555,7 +644,7 @@ def delete_project(
     # Audit log (after commit)
     _log_audit(
         db,
-        action="delete_project",
+        action="project.delete",
         resource=f"project/{project_name}",
         user_id=user_id,
         source_ip=request.client.host if request.client else None,
@@ -740,6 +829,7 @@ def list_packages(
             limit=limit,
             total=total,
             total_pages=total_pages,
+            has_more=page < total_pages,
         ),
     )
 
@@ -835,7 +925,10 @@ def get_package(
 
 @router.post("/api/v1/project/{project_name}/packages", response_model=PackageResponse)
 def create_package(
-    project_name: str, package: PackageCreate, db: Session = Depends(get_db)
+    project_name: str,
+    package: PackageCreate,
+    request: Request,
+    db: Session = Depends(get_db),
 ):
     project = db.query(Project).filter(Project.name == project_name).first()
     if not project:
@@ -873,11 +966,106 @@ def create_package(
         platform=package.platform,
     )
     db.add(db_package)
+
+    # Audit log
+    _log_audit(
+        db=db,
+        action="package.create",
+        resource=f"project/{project_name}/{package.name}",
+        user_id=get_user_id(request),
+        source_ip=request.client.host if request.client else None,
+        details={"format": package.format, "platform": package.platform},
+    )
+
     db.commit()
     db.refresh(db_package)
     return db_package
 
 
+@router.put(
+    "/api/v1/project/{project_name}/packages/{package_name}",
+    response_model=PackageResponse,
+)
+def update_package(
+    project_name: str,
+    package_name: str,
+    package_update: PackageUpdate,
+    request: Request,
+    db: Session = Depends(get_db),
+):
+    """Update a package's metadata."""
+    user_id = get_user_id(request)
+
+    project = db.query(Project).filter(Project.name == project_name).first()
+    if not project:
+        raise HTTPException(status_code=404, detail="Project not found")
+
+    package = (
+        db.query(Package)
+        .filter(Package.project_id == project.id, Package.name == package_name)
+        .first()
+    )
+    if not package:
+        raise HTTPException(status_code=404, detail="Package not found")
+
+    # Validate format and platform if provided
+    if (
+        package_update.format is not None
+        and package_update.format not in PACKAGE_FORMATS
+    ):
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid format. Must be one of: {', '.join(PACKAGE_FORMATS)}",
+        )
+    if (
+        package_update.platform is not None
+        and package_update.platform not in PACKAGE_PLATFORMS
+    ):
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid platform. Must be one of: {', '.join(PACKAGE_PLATFORMS)}",
+        )
+
+    # Track changes for audit log
+    changes = {}
+    if (
+        package_update.description is not None
+        and package_update.description != package.description
+    ):
+        changes["description"] = {
+            "old": package.description,
+            "new": package_update.description,
+        }
+        package.description = package_update.description
+    if package_update.format is not None and package_update.format != package.format:
+        changes["format"] = {"old": package.format, "new": package_update.format}
+        package.format = package_update.format
+    if (
+        package_update.platform is not None
+        and package_update.platform != package.platform
+    ):
+        changes["platform"] = {"old": package.platform, "new": package_update.platform}
+        package.platform = package_update.platform
+
+    if not changes:
+        # No changes, return current package
+        return package
+
+    # Audit log
+    _log_audit(
+        db=db,
+        action="package.update",
+        resource=f"project/{project_name}/{package_name}",
+        user_id=user_id,
+        source_ip=request.client.host if request.client else None,
+        details={"changes": changes},
+    )
+
+    db.commit()
+    db.refresh(package)
+    return package
+
+
 @router.delete(
     "/api/v1/project/{project_name}/packages/{package_name}",
     status_code=204,
@@ -927,7 +1115,7 @@ def delete_package(
     # Audit log (after commit)
     _log_audit(
         db,
-        action="delete_package",
+        action="package.delete",
         resource=f"project/{project_name}/{package_name}",
         user_id=user_id,
         source_ip=request.client.host if request.client else None,
@@ -955,8 +1143,20 @@ def upload_artifact(
     db: Session = Depends(get_db),
     storage: S3Storage = Depends(get_storage),
     content_length: Optional[int] = Header(None, alias="Content-Length"),
+    user_agent: Optional[str] = Header(None, alias="User-Agent"),
+    client_checksum: Optional[str] = Header(None, alias="X-Checksum-SHA256"),
 ):
+    """
+    Upload an artifact to a package.
+
+    Headers:
+    - X-Checksum-SHA256: Optional client-provided SHA256 for verification
+    - User-Agent: Captured for audit purposes
+    """
+    start_time = time.time()
     user_id = get_user_id(request)
+    settings = get_settings()
+    storage_result = None
 
     # Get project and package
     project = db.query(Project).filter(Project.name == project_name).first()
@@ -972,7 +1172,6 @@ def upload_artifact(
         raise HTTPException(status_code=404, detail="Package not found")
 
     # Validate file size
-    settings = get_settings()
     if content_length is not None:
         if content_length > settings.max_file_size:
             raise HTTPException(
@@ -985,6 +1184,17 @@ def upload_artifact(
                 detail="Empty files are not allowed",
             )
 
+    # Validate client checksum format if provided
+    if client_checksum:
+        client_checksum = client_checksum.lower().strip()
+        if len(client_checksum) != 64 or not all(
+            c in "0123456789abcdef" for c in client_checksum
+        ):
+            raise HTTPException(
+                status_code=400,
+                detail="Invalid X-Checksum-SHA256 header. Must be 64 hex characters.",
+            )
+
     # Extract format-specific metadata before storing
     file_metadata = {}
     if file.filename:
@@ -1041,6 +1251,55 @@ def upload_artifact(
         logger.error(f"Storage error during upload: {e}")
         raise HTTPException(status_code=500, detail="Internal storage error")
 
+    # Verify client-provided checksum if present
+    checksum_verified = True
+    if client_checksum and client_checksum != storage_result.sha256:
+        # Checksum mismatch - clean up S3 object if it was newly uploaded
+        logger.warning(
+            f"Client checksum mismatch: expected {client_checksum}, got {storage_result.sha256}"
+        )
+        # Attempt cleanup of the uploaded object
+        try:
+            if not storage_result.already_existed:
+                storage.delete(storage_result.s3_key)
+                logger.info(
+                    f"Cleaned up S3 object after checksum mismatch: {storage_result.s3_key}"
+                )
+        except Exception as cleanup_error:
+            logger.error(
+                f"Failed to clean up S3 object after checksum mismatch: {cleanup_error}"
+            )
+        raise HTTPException(
+            status_code=422,
+            detail=f"Checksum verification failed. Expected {client_checksum}, got {storage_result.sha256}",
+        )
+
+    # Verify S3 object exists and size matches before proceeding
+    try:
+        s3_info = storage.get_object_info(storage_result.s3_key)
+        if s3_info is None:
+            raise HTTPException(
+                status_code=500,
+                detail="Failed to verify uploaded object in storage",
+            )
+        if s3_info.get("size") != storage_result.size:
+            logger.error(
+                f"Size mismatch after upload: expected {storage_result.size}, "
+                f"got {s3_info.get('size')}"
+            )
+            raise HTTPException(
+                status_code=500,
+                detail="Upload verification failed: size mismatch",
+            )
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to verify S3 object: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail="Failed to verify uploaded object",
+        )
+
     # Check if this is a deduplicated upload
     deduplicated = False
     saved_bytes = 0
@@ -1092,16 +1351,26 @@ def upload_artifact(
         )
         db.add(artifact)
 
-    # Record upload
+    # Calculate upload duration
+    duration_ms = int((time.time() - start_time) * 1000)
+
+    # Record upload with enhanced metadata
     upload = Upload(
         artifact_id=storage_result.sha256,
         package_id=package.id,
         original_name=file.filename,
+        tag_name=tag,
+        user_agent=user_agent[:512] if user_agent else None,  # Truncate if too long
+        duration_ms=duration_ms,
+        deduplicated=deduplicated,
+        checksum_verified=checksum_verified,
+        client_checksum=client_checksum,
+        status="completed",
         uploaded_by=user_id,
         source_ip=request.client.host if request.client else None,
-        deduplicated=deduplicated,
     )
     db.add(upload)
+    db.flush()  # Flush to get upload ID
 
     # Create or update tag if provided (with ref_count management and history)
     if tag:
@@ -1117,7 +1386,7 @@ def upload_artifact(
     # Audit log
     _log_audit(
         db,
-        action="upload",
+        action="artifact.upload",
         resource=f"project/{project_name}/{package_name}/artifact/{storage_result.sha256[:12]}",
         user_id=user_id,
         source_ip=request.client.host if request.client else None,
@@ -1127,10 +1396,32 @@ def upload_artifact(
             "deduplicated": deduplicated,
             "saved_bytes": saved_bytes,
             "tag": tag,
+            "duration_ms": duration_ms,
+            "client_checksum_provided": client_checksum is not None,
         },
     )
 
-    db.commit()
+    # Commit with cleanup on failure
+    try:
+        db.commit()
+    except Exception as commit_error:
+        logger.error(f"Database commit failed after upload: {commit_error}")
+        db.rollback()
+        # Attempt to clean up newly uploaded S3 object
+        if storage_result and not storage_result.already_existed:
+            try:
+                storage.delete(storage_result.s3_key)
+                logger.info(
+                    f"Cleaned up S3 object after commit failure: {storage_result.s3_key}"
+                )
+            except Exception as cleanup_error:
+                logger.error(
+                    f"Failed to clean up S3 object after commit failure: {cleanup_error}"
+                )
+        raise HTTPException(
+            status_code=500,
+            detail="Failed to save upload record. Please retry.",
+        )
 
     return UploadResponse(
         artifact_id=storage_result.sha256,
@@ -1145,6 +1436,10 @@ def upload_artifact(
         format_metadata=artifact.artifact_metadata,
         deduplicated=deduplicated,
         ref_count=artifact.ref_count,
+        upload_id=upload.id,
+        content_type=artifact.content_type,
+        original_name=artifact.original_name,
+        created_at=artifact.created_at,
     )
 
 
@@ -1231,7 +1526,7 @@ def init_resumable_upload(
         # Audit log
         _log_audit(
             db,
-            action="upload",
+            action="artifact.upload",
             resource=f"project/{project_name}/{package_name}/artifact/{init_request.expected_hash[:12]}",
             user_id=user_id,
             source_ip=request.client.host if request.client else None,
@@ -1517,7 +1812,24 @@ def download_artifact(
     if not artifact:
         raise HTTPException(status_code=404, detail="Artifact not found")
 
-    filename = artifact.original_name or f"{artifact.id}"
+    filename = sanitize_filename(artifact.original_name or f"{artifact.id}")
+
+    # Audit log download
+    user_id = get_user_id(request)
+    _log_audit(
+        db=db,
+        action="artifact.download",
+        resource=f"project/{project_name}/{package_name}/artifact/{artifact.id[:12]}",
+        user_id=user_id,
+        source_ip=request.client.host if request.client else None,
+        details={
+            "artifact_id": artifact.id,
+            "ref": ref,
+            "size": artifact.size,
+            "original_name": artifact.original_name,
+        },
+    )
+    db.commit()
 
     # Determine download mode (query param overrides server default)
     download_mode = mode or settings.download_mode
@@ -1630,7 +1942,7 @@ def get_artifact_url(
     if not artifact:
         raise HTTPException(status_code=404, detail="Artifact not found")
 
-    filename = artifact.original_name or f"{artifact.id}"
+    filename = sanitize_filename(artifact.original_name or f"{artifact.id}")
     url_expiry = expiry or settings.presigned_url_expiry
 
     presigned_url = storage.generate_presigned_url(
@@ -1681,7 +1993,7 @@ def head_artifact(
     if not artifact:
         raise HTTPException(status_code=404, detail="Artifact not found")
 
-    filename = artifact.original_name or f"{artifact.id}"
+    filename = sanitize_filename(artifact.original_name or f"{artifact.id}")
 
     return Response(
         content=b"",
@@ -1724,6 +2036,12 @@ def list_tags(
     search: Optional[str] = Query(default=None, description="Search by tag name"),
     sort: str = Query(default="name", description="Sort field (name, created_at)"),
     order: str = Query(default="asc", description="Sort order (asc, desc)"),
+    from_date: Optional[datetime] = Query(
+        default=None, alias="from", description="Filter tags created after this date"
+    ),
+    to_date: Optional[datetime] = Query(
+        default=None, alias="to", description="Filter tags created before this date"
+    ),
     db: Session = Depends(get_db),
 ):
     project = db.query(Project).filter(Project.name == project_name).first()
@@ -1769,6 +2087,12 @@ def list_tags(
             )
         )
 
+    # Apply date range filters
+    if from_date:
+        query = query.filter(Tag.created_at >= from_date)
+    if to_date:
+        query = query.filter(Tag.created_at <= to_date)
+
     # Get total count before pagination
     total = query.count()
 
@@ -1812,6 +2136,7 @@ def list_tags(
             limit=limit,
             total=total,
             total_pages=total_pages,
+            has_more=page < total_pages,
         ),
     )
 
@@ -1850,8 +2175,23 @@ def create_tag(
         db.query(Tag).filter(Tag.package_id == package.id, Tag.name == tag.name).first()
     )
     if existing:
+        old_artifact_id = existing.artifact_id
         existing.artifact_id = tag.artifact_id
         existing.created_by = user_id
+
+        # Audit log for tag update
+        _log_audit(
+            db=db,
+            action="tag.update",
+            resource=f"project/{project_name}/{package_name}/tag/{tag.name}",
+            user_id=user_id,
+            source_ip=request.client.host if request.client else None,
+            details={
+                "old_artifact_id": old_artifact_id,
+                "new_artifact_id": tag.artifact_id,
+            },
+        )
+
         db.commit()
         db.refresh(existing)
         return existing
@@ -1863,6 +2203,17 @@ def create_tag(
         created_by=user_id,
     )
     db.add(db_tag)
+
+    # Audit log for tag create
+    _log_audit(
+        db=db,
+        action="tag.create",
+        resource=f"project/{project_name}/{package_name}/tag/{tag.name}",
+        user_id=user_id,
+        source_ip=request.client.host if request.client else None,
+        details={"artifact_id": tag.artifact_id},
+    )
+
     db.commit()
     db.refresh(db_tag)
     return db_tag
@@ -1919,15 +2270,17 @@ def get_tag(
 
 @router.get(
     "/api/v1/project/{project_name}/{package_name}/tags/{tag_name}/history",
-    response_model=List[TagHistoryResponse],
+    response_model=PaginatedResponse[TagHistoryDetailResponse],
 )
 def get_tag_history(
     project_name: str,
     package_name: str,
     tag_name: str,
+    page: int = Query(default=1, ge=1),
+    limit: int = Query(default=20, ge=1, le=100),
     db: Session = Depends(get_db),
 ):
-    """Get the history of artifact assignments for a tag"""
+    """Get the history of artifact assignments for a tag with artifact metadata"""
     project = db.query(Project).filter(Project.name == project_name).first()
     if not project:
         raise HTTPException(status_code=404, detail="Project not found")
@@ -1946,13 +2299,53 @@ def get_tag_history(
     if not tag:
         raise HTTPException(status_code=404, detail="Tag not found")
 
-    history = (
-        db.query(TagHistory)
+    # Get total count
+    total = (
+        db.query(func.count(TagHistory.id)).filter(TagHistory.tag_id == tag.id).scalar()
+        or 0
+    )
+
+    # Get paginated history with artifact metadata
+    offset = (page - 1) * limit
+    history_items = (
+        db.query(TagHistory, Artifact)
+        .outerjoin(Artifact, TagHistory.new_artifact_id == Artifact.id)
         .filter(TagHistory.tag_id == tag.id)
         .order_by(TagHistory.changed_at.desc())
+        .offset(offset)
+        .limit(limit)
         .all()
     )
-    return history
+
+    # Build response with artifact metadata
+    items = []
+    for history, artifact in history_items:
+        items.append(
+            TagHistoryDetailResponse(
+                id=history.id,
+                tag_id=history.tag_id,
+                tag_name=tag.name,
+                old_artifact_id=history.old_artifact_id,
+                new_artifact_id=history.new_artifact_id,
+                changed_at=history.changed_at,
+                changed_by=history.changed_by,
+                artifact_size=artifact.size if artifact else 0,
+                artifact_original_name=artifact.original_name if artifact else None,
+                artifact_content_type=artifact.content_type if artifact else None,
+            )
+        )
+
+    total_pages = math.ceil(total / limit) if limit > 0 else 0
+    return PaginatedResponse(
+        items=items,
+        pagination=PaginationMeta(
+            page=page,
+            limit=limit,
+            total=total,
+            total_pages=total_pages,
+            has_more=page < total_pages,
+        ),
+    )
 
 
 @router.delete(
@@ -2016,7 +2409,7 @@ def delete_tag(
     artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
     _log_audit(
         db,
-        action="delete_tag",
+        action="tag.delete",
         resource=f"project/{project_name}/{package_name}/tag/{tag_name}",
         user_id=user_id,
         source_ip=request.client.host if request.client else None,
@@ -2076,9 +2469,19 @@ def list_package_artifacts(
     created_before: Optional[datetime] = Query(
         default=None, description="Filter artifacts created before this date"
     ),
+    min_size: Optional[int] = Query(
+        default=None, ge=0, description="Minimum artifact size in bytes"
+    ),
+    max_size: Optional[int] = Query(
+        default=None, ge=0, description="Maximum artifact size in bytes"
+    ),
+    sort: Optional[str] = Query(
+        default=None, description="Sort field: created_at, size, original_name"
+    ),
+    order: Optional[str] = Query(default="desc", description="Sort order: asc or desc"),
     db: Session = Depends(get_db),
 ):
-    """List all unique artifacts uploaded to a package"""
+    """List all unique artifacts uploaded to a package with filtering and sorting."""
     project = db.query(Project).filter(Project.name == project_name).first()
     if not project:
         raise HTTPException(status_code=404, detail="Project not found")
@@ -2110,14 +2513,38 @@ def list_package_artifacts(
     if created_before:
         query = query.filter(Artifact.created_at <= created_before)
 
+    # Apply size range filters
+    if min_size is not None:
+        query = query.filter(Artifact.size >= min_size)
+    if max_size is not None:
+        query = query.filter(Artifact.size <= max_size)
+
+    # Validate and apply sorting
+    valid_sort_fields = {
+        "created_at": Artifact.created_at,
+        "size": Artifact.size,
+        "original_name": Artifact.original_name,
+    }
+    if sort and sort not in valid_sort_fields:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid sort field. Valid options: {', '.join(valid_sort_fields.keys())}",
+        )
+    sort_column = valid_sort_fields.get(sort, Artifact.created_at)
+    if order and order.lower() not in ("asc", "desc"):
+        raise HTTPException(
+            status_code=400, detail="Invalid order. Valid options: asc, desc"
+        )
+    sort_order = (
+        sort_column.asc() if order and order.lower() == "asc" else sort_column.desc()
+    )
+
     # Get total count before pagination
     total = query.count()
 
     # Apply pagination
     offset = (page - 1) * limit
-    artifacts = (
-        query.order_by(Artifact.created_at.desc()).offset(offset).limit(limit).all()
-    )
+    artifacts = query.order_by(sort_order).offset(offset).limit(limit).all()
 
     # Calculate total pages
     total_pages = math.ceil(total / limit) if total > 0 else 1
@@ -2153,6 +2580,267 @@ def list_package_artifacts(
             limit=limit,
             total=total,
             total_pages=total_pages,
+            has_more=page < total_pages,
+        ),
+    )
+
+
+# Global artifacts listing
+@router.get(
+    "/api/v1/artifacts",
+    response_model=PaginatedResponse[GlobalArtifactResponse],
+)
+def list_all_artifacts(
+    project: Optional[str] = Query(None, description="Filter by project name"),
+    package: Optional[str] = Query(None, description="Filter by package name"),
+    tag: Optional[str] = Query(
+        None,
+        description="Filter by tag name. Supports wildcards (*) and comma-separated values",
+    ),
+    content_type: Optional[str] = Query(None, description="Filter by content type"),
+    min_size: Optional[int] = Query(None, ge=0, description="Minimum size in bytes"),
+    max_size: Optional[int] = Query(None, ge=0, description="Maximum size in bytes"),
+    from_date: Optional[datetime] = Query(
+        None, alias="from", description="Created after"
+    ),
+    to_date: Optional[datetime] = Query(None, alias="to", description="Created before"),
+    sort: Optional[str] = Query(None, description="Sort field: created_at, size"),
+    order: Optional[str] = Query("desc", description="Sort order: asc or desc"),
+    page: int = Query(1, ge=1),
+    limit: int = Query(20, ge=1, le=100),
+    db: Session = Depends(get_db),
+):
+    """
+    List all artifacts globally with filtering by project, package, tag, etc.
+
+    Returns artifacts with context about which projects/packages/tags reference them.
+    """
+    # Start with base query
+    query = db.query(Artifact)
+
+    # If filtering by project/package/tag, need to join through tags
+    if project or package or tag:
+        # Subquery to get artifact IDs that match the filters
+        tag_query = (
+            db.query(Tag.artifact_id)
+            .join(Package, Tag.package_id == Package.id)
+            .join(Project, Package.project_id == Project.id)
+        )
+        if project:
+            tag_query = tag_query.filter(Project.name == project)
+        if package:
+            tag_query = tag_query.filter(Package.name == package)
+        if tag:
+            # Support multiple values (comma-separated) and wildcards (*)
+            tag_values = [t.strip() for t in tag.split(",") if t.strip()]
+            if len(tag_values) == 1:
+                tag_val = tag_values[0]
+                if "*" in tag_val:
+                    # Wildcard: convert * to SQL LIKE %
+                    tag_query = tag_query.filter(
+                        Tag.name.ilike(tag_val.replace("*", "%"))
+                    )
+                else:
+                    tag_query = tag_query.filter(Tag.name == tag_val)
+            else:
+                # Multiple values: check if any match (with wildcard support)
+                tag_conditions = []
+                for tag_val in tag_values:
+                    if "*" in tag_val:
+                        tag_conditions.append(Tag.name.ilike(tag_val.replace("*", "%")))
+                    else:
+                        tag_conditions.append(Tag.name == tag_val)
+                tag_query = tag_query.filter(or_(*tag_conditions))
+        artifact_ids = tag_query.distinct().subquery()
+        query = query.filter(Artifact.id.in_(artifact_ids))
+
+    # Apply content type filter
+    if content_type:
+        query = query.filter(Artifact.content_type == content_type)
+
+    # Apply size filters
+    if min_size is not None:
+        query = query.filter(Artifact.size >= min_size)
+    if max_size is not None:
+        query = query.filter(Artifact.size <= max_size)
+
+    # Apply date filters
+    if from_date:
+        query = query.filter(Artifact.created_at >= from_date)
+    if to_date:
+        query = query.filter(Artifact.created_at <= to_date)
+
+    # Validate and apply sorting
+    valid_sort_fields = {"created_at": Artifact.created_at, "size": Artifact.size}
+    if sort and sort not in valid_sort_fields:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid sort field. Valid options: {', '.join(valid_sort_fields.keys())}",
+        )
+    sort_column = valid_sort_fields.get(sort, Artifact.created_at)
+    if order and order.lower() not in ("asc", "desc"):
+        raise HTTPException(
+            status_code=400, detail="Invalid order. Valid options: asc, desc"
+        )
+    sort_order = (
+        sort_column.asc() if order and order.lower() == "asc" else sort_column.desc()
+    )
+
+    total = query.count()
+    total_pages = math.ceil(total / limit) if total > 0 else 1
+
+    artifacts = query.order_by(sort_order).offset((page - 1) * limit).limit(limit).all()
+
+    # Build responses with context
+    items = []
+    for artifact in artifacts:
+        # Get all tags referencing this artifact with project/package info
+        tags_info = (
+            db.query(Tag, Package, Project)
+            .join(Package, Tag.package_id == Package.id)
+            .join(Project, Package.project_id == Project.id)
+            .filter(Tag.artifact_id == artifact.id)
+            .all()
+        )
+
+        projects = list(set(proj.name for _, _, proj in tags_info))
+        packages = list(set(f"{proj.name}/{pkg.name}" for _, pkg, proj in tags_info))
+        tags = [f"{proj.name}/{pkg.name}:{t.name}" for t, pkg, proj in tags_info]
+
+        items.append(
+            GlobalArtifactResponse(
+                id=artifact.id,
+                sha256=artifact.id,
+                size=artifact.size,
+                content_type=artifact.content_type,
+                original_name=artifact.original_name,
+                created_at=artifact.created_at,
+                created_by=artifact.created_by,
+                format_metadata=artifact.artifact_metadata,
+                ref_count=artifact.ref_count,
+                projects=projects,
+                packages=packages,
+                tags=tags,
+            )
+        )
+
+    return PaginatedResponse(
+        items=items,
+        pagination=PaginationMeta(
+            page=page,
+            limit=limit,
+            total=total,
+            total_pages=total_pages,
+            has_more=page < total_pages,
+        ),
+    )
+
+
+# Global tags listing
+@router.get(
+    "/api/v1/tags",
+    response_model=PaginatedResponse[GlobalTagResponse],
+)
+def list_all_tags(
+    project: Optional[str] = Query(None, description="Filter by project name"),
+    package: Optional[str] = Query(None, description="Filter by package name"),
+    search: Optional[str] = Query(
+        None,
+        description="Search by tag name. Supports wildcards (*) and comma-separated values",
+    ),
+    from_date: Optional[datetime] = Query(
+        None, alias="from", description="Created after"
+    ),
+    to_date: Optional[datetime] = Query(None, alias="to", description="Created before"),
+    sort: Optional[str] = Query(None, description="Sort field: name, created_at"),
+    order: Optional[str] = Query("desc", description="Sort order: asc or desc"),
+    page: int = Query(1, ge=1),
+    limit: int = Query(20, ge=1, le=100),
+    db: Session = Depends(get_db),
+):
+    """
+    List all tags globally with filtering by project, package, name, etc.
+    """
+    query = (
+        db.query(Tag, Package, Project, Artifact)
+        .join(Package, Tag.package_id == Package.id)
+        .join(Project, Package.project_id == Project.id)
+        .join(Artifact, Tag.artifact_id == Artifact.id)
+    )
+
+    # Apply filters
+    if project:
+        query = query.filter(Project.name == project)
+    if package:
+        query = query.filter(Package.name == package)
+    if search:
+        # Support multiple values (comma-separated) and wildcards (*)
+        search_values = [s.strip() for s in search.split(",") if s.strip()]
+        if len(search_values) == 1:
+            search_val = search_values[0]
+            if "*" in search_val:
+                query = query.filter(Tag.name.ilike(search_val.replace("*", "%")))
+            else:
+                query = query.filter(Tag.name.ilike(f"%{search_val}%"))
+        else:
+            search_conditions = []
+            for search_val in search_values:
+                if "*" in search_val:
+                    search_conditions.append(
+                        Tag.name.ilike(search_val.replace("*", "%"))
+                    )
+                else:
+                    search_conditions.append(Tag.name.ilike(f"%{search_val}%"))
+            query = query.filter(or_(*search_conditions))
+    if from_date:
+        query = query.filter(Tag.created_at >= from_date)
+    if to_date:
+        query = query.filter(Tag.created_at <= to_date)
+
+    # Validate and apply sorting
+    valid_sort_fields = {"name": Tag.name, "created_at": Tag.created_at}
+    if sort and sort not in valid_sort_fields:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid sort field. Valid options: {', '.join(valid_sort_fields.keys())}",
+        )
+    sort_column = valid_sort_fields.get(sort, Tag.created_at)
+    if order and order.lower() not in ("asc", "desc"):
+        raise HTTPException(
+            status_code=400, detail="Invalid order. Valid options: asc, desc"
+        )
+    sort_order = (
+        sort_column.asc() if order and order.lower() == "asc" else sort_column.desc()
+    )
+
+    total = query.count()
+    total_pages = math.ceil(total / limit) if total > 0 else 1
+
+    results = query.order_by(sort_order).offset((page - 1) * limit).limit(limit).all()
+
+    items = [
+        GlobalTagResponse(
+            id=tag.id,
+            name=tag.name,
+            artifact_id=tag.artifact_id,
+            created_at=tag.created_at,
+            created_by=tag.created_by,
+            project_name=proj.name,
+            package_name=pkg.name,
+            artifact_size=artifact.size,
+            artifact_content_type=artifact.content_type,
+        )
+        for tag, pkg, proj, artifact in results
+    ]
+
+    return PaginatedResponse(
+        items=items,
+        pagination=PaginationMeta(
+            page=page,
+            limit=limit,
+            total=total,
+            total_pages=total_pages,
+            has_more=page < total_pages,
         ),
     )
 
@@ -2322,6 +3010,109 @@ def garbage_collect(
     )
 
 
+@router.get(
+    "/api/v1/admin/consistency-check",
+    response_model=ConsistencyCheckResponse,
+)
+def check_consistency(
+    limit: int = Query(
+        default=100, ge=1, le=1000, description="Max items to report per category"
+    ),
+    db: Session = Depends(get_db),
+    storage: S3Storage = Depends(get_storage),
+):
+    """
+    Check consistency between database records and S3 storage.
+
+    Reports:
+    - Orphaned S3 objects (in S3 but not in database)
+    - Missing S3 objects (in database but not in S3)
+    - Size mismatches (database size != S3 size)
+
+    This is a read-only operation. Use garbage-collect to clean up issues.
+    """
+    orphaned_s3_keys = []
+    missing_s3_keys = []
+    size_mismatches = []
+
+    # Get all artifacts from database
+    artifacts = db.query(Artifact).all()
+    total_checked = len(artifacts)
+
+    # Check each artifact exists in S3 and sizes match
+    for artifact in artifacts:
+        try:
+            s3_info = storage.get_object_info(artifact.s3_key)
+            if s3_info is None:
+                if len(missing_s3_keys) < limit:
+                    missing_s3_keys.append(artifact.s3_key)
+            else:
+                s3_size = s3_info.get("size", 0)
+                if s3_size != artifact.size:
+                    if len(size_mismatches) < limit:
+                        size_mismatches.append(
+                            {
+                                "artifact_id": artifact.id,
+                                "s3_key": artifact.s3_key,
+                                "db_size": artifact.size,
+                                "s3_size": s3_size,
+                            }
+                        )
+        except Exception as e:
+            logger.error(f"Error checking S3 object {artifact.s3_key}: {e}")
+            if len(missing_s3_keys) < limit:
+                missing_s3_keys.append(artifact.s3_key)
+
+    # Check for orphaned S3 objects (objects in S3 bucket but not in database)
+    # Note: This is expensive for large buckets, so we limit the scan
+    try:
+        # List objects in the fruits/ prefix (where artifacts are stored)
+        paginator = storage.client.get_paginator("list_objects_v2")
+        artifact_ids_in_db = {a.id for a in artifacts}
+
+        objects_checked = 0
+        for page in paginator.paginate(
+            Bucket=storage.bucket, Prefix="fruits/", MaxKeys=1000
+        ):
+            if "Contents" not in page:
+                break
+            for obj in page["Contents"]:
+                objects_checked += 1
+                # Extract hash from key: fruits/ab/cd/abcdef...
+                key = obj["Key"]
+                parts = key.split("/")
+                if len(parts) == 4 and parts[0] == "fruits":
+                    sha256_hash = parts[3]
+                    if sha256_hash not in artifact_ids_in_db:
+                        if len(orphaned_s3_keys) < limit:
+                            orphaned_s3_keys.append(key)
+
+                # Limit total objects checked
+                if objects_checked >= 10000:
+                    break
+            if objects_checked >= 10000:
+                break
+    except Exception as e:
+        logger.error(f"Error listing S3 objects for consistency check: {e}")
+
+    healthy = (
+        len(orphaned_s3_keys) == 0
+        and len(missing_s3_keys) == 0
+        and len(size_mismatches) == 0
+    )
+
+    return ConsistencyCheckResponse(
+        total_artifacts_checked=total_checked,
+        orphaned_s3_objects=len(orphaned_s3_keys),
+        missing_s3_objects=len(missing_s3_keys),
+        size_mismatches=len(size_mismatches),
+        healthy=healthy,
+        orphaned_s3_keys=orphaned_s3_keys,
+        missing_s3_keys=missing_s3_keys,
+        size_mismatch_artifacts=size_mismatches,
+    )
+
+
 # =============================================================================
 # Statistics Endpoints (ISSUE 34)
 # =============================================================================
@@ -3069,3 +3860,671 @@ Generated: {generated_at.strftime("%Y-%m-%d %H:%M:%S UTC")}
             indent=2,
         ),
     )
+
+
+# =============================================================================
+# Audit Log Endpoints
+# =============================================================================
+
+
+@router.get("/api/v1/audit-logs", response_model=PaginatedResponse[AuditLogResponse])
+def list_audit_logs(
+    action: Optional[str] = Query(None, description="Filter by action type"),
+    resource: Optional[str] = Query(None, description="Filter by resource pattern"),
+    user_id: Optional[str] = Query(None, description="Filter by user"),
+    from_date: Optional[datetime] = Query(None, alias="from", description="Start date"),
+    to_date: Optional[datetime] = Query(None, alias="to", description="End date"),
+    page: int = Query(1, ge=1),
+    limit: int = Query(20, ge=1, le=100),
+    db: Session = Depends(get_db),
+):
+    """
+    List audit logs with filtering and pagination.
+
+    Filters:
+    - action: Filter by action type (e.g., 'project.create', 'artifact.upload')
+    - resource: Filter by resource pattern (partial match)
+    - user_id: Filter by user ID
+    - from/to: Filter by timestamp range
+    """
+    query = db.query(AuditLog)
+
+    if action:
+        query = query.filter(AuditLog.action == action)
+    if resource:
+        query = query.filter(AuditLog.resource.ilike(f"%{resource}%"))
+    if user_id:
+        query = query.filter(AuditLog.user_id == user_id)
+    if from_date:
+        query = query.filter(AuditLog.timestamp >= from_date)
+    if to_date:
+        query = query.filter(AuditLog.timestamp <= to_date)
+
+    total = query.count()
+    total_pages = math.ceil(total / limit) if total > 0 else 1
+
+    logs = (
+        query.order_by(AuditLog.timestamp.desc())
+        .offset((page - 1) * limit)
+        .limit(limit)
+        .all()
+    )
+
+    return PaginatedResponse(
+        items=logs,
+        pagination=PaginationMeta(
+            page=page,
+            limit=limit,
+            total=total,
+            total_pages=total_pages,
+            has_more=page < total_pages,
+        ),
+    )
+
+
+@router.get(
+    "/api/v1/projects/{project_name}/audit-logs",
+    response_model=PaginatedResponse[AuditLogResponse],
+)
+def list_project_audit_logs(
+    project_name: str,
+    action: Optional[str] = Query(None, description="Filter by action type"),
+    from_date: Optional[datetime] = Query(None, alias="from", description="Start date"),
+    to_date: Optional[datetime] = Query(None, alias="to", description="End date"),
+    page: int = Query(1, ge=1),
+    limit: int = Query(20, ge=1, le=100),
+    db: Session = Depends(get_db),
+):
+    """List audit logs for a specific project."""
+    project = db.query(Project).filter(Project.name == project_name).first()
+    if not project:
+        raise HTTPException(status_code=404, detail="Project not found")
+
+    # Match resources that start with project name
+    resource_pattern = f"{project_name}%"
+    query = db.query(AuditLog).filter(AuditLog.resource.like(resource_pattern))
+
+    if action:
+        query = query.filter(AuditLog.action == action)
+    if from_date:
+        query = query.filter(AuditLog.timestamp >= from_date)
+    if to_date:
+        query = query.filter(AuditLog.timestamp <= to_date)
+
+    total = query.count()
+    total_pages = math.ceil(total / limit) if total > 0 else 1
+
+    logs = (
+        query.order_by(AuditLog.timestamp.desc())
+        .offset((page - 1) * limit)
+        .limit(limit)
+        .all()
+    )
+
+    return PaginatedResponse(
+        items=logs,
+        pagination=PaginationMeta(
+            page=page,
+            limit=limit,
+            total=total,
+            total_pages=total_pages,
+            has_more=page < total_pages,
+        ),
+    )
+
+
+@router.get(
+    "/api/v1/project/{project_name}/{package_name}/audit-logs",
+    response_model=PaginatedResponse[AuditLogResponse],
+)
+def list_package_audit_logs(
+    project_name: str,
+    package_name: str,
+    action: Optional[str] = Query(None, description="Filter by action type"),
+    from_date: Optional[datetime] = Query(None, alias="from", description="Start date"),
+    to_date: Optional[datetime] = Query(None, alias="to", description="End date"),
+    page: int = Query(1, ge=1),
+    limit: int = Query(20, ge=1, le=100),
+    db: Session = Depends(get_db),
+):
+    """List audit logs for a specific package."""
+    project = db.query(Project).filter(Project.name == project_name).first()
+    if not project:
+        raise HTTPException(status_code=404, detail="Project not found")
+
+    package = (
+        db.query(Package)
+        .filter(Package.project_id == project.id, Package.name == package_name)
+        .first()
+    )
+    if not package:
+        raise HTTPException(status_code=404, detail="Package not found")
+
+    # Match resources that contain project/package
+    resource_pattern = f"{project_name}/{package_name}%"
+    query = db.query(AuditLog).filter(AuditLog.resource.like(resource_pattern))
+
+    if action:
+        query = query.filter(AuditLog.action == action)
+    if from_date:
+        query = query.filter(AuditLog.timestamp >= from_date)
+    if to_date:
+        query = query.filter(AuditLog.timestamp <= to_date)
+
+    total = query.count()
+    total_pages = math.ceil(total / limit) if total > 0 else 1
+
+    logs = (
+        query.order_by(AuditLog.timestamp.desc())
+        .offset((page - 1) * limit)
+        .limit(limit)
+        .all()
+    )
+
+    return PaginatedResponse(
+        items=logs,
+        pagination=PaginationMeta(
+            page=page,
+            limit=limit,
+            total=total,
+            total_pages=total_pages,
+            has_more=page < total_pages,
+        ),
+    )
+
+
+# =============================================================================
+# Upload History Endpoints
+# =============================================================================
+
+
+@router.get(
+    "/api/v1/uploads",
+    response_model=PaginatedResponse[UploadHistoryResponse],
+)
+def list_all_uploads(
+    request: Request,
+    project: Optional[str] = Query(None, description="Filter by project name"),
+    package: Optional[str] = Query(None, description="Filter by package name"),
+    uploaded_by: Optional[str] = Query(None, description="Filter by uploader"),
+    from_date: Optional[datetime] = Query(None, alias="from", description="Start date"),
+    to_date: Optional[datetime] = Query(None, alias="to", description="End date"),
+    deduplicated: Optional[bool] = Query(
+        None, description="Filter by deduplication status"
+    ),
+    search: Optional[str] = Query(None, description="Search by original filename"),
+    tag: Optional[str] = Query(
+        None,
+        description="Filter by tag name. Supports wildcards (*) and comma-separated values",
+    ),
+    sort: Optional[str] = Query(
+        None, description="Sort field: uploaded_at, original_name, size"
+    ),
+    order: Optional[str] = Query("desc", description="Sort order: asc or desc"),
+    page: int = Query(1, ge=1),
+    limit: int = Query(20, ge=1, le=100),
+    db: Session = Depends(get_db),
+):
+    """
+    List all upload events globally (admin endpoint).
+
+    Supports filtering by:
+    - project: Filter by project name
+    - package: Filter by package name (requires project)
+    - uploaded_by: Filter by user ID
+    - from/to: Filter by timestamp range
+    - deduplicated: Filter by deduplication status
+    - search: Search by original filename (case-insensitive)
+    - tag: Filter by tag name
+    """
+    query = (
+        db.query(Upload, Package, Project, Artifact)
+        .join(Package, Upload.package_id == Package.id)
+        .join(Project, Package.project_id == Project.id)
+        .join(Artifact, Upload.artifact_id == Artifact.id)
+    )
+
+    # Apply filters
+    if project:
+        query = query.filter(Project.name == project)
+    if package:
+        query = query.filter(Package.name == package)
+    if uploaded_by:
+        query = query.filter(Upload.uploaded_by == uploaded_by)
+    if from_date:
+        query = query.filter(Upload.uploaded_at >= from_date)
+    if to_date:
+        query = query.filter(Upload.uploaded_at <= to_date)
+    if deduplicated is not None:
+        query = query.filter(Upload.deduplicated == deduplicated)
+    if search:
+        query = query.filter(Upload.original_name.ilike(f"%{search}%"))
+    if tag:
+        # Support multiple values (comma-separated) and wildcards (*)
+        tag_values = [t.strip() for t in tag.split(",") if t.strip()]
+        if len(tag_values) == 1:
+            tag_val = tag_values[0]
+            if "*" in tag_val:
+                query = query.filter(Upload.tag_name.ilike(tag_val.replace("*", "%")))
+            else:
+                query = query.filter(Upload.tag_name == tag_val)
+        else:
+            tag_conditions = []
+            for tag_val in tag_values:
+                if "*" in tag_val:
+                    tag_conditions.append(
+                        Upload.tag_name.ilike(tag_val.replace("*", "%"))
+                    )
+                else:
+                    tag_conditions.append(Upload.tag_name == tag_val)
+            query = query.filter(or_(*tag_conditions))
+
+    # Validate and apply sorting
+    valid_sort_fields = {
+        "uploaded_at": Upload.uploaded_at,
+        "original_name": Upload.original_name,
+        "size": Artifact.size,
+    }
+    if sort and sort not in valid_sort_fields:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid sort field. Valid options: {', '.join(valid_sort_fields.keys())}",
+        )
+    sort_column = valid_sort_fields.get(sort, Upload.uploaded_at)
+    if order and order.lower() not in ("asc", "desc"):
+        raise HTTPException(
+            status_code=400, detail="Invalid order. Valid options: asc, desc"
+        )
+    sort_order = (
+        sort_column.asc() if order and order.lower() == "asc" else sort_column.desc()
+    )
+
+    total = query.count()
+    total_pages = math.ceil(total / limit) if total > 0 else 1
+
+    results = query.order_by(sort_order).offset((page - 1) * limit).limit(limit).all()
+
+    items = [
+        UploadHistoryResponse(
+            id=upload.id,
+            artifact_id=upload.artifact_id,
+            package_id=upload.package_id,
+            package_name=pkg.name,
+            project_name=proj.name,
+            original_name=upload.original_name,
+            tag_name=upload.tag_name,
+            uploaded_at=upload.uploaded_at,
+            uploaded_by=upload.uploaded_by,
+            source_ip=upload.source_ip,
+            deduplicated=upload.deduplicated or False,
+            artifact_size=artifact.size,
+            artifact_content_type=artifact.content_type,
+        )
+        for upload, pkg, proj, artifact in results
+    ]
+
+    return PaginatedResponse(
+        items=items,
+        pagination=PaginationMeta(
+            page=page,
+            limit=limit,
+            total=total,
+            total_pages=total_pages,
+            has_more=page < total_pages,
+        ),
+    )
+
+
+@router.get(
+    "/api/v1/project/{project_name}/uploads",
+    response_model=PaginatedResponse[UploadHistoryResponse],
+)
+def list_project_uploads(
+    project_name: str,
+    package: Optional[str] = Query(None, description="Filter by package name"),
+    uploaded_by: Optional[str] = Query(None, description="Filter by uploader"),
+    from_date: Optional[datetime] = Query(None, alias="from", description="Start date"),
+    to_date: Optional[datetime] = Query(None, alias="to", description="End date"),
+    deduplicated: Optional[bool] = Query(
+        None, description="Filter by deduplication status"
+    ),
+    page: int = Query(1, ge=1),
+    limit: int = Query(20, ge=1, le=100),
+    db: Session = Depends(get_db),
+):
+    """
+    List upload events for a specific project.
+
+    Supports filtering by:
+    - package: Filter by package name within the project
+    - uploaded_by: Filter by user ID
+    - from/to: Filter by timestamp range
+    - deduplicated: Filter by deduplication status
+    """
+    project = db.query(Project).filter(Project.name == project_name).first()
+    if not project:
+        raise HTTPException(status_code=404, detail="Project not found")
+
+    # Get all package IDs for this project
+    package_ids_query = db.query(Package.id).filter(Package.project_id == project.id)
+
+    if package:
+        package_ids_query = package_ids_query.filter(Package.name == package)
+
+    package_ids = package_ids_query.subquery()
+
+    query = (
+        db.query(Upload, Package, Artifact)
+        .join(Package, Upload.package_id == Package.id)
+        .join(Artifact, Upload.artifact_id == Artifact.id)
+        .filter(Upload.package_id.in_(package_ids))
+    )
+
+    if uploaded_by:
+        query = query.filter(Upload.uploaded_by == uploaded_by)
+    if from_date:
+        query = query.filter(Upload.uploaded_at >= from_date)
+    if to_date:
+        query = query.filter(Upload.uploaded_at <= to_date)
+    if deduplicated is not None:
+        query = query.filter(Upload.deduplicated == deduplicated)
+
+    total = query.count()
+    total_pages = math.ceil(total / limit) if total > 0 else 1
+
+    results = (
+        query.order_by(Upload.uploaded_at.desc())
+        .offset((page - 1) * limit)
+        .limit(limit)
+        .all()
+    )
+
+    items = [
+        UploadHistoryResponse(
+            id=upload.id,
+            artifact_id=upload.artifact_id,
+            package_id=upload.package_id,
+            package_name=pkg.name,
+            project_name=project_name,
+            original_name=upload.original_name,
+            tag_name=upload.tag_name,
+            uploaded_at=upload.uploaded_at,
+            uploaded_by=upload.uploaded_by,
+            source_ip=upload.source_ip,
+            deduplicated=upload.deduplicated or False,
+            artifact_size=artifact.size,
+            artifact_content_type=artifact.content_type,
+        )
+        for upload, pkg, artifact in results
+    ]
+
+    return PaginatedResponse(
+        items=items,
+        pagination=PaginationMeta(
+            page=page,
+            limit=limit,
+            total=total,
+            total_pages=total_pages,
+            has_more=page < total_pages,
+        ),
+    )
+
+
+@router.get(
+    "/api/v1/project/{project_name}/{package_name}/uploads",
+    response_model=PaginatedResponse[UploadHistoryResponse],
+)
+def list_package_uploads(
+    project_name: str,
+    package_name: str,
+    from_date: Optional[datetime] = Query(None, alias="from", description="Start date"),
+    to_date: Optional[datetime] = Query(None, alias="to", description="End date"),
+    page: int = Query(1, ge=1),
+    limit: int = Query(20, ge=1, le=100),
+    db: Session = Depends(get_db),
+):
+    """List upload events for a specific package."""
+    project = db.query(Project).filter(Project.name == project_name).first()
+    if not project:
+        raise HTTPException(status_code=404, detail="Project not found")
+
+    package = (
+        db.query(Package)
+        .filter(Package.project_id == project.id, Package.name == package_name)
+        .first()
+    )
+    if not package:
+        raise HTTPException(status_code=404, detail="Package not found")
+
+    query = db.query(Upload).filter(Upload.package_id == package.id)
+
+    if from_date:
+        query = query.filter(Upload.uploaded_at >= from_date)
+    if to_date:
+        query = query.filter(Upload.uploaded_at <= to_date)
+
+    total = query.count()
+    total_pages = math.ceil(total / limit) if total > 0 else 1
+
+    uploads = (
+        query.order_by(Upload.uploaded_at.desc())
+        .offset((page - 1) * limit)
+        .limit(limit)
+        .all()
+    )
+
+    # Build response with artifact metadata
+    items = []
+    for upload in uploads:
+        artifact = db.query(Artifact).filter(Artifact.id == upload.artifact_id).first()
+        items.append(
+            UploadHistoryResponse(
+                id=upload.id,
+                artifact_id=upload.artifact_id,
+                package_id=upload.package_id,
+                package_name=package_name,
+                project_name=project_name,
+                original_name=upload.original_name,
+                tag_name=upload.tag_name,
+                uploaded_at=upload.uploaded_at,
+                uploaded_by=upload.uploaded_by,
+                source_ip=upload.source_ip,
+                deduplicated=upload.deduplicated or False,
+                artifact_size=artifact.size if artifact else 0,
+                artifact_content_type=artifact.content_type if artifact else None,
+            )
+        )
+
+    return PaginatedResponse(
+        items=items,
+        pagination=PaginationMeta(
+            page=page,
+            limit=limit,
+            total=total,
+            total_pages=total_pages,
+            has_more=page < total_pages,
+        ),
+    )
+
+
+@router.get(
+    "/api/v1/artifact/{artifact_id}/uploads",
+    response_model=PaginatedResponse[UploadHistoryResponse],
+)
+def list_artifact_uploads(
+    artifact_id: str,
+    page: int = Query(1, ge=1),
+    limit: int = Query(20, ge=1, le=100),
+    db: Session = Depends(get_db),
+):
+    """List all upload events for a specific artifact."""
+    artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
+    if not artifact:
+        raise HTTPException(status_code=404, detail="Artifact not found")
+
+    query = db.query(Upload).filter(Upload.artifact_id == artifact_id)
+
+    total = query.count()
+    total_pages = math.ceil(total / limit) if total > 0 else 1
+
+    uploads = (
+        query.order_by(Upload.uploaded_at.desc())
+        .offset((page - 1) * limit)
+        .limit(limit)
+        .all()
+    )
+
+    # Build response with package/project metadata
+    items = []
+    for upload in uploads:
+        package = db.query(Package).filter(Package.id == upload.package_id).first()
+        project = (
+            db.query(Project).filter(Project.id == package.project_id).first()
+            if package
+            else None
+        )
+        items.append(
+            UploadHistoryResponse(
+                id=upload.id,
+                artifact_id=upload.artifact_id,
+                package_id=upload.package_id,
+                package_name=package.name if package else "unknown",
+                project_name=project.name if project else "unknown",
+                original_name=upload.original_name,
+                tag_name=upload.tag_name,
+                uploaded_at=upload.uploaded_at,
+                uploaded_by=upload.uploaded_by,
+                source_ip=upload.source_ip,
+                deduplicated=upload.deduplicated or False,
+                artifact_size=artifact.size,
+                artifact_content_type=artifact.content_type,
+            )
+        )
+
+    return PaginatedResponse(
+        items=items,
+        pagination=PaginationMeta(
+            page=page,
+            limit=limit,
+            total=total,
+            total_pages=total_pages,
+            has_more=page < total_pages,
+        ),
+    )
+
+
+# =============================================================================
+# Artifact Provenance/History Endpoint
+# =============================================================================
+
+
+@router.get(
+    "/api/v1/artifact/{artifact_id}/history", response_model=ArtifactProvenanceResponse
+)
+def get_artifact_provenance(
+    artifact_id: str,
+    db: Session = Depends(get_db),
+):
+    """
+    Get full provenance/history of an artifact.
+
+    Returns:
+    - Artifact metadata
+    - First upload information
+    - All packages/tags referencing the artifact
+    - Complete upload history
+    """
+    artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
+    if not artifact:
+        raise HTTPException(status_code=404, detail="Artifact not found")
+
+    # Get all uploads for this artifact
+    uploads = (
+        db.query(Upload)
+        .filter(Upload.artifact_id == artifact_id)
+        .order_by(Upload.uploaded_at.asc())
+        .all()
+    )
+
+    # Get first upload info
+    first_upload = uploads[0] if uploads else None
+
+    # Get all tags referencing this artifact
+    tags = db.query(Tag).filter(Tag.artifact_id == artifact_id).all()
+
+    # Build package list with tags
+    package_map = {}  # package_id -> {project_name, package_name, tag_names}
+    tag_list = []
+
+    for tag in tags:
+        package = db.query(Package).filter(Package.id == tag.package_id).first()
+        if package:
+            project = db.query(Project).filter(Project.id == package.project_id).first()
+            project_name = project.name if project else "unknown"
+
+            # Add to package map
+            pkg_key = str(package.id)
+            if pkg_key not in package_map:
+                package_map[pkg_key] = {
+                    "project_name": project_name,
+                    "package_name": package.name,
+                    "tag_names": [],
+                }
+            package_map[pkg_key]["tag_names"].append(tag.name)
+
+            # Add to tag list
+            tag_list.append(
+                {
+                    "project_name": project_name,
+                    "package_name": package.name,
+                    "tag_name": tag.name,
+                    "created_at": tag.created_at.isoformat()
+                    if tag.created_at
+                    else None,
+                }
+            )
+
+    # Build upload history
+    upload_history = []
+    for upload in uploads:
+        package = db.query(Package).filter(Package.id == upload.package_id).first()
+        project = (
+            db.query(Project).filter(Project.id == package.project_id).first()
+            if package
+            else None
+        )
+        upload_history.append(
+            {
+                "upload_id": str(upload.id),
+                "project_name": project.name if project else "unknown",
+                "package_name": package.name if package else "unknown",
+                "original_name": upload.original_name,
+                "tag_name": upload.tag_name,
+                "uploaded_at": upload.uploaded_at.isoformat()
+                if upload.uploaded_at
+                else None,
+                "uploaded_by": upload.uploaded_by,
+                "deduplicated": upload.deduplicated or False,
+            }
+        )
+
+    return ArtifactProvenanceResponse(
+        artifact_id=artifact.id,
+        sha256=artifact.id,
+        size=artifact.size,
+        content_type=artifact.content_type,
+        original_name=artifact.original_name,
+        created_at=artifact.created_at,
+        created_by=artifact.created_by,
+        ref_count=artifact.ref_count,
+        first_uploaded_at=first_upload.uploaded_at
+        if first_upload
+        else artifact.created_at,
+        first_uploaded_by=first_upload.uploaded_by
+        if first_upload
+        else artifact.created_by,
+        upload_count=len(uploads),
+        packages=list(package_map.values()),
+        tags=tag_list,
+        uploads=upload_history,
+    )
diff --git a/backend/app/schemas.py b/backend/app/schemas.py
index 4c7db29..9bd3701 100644
--- a/backend/app/schemas.py
+++ b/backend/app/schemas.py
@@ -12,6 +12,7 @@ class PaginationMeta(BaseModel):
     limit: int
     total: int
     total_pages: int
+    has_more: bool = False  # True if there are more pages after current page
 
 
 class PaginatedResponse(BaseModel, Generic[T]):
@@ -39,6 +40,13 @@ class ProjectResponse(BaseModel):
         from_attributes = True
 
 
+class ProjectUpdate(BaseModel):
+    """Schema for updating a project"""
+
+    description: Optional[str] = None
+    is_public: Optional[bool] = None
+
+
 # Package format and platform enums
 PACKAGE_FORMATS = [
     "generic",
@@ -86,6 +94,14 @@ class PackageResponse(BaseModel):
         from_attributes = True
 
 
+class PackageUpdate(BaseModel):
+    """Schema for updating a package"""
+
+    description: Optional[str] = None
+    format: Optional[str] = None
+    platform: Optional[str] = None
+
+
 class TagSummary(BaseModel):
     """Lightweight tag info for embedding in package responses"""
 
@@ -189,6 +205,93 @@ class TagHistoryResponse(BaseModel):
         from_attributes = True
 
 
+class TagHistoryDetailResponse(BaseModel):
+    """Tag history with artifact metadata for each version"""
+
+    id: UUID
+    tag_id: UUID
+    tag_name: str
+    old_artifact_id: Optional[str]
+    new_artifact_id: str
+    changed_at: datetime
+    changed_by: str
+    # Artifact metadata for new artifact
+    artifact_size: int
+    artifact_original_name: Optional[str]
+    artifact_content_type: Optional[str]
+
+    class Config:
+        from_attributes = True
+
+
+# Audit log schemas
+class AuditLogResponse(BaseModel):
+    """Audit log entry response"""
+
+    id: UUID
+    action: str
+    resource: str
+    user_id: str
+    details: Optional[Dict[str, Any]]
+    timestamp: datetime
+    source_ip: Optional[str]
+
+    class Config:
+        from_attributes = True
+
+
+# Upload history schemas
+class UploadHistoryResponse(BaseModel):
+    """Upload event with artifact details"""
+
+    id: UUID
+    artifact_id: str
+    package_id: UUID
+    package_name: str
+    project_name: str
+    original_name: Optional[str]
+    tag_name: Optional[str]
+    uploaded_at: datetime
+    uploaded_by: str
+    source_ip: Optional[str]
+    deduplicated: bool
+    # Artifact metadata
+    artifact_size: int
+    artifact_content_type: Optional[str]
+
+    class Config:
+        from_attributes = True
+
+
+# Artifact provenance schemas
+class ArtifactProvenanceResponse(BaseModel):
+    """Full provenance/history of an artifact"""
+
+    artifact_id: str
+    sha256: str
+    size: int
+    content_type: Optional[str]
+    original_name: Optional[str]
+    created_at: datetime
+    created_by: str
+    ref_count: int
+    # First upload info
+    first_uploaded_at: datetime
+    first_uploaded_by: str
+    # Usage statistics
+    upload_count: int
+    # References
+    packages: List[Dict[str, Any]]  # List of {project_name, package_name, tag_names}
+    tags: List[
+        Dict[str, Any]
+    ]  # List of {project_name, package_name, tag_name, created_at}
+    # Upload history
+    uploads: List[Dict[str, Any]]  # List of upload events
+
+    class Config:
+        from_attributes = True
+
+
 class ArtifactTagInfo(BaseModel):
     """Tag info for embedding in artifact responses"""
 
@@ -240,6 +343,44 @@ class PackageArtifactResponse(BaseModel):
         from_attributes = True
 
 
+class GlobalArtifactResponse(BaseModel):
+    """Artifact with project/package context for global listing"""
+
+    id: str
+    sha256: str
+    size: int
+    content_type: Optional[str]
+    original_name: Optional[str]
+    created_at: datetime
+    created_by: str
+    format_metadata: Optional[Dict[str, Any]] = None
+    ref_count: int = 0
+    # Context from tags/packages
+    projects: List[str] = []  # List of project names containing this artifact
+    packages: List[str] = []  # List of "project/package" paths
+    tags: List[str] = []  # List of "project/package:tag" references
+
+    class Config:
+        from_attributes = True
+
+
+class GlobalTagResponse(BaseModel):
+    """Tag with project/package context for global listing"""
+
+    id: UUID
+    name: str
+    artifact_id: str
+    created_at: datetime
+    created_by: str
+    project_name: str
+    package_name: str
+    artifact_size: Optional[int] = None
+    artifact_content_type: Optional[str] = None
+
+    class Config:
+        from_attributes = True
+
+
 # Upload response
 class UploadResponse(BaseModel):
     artifact_id: str
@@ -254,6 +395,11 @@ class UploadResponse(BaseModel):
     format_metadata: Optional[Dict[str, Any]] = None
     deduplicated: bool = False
     ref_count: int = 1  # Current reference count after this upload
+    # Enhanced metadata (Issue #19)
+    upload_id: Optional[UUID] = None  # UUID of the upload record
+    content_type: Optional[str] = None
+    original_name: Optional[str] = None
+    created_at: Optional[datetime] = None
 
 
 # Resumable upload schemas
@@ -440,6 +586,19 @@ class StorageStatsResponse(BaseModel):
     storage_saved_bytes: int  # Bytes saved through deduplication
 
 
+class ConsistencyCheckResponse(BaseModel):
+    """Result of S3/Database consistency check"""
+
+    total_artifacts_checked: int
+    orphaned_s3_objects: int  # Objects in S3 but not in DB
+    missing_s3_objects: int  # Records in DB but not in S3
+    size_mismatches: int  # Records where DB size != S3 size
+    healthy: bool
+    orphaned_s3_keys: List[str] = []  # Limited list of orphaned S3 keys
+    missing_s3_keys: List[str] = []  # Limited list of missing S3 keys
+    size_mismatch_artifacts: List[Dict[str, Any]] = []  # Limited list of mismatches
+
+
 class DeduplicationStatsResponse(BaseModel):
     """Deduplication effectiveness statistics"""
 
diff --git a/backend/app/services/artifact_cleanup.py b/backend/app/services/artifact_cleanup.py
index d1e807d..0857155 100644
--- a/backend/app/services/artifact_cleanup.py
+++ b/backend/app/services/artifact_cleanup.py
@@ -6,7 +6,7 @@ from typing import List, Optional, Tuple
 from sqlalchemy.orm import Session
 import logging
 
-from ..models import Artifact, Tag, Upload, Package
+from ..models import Artifact, Tag
 from ..repositories.artifact import ArtifactRepository
 from ..repositories.tag import TagRepository
 from ..storage import S3Storage
@@ -40,10 +40,14 @@ class ArtifactCleanupService:
         artifact = self.artifact_repo.get_by_sha256(artifact_id)
         if artifact:
             artifact = self.artifact_repo.decrement_ref_count(artifact)
-            logger.info(f"Decremented ref_count for artifact {artifact_id}: now {artifact.ref_count}")
+            logger.info(
+                f"Decremented ref_count for artifact {artifact_id}: now {artifact.ref_count}"
+            )
         return artifact
 
-    def on_tag_updated(self, old_artifact_id: str, new_artifact_id: str) -> Tuple[Optional[Artifact], Optional[Artifact]]:
+    def on_tag_updated(
+        self, old_artifact_id: str, new_artifact_id: str
+    ) -> Tuple[Optional[Artifact], Optional[Artifact]]:
         """
         Called when a tag is updated to point to a different artifact.
         Decrements ref_count for old artifact, increments for new (if different).
@@ -58,13 +62,17 @@ class ArtifactCleanupService:
             old_artifact = self.artifact_repo.get_by_sha256(old_artifact_id)
             if old_artifact:
                 old_artifact = self.artifact_repo.decrement_ref_count(old_artifact)
-                logger.info(f"Decremented ref_count for old artifact {old_artifact_id}: now {old_artifact.ref_count}")
+                logger.info(
+                    f"Decremented ref_count for old artifact {old_artifact_id}: now {old_artifact.ref_count}"
+                )
 
             # Increment new artifact ref_count
             new_artifact = self.artifact_repo.get_by_sha256(new_artifact_id)
             if new_artifact:
                 new_artifact = self.artifact_repo.increment_ref_count(new_artifact)
-                logger.info(f"Incremented ref_count for new artifact {new_artifact_id}: now {new_artifact.ref_count}")
+                logger.info(
+                    f"Incremented ref_count for new artifact {new_artifact_id}: now {new_artifact.ref_count}"
+                )
 
         return old_artifact, new_artifact
 
@@ -84,11 +92,15 @@ class ArtifactCleanupService:
             if artifact:
                 self.artifact_repo.decrement_ref_count(artifact)
                 affected_artifacts.append(tag.artifact_id)
-                logger.info(f"Decremented ref_count for artifact {tag.artifact_id} (package delete)")
+                logger.info(
+                    f"Decremented ref_count for artifact {tag.artifact_id} (package delete)"
+                )
 
         return affected_artifacts
 
-    def cleanup_orphaned_artifacts(self, batch_size: int = 100, dry_run: bool = False) -> List[str]:
+    def cleanup_orphaned_artifacts(
+        self, batch_size: int = 100, dry_run: bool = False
+    ) -> List[str]:
         """
         Find and delete artifacts with ref_count = 0.
 
@@ -116,7 +128,9 @@ class ArtifactCleanupService:
                     # Then delete from database
                     self.artifact_repo.delete(artifact)
                     deleted_ids.append(artifact.id)
-                    logger.info(f"Deleted orphaned artifact from database: {artifact.id}")
+                    logger.info(
+                        f"Deleted orphaned artifact from database: {artifact.id}"
+                    )
                 except Exception as e:
                     logger.error(f"Failed to delete artifact {artifact.id}: {e}")
 
@@ -128,10 +142,12 @@ class ArtifactCleanupService:
     def get_orphaned_count(self) -> int:
         """Get count of artifacts with ref_count = 0."""
         from sqlalchemy import func
+
         return (
             self.db.query(func.count(Artifact.id))
             .filter(Artifact.ref_count == 0)
-            .scalar() or 0
+            .scalar()
+            or 0
         )
 
     def verify_ref_counts(self, fix: bool = False) -> List[dict]:
@@ -173,7 +189,9 @@ class ArtifactCleanupService:
 
                 if fix:
                     artifact.ref_count = max(actual_count, 1)
-                    logger.warning(f"Fixed ref_count for artifact {artifact.id}: {mismatch['stored_ref_count']} -> {artifact.ref_count}")
+                    logger.warning(
+                        f"Fixed ref_count for artifact {artifact.id}: {mismatch['stored_ref_count']} -> {artifact.ref_count}"
+                    )
 
         if fix and mismatches:
             self.db.commit()
diff --git a/backend/app/storage.py b/backend/app/storage.py
index 99b4783..440dbaf 100644
--- a/backend/app/storage.py
+++ b/backend/app/storage.py
@@ -202,6 +202,9 @@ class StorageResult(NamedTuple):
     md5: Optional[str] = None
     sha1: Optional[str] = None
     s3_etag: Optional[str] = None
+    already_existed: bool = (
+        False  # True if artifact was deduplicated (S3 object already existed)
+    )
 
 
 class S3StorageUnavailableError(StorageError):
@@ -354,6 +357,7 @@ class S3Storage:
             md5=md5_hash,
             sha1=sha1_hash,
             s3_etag=s3_etag,
+            already_existed=exists,
         )
 
     def _store_multipart(self, file: BinaryIO, content_length: int) -> StorageResult:
@@ -433,6 +437,7 @@ class S3Storage:
                 md5=md5_hash,
                 sha1=sha1_hash,
                 s3_etag=s3_etag,
+                already_existed=True,
             )
 
         # Seek back to start for upload
@@ -486,6 +491,7 @@ class S3Storage:
                 md5=md5_hash,
                 sha1=sha1_hash,
                 s3_etag=s3_etag,
+                already_existed=False,
             )
 
         except Exception as e:
@@ -535,6 +541,7 @@ class S3Storage:
                 md5=md5_hash,
                 sha1=sha1_hash,
                 s3_etag=s3_etag,
+                already_existed=True,
             )
 
         # Upload based on size
@@ -615,6 +622,7 @@ class S3Storage:
             md5=md5_hash,
             sha1=sha1_hash,
             s3_etag=s3_etag,
+            already_existed=False,
         )
 
     def initiate_resumable_upload(self, expected_hash: str) -> Dict[str, Any]:
diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py
index 605dfe3..34111d8 100644
--- a/backend/tests/conftest.py
+++ b/backend/tests/conftest.py
@@ -4,15 +4,14 @@ Test configuration and fixtures for Orchard backend tests.
 This module provides:
 - Database fixtures with test isolation
 - Mock S3 storage using moto
-- Test data factories for common scenarios
+- Shared pytest fixtures
 """
 
 import os
 import pytest
-import hashlib
-from typing import Generator, BinaryIO
-from unittest.mock import MagicMock, patch
 import io
+from typing import Generator
+from unittest.mock import MagicMock
 
 # Set test environment defaults before importing app modules
 # Use setdefault to NOT override existing env vars (from docker-compose)
@@ -26,54 +25,27 @@ os.environ.setdefault("ORCHARD_S3_BUCKET", "test-bucket")
 os.environ.setdefault("ORCHARD_S3_ACCESS_KEY_ID", "test")
 os.environ.setdefault("ORCHARD_S3_SECRET_ACCESS_KEY", "test")
 
-
-# =============================================================================
-# Test Data Factories
-# =============================================================================
-
-
-def create_test_file(content: bytes = None, size: int = 1024) -> io.BytesIO:
-    """
-    Create a test file with known content.
-
-    Args:
-        content: Specific content to use, or None to generate random-ish content
-        size: Size of generated content if content is None
-
-    Returns:
-        BytesIO object with the content
-    """
-    if content is None:
-        content = os.urandom(size)
-    return io.BytesIO(content)
-
-
-def compute_sha256(content: bytes) -> str:
-    """Compute SHA256 hash of content as lowercase hex string."""
-    return hashlib.sha256(content).hexdigest()
-
-
-def compute_md5(content: bytes) -> str:
-    """Compute MD5 hash of content as lowercase hex string."""
-    return hashlib.md5(content).hexdigest()
-
-
-def compute_sha1(content: bytes) -> str:
-    """Compute SHA1 hash of content as lowercase hex string."""
-    return hashlib.sha1(content).hexdigest()
-
-
-# Known test data with pre-computed hashes
-TEST_CONTENT_HELLO = b"Hello, World!"
-TEST_HASH_HELLO = "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f"
-TEST_MD5_HELLO = "65a8e27d8879283831b664bd8b7f0ad4"
-TEST_SHA1_HELLO = "0a0a9f2a6772942557ab5355d76af442f8f65e01"
-
-TEST_CONTENT_EMPTY = b""
-# Note: Empty content should be rejected by the storage layer
-
-TEST_CONTENT_BINARY = bytes(range(256))
-TEST_HASH_BINARY = compute_sha256(TEST_CONTENT_BINARY)
+# Re-export factory functions for backward compatibility
+from tests.factories import (
+    create_test_file,
+    compute_sha256,
+    compute_md5,
+    compute_sha1,
+    upload_test_file,
+    TEST_CONTENT_HELLO,
+    TEST_HASH_HELLO,
+    TEST_MD5_HELLO,
+    TEST_SHA1_HELLO,
+    TEST_CONTENT_EMPTY,
+    TEST_CONTENT_BINARY,
+    TEST_HASH_BINARY,
+    get_s3_client,
+    get_s3_bucket,
+    list_s3_objects_by_hash,
+    count_s3_objects_by_prefix,
+    s3_object_exists,
+    delete_s3_object_by_hash,
+)
 
 
 # =============================================================================
@@ -289,126 +261,3 @@ def test_content():
     content = f"test-content-{uuid.uuid4().hex}".encode()
     sha256 = compute_sha256(content)
     return (content, sha256)
-
-
-def upload_test_file(
-    client,
-    project: str,
-    package: str,
-    content: bytes,
-    filename: str = "test.bin",
-    tag: str = None,
-) -> dict:
-    """
-    Helper function to upload a test file.
-
-    Returns the upload response as a dict.
-    """
-    files = {"file": (filename, io.BytesIO(content), "application/octet-stream")}
-    data = {}
-    if tag:
-        data["tag"] = tag
-
-    response = client.post(
-        f"/api/v1/project/{project}/{package}/upload",
-        files=files,
-        data=data if data else None,
-    )
-    assert response.status_code == 200, f"Upload failed: {response.text}"
-    return response.json()
-
-
-# =============================================================================
-# S3 Direct Access Helpers (for integration tests)
-# =============================================================================
-
-
-def get_s3_client():
-    """
-    Create a boto3 S3 client for direct S3 access in integration tests.
-
-    Uses environment variables for configuration (same as the app).
-    Note: When running in container, S3 endpoint should be 'minio:9000' not 'localhost:9000'.
-    """
-    import boto3
-    from botocore.config import Config
-
-    config = Config(s3={"addressing_style": "path"})
-
-    # Use the same endpoint as the app (minio:9000 in container, localhost:9000 locally)
-    endpoint = os.environ.get("ORCHARD_S3_ENDPOINT", "http://minio:9000")
-
-    return boto3.client(
-        "s3",
-        endpoint_url=endpoint,
-        region_name=os.environ.get("ORCHARD_S3_REGION", "us-east-1"),
-        aws_access_key_id=os.environ.get("ORCHARD_S3_ACCESS_KEY_ID", "minioadmin"),
-        aws_secret_access_key=os.environ.get(
-            "ORCHARD_S3_SECRET_ACCESS_KEY", "minioadmin"
-        ),
-        config=config,
-    )
-
-
-def get_s3_bucket():
-    """Get the S3 bucket name from environment."""
-    return os.environ.get("ORCHARD_S3_BUCKET", "orchard-artifacts")
-
-
-def list_s3_objects_by_hash(sha256_hash: str) -> list:
-    """
-    List S3 objects that match a specific SHA256 hash.
-
-    Uses the fruits/{hash[:2]}/{hash[2:4]}/{hash} key pattern.
-    Returns list of matching object keys.
-    """
-    client = get_s3_client()
-    bucket = get_s3_bucket()
-    prefix = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
-
-    response = client.list_objects_v2(Bucket=bucket, Prefix=prefix)
-
-    if "Contents" not in response:
-        return []
-
-    return [obj["Key"] for obj in response["Contents"]]
-
-
-def count_s3_objects_by_prefix(prefix: str) -> int:
-    """
-    Count S3 objects with a given prefix.
-
-    Useful for checking if duplicate uploads created multiple objects.
-    """
-    client = get_s3_client()
-    bucket = get_s3_bucket()
-
-    response = client.list_objects_v2(Bucket=bucket, Prefix=prefix)
-
-    if "Contents" not in response:
-        return 0
-
-    return len(response["Contents"])
-
-
-def s3_object_exists(sha256_hash: str) -> bool:
-    """
-    Check if an S3 object exists for a given SHA256 hash.
-    """
-    objects = list_s3_objects_by_hash(sha256_hash)
-    return len(objects) > 0
-
-
-def delete_s3_object_by_hash(sha256_hash: str) -> bool:
-    """
-    Delete an S3 object by its SHA256 hash (for test cleanup).
-    """
-    client = get_s3_client()
-    bucket = get_s3_bucket()
-    s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
-
-    try:
-        client.delete_object(Bucket=bucket, Key=s3_key)
-        return True
-    except Exception:
-        return False
diff --git a/backend/tests/factories.py b/backend/tests/factories.py
new file mode 100644
index 0000000..cd58f2a
--- /dev/null
+++ b/backend/tests/factories.py
@@ -0,0 +1,288 @@
+"""
+Test data factories for Orchard backend tests.
+
+This module provides factory functions for creating test data,
+including test files, pre-computed hashes, and helper utilities.
+"""
+
+import hashlib
+import io
+import os
+import uuid
+from typing import Optional
+
+
+# =============================================================================
+# Hash Computation Utilities
+# =============================================================================
+
+
+def compute_sha256(content: bytes) -> str:
+    """Compute SHA256 hash of content as lowercase hex string."""
+    return hashlib.sha256(content).hexdigest()
+
+
+def compute_md5(content: bytes) -> str:
+    """Compute MD5 hash of content as lowercase hex string."""
+    return hashlib.md5(content).hexdigest()
+
+
+def compute_sha1(content: bytes) -> str:
+    """Compute SHA1 hash of content as lowercase hex string."""
+    return hashlib.sha1(content).hexdigest()
+
+
+# =============================================================================
+# Test File Factories
+# =============================================================================
+
+
+def create_test_file(content: Optional[bytes] = None, size: int = 1024) -> io.BytesIO:
+    """
+    Create a test file with known content.
+
+    Args:
+        content: Specific content to use, or None to generate random-ish content
+        size: Size of generated content if content is None
+
+    Returns:
+        BytesIO object with the content
+    """
+    if content is None:
+        content = os.urandom(size)
+    return io.BytesIO(content)
+
+
+def create_unique_content(prefix: str = "test-content") -> tuple[bytes, str]:
+    """
+    Create unique test content with its SHA256 hash.
+
+    Args:
+        prefix: Prefix for the content string
+
+    Returns:
+        Tuple of (content_bytes, sha256_hash)
+    """
+    content = f"{prefix}-{uuid.uuid4().hex}".encode()
+    sha256 = compute_sha256(content)
+    return content, sha256
+
+
+# =============================================================================
+# Known Test Data (Pre-computed hashes for deterministic tests)
+# =============================================================================
+
+
+TEST_CONTENT_HELLO = b"Hello, World!"
+TEST_HASH_HELLO = "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f"
+TEST_MD5_HELLO = "65a8e27d8879283831b664bd8b7f0ad4"
+TEST_SHA1_HELLO = "0a0a9f2a6772942557ab5355d76af442f8f65e01"
+
+TEST_CONTENT_EMPTY = b""
+# Note: Empty content should be rejected by the storage layer
+
+TEST_CONTENT_BINARY = bytes(range(256))
+TEST_HASH_BINARY = compute_sha256(TEST_CONTENT_BINARY)
+
+
+# =============================================================================
+# API Test Helpers
+# =============================================================================
+
+
+def upload_test_file(
+    client,
+    project: str,
+    package: str,
+    content: bytes,
+    filename: str = "test.bin",
+    tag: Optional[str] = None,
+) -> dict:
+    """
+    Helper function to upload a test file via the API.
+
+    Args:
+        client: HTTP client (httpx or TestClient)
+        project: Project name
+        package: Package name
+        content: File content as bytes
+        filename: Original filename
+        tag: Optional tag to assign
+
+    Returns:
+        The upload response as a dict
+    """
+    files = {"file": (filename, io.BytesIO(content), "application/octet-stream")}
+    data = {}
+    if tag:
+        data["tag"] = tag
+
+    response = client.post(
+        f"/api/v1/project/{project}/{package}/upload",
+        files=files,
+        data=data if data else None,
+    )
+    assert response.status_code == 200, f"Upload failed: {response.text}"
+    return response.json()
+
+
+# =============================================================================
+# Project/Package Factories
+# =============================================================================
+
+
+def create_test_project(client, unique_id: Optional[str] = None) -> str:
+    """
+    Create a test project via the API.
+
+    Args:
+        client: HTTP client
+        unique_id: Unique identifier for the project name
+
+    Returns:
+        Project name
+    """
+    if unique_id is None:
+        unique_id = uuid.uuid4().hex[:8]
+
+    project_name = f"test-project-{unique_id}"
+    response = client.post(
+        "/api/v1/projects",
+        json={"name": project_name, "description": "Test project", "is_public": True},
+    )
+    assert response.status_code == 200, f"Failed to create project: {response.text}"
+    return project_name
+
+
+def create_test_package(client, project: str, unique_id: Optional[str] = None) -> str:
+    """
+    Create a test package via the API.
+
+    Args:
+        client: HTTP client
+        project: Project name
+        unique_id: Unique identifier for the package name
+
+    Returns:
+        Package name
+    """
+    if unique_id is None:
+        unique_id = uuid.uuid4().hex[:8]
+
+    package_name = f"test-package-{unique_id}"
+    response = client.post(
+        f"/api/v1/project/{project}/packages",
+        json={"name": package_name, "description": "Test package"},
+    )
+    assert response.status_code == 200, f"Failed to create package: {response.text}"
+    return package_name
+
+
+def delete_test_project(client, project: str) -> None:
+    """
+    Delete a test project (cleanup helper).
+
+    Args:
+        client: HTTP client
+        project: Project name to delete
+    """
+    try:
+        client.delete(f"/api/v1/projects/{project}")
+    except Exception:
+        pass  # Ignore cleanup errors
+
+
+# =============================================================================
+# S3 Test Helpers
+# =============================================================================
+
+
+def get_s3_client():
+    """
+    Create a boto3 S3 client for direct S3 access in integration tests.
+
+    Uses environment variables for configuration (same as the app).
+    Note: When running in container, S3 endpoint should be 'minio:9000' not 'localhost:9000'.
+    """
+    import boto3
+    from botocore.config import Config
+
+    config = Config(s3={"addressing_style": "path"})
+
+    # Use the same endpoint as the app (minio:9000 in container, localhost:9000 locally)
+    endpoint = os.environ.get("ORCHARD_S3_ENDPOINT", "http://minio:9000")
+
+    return boto3.client(
+        "s3",
+        endpoint_url=endpoint,
+        region_name=os.environ.get("ORCHARD_S3_REGION", "us-east-1"),
+        aws_access_key_id=os.environ.get("ORCHARD_S3_ACCESS_KEY_ID", "minioadmin"),
+        aws_secret_access_key=os.environ.get(
+            "ORCHARD_S3_SECRET_ACCESS_KEY", "minioadmin"
+        ),
+        config=config,
+    )
+
+
+def get_s3_bucket() -> str:
+    """Get the S3 bucket name from environment."""
+    return os.environ.get("ORCHARD_S3_BUCKET", "orchard-artifacts")
+
+
+def list_s3_objects_by_hash(sha256_hash: str) -> list:
+    """
+    List S3 objects that match a specific SHA256 hash.
+
+    Uses the fruits/{hash[:2]}/{hash[2:4]}/{hash} key pattern.
+    Returns list of matching object keys.
+    """
+    client = get_s3_client()
+    bucket = get_s3_bucket()
+    prefix = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
+
+    response = client.list_objects_v2(Bucket=bucket, Prefix=prefix)
+
+    if "Contents" not in response:
+        return []
+
+    return [obj["Key"] for obj in response["Contents"]]
+
+
+def count_s3_objects_by_prefix(prefix: str) -> int:
+    """
+    Count S3 objects with a given prefix.
+
+    Useful for checking if duplicate uploads created multiple objects.
+    """
+    client = get_s3_client()
+    bucket = get_s3_bucket()
+
+    response = client.list_objects_v2(Bucket=bucket, Prefix=prefix)
+
+    if "Contents" not in response:
+        return 0
+
+    return len(response["Contents"])
+
+
+def s3_object_exists(sha256_hash: str) -> bool:
+    """
+    Check if an S3 object exists for a given SHA256 hash.
+    """
+    objects = list_s3_objects_by_hash(sha256_hash)
+    return len(objects) > 0
+
+
+def delete_s3_object_by_hash(sha256_hash: str) -> bool:
+    """
+    Delete an S3 object by its SHA256 hash (for test cleanup).
+    """
+    client = get_s3_client()
+    bucket = get_s3_bucket()
+    s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
+
+    try:
+        client.delete_object(Bucket=bucket, Key=s3_key)
+        return True
+    except Exception:
+        return False
diff --git a/backend/tests/integration/__init__.py b/backend/tests/integration/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/backend/tests/integration/test_artifacts_api.py b/backend/tests/integration/test_artifacts_api.py
new file mode 100644
index 0000000..f9b0841
--- /dev/null
+++ b/backend/tests/integration/test_artifacts_api.py
@@ -0,0 +1,638 @@
+"""
+Integration tests for artifact API endpoints.
+
+Tests cover:
+- Artifact retrieval by ID
+- Artifact stats endpoint
+- Artifact provenance/history
+- Artifact uploads listing
+- Garbage collection endpoints
+- Orphaned artifacts management
+"""
+
+import pytest
+from tests.factories import compute_sha256, upload_test_file
+
+
+class TestArtifactRetrieval:
+    """Tests for artifact retrieval endpoints."""
+
+    @pytest.mark.integration
+    def test_get_artifact_by_id(self, integration_client, test_package):
+        """Test retrieving an artifact by its SHA256 ID."""
+        project_name, package_name = test_package
+        content = b"artifact retrieval test"
+        expected_hash = compute_sha256(content)
+
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="v1"
+        )
+
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert data["id"] == expected_hash
+        assert data["sha256"] == expected_hash
+        assert data["size"] == len(content)
+        assert "ref_count" in data
+        assert "created_at" in data
+
+    @pytest.mark.integration
+    def test_get_nonexistent_artifact(self, integration_client):
+        """Test getting a non-existent artifact returns 404."""
+        fake_hash = "a" * 64
+        response = integration_client.get(f"/api/v1/artifact/{fake_hash}")
+        assert response.status_code == 404
+
+    @pytest.mark.integration
+    def test_artifact_includes_tags(self, integration_client, test_package):
+        """Test artifact response includes tags pointing to it."""
+        project_name, package_name = test_package
+        content = b"artifact with tags test"
+        expected_hash = compute_sha256(content)
+
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="tagged-v1"
+        )
+
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "tags" in data
+        assert len(data["tags"]) >= 1
+
+        tag = data["tags"][0]
+        assert "name" in tag
+        assert "package_name" in tag
+        assert "project_name" in tag
+
+
+class TestArtifactStats:
+    """Tests for artifact statistics endpoint."""
+
+    @pytest.mark.integration
+    def test_artifact_stats_returns_valid_response(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test artifact stats returns expected fields."""
+        project, package = test_package
+        content = f"artifact stats test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        upload_test_file(
+            integration_client, project, package, content, tag=f"art-{unique_test_id}"
+        )
+
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "artifact_id" in data
+        assert "sha256" in data
+        assert "size" in data
+        assert "ref_count" in data
+        assert "storage_savings" in data
+        assert "tags" in data
+        assert "projects" in data
+        assert "packages" in data
+
+    @pytest.mark.integration
+    def test_artifact_stats_not_found(self, integration_client):
+        """Test artifact stats returns 404 for non-existent artifact."""
+        fake_hash = "0" * 64
+        response = integration_client.get(f"/api/v1/artifact/{fake_hash}/stats")
+        assert response.status_code == 404
+
+    @pytest.mark.integration
+    def test_artifact_stats_shows_correct_projects(
+        self, integration_client, unique_test_id
+    ):
+        """Test artifact stats shows all projects using the artifact."""
+        content = f"multi-project artifact {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        proj1 = f"art-stats-a-{unique_test_id}"
+        proj2 = f"art-stats-b-{unique_test_id}"
+
+        try:
+            # Create projects and packages
+            integration_client.post(
+                "/api/v1/projects",
+                json={"name": proj1, "description": "Test", "is_public": True},
+            )
+            integration_client.post(
+                "/api/v1/projects",
+                json={"name": proj2, "description": "Test", "is_public": True},
+            )
+            integration_client.post(
+                f"/api/v1/project/{proj1}/packages",
+                json={"name": "pkg", "description": "Test"},
+            )
+            integration_client.post(
+                f"/api/v1/project/{proj2}/packages",
+                json={"name": "pkg", "description": "Test"},
+            )
+
+            # Upload same content to both projects
+            upload_test_file(integration_client, proj1, "pkg", content, tag="v1")
+            upload_test_file(integration_client, proj2, "pkg", content, tag="v1")
+
+            # Check artifact stats
+            response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats")
+            assert response.status_code == 200
+
+            data = response.json()
+            assert len(data["projects"]) == 2
+            assert proj1 in data["projects"]
+            assert proj2 in data["projects"]
+
+        finally:
+            integration_client.delete(f"/api/v1/projects/{proj1}")
+            integration_client.delete(f"/api/v1/projects/{proj2}")
+
+
+class TestArtifactProvenance:
+    """Tests for artifact provenance/history endpoint."""
+
+    @pytest.mark.integration
+    def test_artifact_history_returns_200(self, integration_client, test_package):
+        """Test artifact history endpoint returns 200."""
+        project_name, package_name = test_package
+
+        upload_result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"provenance test content",
+            "prov.txt",
+        )
+        artifact_id = upload_result["artifact_id"]
+
+        response = integration_client.get(f"/api/v1/artifact/{artifact_id}/history")
+        assert response.status_code == 200
+
+    @pytest.mark.integration
+    def test_artifact_history_has_required_fields(
+        self, integration_client, test_package
+    ):
+        """Test artifact history has all required fields."""
+        project_name, package_name = test_package
+
+        upload_result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"provenance fields test",
+            "fields.txt",
+        )
+        artifact_id = upload_result["artifact_id"]
+
+        response = integration_client.get(f"/api/v1/artifact/{artifact_id}/history")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "artifact_id" in data
+        assert "sha256" in data
+        assert "size" in data
+        assert "created_at" in data
+        assert "created_by" in data
+        assert "ref_count" in data
+        assert "first_uploaded_at" in data
+        assert "first_uploaded_by" in data
+        assert "upload_count" in data
+        assert "packages" in data
+        assert "tags" in data
+        assert "uploads" in data
+
+    @pytest.mark.integration
+    def test_artifact_history_not_found(self, integration_client):
+        """Test non-existent artifact returns 404."""
+        fake_hash = "b" * 64
+        response = integration_client.get(f"/api/v1/artifact/{fake_hash}/history")
+        assert response.status_code == 404
+
+    @pytest.mark.integration
+    def test_artifact_history_with_tag(self, integration_client, test_package):
+        """Test artifact history includes tag information when tagged."""
+        project_name, package_name = test_package
+
+        upload_result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"tagged provenance test",
+            "tagged.txt",
+            tag="v1.0.0",
+        )
+        artifact_id = upload_result["artifact_id"]
+
+        response = integration_client.get(f"/api/v1/artifact/{artifact_id}/history")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["tags"]) >= 1
+
+        tag = data["tags"][0]
+        assert "project_name" in tag
+        assert "package_name" in tag
+        assert "tag_name" in tag
+
+
+class TestArtifactUploads:
+    """Tests for artifact uploads listing endpoint."""
+
+    @pytest.mark.integration
+    def test_artifact_uploads_returns_200(self, integration_client, test_package):
+        """Test artifact uploads endpoint returns 200."""
+        project_name, package_name = test_package
+
+        upload_result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"artifact upload test",
+            "artifact.txt",
+        )
+        artifact_id = upload_result["artifact_id"]
+
+        response = integration_client.get(f"/api/v1/artifact/{artifact_id}/uploads")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+        assert len(data["items"]) >= 1
+
+    @pytest.mark.integration
+    def test_artifact_uploads_not_found(self, integration_client):
+        """Test non-existent artifact returns 404."""
+        fake_hash = "a" * 64
+        response = integration_client.get(f"/api/v1/artifact/{fake_hash}/uploads")
+        assert response.status_code == 404
+
+
+class TestOrphanedArtifacts:
+    """Tests for orphaned artifacts management."""
+
+    @pytest.mark.integration
+    def test_list_orphaned_artifacts_returns_list(self, integration_client):
+        """Test orphaned artifacts endpoint returns a list."""
+        response = integration_client.get("/api/v1/admin/orphaned-artifacts")
+        assert response.status_code == 200
+        assert isinstance(response.json(), list)
+
+    @pytest.mark.integration
+    def test_orphaned_artifact_has_required_fields(self, integration_client):
+        """Test orphaned artifact response has required fields."""
+        response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1")
+        assert response.status_code == 200
+
+        data = response.json()
+        if len(data) > 0:
+            artifact = data[0]
+            assert "id" in artifact
+            assert "size" in artifact
+            assert "created_at" in artifact
+            assert "created_by" in artifact
+            assert "original_name" in artifact
+
+    @pytest.mark.integration
+    def test_orphaned_artifacts_respects_limit(self, integration_client):
+        """Test orphaned artifacts endpoint respects limit parameter."""
+        response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=5")
+        assert response.status_code == 200
+        assert len(response.json()) <= 5
+
+    @pytest.mark.integration
+    def test_artifact_becomes_orphaned_when_tag_deleted(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test artifact appears in orphaned list after tag is deleted."""
+        project, package = test_package
+        content = f"orphan test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        # Upload with tag
+        upload_test_file(integration_client, project, package, content, tag="temp-tag")
+
+        # Verify not in orphaned list
+        response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1000")
+        orphaned_ids = [a["id"] for a in response.json()]
+        assert expected_hash not in orphaned_ids
+
+        # Delete the tag
+        integration_client.delete(f"/api/v1/project/{project}/{package}/tags/temp-tag")
+
+        # Verify now in orphaned list
+        response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1000")
+        orphaned_ids = [a["id"] for a in response.json()]
+        assert expected_hash in orphaned_ids
+
+
+class TestGarbageCollection:
+    """Tests for garbage collection endpoint."""
+
+    @pytest.mark.integration
+    def test_garbage_collect_dry_run_returns_response(self, integration_client):
+        """Test garbage collection dry run returns valid response."""
+        response = integration_client.post("/api/v1/admin/garbage-collect?dry_run=true")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "artifacts_deleted" in data
+        assert "bytes_freed" in data
+        assert "artifact_ids" in data
+        assert "dry_run" in data
+        assert data["dry_run"] is True
+
+    @pytest.mark.integration
+    def test_garbage_collect_dry_run_doesnt_delete(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test garbage collection dry run doesn't actually delete artifacts."""
+        project, package = test_package
+        content = f"dry run test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        # Upload and delete tag to create orphan
+        upload_test_file(integration_client, project, package, content, tag="dry-run")
+        integration_client.delete(f"/api/v1/project/{project}/{package}/tags/dry-run")
+
+        # Verify artifact exists
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.status_code == 200
+
+        # Run garbage collection in dry-run mode
+        gc_response = integration_client.post(
+            "/api/v1/admin/garbage-collect?dry_run=true&limit=1000"
+        )
+        assert gc_response.status_code == 200
+        assert expected_hash in gc_response.json()["artifact_ids"]
+
+        # Verify artifact STILL exists
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.status_code == 200
+
+    @pytest.mark.integration
+    def test_garbage_collect_preserves_referenced_artifacts(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test garbage collection doesn't delete artifacts with ref_count > 0."""
+        project, package = test_package
+        content = f"preserve test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        # Upload with tag (ref_count=1)
+        upload_test_file(integration_client, project, package, content, tag="keep-this")
+
+        # Verify artifact exists with ref_count=1
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.status_code == 200
+        assert response.json()["ref_count"] == 1
+
+        # Run garbage collection (dry_run to not affect other tests)
+        gc_response = integration_client.post(
+            "/api/v1/admin/garbage-collect?dry_run=true&limit=1000"
+        )
+        assert gc_response.status_code == 200
+
+        # Verify artifact was NOT in delete list
+        assert expected_hash not in gc_response.json()["artifact_ids"]
+
+        # Verify artifact still exists
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.status_code == 200
+        assert response.json()["ref_count"] == 1
+
+    @pytest.mark.integration
+    def test_garbage_collect_respects_limit(self, integration_client):
+        """Test garbage collection respects limit parameter."""
+        response = integration_client.post(
+            "/api/v1/admin/garbage-collect?dry_run=true&limit=5"
+        )
+        assert response.status_code == 200
+        assert response.json()["artifacts_deleted"] <= 5
+
+    @pytest.mark.integration
+    def test_garbage_collect_returns_bytes_freed(self, integration_client):
+        """Test garbage collection returns accurate bytes_freed."""
+        response = integration_client.post("/api/v1/admin/garbage-collect?dry_run=true")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert data["bytes_freed"] >= 0
+        assert isinstance(data["bytes_freed"], int)
+
+
+class TestGlobalUploads:
+    """Tests for global uploads endpoint."""
+
+    @pytest.mark.integration
+    def test_global_uploads_returns_200(self, integration_client):
+        """Test global uploads endpoint returns 200."""
+        response = integration_client.get("/api/v1/uploads")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+    @pytest.mark.integration
+    def test_global_uploads_pagination(self, integration_client):
+        """Test global uploads endpoint respects pagination."""
+        response = integration_client.get("/api/v1/uploads?limit=5&page=1")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["items"]) <= 5
+        assert data["pagination"]["limit"] == 5
+        assert data["pagination"]["page"] == 1
+
+    @pytest.mark.integration
+    def test_global_uploads_filter_by_project(self, integration_client, test_package):
+        """Test filtering global uploads by project name."""
+        project_name, package_name = test_package
+
+        # Upload a file
+        upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"global filter test",
+            "global.txt",
+        )
+
+        response = integration_client.get(f"/api/v1/uploads?project={project_name}")
+        assert response.status_code == 200
+
+        data = response.json()
+        for item in data["items"]:
+            assert item["project_name"] == project_name
+
+    @pytest.mark.integration
+    def test_global_uploads_has_more_field(self, integration_client):
+        """Test pagination includes has_more field."""
+        response = integration_client.get("/api/v1/uploads?limit=1")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "has_more" in data["pagination"]
+        assert isinstance(data["pagination"]["has_more"], bool)
+
+
+class TestGlobalArtifacts:
+    """Tests for global artifacts endpoint."""
+
+    @pytest.mark.integration
+    def test_global_artifacts_returns_200(self, integration_client):
+        """Test global artifacts endpoint returns 200."""
+        response = integration_client.get("/api/v1/artifacts")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+    @pytest.mark.integration
+    def test_global_artifacts_pagination(self, integration_client):
+        """Test global artifacts endpoint respects pagination."""
+        response = integration_client.get("/api/v1/artifacts?limit=5&page=1")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["items"]) <= 5
+        assert data["pagination"]["limit"] == 5
+
+    @pytest.mark.integration
+    def test_global_artifacts_filter_by_size(self, integration_client):
+        """Test filtering global artifacts by size range."""
+        response = integration_client.get(
+            "/api/v1/artifacts?min_size=1&max_size=1000000"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        for item in data["items"]:
+            assert 1 <= item["size"] <= 1000000
+
+    @pytest.mark.integration
+    def test_global_artifacts_sort_by_size(self, integration_client):
+        """Test sorting global artifacts by size."""
+        response = integration_client.get("/api/v1/artifacts?sort=size&order=desc")
+        assert response.status_code == 200
+        data = response.json()
+        if len(data["items"]) > 1:
+            sizes = [item["size"] for item in data["items"]]
+            assert sizes == sorted(sizes, reverse=True)
+
+    @pytest.mark.integration
+    def test_global_artifacts_invalid_sort_returns_400(self, integration_client):
+        """Test invalid sort field returns 400."""
+        response = integration_client.get("/api/v1/artifacts?sort=invalid_field")
+        assert response.status_code == 400
+
+
+class TestGlobalTags:
+    """Tests for global tags endpoint."""
+
+    @pytest.mark.integration
+    def test_global_tags_returns_200(self, integration_client):
+        """Test global tags endpoint returns 200."""
+        response = integration_client.get("/api/v1/tags")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+    @pytest.mark.integration
+    def test_global_tags_pagination(self, integration_client):
+        """Test global tags endpoint respects pagination."""
+        response = integration_client.get("/api/v1/tags?limit=5&page=1")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["items"]) <= 5
+        assert data["pagination"]["limit"] == 5
+
+    @pytest.mark.integration
+    def test_global_tags_has_project_context(self, integration_client):
+        """Test global tags response includes project/package context."""
+        response = integration_client.get("/api/v1/tags?limit=1")
+        assert response.status_code == 200
+
+        data = response.json()
+        if len(data["items"]) > 0:
+            item = data["items"][0]
+            assert "project_name" in item
+            assert "package_name" in item
+            assert "artifact_id" in item
+
+    @pytest.mark.integration
+    def test_global_tags_search_with_wildcard(self, integration_client):
+        """Test global tags search supports wildcards."""
+        response = integration_client.get("/api/v1/tags?search=v*")
+        assert response.status_code == 200
+        # Just verify it doesn't error; results may vary
+
+
+class TestAuditLogs:
+    """Tests for global audit logs endpoint."""
+
+    @pytest.mark.integration
+    def test_list_audit_logs_returns_valid_response(self, integration_client):
+        """Test audit logs endpoint returns valid paginated response."""
+        response = integration_client.get("/api/v1/audit-logs")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+        assert isinstance(data["items"], list)
+
+        pagination = data["pagination"]
+        assert "page" in pagination
+        assert "limit" in pagination
+        assert "total" in pagination
+        assert "total_pages" in pagination
+
+    @pytest.mark.integration
+    def test_audit_logs_respects_pagination(self, integration_client):
+        """Test audit logs endpoint respects limit parameter."""
+        response = integration_client.get("/api/v1/audit-logs?limit=5")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["items"]) <= 5
+        assert data["pagination"]["limit"] == 5
+
+    @pytest.mark.integration
+    def test_audit_logs_filter_by_action(self, integration_client, test_package):
+        """Test filtering audit logs by action type."""
+        project_name, package_name = test_package
+
+        response = integration_client.get("/api/v1/audit-logs?action=project.create")
+        assert response.status_code == 200
+
+        data = response.json()
+        for item in data["items"]:
+            assert item["action"] == "project.create"
+
+    @pytest.mark.integration
+    def test_audit_log_entry_has_required_fields(
+        self, integration_client, test_project
+    ):
+        """Test audit log entries have all required fields."""
+        response = integration_client.get("/api/v1/audit-logs?limit=10")
+        assert response.status_code == 200
+
+        data = response.json()
+        if data["items"]:
+            item = data["items"][0]
+            assert "id" in item
+            assert "action" in item
+            assert "resource" in item
+            assert "user_id" in item
+            assert "timestamp" in item
diff --git a/backend/tests/integration/test_packages_api.py b/backend/tests/integration/test_packages_api.py
new file mode 100644
index 0000000..60af55a
--- /dev/null
+++ b/backend/tests/integration/test_packages_api.py
@@ -0,0 +1,345 @@
+"""
+Integration tests for package API endpoints.
+
+Tests cover:
+- Package CRUD operations
+- Package listing with pagination, search, filtering
+- Package stats endpoint
+- Package-level audit logs
+- Cascade delete behavior
+"""
+
+import pytest
+from tests.factories import compute_sha256, upload_test_file
+
+
+class TestPackageCRUD:
+    """Tests for package create, read, update, delete operations."""
+
+    @pytest.mark.integration
+    def test_create_package(self, integration_client, test_project, unique_test_id):
+        """Test creating a new package."""
+        package_name = f"test-create-pkg-{unique_test_id}"
+
+        response = integration_client.post(
+            f"/api/v1/project/{test_project}/packages",
+            json={
+                "name": package_name,
+                "description": "Test package",
+                "format": "npm",
+                "platform": "linux",
+            },
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert data["name"] == package_name
+        assert data["description"] == "Test package"
+        assert data["format"] == "npm"
+        assert data["platform"] == "linux"
+
+    @pytest.mark.integration
+    def test_get_package(self, integration_client, test_package):
+        """Test getting a package by name."""
+        project_name, package_name = test_package
+
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/packages/{package_name}"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert data["name"] == package_name
+
+    @pytest.mark.integration
+    def test_get_nonexistent_package(self, integration_client, test_project):
+        """Test getting a non-existent package returns 404."""
+        response = integration_client.get(
+            f"/api/v1/project/{test_project}/packages/nonexistent-pkg"
+        )
+        assert response.status_code == 404
+
+    @pytest.mark.integration
+    def test_list_packages(self, integration_client, test_package):
+        """Test listing packages includes created package."""
+        project_name, package_name = test_package
+
+        response = integration_client.get(f"/api/v1/project/{project_name}/packages")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+        package_names = [p["name"] for p in data["items"]]
+        assert package_name in package_names
+
+    @pytest.mark.integration
+    def test_delete_package(self, integration_client, test_project, unique_test_id):
+        """Test deleting a package."""
+        package_name = f"test-delete-pkg-{unique_test_id}"
+
+        # Create package
+        integration_client.post(
+            f"/api/v1/project/{test_project}/packages",
+            json={"name": package_name, "description": "To be deleted"},
+        )
+
+        # Delete package
+        response = integration_client.delete(
+            f"/api/v1/project/{test_project}/packages/{package_name}"
+        )
+        assert response.status_code == 204
+
+        # Verify deleted
+        response = integration_client.get(
+            f"/api/v1/project/{test_project}/packages/{package_name}"
+        )
+        assert response.status_code == 404
+
+
+class TestPackageListingFilters:
+    """Tests for package listing with filters and pagination."""
+
+    @pytest.mark.integration
+    def test_packages_pagination(self, integration_client, test_project):
+        """Test package listing respects pagination parameters."""
+        response = integration_client.get(
+            f"/api/v1/project/{test_project}/packages?page=1&limit=5"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["items"]) <= 5
+        assert data["pagination"]["limit"] == 5
+        assert data["pagination"]["page"] == 1
+
+    @pytest.mark.integration
+    def test_packages_filter_by_format(
+        self, integration_client, test_project, unique_test_id
+    ):
+        """Test package filtering by format."""
+        # Create a package with specific format
+        package_name = f"npm-pkg-{unique_test_id}"
+        integration_client.post(
+            f"/api/v1/project/{test_project}/packages",
+            json={"name": package_name, "format": "npm"},
+        )
+
+        response = integration_client.get(
+            f"/api/v1/project/{test_project}/packages?format=npm"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        for pkg in data["items"]:
+            assert pkg["format"] == "npm"
+
+    @pytest.mark.integration
+    def test_packages_filter_by_platform(
+        self, integration_client, test_project, unique_test_id
+    ):
+        """Test package filtering by platform."""
+        # Create a package with specific platform
+        package_name = f"linux-pkg-{unique_test_id}"
+        integration_client.post(
+            f"/api/v1/project/{test_project}/packages",
+            json={"name": package_name, "platform": "linux"},
+        )
+
+        response = integration_client.get(
+            f"/api/v1/project/{test_project}/packages?platform=linux"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        for pkg in data["items"]:
+            assert pkg["platform"] == "linux"
+
+
+class TestPackageStats:
+    """Tests for package statistics endpoint."""
+
+    @pytest.mark.integration
+    def test_package_stats_returns_valid_response(
+        self, integration_client, test_package
+    ):
+        """Test package stats endpoint returns expected fields."""
+        project, package = test_package
+        response = integration_client.get(
+            f"/api/v1/project/{project}/packages/{package}/stats"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "package_id" in data
+        assert "package_name" in data
+        assert "project_name" in data
+        assert "tag_count" in data
+        assert "artifact_count" in data
+        assert "total_size_bytes" in data
+        assert "upload_count" in data
+        assert "deduplicated_uploads" in data
+        assert "storage_saved_bytes" in data
+        assert "deduplication_ratio" in data
+
+    @pytest.mark.integration
+    def test_package_stats_not_found(self, integration_client, test_project):
+        """Test package stats returns 404 for non-existent package."""
+        response = integration_client.get(
+            f"/api/v1/project/{test_project}/packages/nonexistent-package/stats"
+        )
+        assert response.status_code == 404
+
+
+class TestPackageAuditLogs:
+    """Tests for package-level audit logs endpoint."""
+
+    @pytest.mark.integration
+    def test_package_audit_logs_returns_200(self, integration_client, test_package):
+        """Test package audit logs endpoint returns 200."""
+        project_name, package_name = test_package
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/audit-logs"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+    @pytest.mark.integration
+    def test_package_audit_logs_project_not_found(self, integration_client):
+        """Test non-existent project returns 404."""
+        response = integration_client.get(
+            "/api/v1/project/nonexistent/nonexistent/audit-logs"
+        )
+        assert response.status_code == 404
+
+    @pytest.mark.integration
+    def test_package_audit_logs_package_not_found(
+        self, integration_client, test_project
+    ):
+        """Test non-existent package returns 404."""
+        response = integration_client.get(
+            f"/api/v1/project/{test_project}/nonexistent-package/audit-logs"
+        )
+        assert response.status_code == 404
+
+
+class TestPackageCascadeDelete:
+    """Tests for cascade delete behavior when deleting packages."""
+
+    @pytest.mark.integration
+    def test_ref_count_decrements_on_package_delete(
+        self, integration_client, unique_test_id
+    ):
+        """Test ref_count decrements for all tags when package is deleted."""
+        project_name = f"cascade-pkg-{unique_test_id}"
+        package_name = f"test-pkg-{unique_test_id}"
+
+        # Create project
+        response = integration_client.post(
+            "/api/v1/projects",
+            json={
+                "name": project_name,
+                "description": "Test project",
+                "is_public": True,
+            },
+        )
+        assert response.status_code == 200
+
+        # Create package
+        response = integration_client.post(
+            f"/api/v1/project/{project_name}/packages",
+            json={"name": package_name, "description": "Test package"},
+        )
+        assert response.status_code == 200
+
+        # Upload content with multiple tags
+        content = f"cascade delete test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="v1"
+        )
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="v2"
+        )
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="v3"
+        )
+
+        # Verify ref_count is 3
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 3
+
+        # Delete the package
+        delete_response = integration_client.delete(
+            f"/api/v1/project/{project_name}/packages/{package_name}"
+        )
+        assert delete_response.status_code == 204
+
+        # Verify ref_count is 0
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 0
+
+        # Cleanup
+        integration_client.delete(f"/api/v1/projects/{project_name}")
+
+
+class TestPackageUploads:
+    """Tests for package-level uploads endpoint."""
+
+    @pytest.mark.integration
+    def test_package_uploads_returns_200(self, integration_client, test_package):
+        """Test package uploads endpoint returns 200."""
+        project_name, package_name = test_package
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/uploads"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+    @pytest.mark.integration
+    def test_package_uploads_after_upload(self, integration_client, test_package):
+        """Test uploads are recorded after file upload."""
+        project_name, package_name = test_package
+
+        # Upload a file
+        upload_result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"test upload content",
+            "test.txt",
+        )
+        assert upload_result["artifact_id"]
+
+        # Check uploads endpoint
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/uploads"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["items"]) >= 1
+
+        # Verify upload record fields
+        upload = data["items"][0]
+        assert "artifact_id" in upload
+        assert "package_name" in upload
+        assert "project_name" in upload
+        assert "uploaded_at" in upload
+        assert "uploaded_by" in upload
+
+    @pytest.mark.integration
+    def test_package_uploads_project_not_found(self, integration_client):
+        """Test non-existent project returns 404."""
+        response = integration_client.get(
+            "/api/v1/project/nonexistent/nonexistent/uploads"
+        )
+        assert response.status_code == 404
diff --git a/backend/tests/integration/test_projects_api.py b/backend/tests/integration/test_projects_api.py
new file mode 100644
index 0000000..0de9554
--- /dev/null
+++ b/backend/tests/integration/test_projects_api.py
@@ -0,0 +1,322 @@
+"""
+Integration tests for project API endpoints.
+
+Tests cover:
+- Project CRUD operations
+- Project listing with pagination, search, and sorting
+- Project stats endpoint
+- Project-level audit logs
+- Cascade delete behavior
+"""
+
+import pytest
+from tests.factories import compute_sha256, upload_test_file
+
+
+class TestProjectCRUD:
+    """Tests for project create, read, update, delete operations."""
+
+    @pytest.mark.integration
+    def test_create_project(self, integration_client, unique_test_id):
+        """Test creating a new project."""
+        project_name = f"test-create-{unique_test_id}"
+
+        try:
+            response = integration_client.post(
+                "/api/v1/projects",
+                json={
+                    "name": project_name,
+                    "description": "Test project",
+                    "is_public": True,
+                },
+            )
+            assert response.status_code == 200
+
+            data = response.json()
+            assert data["name"] == project_name
+            assert data["description"] == "Test project"
+            assert data["is_public"] is True
+            assert "id" in data
+            assert "created_at" in data
+        finally:
+            integration_client.delete(f"/api/v1/projects/{project_name}")
+
+    @pytest.mark.integration
+    def test_get_project(self, integration_client, test_project):
+        """Test getting a project by name."""
+        response = integration_client.get(f"/api/v1/projects/{test_project}")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert data["name"] == test_project
+
+    @pytest.mark.integration
+    def test_get_nonexistent_project(self, integration_client):
+        """Test getting a non-existent project returns 404."""
+        response = integration_client.get("/api/v1/projects/nonexistent-project-xyz")
+        assert response.status_code == 404
+
+    @pytest.mark.integration
+    def test_list_projects(self, integration_client, test_project):
+        """Test listing projects includes created project."""
+        response = integration_client.get("/api/v1/projects")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+        project_names = [p["name"] for p in data["items"]]
+        assert test_project in project_names
+
+    @pytest.mark.integration
+    def test_delete_project(self, integration_client, unique_test_id):
+        """Test deleting a project."""
+        project_name = f"test-delete-{unique_test_id}"
+
+        # Create project
+        integration_client.post(
+            "/api/v1/projects",
+            json={"name": project_name, "description": "To be deleted"},
+        )
+
+        # Delete project
+        response = integration_client.delete(f"/api/v1/projects/{project_name}")
+        assert response.status_code == 204
+
+        # Verify deleted
+        response = integration_client.get(f"/api/v1/projects/{project_name}")
+        assert response.status_code == 404
+
+
+class TestProjectListingFilters:
+    """Tests for project listing with filters and pagination."""
+
+    @pytest.mark.integration
+    def test_projects_pagination(self, integration_client):
+        """Test project listing respects pagination parameters."""
+        response = integration_client.get("/api/v1/projects?page=1&limit=5")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["items"]) <= 5
+        assert data["pagination"]["limit"] == 5
+        assert data["pagination"]["page"] == 1
+        assert "has_more" in data["pagination"]
+
+    @pytest.mark.integration
+    def test_projects_search(self, integration_client, test_project):
+        """Test project search by name."""
+        # Search for our test project
+        response = integration_client.get(
+            f"/api/v1/projects?search={test_project[:10]}"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        # Our project should be in results
+        project_names = [p["name"] for p in data["items"]]
+        assert test_project in project_names
+
+    @pytest.mark.integration
+    def test_projects_sort_by_name(self, integration_client):
+        """Test project sorting by name."""
+        response = integration_client.get("/api/v1/projects?sort=name&order=asc")
+        assert response.status_code == 200
+
+        data = response.json()
+        names = [p["name"] for p in data["items"]]
+        assert names == sorted(names)
+
+
+class TestProjectStats:
+    """Tests for project statistics endpoint."""
+
+    @pytest.mark.integration
+    def test_project_stats_returns_valid_response(
+        self, integration_client, test_project
+    ):
+        """Test project stats endpoint returns expected fields."""
+        response = integration_client.get(f"/api/v1/projects/{test_project}/stats")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "project_id" in data
+        assert "project_name" in data
+        assert "package_count" in data
+        assert "tag_count" in data
+        assert "artifact_count" in data
+        assert "total_size_bytes" in data
+        assert "upload_count" in data
+        assert "deduplicated_uploads" in data
+        assert "storage_saved_bytes" in data
+        assert "deduplication_ratio" in data
+
+    @pytest.mark.integration
+    def test_project_stats_not_found(self, integration_client):
+        """Test project stats returns 404 for non-existent project."""
+        response = integration_client.get("/api/v1/projects/nonexistent-project/stats")
+        assert response.status_code == 404
+
+
+class TestProjectAuditLogs:
+    """Tests for project-level audit logs endpoint."""
+
+    @pytest.mark.integration
+    def test_project_audit_logs_returns_200(self, integration_client, test_project):
+        """Test project audit logs endpoint returns 200."""
+        response = integration_client.get(f"/api/v1/projects/{test_project}/audit-logs")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+    @pytest.mark.integration
+    def test_project_audit_logs_not_found(self, integration_client):
+        """Test non-existent project returns 404."""
+        response = integration_client.get(
+            "/api/v1/projects/nonexistent-project/audit-logs"
+        )
+        assert response.status_code == 404
+
+
+class TestProjectCascadeDelete:
+    """Tests for cascade delete behavior when deleting projects."""
+
+    @pytest.mark.integration
+    def test_project_delete_cascades_to_packages(
+        self, integration_client, unique_test_id
+    ):
+        """Test deleting project cascades to packages."""
+        project_name = f"cascade-proj-{unique_test_id}"
+        package_name = f"cascade-pkg-{unique_test_id}"
+
+        try:
+            # Create project and package
+            integration_client.post(
+                "/api/v1/projects",
+                json={"name": project_name, "description": "Test", "is_public": True},
+            )
+            integration_client.post(
+                f"/api/v1/project/{project_name}/packages",
+                json={"name": package_name, "description": "Test package"},
+            )
+
+            # Verify package exists
+            response = integration_client.get(
+                f"/api/v1/project/{project_name}/packages/{package_name}"
+            )
+            assert response.status_code == 200
+
+            # Delete project
+            integration_client.delete(f"/api/v1/projects/{project_name}")
+
+            # Verify project is deleted (and package with it)
+            response = integration_client.get(f"/api/v1/projects/{project_name}")
+            assert response.status_code == 404
+        except Exception:
+            # Cleanup if test fails
+            integration_client.delete(f"/api/v1/projects/{project_name}")
+            raise
+
+    @pytest.mark.integration
+    def test_ref_count_decrements_on_project_delete(
+        self, integration_client, unique_test_id
+    ):
+        """Test ref_count decrements for all tags when project is deleted."""
+        project_name = f"cascade-proj-{unique_test_id}"
+        package1_name = f"pkg1-{unique_test_id}"
+        package2_name = f"pkg2-{unique_test_id}"
+
+        # Create project
+        response = integration_client.post(
+            "/api/v1/projects",
+            json={
+                "name": project_name,
+                "description": "Test project",
+                "is_public": True,
+            },
+        )
+        assert response.status_code == 200
+
+        # Create two packages
+        for pkg_name in [package1_name, package2_name]:
+            response = integration_client.post(
+                f"/api/v1/project/{project_name}/packages",
+                json={"name": pkg_name, "description": "Test package"},
+            )
+            assert response.status_code == 200
+
+        # Upload same content with tags in both packages
+        content = f"project cascade test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        upload_test_file(
+            integration_client, project_name, package1_name, content, tag="v1"
+        )
+        upload_test_file(
+            integration_client, project_name, package1_name, content, tag="v2"
+        )
+        upload_test_file(
+            integration_client, project_name, package2_name, content, tag="latest"
+        )
+        upload_test_file(
+            integration_client, project_name, package2_name, content, tag="stable"
+        )
+
+        # Verify ref_count is 4 (2 tags in each of 2 packages)
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 4
+
+        # Delete the project
+        delete_response = integration_client.delete(f"/api/v1/projects/{project_name}")
+        assert delete_response.status_code == 204
+
+        # Verify ref_count is 0
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 0
+
+
+class TestProjectUploads:
+    """Tests for project-level uploads endpoint."""
+
+    @pytest.mark.integration
+    def test_project_uploads_returns_200(self, integration_client, test_project):
+        """Test project uploads endpoint returns 200."""
+        response = integration_client.get(f"/api/v1/project/{test_project}/uploads")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+    @pytest.mark.integration
+    def test_project_uploads_after_upload(self, integration_client, test_package):
+        """Test uploads are recorded in project uploads."""
+        project_name, package_name = test_package
+
+        # Upload a file
+        upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"project uploads test",
+            "project.txt",
+        )
+
+        response = integration_client.get(f"/api/v1/project/{project_name}/uploads")
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["items"]) >= 1
+
+        # Verify project name matches
+        for item in data["items"]:
+            assert item["project_name"] == project_name
+
+    @pytest.mark.integration
+    def test_project_uploads_not_found(self, integration_client):
+        """Test non-existent project returns 404."""
+        response = integration_client.get("/api/v1/project/nonexistent/uploads")
+        assert response.status_code == 404
diff --git a/backend/tests/integration/test_tags_api.py b/backend/tests/integration/test_tags_api.py
new file mode 100644
index 0000000..2b8db6e
--- /dev/null
+++ b/backend/tests/integration/test_tags_api.py
@@ -0,0 +1,403 @@
+"""
+Integration tests for tag API endpoints.
+
+Tests cover:
+- Tag CRUD operations
+- Tag listing with pagination and search
+- Tag history tracking
+- ref_count behavior with tag operations
+"""
+
+import pytest
+from tests.factories import compute_sha256, upload_test_file
+
+
+class TestTagCRUD:
+    """Tests for tag create, read, delete operations."""
+
+    @pytest.mark.integration
+    def test_create_tag_via_upload(self, integration_client, test_package):
+        """Test creating a tag via upload endpoint."""
+        project_name, package_name = test_package
+
+        result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"tag create test",
+            tag="v1.0.0",
+        )
+
+        assert result["tag"] == "v1.0.0"
+        assert result["artifact_id"]
+
+    @pytest.mark.integration
+    def test_create_tag_via_post(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test creating a tag via POST /tags endpoint."""
+        project_name, package_name = test_package
+
+        # First upload an artifact
+        result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"artifact for tag",
+        )
+        artifact_id = result["artifact_id"]
+
+        # Create tag via POST
+        tag_name = f"post-tag-{unique_test_id}"
+        response = integration_client.post(
+            f"/api/v1/project/{project_name}/{package_name}/tags",
+            json={"name": tag_name, "artifact_id": artifact_id},
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert data["name"] == tag_name
+        assert data["artifact_id"] == artifact_id
+
+    @pytest.mark.integration
+    def test_get_tag(self, integration_client, test_package):
+        """Test getting a tag by name."""
+        project_name, package_name = test_package
+
+        upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"get tag test",
+            tag="get-tag",
+        )
+
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/tags/get-tag"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert data["name"] == "get-tag"
+        assert "artifact_id" in data
+        assert "artifact_size" in data
+        assert "artifact_content_type" in data
+
+    @pytest.mark.integration
+    def test_list_tags(self, integration_client, test_package):
+        """Test listing tags for a package."""
+        project_name, package_name = test_package
+
+        # Create some tags
+        upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"list tags test",
+            tag="list-v1",
+        )
+
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/tags"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert "items" in data
+        assert "pagination" in data
+
+        tag_names = [t["name"] for t in data["items"]]
+        assert "list-v1" in tag_names
+
+    @pytest.mark.integration
+    def test_delete_tag(self, integration_client, test_package):
+        """Test deleting a tag."""
+        project_name, package_name = test_package
+
+        upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"delete tag test",
+            tag="to-delete",
+        )
+
+        # Delete tag
+        response = integration_client.delete(
+            f"/api/v1/project/{project_name}/{package_name}/tags/to-delete"
+        )
+        assert response.status_code == 204
+
+        # Verify deleted
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/tags/to-delete"
+        )
+        assert response.status_code == 404
+
+
+class TestTagListingFilters:
+    """Tests for tag listing with filters and search."""
+
+    @pytest.mark.integration
+    def test_tags_pagination(self, integration_client, test_package):
+        """Test tag listing respects pagination."""
+        project_name, package_name = test_package
+
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/tags?limit=5"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data["items"]) <= 5
+        assert data["pagination"]["limit"] == 5
+
+    @pytest.mark.integration
+    def test_tags_search(self, integration_client, test_package, unique_test_id):
+        """Test tag search by name."""
+        project_name, package_name = test_package
+
+        tag_name = f"searchable-{unique_test_id}"
+        upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"search test",
+            tag=tag_name,
+        )
+
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/tags?search=searchable"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        tag_names = [t["name"] for t in data["items"]]
+        assert tag_name in tag_names
+
+
+class TestTagHistory:
+    """Tests for tag history tracking."""
+
+    @pytest.mark.integration
+    def test_tag_history_on_create(self, integration_client, test_package):
+        """Test tag history is created when tag is created."""
+        project_name, package_name = test_package
+
+        upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"history create test",
+            tag="history-create",
+        )
+
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/tags/history-create/history"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        assert len(data) >= 1
+
+    @pytest.mark.integration
+    def test_tag_history_on_update(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test tag history is created when tag is updated."""
+        project_name, package_name = test_package
+
+        tag_name = f"history-update-{unique_test_id}"
+
+        # Create tag with first artifact
+        upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"first content",
+            tag=tag_name,
+        )
+
+        # Update tag with second artifact
+        upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"second content",
+            tag=tag_name,
+        )
+
+        response = integration_client.get(
+            f"/api/v1/project/{project_name}/{package_name}/tags/{tag_name}/history"
+        )
+        assert response.status_code == 200
+
+        data = response.json()
+        # Should have at least 2 history entries (create + update)
+        assert len(data) >= 2
+
+
+class TestTagRefCount:
+    """Tests for ref_count behavior with tag operations."""
+
+    @pytest.mark.integration
+    def test_ref_count_decrements_on_tag_delete(self, integration_client, test_package):
+        """Test ref_count decrements when a tag is deleted."""
+        project_name, package_name = test_package
+        content = b"ref count delete test"
+        expected_hash = compute_sha256(content)
+
+        # Upload with two tags
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="rc-v1"
+        )
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="rc-v2"
+        )
+
+        # Verify ref_count is 2
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 2
+
+        # Delete one tag
+        delete_response = integration_client.delete(
+            f"/api/v1/project/{project_name}/{package_name}/tags/rc-v1"
+        )
+        assert delete_response.status_code == 204
+
+        # Verify ref_count is now 1
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 1
+
+    @pytest.mark.integration
+    def test_ref_count_zero_after_all_tags_deleted(
+        self, integration_client, test_package
+    ):
+        """Test ref_count goes to 0 when all tags are deleted."""
+        project_name, package_name = test_package
+        content = b"orphan test content"
+        expected_hash = compute_sha256(content)
+
+        # Upload with one tag
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="only-tag"
+        )
+
+        # Delete the tag
+        integration_client.delete(
+            f"/api/v1/project/{project_name}/{package_name}/tags/only-tag"
+        )
+
+        # Verify ref_count is 0
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 0
+
+    @pytest.mark.integration
+    def test_ref_count_adjusts_on_tag_update(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test ref_count adjusts when a tag is updated to point to different artifact."""
+        project_name, package_name = test_package
+
+        # Upload two different artifacts
+        content1 = f"artifact one {unique_test_id}".encode()
+        content2 = f"artifact two {unique_test_id}".encode()
+        hash1 = compute_sha256(content1)
+        hash2 = compute_sha256(content2)
+
+        # Upload first artifact with tag "latest"
+        upload_test_file(
+            integration_client, project_name, package_name, content1, tag="latest"
+        )
+
+        # Verify first artifact has ref_count 1
+        response = integration_client.get(f"/api/v1/artifact/{hash1}")
+        assert response.json()["ref_count"] == 1
+
+        # Upload second artifact with different tag
+        upload_test_file(
+            integration_client, project_name, package_name, content2, tag="stable"
+        )
+
+        # Now update "latest" tag to point to second artifact
+        upload_test_file(
+            integration_client, project_name, package_name, content2, tag="latest"
+        )
+
+        # Verify first artifact ref_count decreased to 0
+        response = integration_client.get(f"/api/v1/artifact/{hash1}")
+        assert response.json()["ref_count"] == 0
+
+        # Verify second artifact ref_count increased to 2
+        response = integration_client.get(f"/api/v1/artifact/{hash2}")
+        assert response.json()["ref_count"] == 2
+
+    @pytest.mark.integration
+    def test_ref_count_unchanged_when_tag_same_artifact(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test ref_count doesn't change when tag is 'updated' to same artifact."""
+        project_name, package_name = test_package
+
+        content = f"same artifact {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        # Upload with tag
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="same-v1"
+        )
+
+        # Verify ref_count is 1
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 1
+
+        # Upload same content with same tag (no-op)
+        upload_test_file(
+            integration_client, project_name, package_name, content, tag="same-v1"
+        )
+
+        # Verify ref_count is still 1
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 1
+
+    @pytest.mark.integration
+    def test_tag_via_post_endpoint_increments_ref_count(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test creating tag via POST /tags endpoint increments ref_count."""
+        project_name, package_name = test_package
+
+        content = f"tag endpoint test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        # Upload artifact without tag
+        result = upload_test_file(
+            integration_client, project_name, package_name, content, filename="test.bin"
+        )
+        artifact_id = result["artifact_id"]
+
+        # Verify ref_count is 0 (no tags yet)
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 0
+
+        # Create tag via POST endpoint
+        tag_response = integration_client.post(
+            f"/api/v1/project/{project_name}/{package_name}/tags",
+            json={"name": "post-v1", "artifact_id": artifact_id},
+        )
+        assert tag_response.status_code == 200
+
+        # Verify ref_count is now 1
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 1
+
+        # Create another tag via POST endpoint
+        tag_response = integration_client.post(
+            f"/api/v1/project/{project_name}/{package_name}/tags",
+            json={"name": "post-latest", "artifact_id": artifact_id},
+        )
+        assert tag_response.status_code == 200
+
+        # Verify ref_count is now 2
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.json()["ref_count"] == 2
diff --git a/backend/tests/test_integration_uploads.py b/backend/tests/integration/test_upload_download_api.py
similarity index 56%
rename from backend/tests/test_integration_uploads.py
rename to backend/tests/integration/test_upload_download_api.py
index d354390..dfa25f9 100644
--- a/backend/tests/test_integration_uploads.py
+++ b/backend/tests/integration/test_upload_download_api.py
@@ -1,33 +1,109 @@
 """
-Integration tests for duplicate uploads and storage verification.
-
-These tests require the full stack to be running (docker-compose.local.yml).
+Integration tests for upload and download API endpoints.
 
 Tests cover:
-- Duplicate upload scenarios across packages and projects
-- Storage verification (single S3 object, single artifact row)
-- Upload table tracking
-- Content integrity verification
+- Upload functionality and deduplication
+- Download by tag and artifact ID
 - Concurrent upload handling
-- Failure cleanup
+- File size validation
+- Upload failure cleanup
+- S3 storage verification
 """
 
 import pytest
 import io
 import threading
-import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
-from tests.conftest import (
+from tests.factories import (
     compute_sha256,
     upload_test_file,
     list_s3_objects_by_hash,
     s3_object_exists,
-    delete_s3_object_by_hash,
 )
 
 
-class TestDuplicateUploadScenarios:
-    """Integration tests for duplicate upload behavior."""
+class TestUploadBasics:
+    """Tests for basic upload functionality."""
+
+    @pytest.mark.integration
+    def test_upload_returns_artifact_id(self, integration_client, test_package):
+        """Test upload returns the artifact ID (SHA256 hash)."""
+        project_name, package_name = test_package
+        content = b"basic upload test"
+        expected_hash = compute_sha256(content)
+
+        result = upload_test_file(
+            integration_client, project_name, package_name, content, tag="v1"
+        )
+
+        assert result["artifact_id"] == expected_hash
+
+    @pytest.mark.integration
+    def test_upload_response_has_upload_id(self, integration_client, test_package):
+        """Test upload response includes upload_id."""
+        project_name, package_name = test_package
+
+        result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"upload id test",
+            "uploadid.txt",
+        )
+
+        assert "upload_id" in result
+        assert result["upload_id"] is not None
+
+    @pytest.mark.integration
+    def test_upload_response_has_content_type(self, integration_client, test_package):
+        """Test upload response includes content_type."""
+        project_name, package_name = test_package
+
+        result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"content type test",
+            "content.txt",
+        )
+
+        assert "content_type" in result
+
+    @pytest.mark.integration
+    def test_upload_response_has_original_name(self, integration_client, test_package):
+        """Test upload response includes original_name."""
+        project_name, package_name = test_package
+
+        result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"original name test",
+            "originalname.txt",
+        )
+
+        assert "original_name" in result
+        assert result["original_name"] == "originalname.txt"
+
+    @pytest.mark.integration
+    def test_upload_response_has_created_at(self, integration_client, test_package):
+        """Test upload response includes created_at."""
+        project_name, package_name = test_package
+
+        result = upload_test_file(
+            integration_client,
+            project_name,
+            package_name,
+            b"created at test",
+            "createdat.txt",
+        )
+
+        assert "created_at" in result
+        assert result["created_at"] is not None
+
+
+class TestDuplicateUploads:
+    """Tests for duplicate upload deduplication behavior."""
 
     @pytest.mark.integration
     def test_same_file_twice_returns_same_artifact_id(
@@ -103,62 +179,11 @@ class TestDuplicateUploadScenarios:
         assert result2["artifact_id"] == expected_hash
         assert result2["deduplicated"] is True
 
-    @pytest.mark.integration
-    def test_same_file_different_projects_shares_artifact(
-        self, integration_client, unique_test_id
-    ):
-        """Test uploading same file to different projects shares artifact."""
-        content = f"content shared across projects {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Create two projects with packages
-        proj1 = f"project-x-{unique_test_id}"
-        proj2 = f"project-y-{unique_test_id}"
-        pkg_name = "shared-pkg"
-
-        try:
-            # Create projects and packages
-            integration_client.post(
-                "/api/v1/projects",
-                json={"name": proj1, "description": "Project X", "is_public": True},
-            )
-            integration_client.post(
-                "/api/v1/projects",
-                json={"name": proj2, "description": "Project Y", "is_public": True},
-            )
-            integration_client.post(
-                f"/api/v1/project/{proj1}/packages",
-                json={"name": pkg_name, "description": "Package"},
-            )
-            integration_client.post(
-                f"/api/v1/project/{proj2}/packages",
-                json={"name": pkg_name, "description": "Package"},
-            )
-
-            # Upload to first project
-            result1 = upload_test_file(
-                integration_client, proj1, pkg_name, content, tag="v1"
-            )
-            assert result1["artifact_id"] == expected_hash
-            assert result1["deduplicated"] is False
-
-            # Upload to second project
-            result2 = upload_test_file(
-                integration_client, proj2, pkg_name, content, tag="v1"
-            )
-            assert result2["artifact_id"] == expected_hash
-            assert result2["deduplicated"] is True
-
-        finally:
-            # Cleanup
-            integration_client.delete(f"/api/v1/projects/{proj1}")
-            integration_client.delete(f"/api/v1/projects/{proj2}")
-
     @pytest.mark.integration
     def test_same_file_different_filenames_shares_artifact(
         self, integration_client, test_package
     ):
-        """Test uploading same file with different original filenames shares artifact."""
+        """Test uploading same file with different filenames shares artifact."""
         project, package = test_package
         content = b"content with different filenames"
         expected_hash = compute_sha256(content)
@@ -186,110 +211,68 @@ class TestDuplicateUploadScenarios:
         assert result2["artifact_id"] == expected_hash
         assert result2["deduplicated"] is True
 
-    @pytest.mark.integration
-    def test_same_file_different_tags_shares_artifact(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test uploading same file with different tags shares artifact."""
-        project, package = test_package
-        content = f"content with different tags {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
 
-        tags = ["latest", "stable", "v1.0.0", "release"]
-        for i, tag in enumerate(tags):
-            result = upload_test_file(
-                integration_client, project, package, content, tag=tag
-            )
-            assert result["artifact_id"] == expected_hash
-            if i == 0:
-                assert result["deduplicated"] is False
-            else:
-                assert result["deduplicated"] is True
-
-
-class TestStorageVerification:
-    """Tests to verify storage behavior after duplicate uploads."""
+class TestDownload:
+    """Tests for download functionality."""
 
     @pytest.mark.integration
-    def test_artifact_table_single_row_after_duplicates(
-        self, integration_client, test_package
-    ):
-        """Test artifact table contains only one row after duplicate uploads."""
+    def test_download_by_tag(self, integration_client, test_package):
+        """Test downloading artifact by tag name."""
         project, package = test_package
-        content = b"content for single row test"
-        expected_hash = compute_sha256(content)
+        original_content = b"download by tag test"
 
-        # Upload same content multiple times with different tags
-        for tag in ["v1", "v2", "v3"]:
-            upload_test_file(integration_client, project, package, content, tag=tag)
+        upload_test_file(
+            integration_client, project, package, original_content, tag="download-tag"
+        )
 
-        # Query artifact - should exist and be unique
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.status_code == 200
-        artifact = response.json()
-        assert artifact["id"] == expected_hash
-        assert artifact["ref_count"] == 3
-
-    @pytest.mark.integration
-    def test_upload_table_multiple_rows_for_duplicates(
-        self, integration_client, test_package
-    ):
-        """Test upload table contains multiple rows for duplicate uploads (event tracking)."""
-        project, package = test_package
-        content = b"content for upload tracking test"
-
-        # Upload same content 3 times
-        for tag in ["upload1", "upload2", "upload3"]:
-            upload_test_file(integration_client, project, package, content, tag=tag)
-
-        # Check package stats - should show 3 uploads but fewer unique artifacts
         response = integration_client.get(
-            f"/api/v1/project/{project}/packages/{package}"
+            f"/api/v1/project/{project}/{package}/+/download-tag",
+            params={"mode": "proxy"},
         )
         assert response.status_code == 200
-        pkg_info = response.json()
-        assert pkg_info["tag_count"] == 3
+        assert response.content == original_content
 
     @pytest.mark.integration
-    def test_artifact_content_matches_original(self, integration_client, test_package):
-        """Test artifact content retrieved matches original content exactly."""
+    def test_download_by_artifact_id(self, integration_client, test_package):
+        """Test downloading artifact by artifact ID."""
+        project, package = test_package
+        original_content = b"download by id test"
+        expected_hash = compute_sha256(original_content)
+
+        upload_test_file(integration_client, project, package, original_content)
+
+        response = integration_client.get(
+            f"/api/v1/project/{project}/{package}/+/artifact:{expected_hash}",
+            params={"mode": "proxy"},
+        )
+        assert response.status_code == 200
+        assert response.content == original_content
+
+    @pytest.mark.integration
+    def test_download_nonexistent_tag(self, integration_client, test_package):
+        """Test downloading nonexistent tag returns 404."""
+        project, package = test_package
+
+        response = integration_client.get(
+            f"/api/v1/project/{project}/{package}/+/nonexistent-tag"
+        )
+        assert response.status_code == 404
+
+    @pytest.mark.integration
+    def test_content_matches_original(self, integration_client, test_package):
+        """Test downloaded content matches original exactly."""
         project, package = test_package
         original_content = b"exact content verification test data 12345"
 
-        # Upload
-        result = upload_test_file(
+        upload_test_file(
             integration_client, project, package, original_content, tag="verify"
         )
 
-        # Download and compare
-        download_response = integration_client.get(
+        response = integration_client.get(
             f"/api/v1/project/{project}/{package}/+/verify", params={"mode": "proxy"}
         )
-        assert download_response.status_code == 200
-        downloaded_content = download_response.content
-        assert downloaded_content == original_content
-
-    @pytest.mark.integration
-    def test_storage_stats_reflect_deduplication(
-        self, integration_client, test_package
-    ):
-        """Test total storage size matches single artifact size after duplicates."""
-        project, package = test_package
-        content = b"content for storage stats test - should only count once"
-        content_size = len(content)
-
-        # Upload same content 5 times
-        for tag in ["a", "b", "c", "d", "e"]:
-            upload_test_file(integration_client, project, package, content, tag=tag)
-
-        # Check global stats
-        response = integration_client.get("/api/v1/stats")
         assert response.status_code == 200
-        stats = response.json()
-
-        # Deduplication should show savings
-        assert stats["deduplicated_uploads"] > 0
-        assert stats["storage_saved_bytes"] > 0
+        assert response.content == original_content
 
 
 class TestConcurrentUploads:
@@ -308,7 +291,6 @@ class TestConcurrentUploads:
 
         def upload_worker(tag_suffix):
             try:
-                # Create a new client for this thread
                 from httpx import Client
 
                 base_url = "http://localhost:8080"
@@ -332,13 +314,11 @@ class TestConcurrentUploads:
             except Exception as e:
                 errors.append(str(e))
 
-        # Run concurrent uploads
         with ThreadPoolExecutor(max_workers=num_concurrent) as executor:
             futures = [executor.submit(upload_worker, i) for i in range(num_concurrent)]
             for future in as_completed(futures):
-                pass  # Wait for all to complete
+                pass
 
-        # Verify results
         assert len(errors) == 0, f"Errors during concurrent uploads: {errors}"
         assert len(results) == num_concurrent
 
@@ -353,227 +333,27 @@ class TestConcurrentUploads:
         assert response.json()["ref_count"] == num_concurrent
 
 
-class TestDeduplicationAcrossRestarts:
-    """Tests for deduplication persistence."""
-
-    @pytest.mark.integration
-    def test_deduplication_persists(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """
-        Test deduplication works with persisted data.
-
-        This test uploads content, then uploads the same content again.
-        Since the database persists, the second upload should detect
-        the existing artifact even without server restart.
-        """
-        project, package = test_package
-        content = f"persisted content for dedup test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # First upload
-        result1 = upload_test_file(
-            integration_client, project, package, content, tag="persist1"
-        )
-        assert result1["artifact_id"] == expected_hash
-        assert result1["deduplicated"] is False
-
-        # Second upload (simulating after restart - data is persisted)
-        result2 = upload_test_file(
-            integration_client, project, package, content, tag="persist2"
-        )
-        assert result2["artifact_id"] == expected_hash
-        assert result2["deduplicated"] is True
-
-        # Verify artifact exists with correct ref_count
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.status_code == 200
-        assert response.json()["ref_count"] == 2
-
-
-class TestS3ObjectVerification:
-    """Tests to verify S3 storage behavior directly."""
-
-    @pytest.mark.integration
-    def test_s3_bucket_single_object_after_duplicates(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test S3 bucket contains only one object after duplicate uploads."""
-        project, package = test_package
-        content = f"content for s3 object count test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Upload same content multiple times with different tags
-        for tag in ["s3test1", "s3test2", "s3test3"]:
-            upload_test_file(integration_client, project, package, content, tag=tag)
-
-        # Verify only one S3 object exists for this hash
-        s3_objects = list_s3_objects_by_hash(expected_hash)
-        assert len(s3_objects) == 1, (
-            f"Expected 1 S3 object, found {len(s3_objects)}: {s3_objects}"
-        )
-
-        # Verify the object key follows expected pattern
-        expected_key = (
-            f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
-        )
-        assert s3_objects[0] == expected_key
-
-
-class TestUploadFailureCleanup:
-    """Tests for cleanup when uploads fail."""
-
-    @pytest.mark.integration
-    def test_upload_failure_invalid_project_no_orphaned_s3(
-        self, integration_client, unique_test_id
-    ):
-        """Test upload to non-existent project doesn't leave orphaned S3 objects."""
-        content = f"content for orphan s3 test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Attempt upload to non-existent project
-        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
-        response = integration_client.post(
-            f"/api/v1/project/nonexistent-project-{unique_test_id}/nonexistent-pkg/upload",
-            files=files,
-            data={"tag": "test"},
-        )
-
-        # Upload should fail
-        assert response.status_code == 404
-
-        # Verify no S3 object was created
-        assert not s3_object_exists(expected_hash), (
-            "Orphaned S3 object found after failed upload"
-        )
-
-    @pytest.mark.integration
-    def test_upload_failure_invalid_package_no_orphaned_s3(
-        self, integration_client, test_project, unique_test_id
-    ):
-        """Test upload to non-existent package doesn't leave orphaned S3 objects."""
-        content = f"content for orphan s3 test pkg {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Attempt upload to non-existent package
-        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
-        response = integration_client.post(
-            f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
-            files=files,
-            data={"tag": "test"},
-        )
-
-        # Upload should fail
-        assert response.status_code == 404
-
-        # Verify no S3 object was created
-        assert not s3_object_exists(expected_hash), (
-            "Orphaned S3 object found after failed upload"
-        )
-
-    @pytest.mark.integration
-    def test_upload_failure_empty_file_no_orphaned_s3(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test upload of empty file doesn't leave orphaned S3 objects or DB records."""
-        project, package = test_package
-        content = b""  # Empty content
-
-        # Attempt upload of empty file
-        files = {"file": ("empty.bin", io.BytesIO(content), "application/octet-stream")}
-        response = integration_client.post(
-            f"/api/v1/project/{project}/{package}/upload",
-            files=files,
-            data={"tag": f"empty-{unique_test_id}"},
-        )
-
-        # Upload should fail (empty files are rejected)
-        assert response.status_code in (400, 422), (
-            f"Expected 400/422, got {response.status_code}"
-        )
-
-    @pytest.mark.integration
-    def test_upload_failure_no_orphaned_database_records(
-        self, integration_client, test_project, unique_test_id
-    ):
-        """Test failed upload doesn't leave orphaned database records."""
-        content = f"content for db orphan test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Attempt upload to non-existent package (should fail before DB insert)
-        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
-        response = integration_client.post(
-            f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
-            files=files,
-            data={"tag": "test"},
-        )
-
-        # Upload should fail
-        assert response.status_code == 404
-
-        # Verify no artifact record was created
-        artifact_response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert artifact_response.status_code == 404, (
-            "Orphaned artifact record found after failed upload"
-        )
-
-    @pytest.mark.integration
-    def test_duplicate_tag_upload_handles_gracefully(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test uploading with duplicate tag is handled without orphaned data."""
-        project, package = test_package
-        content1 = f"content version 1 {unique_test_id}".encode()
-        content2 = f"content version 2 {unique_test_id}".encode()
-        tag = f"duplicate-tag-{unique_test_id}"
-
-        # First upload with tag
-        result1 = upload_test_file(
-            integration_client, project, package, content1, tag=tag
-        )
-        hash1 = result1["artifact_id"]
-
-        # Second upload with same tag (should update the tag to point to new artifact)
-        result2 = upload_test_file(
-            integration_client, project, package, content2, tag=tag
-        )
-        hash2 = result2["artifact_id"]
-
-        # Both artifacts should exist
-        assert integration_client.get(f"/api/v1/artifact/{hash1}").status_code == 200
-        assert integration_client.get(f"/api/v1/artifact/{hash2}").status_code == 200
-
-        # Tag should point to the second artifact
-        tag_response = integration_client.get(
-            f"/api/v1/project/{project}/{package}/tags/{tag}"
-        )
-        assert tag_response.status_code == 200
-        assert tag_response.json()["artifact_id"] == hash2
-
-
 class TestFileSizeValidation:
     """Tests for file size limits and empty file rejection."""
 
     @pytest.mark.integration
     def test_empty_file_rejected(self, integration_client, test_package):
-        """Test that empty files are rejected with appropriate error."""
+        """Test empty files are rejected with appropriate error."""
         project, package = test_package
 
-        # Try to upload empty content
         files = {"file": ("empty.txt", io.BytesIO(b""), "application/octet-stream")}
         response = integration_client.post(
             f"/api/v1/project/{project}/{package}/upload",
             files=files,
         )
 
-        # Should be rejected (422 from storage layer or validation)
         assert response.status_code in [422, 400]
 
     @pytest.mark.integration
     def test_small_valid_file_accepted(self, integration_client, test_package):
-        """Test that small (1 byte) files are accepted."""
+        """Test small (1 byte) files are accepted."""
         project, package = test_package
-        content = b"X"  # Single byte
+        content = b"X"
 
         result = upload_test_file(
             integration_client, project, package, content, tag="tiny"
@@ -586,7 +366,7 @@ class TestFileSizeValidation:
     def test_file_size_reported_correctly(
         self, integration_client, test_package, unique_test_id
     ):
-        """Test that file size is correctly reported in response."""
+        """Test file size is correctly reported in response."""
         project, package = test_package
         content = f"Test content for size check {unique_test_id}".encode()
         expected_size = len(content)
@@ -602,3 +382,121 @@ class TestFileSizeValidation:
             f"/api/v1/artifact/{result['artifact_id']}"
         )
         assert artifact_response.json()["size"] == expected_size
+
+
+class TestUploadFailureCleanup:
+    """Tests for cleanup when uploads fail."""
+
+    @pytest.mark.integration
+    def test_upload_failure_invalid_project_no_orphaned_s3(
+        self, integration_client, unique_test_id
+    ):
+        """Test upload to non-existent project doesn't leave orphaned S3 objects."""
+        content = f"content for orphan s3 test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
+        response = integration_client.post(
+            f"/api/v1/project/nonexistent-project-{unique_test_id}/nonexistent-pkg/upload",
+            files=files,
+            data={"tag": "test"},
+        )
+
+        assert response.status_code == 404
+
+        # Verify no S3 object was created
+        assert not s3_object_exists(expected_hash), (
+            "Orphaned S3 object found after failed upload"
+        )
+
+    @pytest.mark.integration
+    def test_upload_failure_invalid_package_no_orphaned_s3(
+        self, integration_client, test_project, unique_test_id
+    ):
+        """Test upload to non-existent package doesn't leave orphaned S3 objects."""
+        content = f"content for orphan s3 test pkg {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
+        response = integration_client.post(
+            f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
+            files=files,
+            data={"tag": "test"},
+        )
+
+        assert response.status_code == 404
+
+        assert not s3_object_exists(expected_hash), (
+            "Orphaned S3 object found after failed upload"
+        )
+
+    @pytest.mark.integration
+    def test_upload_failure_no_orphaned_database_records(
+        self, integration_client, test_project, unique_test_id
+    ):
+        """Test failed upload doesn't leave orphaned database records."""
+        content = f"content for db orphan test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
+        response = integration_client.post(
+            f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
+            files=files,
+            data={"tag": "test"},
+        )
+
+        assert response.status_code == 404
+
+        artifact_response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert artifact_response.status_code == 404, (
+            "Orphaned artifact record found after failed upload"
+        )
+
+
+class TestS3StorageVerification:
+    """Tests to verify S3 storage behavior."""
+
+    @pytest.mark.integration
+    def test_s3_single_object_after_duplicates(
+        self, integration_client, test_package, unique_test_id
+    ):
+        """Test S3 bucket contains only one object after duplicate uploads."""
+        project, package = test_package
+        content = f"content for s3 object count test {unique_test_id}".encode()
+        expected_hash = compute_sha256(content)
+
+        # Upload same content multiple times
+        for tag in ["s3test1", "s3test2", "s3test3"]:
+            upload_test_file(integration_client, project, package, content, tag=tag)
+
+        # Verify only one S3 object exists
+        s3_objects = list_s3_objects_by_hash(expected_hash)
+        assert len(s3_objects) == 1, (
+            f"Expected 1 S3 object, found {len(s3_objects)}: {s3_objects}"
+        )
+
+        # Verify object key follows expected pattern
+        expected_key = (
+            f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
+        )
+        assert s3_objects[0] == expected_key
+
+    @pytest.mark.integration
+    def test_artifact_table_single_row_after_duplicates(
+        self, integration_client, test_package
+    ):
+        """Test artifact table contains only one row after duplicate uploads."""
+        project, package = test_package
+        content = b"content for single row test"
+        expected_hash = compute_sha256(content)
+
+        # Upload same content multiple times
+        for tag in ["v1", "v2", "v3"]:
+            upload_test_file(integration_client, project, package, content, tag=tag)
+
+        # Query artifact
+        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
+        assert response.status_code == 200
+        artifact = response.json()
+        assert artifact["id"] == expected_hash
+        assert artifact["ref_count"] == 3
diff --git a/backend/tests/test_duplicate_detection.py b/backend/tests/test_duplicate_detection.py
deleted file mode 100644
index b2284b3..0000000
--- a/backend/tests/test_duplicate_detection.py
+++ /dev/null
@@ -1,207 +0,0 @@
-"""
-Unit tests for duplicate detection and deduplication logic.
-
-Tests cover:
-- _exists() method correctly identifies existing S3 keys
-- S3 key generation follows expected pattern
-- Storage layer skips upload when artifact already exists
-- Storage layer performs upload when artifact does not exist
-"""
-
-import pytest
-import io
-from unittest.mock import MagicMock, patch
-from tests.conftest import (
-    compute_sha256,
-    TEST_CONTENT_HELLO,
-    TEST_HASH_HELLO,
-)
-
-
-class TestExistsMethod:
-    """Tests for the _exists() method that checks S3 object existence."""
-
-    @pytest.mark.unit
-    def test_exists_returns_true_for_existing_key(self, mock_storage, mock_s3_client):
-        """Test _exists() returns True when object exists."""
-        # Pre-populate the mock storage
-        test_key = "fruits/df/fd/test-hash"
-        mock_s3_client.objects[test_key] = b"content"
-
-        result = mock_storage._exists(test_key)
-
-        assert result is True
-
-    @pytest.mark.unit
-    def test_exists_returns_false_for_nonexistent_key(self, mock_storage):
-        """Test _exists() returns False when object doesn't exist."""
-        result = mock_storage._exists("fruits/no/ne/nonexistent-key")
-
-        assert result is False
-
-    @pytest.mark.unit
-    def test_exists_handles_404_error(self, mock_storage):
-        """Test _exists() handles 404 errors gracefully."""
-        # The mock client raises ClientError for nonexistent keys
-        result = mock_storage._exists("fruits/xx/yy/does-not-exist")
-
-        assert result is False
-
-
-class TestS3KeyGeneration:
-    """Tests for S3 key pattern generation."""
-
-    @pytest.mark.unit
-    def test_s3_key_pattern(self):
-        """Test S3 key follows pattern: fruits/{hash[:2]}/{hash[2:4]}/{hash}"""
-        test_hash = "abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890"
-
-        expected_key = f"fruits/{test_hash[:2]}/{test_hash[2:4]}/{test_hash}"
-        # Expected: fruits/ab/cd/abcdef1234567890...
-
-        assert expected_key == f"fruits/ab/cd/{test_hash}"
-
-    @pytest.mark.unit
-    def test_s3_key_generation_in_storage(self, mock_storage):
-        """Test storage layer generates correct S3 key."""
-        content = TEST_CONTENT_HELLO
-        file_obj = io.BytesIO(content)
-
-        result = mock_storage._store_simple(file_obj)
-
-        expected_key = (
-            f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
-        )
-        assert result.s3_key == expected_key
-
-    @pytest.mark.unit
-    def test_s3_key_uses_sha256_hash(self, mock_storage):
-        """Test S3 key is derived from SHA256 hash."""
-        content = b"unique test content for key test"
-        file_obj = io.BytesIO(content)
-        expected_hash = compute_sha256(content)
-
-        result = mock_storage._store_simple(file_obj)
-
-        # Key should contain the hash
-        assert expected_hash in result.s3_key
-
-
-class TestDeduplicationBehavior:
-    """Tests for deduplication (skip upload when exists)."""
-
-    @pytest.mark.unit
-    def test_skips_upload_when_exists(self, mock_storage, mock_s3_client):
-        """Test storage skips S3 upload when artifact already exists."""
-        content = TEST_CONTENT_HELLO
-        s3_key = (
-            f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
-        )
-
-        # Pre-populate storage (simulate existing artifact)
-        mock_s3_client.objects[s3_key] = content
-
-        # Track put_object calls
-        original_put = mock_s3_client.put_object
-        put_called = []
-
-        def tracked_put(*args, **kwargs):
-            put_called.append(True)
-            return original_put(*args, **kwargs)
-
-        mock_s3_client.put_object = tracked_put
-
-        # Store the same content
-        file_obj = io.BytesIO(content)
-        result = mock_storage._store_simple(file_obj)
-
-        # put_object should NOT have been called (deduplication)
-        assert len(put_called) == 0
-        assert result.sha256 == TEST_HASH_HELLO
-
-    @pytest.mark.unit
-    def test_uploads_when_not_exists(self, mock_storage, mock_s3_client):
-        """Test storage uploads to S3 when artifact doesn't exist."""
-        content = b"brand new unique content"
-        content_hash = compute_sha256(content)
-        s3_key = f"fruits/{content_hash[:2]}/{content_hash[2:4]}/{content_hash}"
-
-        # Ensure object doesn't exist
-        assert s3_key not in mock_s3_client.objects
-
-        # Store the content
-        file_obj = io.BytesIO(content)
-        result = mock_storage._store_simple(file_obj)
-
-        # Object should now exist in mock storage
-        assert s3_key in mock_s3_client.objects
-        assert mock_s3_client.objects[s3_key] == content
-
-    @pytest.mark.unit
-    def test_returns_same_hash_for_duplicate(self, mock_storage, mock_s3_client):
-        """Test storing same content twice returns same hash."""
-        content = b"content to be stored twice"
-
-        # First store
-        file1 = io.BytesIO(content)
-        result1 = mock_storage._store_simple(file1)
-
-        # Second store (duplicate)
-        file2 = io.BytesIO(content)
-        result2 = mock_storage._store_simple(file2)
-
-        assert result1.sha256 == result2.sha256
-        assert result1.s3_key == result2.s3_key
-
-    @pytest.mark.unit
-    def test_different_content_different_keys(self, mock_storage):
-        """Test different content produces different S3 keys."""
-        content1 = b"first content"
-        content2 = b"second content"
-
-        file1 = io.BytesIO(content1)
-        result1 = mock_storage._store_simple(file1)
-
-        file2 = io.BytesIO(content2)
-        result2 = mock_storage._store_simple(file2)
-
-        assert result1.sha256 != result2.sha256
-        assert result1.s3_key != result2.s3_key
-
-
-class TestDeduplicationEdgeCases:
-    """Edge case tests for deduplication."""
-
-    @pytest.mark.unit
-    def test_same_content_different_filenames(self, mock_storage):
-        """Test same content with different metadata is deduplicated."""
-        content = b"identical content"
-
-        # Store with "filename1"
-        file1 = io.BytesIO(content)
-        result1 = mock_storage._store_simple(file1)
-
-        # Store with "filename2" (same content)
-        file2 = io.BytesIO(content)
-        result2 = mock_storage._store_simple(file2)
-
-        # Both should have same hash (content-addressable)
-        assert result1.sha256 == result2.sha256
-
-    @pytest.mark.unit
-    def test_whitespace_only_difference(self, mock_storage):
-        """Test content differing only by whitespace produces different hashes."""
-        content1 = b"test content"
-        content2 = b"test  content"  # Extra space
-        content3 = b"test content "  # Trailing space
-
-        file1 = io.BytesIO(content1)
-        file2 = io.BytesIO(content2)
-        file3 = io.BytesIO(content3)
-
-        result1 = mock_storage._store_simple(file1)
-        result2 = mock_storage._store_simple(file2)
-        result3 = mock_storage._store_simple(file3)
-
-        # All should be different (content-addressable)
-        assert len({result1.sha256, result2.sha256, result3.sha256}) == 3
diff --git a/backend/tests/test_garbage_collection.py b/backend/tests/test_garbage_collection.py
deleted file mode 100644
index 698f98b..0000000
--- a/backend/tests/test_garbage_collection.py
+++ /dev/null
@@ -1,168 +0,0 @@
-"""
-Integration tests for garbage collection functionality.
-
-Tests cover:
-- Listing orphaned artifacts (ref_count=0)
-- Garbage collection in dry-run mode
-- Garbage collection actual deletion
-- Verifying artifacts with refs are not deleted
-"""
-
-import pytest
-from tests.conftest import (
-    compute_sha256,
-    upload_test_file,
-)
-
-
-class TestOrphanedArtifactsEndpoint:
-    """Tests for GET /api/v1/admin/orphaned-artifacts endpoint."""
-
-    @pytest.mark.integration
-    def test_list_orphaned_artifacts_returns_list(self, integration_client):
-        """Test orphaned artifacts endpoint returns a list."""
-        response = integration_client.get("/api/v1/admin/orphaned-artifacts")
-        assert response.status_code == 200
-        assert isinstance(response.json(), list)
-
-    @pytest.mark.integration
-    def test_orphaned_artifact_has_required_fields(self, integration_client):
-        """Test orphaned artifact response has required fields."""
-        response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1")
-        assert response.status_code == 200
-
-        data = response.json()
-        if len(data) > 0:
-            artifact = data[0]
-            assert "id" in artifact
-            assert "size" in artifact
-            assert "created_at" in artifact
-            assert "created_by" in artifact
-            assert "original_name" in artifact
-
-    @pytest.mark.integration
-    def test_orphaned_artifacts_respects_limit(self, integration_client):
-        """Test orphaned artifacts endpoint respects limit parameter."""
-        response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=5")
-        assert response.status_code == 200
-        assert len(response.json()) <= 5
-
-    @pytest.mark.integration
-    def test_artifact_becomes_orphaned_when_tag_deleted(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test artifact appears in orphaned list after tag is deleted."""
-        project, package = test_package
-        content = f"orphan test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Upload with tag
-        upload_test_file(integration_client, project, package, content, tag="temp-tag")
-
-        # Verify not in orphaned list (has ref_count=1)
-        response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1000")
-        orphaned_ids = [a["id"] for a in response.json()]
-        assert expected_hash not in orphaned_ids
-
-        # Delete the tag
-        integration_client.delete(f"/api/v1/project/{project}/{package}/tags/temp-tag")
-
-        # Verify now in orphaned list (ref_count=0)
-        response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1000")
-        orphaned_ids = [a["id"] for a in response.json()]
-        assert expected_hash in orphaned_ids
-
-
-class TestGarbageCollectionEndpoint:
-    """Tests for POST /api/v1/admin/garbage-collect endpoint."""
-
-    @pytest.mark.integration
-    def test_garbage_collect_dry_run_returns_response(self, integration_client):
-        """Test garbage collection dry run returns valid response."""
-        response = integration_client.post("/api/v1/admin/garbage-collect?dry_run=true")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert "artifacts_deleted" in data
-        assert "bytes_freed" in data
-        assert "artifact_ids" in data
-        assert "dry_run" in data
-        assert data["dry_run"] is True
-
-    @pytest.mark.integration
-    def test_garbage_collect_dry_run_doesnt_delete(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test garbage collection dry run doesn't actually delete artifacts."""
-        project, package = test_package
-        content = f"dry run test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Upload and delete tag to create orphan
-        upload_test_file(integration_client, project, package, content, tag="dry-run")
-        integration_client.delete(f"/api/v1/project/{project}/{package}/tags/dry-run")
-
-        # Verify artifact exists
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.status_code == 200
-
-        # Run garbage collection in dry-run mode
-        gc_response = integration_client.post(
-            "/api/v1/admin/garbage-collect?dry_run=true&limit=1000"
-        )
-        assert gc_response.status_code == 200
-        assert expected_hash in gc_response.json()["artifact_ids"]
-
-        # Verify artifact STILL exists (dry run didn't delete)
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.status_code == 200
-
-    @pytest.mark.integration
-    def test_garbage_collect_preserves_referenced_artifacts(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test garbage collection doesn't delete artifacts with ref_count > 0."""
-        project, package = test_package
-        content = f"preserve test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Upload with tag (ref_count=1)
-        upload_test_file(integration_client, project, package, content, tag="keep-this")
-
-        # Verify artifact exists with ref_count=1
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.status_code == 200
-        assert response.json()["ref_count"] == 1
-
-        # Run garbage collection (dry_run to not affect other tests)
-        gc_response = integration_client.post(
-            "/api/v1/admin/garbage-collect?dry_run=true&limit=1000"
-        )
-        assert gc_response.status_code == 200
-
-        # Verify artifact was NOT in delete list (has ref_count > 0)
-        assert expected_hash not in gc_response.json()["artifact_ids"]
-
-        # Verify artifact still exists
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.status_code == 200
-        assert response.json()["ref_count"] == 1
-
-    @pytest.mark.integration
-    def test_garbage_collect_respects_limit(self, integration_client):
-        """Test garbage collection respects limit parameter."""
-        response = integration_client.post(
-            "/api/v1/admin/garbage-collect?dry_run=true&limit=5"
-        )
-        assert response.status_code == 200
-        assert response.json()["artifacts_deleted"] <= 5
-
-    @pytest.mark.integration
-    def test_garbage_collect_returns_bytes_freed(self, integration_client):
-        """Test garbage collection returns accurate bytes_freed."""
-        response = integration_client.post("/api/v1/admin/garbage-collect?dry_run=true")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert data["bytes_freed"] >= 0
-        assert isinstance(data["bytes_freed"], int)
diff --git a/backend/tests/test_hash_calculation.py b/backend/tests/test_hash_calculation.py
deleted file mode 100644
index 309065e..0000000
--- a/backend/tests/test_hash_calculation.py
+++ /dev/null
@@ -1,215 +0,0 @@
-"""
-Unit tests for SHA256 hash calculation and deduplication logic.
-
-Tests cover:
-- Hash computation produces consistent results
-- Hash is always 64 character lowercase hexadecimal
-- Different content produces different hashes
-- Binary content handling
-- Large file handling (streaming)
-"""
-
-import pytest
-import hashlib
-import io
-from tests.conftest import (
-    create_test_file,
-    compute_sha256,
-    TEST_CONTENT_HELLO,
-    TEST_HASH_HELLO,
-    TEST_CONTENT_BINARY,
-    TEST_HASH_BINARY,
-)
-
-
-class TestHashComputation:
-    """Unit tests for hash calculation functionality."""
-
-    @pytest.mark.unit
-    def test_sha256_consistent_results(self):
-        """Test SHA256 hash produces consistent results for identical content."""
-        content = b"test content for hashing"
-
-        # Compute hash multiple times
-        hash1 = compute_sha256(content)
-        hash2 = compute_sha256(content)
-        hash3 = compute_sha256(content)
-
-        assert hash1 == hash2 == hash3
-
-    @pytest.mark.unit
-    def test_sha256_different_content_different_hash(self):
-        """Test SHA256 produces different hashes for different content."""
-        content1 = b"content version 1"
-        content2 = b"content version 2"
-
-        hash1 = compute_sha256(content1)
-        hash2 = compute_sha256(content2)
-
-        assert hash1 != hash2
-
-    @pytest.mark.unit
-    def test_sha256_format_64_char_hex(self):
-        """Test SHA256 hash is always 64 character lowercase hexadecimal."""
-        test_cases = [
-            b"",  # Empty
-            b"a",  # Single char
-            b"Hello, World!",  # Normal string
-            bytes(range(256)),  # All byte values
-            b"x" * 10000,  # Larger content
-        ]
-
-        for content in test_cases:
-            hash_value = compute_sha256(content)
-
-            # Check length
-            assert len(hash_value) == 64, (
-                f"Hash length should be 64, got {len(hash_value)}"
-            )
-
-            # Check lowercase
-            assert hash_value == hash_value.lower(), "Hash should be lowercase"
-
-            # Check hexadecimal
-            assert all(c in "0123456789abcdef" for c in hash_value), (
-                "Hash should be hex"
-            )
-
-    @pytest.mark.unit
-    def test_sha256_known_value(self):
-        """Test SHA256 produces expected hash for known input."""
-        assert compute_sha256(TEST_CONTENT_HELLO) == TEST_HASH_HELLO
-
-    @pytest.mark.unit
-    def test_sha256_binary_content(self):
-        """Test SHA256 handles binary content correctly."""
-        assert compute_sha256(TEST_CONTENT_BINARY) == TEST_HASH_BINARY
-
-        # Test with null bytes
-        content_with_nulls = b"\x00\x00test\x00\x00"
-        hash_value = compute_sha256(content_with_nulls)
-        assert len(hash_value) == 64
-
-    @pytest.mark.unit
-    def test_sha256_streaming_computation(self):
-        """Test SHA256 can be computed in chunks (streaming)."""
-        # Large content
-        chunk_size = 8192
-        total_size = chunk_size * 10  # 80KB
-        content = b"x" * total_size
-
-        # Direct computation
-        direct_hash = compute_sha256(content)
-
-        # Streaming computation
-        hasher = hashlib.sha256()
-        for i in range(0, total_size, chunk_size):
-            hasher.update(content[i : i + chunk_size])
-        streaming_hash = hasher.hexdigest()
-
-        assert direct_hash == streaming_hash
-
-    @pytest.mark.unit
-    def test_sha256_order_matters(self):
-        """Test that content order affects hash (not just content set)."""
-        content1 = b"AB"
-        content2 = b"BA"
-
-        assert compute_sha256(content1) != compute_sha256(content2)
-
-
-class TestStorageHashComputation:
-    """Tests for hash computation in the storage layer."""
-
-    @pytest.mark.unit
-    def test_storage_computes_sha256(self, mock_storage):
-        """Test storage layer correctly computes SHA256 hash."""
-        content = TEST_CONTENT_HELLO
-        file_obj = io.BytesIO(content)
-
-        result = mock_storage._store_simple(file_obj)
-
-        assert result.sha256 == TEST_HASH_HELLO
-
-    @pytest.mark.unit
-    def test_storage_computes_md5(self, mock_storage):
-        """Test storage layer also computes MD5 hash."""
-        content = TEST_CONTENT_HELLO
-        file_obj = io.BytesIO(content)
-
-        result = mock_storage._store_simple(file_obj)
-
-        expected_md5 = hashlib.md5(content).hexdigest()
-        assert result.md5 == expected_md5
-
-    @pytest.mark.unit
-    def test_storage_computes_sha1(self, mock_storage):
-        """Test storage layer also computes SHA1 hash."""
-        content = TEST_CONTENT_HELLO
-        file_obj = io.BytesIO(content)
-
-        result = mock_storage._store_simple(file_obj)
-
-        expected_sha1 = hashlib.sha1(content).hexdigest()
-        assert result.sha1 == expected_sha1
-
-    @pytest.mark.unit
-    def test_storage_returns_correct_size(self, mock_storage):
-        """Test storage layer returns correct file size."""
-        content = b"test content with known size"
-        file_obj = io.BytesIO(content)
-
-        result = mock_storage._store_simple(file_obj)
-
-        assert result.size == len(content)
-
-    @pytest.mark.unit
-    def test_storage_generates_correct_s3_key(self, mock_storage):
-        """Test storage layer generates correct S3 key pattern."""
-        content = TEST_CONTENT_HELLO
-        file_obj = io.BytesIO(content)
-
-        result = mock_storage._store_simple(file_obj)
-
-        # Key should be: fruits/{hash[:2]}/{hash[2:4]}/{hash}
-        expected_key = (
-            f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
-        )
-        assert result.s3_key == expected_key
-
-
-class TestHashEdgeCases:
-    """Edge case tests for hash computation."""
-
-    @pytest.mark.unit
-    def test_hash_empty_content_rejected(self, mock_storage):
-        """Test that empty content is rejected."""
-        from app.storage import HashComputationError
-
-        file_obj = io.BytesIO(b"")
-
-        with pytest.raises(HashComputationError):
-            mock_storage._store_simple(file_obj)
-
-    @pytest.mark.unit
-    def test_hash_large_file_streaming(self, mock_storage):
-        """Test hash computation for large files uses streaming."""
-        # Create a 10MB file
-        size = 10 * 1024 * 1024
-        content = b"x" * size
-        file_obj = io.BytesIO(content)
-
-        result = mock_storage._store_simple(file_obj)
-
-        expected_hash = compute_sha256(content)
-        assert result.sha256 == expected_hash
-
-    @pytest.mark.unit
-    def test_hash_special_bytes(self):
-        """Test hash handles all byte values correctly."""
-        # All possible byte values
-        content = bytes(range(256))
-        hash_value = compute_sha256(content)
-
-        assert len(hash_value) == 64
-        assert hash_value == TEST_HASH_BINARY
diff --git a/backend/tests/test_ref_count.py b/backend/tests/test_ref_count.py
deleted file mode 100644
index 6a59995..0000000
--- a/backend/tests/test_ref_count.py
+++ /dev/null
@@ -1,458 +0,0 @@
-"""
-Unit and integration tests for reference counting behavior.
-
-Tests cover:
-- ref_count is set correctly for new artifacts
-- ref_count increments on duplicate uploads
-- ref_count query correctly identifies existing artifacts
-- Artifact lookup by SHA256 hash works correctly
-"""
-
-import pytest
-import io
-from tests.conftest import (
-    compute_sha256,
-    upload_test_file,
-    TEST_CONTENT_HELLO,
-    TEST_HASH_HELLO,
-)
-
-
-class TestRefCountQuery:
-    """Tests for ref_count querying and artifact lookup."""
-
-    @pytest.mark.integration
-    def test_artifact_lookup_by_sha256(self, integration_client, test_package):
-        """Test artifact lookup by SHA256 hash (primary key) works correctly."""
-        project, package = test_package
-        content = b"unique content for lookup test"
-        expected_hash = compute_sha256(content)
-
-        # Upload a file
-        upload_result = upload_test_file(
-            integration_client, project, package, content, tag="v1"
-        )
-        assert upload_result["artifact_id"] == expected_hash
-
-        # Look up artifact by ID (SHA256)
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.status_code == 200
-
-        artifact = response.json()
-        assert artifact["id"] == expected_hash
-        assert artifact["sha256"] == expected_hash
-        assert artifact["size"] == len(content)
-
-    @pytest.mark.integration
-    def test_ref_count_query_identifies_existing_artifact(
-        self, integration_client, test_package
-    ):
-        """Test ref_count query correctly identifies existing artifacts by hash."""
-        project, package = test_package
-        content = b"content for ref count query test"
-        expected_hash = compute_sha256(content)
-
-        # Upload a file with a tag
-        upload_result = upload_test_file(
-            integration_client, project, package, content, tag="v1"
-        )
-
-        # Query artifact and check ref_count
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.status_code == 200
-
-        artifact = response.json()
-        assert artifact["ref_count"] >= 1  # At least 1 from the tag
-
-    @pytest.mark.integration
-    def test_ref_count_set_to_1_for_new_artifact_with_tag(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test ref_count is set to 1 for new artifacts when created with a tag."""
-        project, package = test_package
-        content = f"brand new content for ref count test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Upload a new file with a tag
-        upload_result = upload_test_file(
-            integration_client, project, package, content, tag="initial"
-        )
-
-        assert upload_result["artifact_id"] == expected_hash
-        assert upload_result["ref_count"] == 1
-        assert upload_result["deduplicated"] is False
-
-    @pytest.mark.integration
-    def test_ref_count_increments_on_duplicate_upload_with_tag(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test ref_count is incremented when duplicate content is uploaded with a new tag."""
-        project, package = test_package
-        content = f"content that will be uploaded twice {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # First upload with tag
-        result1 = upload_test_file(
-            integration_client, project, package, content, tag="v1"
-        )
-        assert result1["ref_count"] == 1
-        assert result1["deduplicated"] is False
-
-        # Second upload with different tag (same content)
-        result2 = upload_test_file(
-            integration_client, project, package, content, tag="v2"
-        )
-        assert result2["artifact_id"] == expected_hash
-        assert result2["ref_count"] == 2
-        assert result2["deduplicated"] is True
-
-    @pytest.mark.integration
-    def test_ref_count_after_multiple_tags(self, integration_client, test_package):
-        """Test ref_count correctly reflects number of tags pointing to artifact."""
-        project, package = test_package
-        content = b"content for multiple tag test"
-        expected_hash = compute_sha256(content)
-
-        # Upload with multiple tags
-        tags = ["v1", "v2", "v3", "latest"]
-        for i, tag in enumerate(tags):
-            result = upload_test_file(
-                integration_client, project, package, content, tag=tag
-            )
-            assert result["artifact_id"] == expected_hash
-            assert result["ref_count"] == i + 1
-
-        # Verify final ref_count via artifact endpoint
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.status_code == 200
-        assert response.json()["ref_count"] == len(tags)
-
-
-class TestRefCountWithDeletion:
-    """Tests for ref_count behavior when tags are deleted."""
-
-    @pytest.mark.integration
-    def test_ref_count_decrements_on_tag_delete(self, integration_client, test_package):
-        """Test ref_count decrements when a tag is deleted."""
-        project, package = test_package
-        content = b"content for delete test"
-        expected_hash = compute_sha256(content)
-
-        # Upload with two tags
-        upload_test_file(integration_client, project, package, content, tag="v1")
-        upload_test_file(integration_client, project, package, content, tag="v2")
-
-        # Verify ref_count is 2
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 2
-
-        # Delete one tag
-        delete_response = integration_client.delete(
-            f"/api/v1/project/{project}/{package}/tags/v1"
-        )
-        assert delete_response.status_code == 204
-
-        # Verify ref_count is now 1
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 1
-
-    @pytest.mark.integration
-    def test_ref_count_zero_after_all_tags_deleted(
-        self, integration_client, test_package
-    ):
-        """Test ref_count goes to 0 when all tags are deleted."""
-        project, package = test_package
-        content = b"content that will be orphaned"
-        expected_hash = compute_sha256(content)
-
-        # Upload with one tag
-        upload_test_file(integration_client, project, package, content, tag="only-tag")
-
-        # Delete the tag
-        integration_client.delete(f"/api/v1/project/{project}/{package}/tags/only-tag")
-
-        # Verify ref_count is 0
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 0
-
-
-class TestRefCountCascadeDelete:
-    """Tests for ref_count behavior during cascade deletions."""
-
-    @pytest.mark.integration
-    def test_ref_count_decrements_on_package_delete(
-        self, integration_client, unique_test_id
-    ):
-        """Test ref_count decrements for all tags when package is deleted."""
-        # Create a project and package manually (not using fixtures to control cleanup)
-        project_name = f"cascade-pkg-{unique_test_id}"
-        package_name = f"test-pkg-{unique_test_id}"
-
-        # Create project
-        response = integration_client.post(
-            "/api/v1/projects",
-            json={
-                "name": project_name,
-                "description": "Test project",
-                "is_public": True,
-            },
-        )
-        assert response.status_code == 200
-
-        # Create package
-        response = integration_client.post(
-            f"/api/v1/project/{project_name}/packages",
-            json={"name": package_name, "description": "Test package"},
-        )
-        assert response.status_code == 200
-
-        # Upload content with multiple tags
-        content = f"cascade delete test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        upload_test_file(
-            integration_client, project_name, package_name, content, tag="v1"
-        )
-        upload_test_file(
-            integration_client, project_name, package_name, content, tag="v2"
-        )
-        upload_test_file(
-            integration_client, project_name, package_name, content, tag="v3"
-        )
-
-        # Verify ref_count is 3
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 3
-
-        # Delete the package (should cascade delete all tags and decrement ref_count)
-        delete_response = integration_client.delete(
-            f"/api/v1/project/{project_name}/packages/{package_name}"
-        )
-        assert delete_response.status_code == 204
-
-        # Verify ref_count is 0 (all tags were deleted)
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 0
-
-        # Cleanup: delete the project
-        integration_client.delete(f"/api/v1/projects/{project_name}")
-
-    @pytest.mark.integration
-    def test_ref_count_decrements_on_project_delete(
-        self, integration_client, unique_test_id
-    ):
-        """Test ref_count decrements for all tags in all packages when project is deleted."""
-        # Create a project manually (not using fixtures to control cleanup)
-        project_name = f"cascade-proj-{unique_test_id}"
-        package1_name = f"pkg1-{unique_test_id}"
-        package2_name = f"pkg2-{unique_test_id}"
-
-        # Create project
-        response = integration_client.post(
-            "/api/v1/projects",
-            json={
-                "name": project_name,
-                "description": "Test project",
-                "is_public": True,
-            },
-        )
-        assert response.status_code == 200
-
-        # Create two packages
-        for pkg_name in [package1_name, package2_name]:
-            response = integration_client.post(
-                f"/api/v1/project/{project_name}/packages",
-                json={"name": pkg_name, "description": "Test package"},
-            )
-            assert response.status_code == 200
-
-        # Upload same content with tags in both packages
-        content = f"project cascade test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        upload_test_file(
-            integration_client, project_name, package1_name, content, tag="v1"
-        )
-        upload_test_file(
-            integration_client, project_name, package1_name, content, tag="v2"
-        )
-        upload_test_file(
-            integration_client, project_name, package2_name, content, tag="latest"
-        )
-        upload_test_file(
-            integration_client, project_name, package2_name, content, tag="stable"
-        )
-
-        # Verify ref_count is 4 (2 tags in each of 2 packages)
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 4
-
-        # Delete the project (should cascade delete all packages, tags, and decrement ref_count)
-        delete_response = integration_client.delete(f"/api/v1/projects/{project_name}")
-        assert delete_response.status_code == 204
-
-        # Verify ref_count is 0
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 0
-
-    @pytest.mark.integration
-    def test_shared_artifact_ref_count_partial_decrement(
-        self, integration_client, unique_test_id
-    ):
-        """Test ref_count correctly decrements when artifact is shared across packages."""
-        # Create project with two packages
-        project_name = f"shared-artifact-{unique_test_id}"
-        package1_name = f"pkg1-{unique_test_id}"
-        package2_name = f"pkg2-{unique_test_id}"
-
-        # Create project
-        response = integration_client.post(
-            "/api/v1/projects",
-            json={
-                "name": project_name,
-                "description": "Test project",
-                "is_public": True,
-            },
-        )
-        assert response.status_code == 200
-
-        # Create two packages
-        for pkg_name in [package1_name, package2_name]:
-            response = integration_client.post(
-                f"/api/v1/project/{project_name}/packages",
-                json={"name": pkg_name, "description": "Test package"},
-            )
-            assert response.status_code == 200
-
-        # Upload same content to both packages
-        content = f"shared artifact {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        upload_test_file(
-            integration_client, project_name, package1_name, content, tag="v1"
-        )
-        upload_test_file(
-            integration_client, project_name, package2_name, content, tag="v1"
-        )
-
-        # Verify ref_count is 2
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 2
-
-        # Delete only package1 (package2 still references the artifact)
-        delete_response = integration_client.delete(
-            f"/api/v1/project/{project_name}/packages/{package1_name}"
-        )
-        assert delete_response.status_code == 204
-
-        # Verify ref_count is 1 (only package2's tag remains)
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 1
-
-        # Cleanup
-        integration_client.delete(f"/api/v1/projects/{project_name}")
-
-
-class TestRefCountTagUpdate:
-    """Tests for ref_count behavior when tags are updated to point to different artifacts."""
-
-    @pytest.mark.integration
-    def test_ref_count_adjusts_on_tag_update(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test ref_count adjusts when a tag is updated to point to a different artifact."""
-        project, package = test_package
-
-        # Upload two different artifacts
-        content1 = f"artifact one {unique_test_id}".encode()
-        content2 = f"artifact two {unique_test_id}".encode()
-        hash1 = compute_sha256(content1)
-        hash2 = compute_sha256(content2)
-
-        # Upload first artifact with tag "latest"
-        upload_test_file(integration_client, project, package, content1, tag="latest")
-
-        # Verify first artifact has ref_count 1
-        response = integration_client.get(f"/api/v1/artifact/{hash1}")
-        assert response.json()["ref_count"] == 1
-
-        # Upload second artifact with different tag
-        upload_test_file(integration_client, project, package, content2, tag="stable")
-
-        # Now update "latest" tag to point to second artifact
-        # This is done by uploading the same content with the same tag
-        upload_test_file(integration_client, project, package, content2, tag="latest")
-
-        # Verify first artifact ref_count decreased to 0 (tag moved away)
-        response = integration_client.get(f"/api/v1/artifact/{hash1}")
-        assert response.json()["ref_count"] == 0
-
-        # Verify second artifact ref_count increased to 2 (stable + latest)
-        response = integration_client.get(f"/api/v1/artifact/{hash2}")
-        assert response.json()["ref_count"] == 2
-
-    @pytest.mark.integration
-    def test_ref_count_unchanged_when_tag_same_artifact(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test ref_count doesn't change when tag is 'updated' to same artifact."""
-        project, package = test_package
-
-        content = f"same artifact {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Upload with tag
-        upload_test_file(integration_client, project, package, content, tag="v1")
-
-        # Verify ref_count is 1
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 1
-
-        # Upload same content with same tag (no-op)
-        upload_test_file(integration_client, project, package, content, tag="v1")
-
-        # Verify ref_count is still 1 (no double-counting)
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 1
-
-    @pytest.mark.integration
-    def test_tag_via_post_endpoint_increments_ref_count(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test creating tag via POST /tags endpoint increments ref_count."""
-        project, package = test_package
-
-        content = f"tag endpoint test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Upload artifact without tag
-        result = upload_test_file(
-            integration_client, project, package, content, filename="test.bin", tag=None
-        )
-        artifact_id = result["artifact_id"]
-
-        # Verify ref_count is 0 (no tags yet)
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 0
-
-        # Create tag via POST endpoint
-        tag_response = integration_client.post(
-            f"/api/v1/project/{project}/{package}/tags",
-            json={"name": "v1.0.0", "artifact_id": artifact_id},
-        )
-        assert tag_response.status_code == 200
-
-        # Verify ref_count is now 1
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 1
-
-        # Create another tag via POST endpoint
-        tag_response = integration_client.post(
-            f"/api/v1/project/{project}/{package}/tags",
-            json={"name": "latest", "artifact_id": artifact_id},
-        )
-        assert tag_response.status_code == 200
-
-        # Verify ref_count is now 2
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
-        assert response.json()["ref_count"] == 2
diff --git a/backend/tests/test_stats_endpoints.py b/backend/tests/test_stats_endpoints.py
deleted file mode 100644
index ce4da69..0000000
--- a/backend/tests/test_stats_endpoints.py
+++ /dev/null
@@ -1,488 +0,0 @@
-"""
-Integration tests for statistics endpoints.
-
-Tests cover:
-- Global stats endpoint
-- Deduplication stats endpoint
-- Cross-project deduplication
-- Timeline stats
-- Export and report endpoints
-- Package and artifact stats
-"""
-
-import pytest
-from tests.conftest import compute_sha256, upload_test_file
-
-
-class TestGlobalStats:
-    """Tests for GET /api/v1/stats endpoint."""
-
-    @pytest.mark.integration
-    def test_stats_returns_valid_response(self, integration_client):
-        """Test stats endpoint returns expected fields."""
-        response = integration_client.get("/api/v1/stats")
-        assert response.status_code == 200
-
-        data = response.json()
-        # Check all required fields exist
-        assert "total_artifacts" in data
-        assert "total_size_bytes" in data
-        assert "unique_artifacts" in data
-        assert "orphaned_artifacts" in data
-        assert "orphaned_size_bytes" in data
-        assert "total_uploads" in data
-        assert "deduplicated_uploads" in data
-        assert "deduplication_ratio" in data
-        assert "storage_saved_bytes" in data
-
-    @pytest.mark.integration
-    def test_stats_values_are_non_negative(self, integration_client):
-        """Test all stat values are non-negative."""
-        response = integration_client.get("/api/v1/stats")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert data["total_artifacts"] >= 0
-        assert data["total_size_bytes"] >= 0
-        assert data["unique_artifacts"] >= 0
-        assert data["orphaned_artifacts"] >= 0
-        assert data["total_uploads"] >= 0
-        assert data["deduplicated_uploads"] >= 0
-        assert data["deduplication_ratio"] >= 0
-        assert data["storage_saved_bytes"] >= 0
-
-    @pytest.mark.integration
-    def test_stats_update_after_upload(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test stats update after uploading an artifact."""
-        project, package = test_package
-
-        # Get initial stats
-        initial_response = integration_client.get("/api/v1/stats")
-        initial_stats = initial_response.json()
-
-        # Upload a new file
-        content = f"stats test content {unique_test_id}".encode()
-        upload_test_file(
-            integration_client, project, package, content, tag=f"stats-{unique_test_id}"
-        )
-
-        # Get updated stats
-        updated_response = integration_client.get("/api/v1/stats")
-        updated_stats = updated_response.json()
-
-        # Verify stats increased
-        assert updated_stats["total_uploads"] >= initial_stats["total_uploads"]
-
-
-class TestDeduplicationStats:
-    """Tests for GET /api/v1/stats/deduplication endpoint."""
-
-    @pytest.mark.integration
-    def test_dedup_stats_returns_valid_response(self, integration_client):
-        """Test deduplication stats returns expected fields."""
-        response = integration_client.get("/api/v1/stats/deduplication")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert "total_logical_bytes" in data
-        assert "total_physical_bytes" in data
-        assert "bytes_saved" in data
-        assert "savings_percentage" in data
-        assert "total_uploads" in data
-        assert "unique_artifacts" in data
-        assert "duplicate_uploads" in data
-        assert "average_ref_count" in data
-        assert "max_ref_count" in data
-        assert "most_referenced_artifacts" in data
-
-    @pytest.mark.integration
-    def test_most_referenced_artifacts_format(self, integration_client):
-        """Test most_referenced_artifacts has correct structure."""
-        response = integration_client.get("/api/v1/stats/deduplication")
-        assert response.status_code == 200
-
-        data = response.json()
-        artifacts = data["most_referenced_artifacts"]
-        assert isinstance(artifacts, list)
-
-        if len(artifacts) > 0:
-            artifact = artifacts[0]
-            assert "artifact_id" in artifact
-            assert "ref_count" in artifact
-            assert "size" in artifact
-            assert "storage_saved" in artifact
-
-    @pytest.mark.integration
-    def test_dedup_stats_with_top_n_param(self, integration_client):
-        """Test deduplication stats respects top_n parameter."""
-        response = integration_client.get("/api/v1/stats/deduplication?top_n=3")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert len(data["most_referenced_artifacts"]) <= 3
-
-    @pytest.mark.integration
-    def test_savings_percentage_valid_range(self, integration_client):
-        """Test savings percentage is between 0 and 100."""
-        response = integration_client.get("/api/v1/stats/deduplication")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert 0 <= data["savings_percentage"] <= 100
-
-
-class TestCrossProjectStats:
-    """Tests for GET /api/v1/stats/cross-project endpoint."""
-
-    @pytest.mark.integration
-    def test_cross_project_returns_valid_response(self, integration_client):
-        """Test cross-project stats returns expected fields."""
-        response = integration_client.get("/api/v1/stats/cross-project")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert "shared_artifacts_count" in data
-        assert "total_cross_project_savings" in data
-        assert "shared_artifacts" in data
-        assert isinstance(data["shared_artifacts"], list)
-
-    @pytest.mark.integration
-    def test_cross_project_respects_limit(self, integration_client):
-        """Test cross-project stats respects limit parameter."""
-        response = integration_client.get("/api/v1/stats/cross-project?limit=5")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert len(data["shared_artifacts"]) <= 5
-
-    @pytest.mark.integration
-    def test_cross_project_detects_shared_artifacts(
-        self, integration_client, unique_test_id
-    ):
-        """Test cross-project deduplication is detected."""
-        content = f"shared across projects {unique_test_id}".encode()
-
-        # Create two projects
-        proj1 = f"cross-proj-a-{unique_test_id}"
-        proj2 = f"cross-proj-b-{unique_test_id}"
-
-        try:
-            # Create projects and packages
-            integration_client.post(
-                "/api/v1/projects",
-                json={"name": proj1, "description": "Test", "is_public": True},
-            )
-            integration_client.post(
-                "/api/v1/projects",
-                json={"name": proj2, "description": "Test", "is_public": True},
-            )
-            integration_client.post(
-                f"/api/v1/project/{proj1}/packages",
-                json={"name": "pkg", "description": "Test"},
-            )
-            integration_client.post(
-                f"/api/v1/project/{proj2}/packages",
-                json={"name": "pkg", "description": "Test"},
-            )
-
-            # Upload same content to both projects
-            upload_test_file(integration_client, proj1, "pkg", content, tag="v1")
-            upload_test_file(integration_client, proj2, "pkg", content, tag="v1")
-
-            # Check cross-project stats
-            response = integration_client.get("/api/v1/stats/cross-project")
-            assert response.status_code == 200
-
-            data = response.json()
-            assert data["shared_artifacts_count"] >= 1
-
-        finally:
-            # Cleanup
-            integration_client.delete(f"/api/v1/projects/{proj1}")
-            integration_client.delete(f"/api/v1/projects/{proj2}")
-
-
-class TestTimelineStats:
-    """Tests for GET /api/v1/stats/timeline endpoint."""
-
-    @pytest.mark.integration
-    def test_timeline_returns_valid_response(self, integration_client):
-        """Test timeline stats returns expected fields."""
-        response = integration_client.get("/api/v1/stats/timeline")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert "period" in data
-        assert "start_date" in data
-        assert "end_date" in data
-        assert "data_points" in data
-        assert isinstance(data["data_points"], list)
-
-    @pytest.mark.integration
-    def test_timeline_daily_period(self, integration_client):
-        """Test timeline with daily period."""
-        response = integration_client.get("/api/v1/stats/timeline?period=daily")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert data["period"] == "daily"
-
-    @pytest.mark.integration
-    def test_timeline_weekly_period(self, integration_client):
-        """Test timeline with weekly period."""
-        response = integration_client.get("/api/v1/stats/timeline?period=weekly")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert data["period"] == "weekly"
-
-    @pytest.mark.integration
-    def test_timeline_monthly_period(self, integration_client):
-        """Test timeline with monthly period."""
-        response = integration_client.get("/api/v1/stats/timeline?period=monthly")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert data["period"] == "monthly"
-
-    @pytest.mark.integration
-    def test_timeline_invalid_period_rejected(self, integration_client):
-        """Test timeline rejects invalid period."""
-        response = integration_client.get("/api/v1/stats/timeline?period=invalid")
-        assert response.status_code == 422
-
-    @pytest.mark.integration
-    def test_timeline_data_point_structure(self, integration_client):
-        """Test timeline data points have correct structure."""
-        response = integration_client.get("/api/v1/stats/timeline")
-        assert response.status_code == 200
-
-        data = response.json()
-        if len(data["data_points"]) > 0:
-            point = data["data_points"][0]
-            assert "date" in point
-            assert "total_uploads" in point
-            assert "unique_artifacts" in point
-            assert "duplicated_uploads" in point
-            assert "bytes_saved" in point
-
-
-class TestExportEndpoint:
-    """Tests for GET /api/v1/stats/export endpoint."""
-
-    @pytest.mark.integration
-    def test_export_json_format(self, integration_client):
-        """Test export with JSON format."""
-        response = integration_client.get("/api/v1/stats/export?format=json")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert "total_artifacts" in data
-        assert "generated_at" in data
-
-    @pytest.mark.integration
-    def test_export_csv_format(self, integration_client):
-        """Test export with CSV format."""
-        response = integration_client.get("/api/v1/stats/export?format=csv")
-        assert response.status_code == 200
-        assert "text/csv" in response.headers.get("content-type", "")
-
-        content = response.text
-        assert "Metric,Value" in content
-        assert "total_artifacts" in content
-
-    @pytest.mark.integration
-    def test_export_invalid_format_rejected(self, integration_client):
-        """Test export rejects invalid format."""
-        response = integration_client.get("/api/v1/stats/export?format=xml")
-        assert response.status_code == 422
-
-
-class TestReportEndpoint:
-    """Tests for GET /api/v1/stats/report endpoint."""
-
-    @pytest.mark.integration
-    def test_report_markdown_format(self, integration_client):
-        """Test report with markdown format."""
-        response = integration_client.get("/api/v1/stats/report?format=markdown")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert data["format"] == "markdown"
-        assert "generated_at" in data
-        assert "content" in data
-        assert "# Orchard Storage Report" in data["content"]
-
-    @pytest.mark.integration
-    def test_report_json_format(self, integration_client):
-        """Test report with JSON format."""
-        response = integration_client.get("/api/v1/stats/report?format=json")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert data["format"] == "json"
-        assert "content" in data
-
-    @pytest.mark.integration
-    def test_report_contains_sections(self, integration_client):
-        """Test markdown report contains expected sections."""
-        response = integration_client.get("/api/v1/stats/report?format=markdown")
-        assert response.status_code == 200
-
-        content = response.json()["content"]
-        assert "## Overview" in content
-        assert "## Storage" in content
-        assert "## Uploads" in content
-
-
-class TestProjectStats:
-    """Tests for GET /api/v1/projects/:project/stats endpoint."""
-
-    @pytest.mark.integration
-    def test_project_stats_returns_valid_response(
-        self, integration_client, test_project
-    ):
-        """Test project stats returns expected fields."""
-        response = integration_client.get(f"/api/v1/projects/{test_project}/stats")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert "project_id" in data
-        assert "project_name" in data
-        assert "package_count" in data
-        assert "tag_count" in data
-        assert "artifact_count" in data
-        assert "total_size_bytes" in data
-        assert "upload_count" in data
-        assert "deduplicated_uploads" in data
-        assert "storage_saved_bytes" in data
-        assert "deduplication_ratio" in data
-
-    @pytest.mark.integration
-    def test_project_stats_not_found(self, integration_client):
-        """Test project stats returns 404 for non-existent project."""
-        response = integration_client.get("/api/v1/projects/nonexistent-project/stats")
-        assert response.status_code == 404
-
-
-class TestPackageStats:
-    """Tests for GET /api/v1/project/:project/packages/:package/stats endpoint."""
-
-    @pytest.mark.integration
-    def test_package_stats_returns_valid_response(
-        self, integration_client, test_package
-    ):
-        """Test package stats returns expected fields."""
-        project, package = test_package
-        response = integration_client.get(
-            f"/api/v1/project/{project}/packages/{package}/stats"
-        )
-        assert response.status_code == 200
-
-        data = response.json()
-        assert "package_id" in data
-        assert "package_name" in data
-        assert "project_name" in data
-        assert "tag_count" in data
-        assert "artifact_count" in data
-        assert "total_size_bytes" in data
-        assert "upload_count" in data
-        assert "deduplicated_uploads" in data
-        assert "storage_saved_bytes" in data
-        assert "deduplication_ratio" in data
-
-    @pytest.mark.integration
-    def test_package_stats_not_found(self, integration_client, test_project):
-        """Test package stats returns 404 for non-existent package."""
-        response = integration_client.get(
-            f"/api/v1/project/{test_project}/packages/nonexistent-package/stats"
-        )
-        assert response.status_code == 404
-
-
-class TestArtifactStats:
-    """Tests for GET /api/v1/artifact/:id/stats endpoint."""
-
-    @pytest.mark.integration
-    def test_artifact_stats_returns_valid_response(
-        self, integration_client, test_package, unique_test_id
-    ):
-        """Test artifact stats returns expected fields."""
-        project, package = test_package
-        content = f"artifact stats test {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        # Upload artifact
-        upload_test_file(
-            integration_client, project, package, content, tag=f"art-{unique_test_id}"
-        )
-
-        # Get artifact stats
-        response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats")
-        assert response.status_code == 200
-
-        data = response.json()
-        assert "artifact_id" in data
-        assert "sha256" in data
-        assert "size" in data
-        assert "ref_count" in data
-        assert "storage_savings" in data
-        assert "tags" in data
-        assert "projects" in data
-        assert "packages" in data
-
-    @pytest.mark.integration
-    def test_artifact_stats_not_found(self, integration_client):
-        """Test artifact stats returns 404 for non-existent artifact."""
-        fake_hash = "0" * 64
-        response = integration_client.get(f"/api/v1/artifact/{fake_hash}/stats")
-        assert response.status_code == 404
-
-    @pytest.mark.integration
-    def test_artifact_stats_shows_correct_projects(
-        self, integration_client, unique_test_id
-    ):
-        """Test artifact stats shows all projects using the artifact."""
-        content = f"multi-project artifact {unique_test_id}".encode()
-        expected_hash = compute_sha256(content)
-
-        proj1 = f"art-stats-a-{unique_test_id}"
-        proj2 = f"art-stats-b-{unique_test_id}"
-
-        try:
-            # Create projects and packages
-            integration_client.post(
-                "/api/v1/projects",
-                json={"name": proj1, "description": "Test", "is_public": True},
-            )
-            integration_client.post(
-                "/api/v1/projects",
-                json={"name": proj2, "description": "Test", "is_public": True},
-            )
-            integration_client.post(
-                f"/api/v1/project/{proj1}/packages",
-                json={"name": "pkg", "description": "Test"},
-            )
-            integration_client.post(
-                f"/api/v1/project/{proj2}/packages",
-                json={"name": "pkg", "description": "Test"},
-            )
-
-            # Upload same content to both projects
-            upload_test_file(integration_client, proj1, "pkg", content, tag="v1")
-            upload_test_file(integration_client, proj2, "pkg", content, tag="v1")
-
-            # Check artifact stats
-            response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats")
-            assert response.status_code == 200
-
-            data = response.json()
-            assert len(data["projects"]) == 2
-            assert proj1 in data["projects"]
-            assert proj2 in data["projects"]
-
-        finally:
-            integration_client.delete(f"/api/v1/projects/{proj1}")
-            integration_client.delete(f"/api/v1/projects/{proj2}")
diff --git a/backend/tests/unit/__init__.py b/backend/tests/unit/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/backend/tests/unit/test_models.py b/backend/tests/unit/test_models.py
new file mode 100644
index 0000000..ae85605
--- /dev/null
+++ b/backend/tests/unit/test_models.py
@@ -0,0 +1,271 @@
+"""
+Unit tests for SQLAlchemy models.
+
+Tests cover:
+- Model instantiation and defaults
+- Property aliases (sha256, format_metadata)
+- Relationship definitions
+- Constraint definitions
+"""
+
+import pytest
+import uuid
+from datetime import datetime
+
+
+class TestArtifactModel:
+    """Tests for the Artifact model."""
+
+    @pytest.mark.unit
+    def test_artifact_sha256_property(self):
+        """Test sha256 property is an alias for id."""
+        from app.models import Artifact
+
+        artifact = Artifact(
+            id="a" * 64,
+            size=1024,
+            created_by="test-user",
+            s3_key="fruits/aa/aa/test",
+        )
+
+        assert artifact.sha256 == artifact.id
+        assert artifact.sha256 == "a" * 64
+
+    @pytest.mark.unit
+    def test_artifact_format_metadata_alias(self):
+        """Test format_metadata is an alias for artifact_metadata."""
+        from app.models import Artifact
+
+        test_metadata = {"format": "tarball", "version": "1.0.0"}
+        artifact = Artifact(
+            id="b" * 64,
+            size=2048,
+            created_by="test-user",
+            s3_key="fruits/bb/bb/test",
+            artifact_metadata=test_metadata,
+        )
+
+        assert artifact.format_metadata == test_metadata
+        assert artifact.format_metadata == artifact.artifact_metadata
+
+    @pytest.mark.unit
+    def test_artifact_format_metadata_setter(self):
+        """Test format_metadata setter updates artifact_metadata."""
+        from app.models import Artifact
+
+        artifact = Artifact(
+            id="c" * 64,
+            size=512,
+            created_by="test-user",
+            s3_key="fruits/cc/cc/test",
+        )
+
+        new_metadata = {"type": "rpm", "arch": "x86_64"}
+        artifact.format_metadata = new_metadata
+
+        assert artifact.artifact_metadata == new_metadata
+        assert artifact.format_metadata == new_metadata
+
+    @pytest.mark.unit
+    def test_artifact_default_ref_count(self):
+        """Test artifact ref_count column has default value of 1."""
+        from app.models import Artifact
+
+        # Check the column definition has the right default
+        ref_count_col = Artifact.__table__.columns["ref_count"]
+        assert ref_count_col.default is not None
+        assert ref_count_col.default.arg == 1
+
+    @pytest.mark.unit
+    def test_artifact_default_metadata_is_dict(self):
+        """Test artifact default metadata is an empty dict."""
+        from app.models import Artifact
+
+        artifact = Artifact(
+            id="e" * 64,
+            size=100,
+            created_by="test-user",
+            s3_key="fruits/ee/ee/test",
+        )
+
+        # Default might be None until saved, but the column default is dict
+        assert artifact.artifact_metadata is None or isinstance(
+            artifact.artifact_metadata, dict
+        )
+
+
+class TestProjectModel:
+    """Tests for the Project model."""
+
+    @pytest.mark.unit
+    def test_project_default_is_public(self):
+        """Test project is_public column has default value of True."""
+        from app.models import Project
+
+        # Check the column definition has the right default
+        is_public_col = Project.__table__.columns["is_public"]
+        assert is_public_col.default is not None
+        assert is_public_col.default.arg is True
+
+    @pytest.mark.unit
+    def test_project_uuid_generation(self):
+        """Test project generates UUID by default."""
+        from app.models import Project
+
+        project = Project(
+            name="uuid-test-project",
+            created_by="test-user",
+        )
+
+        # UUID should be set by default function
+        assert project.id is not None or hasattr(Project.id, "default")
+
+
+class TestPackageModel:
+    """Tests for the Package model."""
+
+    @pytest.mark.unit
+    def test_package_default_format(self):
+        """Test package format column has default value of 'generic'."""
+        from app.models import Package
+
+        # Check the column definition has the right default
+        format_col = Package.__table__.columns["format"]
+        assert format_col.default is not None
+        assert format_col.default.arg == "generic"
+
+    @pytest.mark.unit
+    def test_package_default_platform(self):
+        """Test package platform column has default value of 'any'."""
+        from app.models import Package
+
+        # Check the column definition has the right default
+        platform_col = Package.__table__.columns["platform"]
+        assert platform_col.default is not None
+        assert platform_col.default.arg == "any"
+
+
+class TestTagModel:
+    """Tests for the Tag model."""
+
+    @pytest.mark.unit
+    def test_tag_requires_package_id(self):
+        """Test tag requires package_id."""
+        from app.models import Tag
+
+        tag = Tag(
+            name="v1.0.0",
+            package_id=uuid.uuid4(),
+            artifact_id="f" * 64,
+            created_by="test-user",
+        )
+
+        assert tag.package_id is not None
+        assert tag.artifact_id == "f" * 64
+
+
+class TestTagHistoryModel:
+    """Tests for the TagHistory model."""
+
+    @pytest.mark.unit
+    def test_tag_history_default_change_type(self):
+        """Test tag history change_type column has default value of 'update'."""
+        from app.models import TagHistory
+
+        # Check the column definition has the right default
+        change_type_col = TagHistory.__table__.columns["change_type"]
+        assert change_type_col.default is not None
+        assert change_type_col.default.arg == "update"
+
+    @pytest.mark.unit
+    def test_tag_history_allows_null_old_artifact(self):
+        """Test tag history allows null old_artifact_id (for create events)."""
+        from app.models import TagHistory
+
+        history = TagHistory(
+            tag_id=uuid.uuid4(),
+            old_artifact_id=None,
+            new_artifact_id="h" * 64,
+            change_type="create",
+            changed_by="test-user",
+        )
+
+        assert history.old_artifact_id is None
+
+
+class TestUploadModel:
+    """Tests for the Upload model."""
+
+    @pytest.mark.unit
+    def test_upload_default_deduplicated_is_false(self):
+        """Test upload deduplicated column has default value of False."""
+        from app.models import Upload
+
+        # Check the column definition has the right default
+        deduplicated_col = Upload.__table__.columns["deduplicated"]
+        assert deduplicated_col.default is not None
+        assert deduplicated_col.default.arg is False
+
+    @pytest.mark.unit
+    def test_upload_default_checksum_verified_is_true(self):
+        """Test upload checksum_verified column has default value of True."""
+        from app.models import Upload
+
+        # Check the column definition has the right default
+        checksum_verified_col = Upload.__table__.columns["checksum_verified"]
+        assert checksum_verified_col.default is not None
+        assert checksum_verified_col.default.arg is True
+
+
+class TestAccessPermissionModel:
+    """Tests for the AccessPermission model."""
+
+    @pytest.mark.unit
+    def test_access_permission_levels(self):
+        """Test valid access permission levels."""
+        from app.models import AccessPermission
+
+        # This tests the check constraint values
+        valid_levels = ["read", "write", "admin"]
+
+        for level in valid_levels:
+            permission = AccessPermission(
+                project_id=uuid.uuid4(),
+                user_id="test-user",
+                level=level,
+            )
+            assert permission.level == level
+
+
+class TestAuditLogModel:
+    """Tests for the AuditLog model."""
+
+    @pytest.mark.unit
+    def test_audit_log_required_fields(self):
+        """Test audit log has all required fields."""
+        from app.models import AuditLog
+
+        log = AuditLog(
+            action="project.create",
+            resource="/projects/test-project",
+            user_id="test-user",
+        )
+
+        assert log.action == "project.create"
+        assert log.resource == "/projects/test-project"
+        assert log.user_id == "test-user"
+
+    @pytest.mark.unit
+    def test_audit_log_optional_details(self):
+        """Test audit log can have optional details JSON."""
+        from app.models import AuditLog
+
+        details = {"old_value": "v1", "new_value": "v2"}
+        log = AuditLog(
+            action="tag.update",
+            resource="/projects/test/packages/pkg/tags/latest",
+            user_id="test-user",
+            details=details,
+        )
+
+        assert log.details == details
diff --git a/backend/tests/unit/test_storage.py b/backend/tests/unit/test_storage.py
new file mode 100644
index 0000000..3fbe6eb
--- /dev/null
+++ b/backend/tests/unit/test_storage.py
@@ -0,0 +1,439 @@
+"""
+Unit tests for S3 storage layer.
+
+Tests cover:
+- SHA256 hash calculation and consistency
+- Hash format validation (64-char hex)
+- S3 key generation pattern
+- Deduplication behavior (_exists method)
+- Storage result computation (MD5, SHA1, size)
+- Edge cases (empty files, large files, binary content)
+"""
+
+import pytest
+import hashlib
+import io
+from tests.factories import (
+    compute_sha256,
+    TEST_CONTENT_HELLO,
+    TEST_HASH_HELLO,
+    TEST_CONTENT_BINARY,
+    TEST_HASH_BINARY,
+)
+
+
+# =============================================================================
+# Hash Computation Tests
+# =============================================================================
+
+
+class TestHashComputation:
+    """Unit tests for hash calculation functionality."""
+
+    @pytest.mark.unit
+    def test_sha256_consistent_results(self):
+        """Test SHA256 hash produces consistent results for identical content."""
+        content = b"test content for hashing"
+
+        # Compute hash multiple times
+        hash1 = compute_sha256(content)
+        hash2 = compute_sha256(content)
+        hash3 = compute_sha256(content)
+
+        assert hash1 == hash2 == hash3
+
+    @pytest.mark.unit
+    def test_sha256_different_content_different_hash(self):
+        """Test SHA256 produces different hashes for different content."""
+        content1 = b"content version 1"
+        content2 = b"content version 2"
+
+        hash1 = compute_sha256(content1)
+        hash2 = compute_sha256(content2)
+
+        assert hash1 != hash2
+
+    @pytest.mark.unit
+    def test_sha256_format_64_char_hex(self):
+        """Test SHA256 hash is always 64 character lowercase hexadecimal."""
+        test_cases = [
+            b"",  # Empty
+            b"a",  # Single char
+            b"Hello, World!",  # Normal string
+            bytes(range(256)),  # All byte values
+            b"x" * 10000,  # Larger content
+        ]
+
+        for content in test_cases:
+            hash_value = compute_sha256(content)
+
+            # Check length
+            assert len(hash_value) == 64, (
+                f"Hash length should be 64, got {len(hash_value)}"
+            )
+
+            # Check lowercase
+            assert hash_value == hash_value.lower(), "Hash should be lowercase"
+
+            # Check hexadecimal
+            assert all(c in "0123456789abcdef" for c in hash_value), (
+                "Hash should be hex"
+            )
+
+    @pytest.mark.unit
+    def test_sha256_known_value(self):
+        """Test SHA256 produces expected hash for known input."""
+        assert compute_sha256(TEST_CONTENT_HELLO) == TEST_HASH_HELLO
+
+    @pytest.mark.unit
+    def test_sha256_binary_content(self):
+        """Test SHA256 handles binary content correctly."""
+        assert compute_sha256(TEST_CONTENT_BINARY) == TEST_HASH_BINARY
+
+        # Test with null bytes
+        content_with_nulls = b"\x00\x00test\x00\x00"
+        hash_value = compute_sha256(content_with_nulls)
+        assert len(hash_value) == 64
+
+    @pytest.mark.unit
+    def test_sha256_streaming_computation(self):
+        """Test SHA256 can be computed in chunks (streaming)."""
+        # Large content
+        chunk_size = 8192
+        total_size = chunk_size * 10  # 80KB
+        content = b"x" * total_size
+
+        # Direct computation
+        direct_hash = compute_sha256(content)
+
+        # Streaming computation
+        hasher = hashlib.sha256()
+        for i in range(0, total_size, chunk_size):
+            hasher.update(content[i : i + chunk_size])
+        streaming_hash = hasher.hexdigest()
+
+        assert direct_hash == streaming_hash
+
+    @pytest.mark.unit
+    def test_sha256_order_matters(self):
+        """Test that content order affects hash (not just content set)."""
+        content1 = b"AB"
+        content2 = b"BA"
+
+        assert compute_sha256(content1) != compute_sha256(content2)
+
+
+# =============================================================================
+# Storage Hash Computation Tests
+# =============================================================================
+
+
+class TestStorageHashComputation:
+    """Tests for hash computation in the storage layer."""
+
+    @pytest.mark.unit
+    def test_storage_computes_sha256(self, mock_storage):
+        """Test storage layer correctly computes SHA256 hash."""
+        content = TEST_CONTENT_HELLO
+        file_obj = io.BytesIO(content)
+
+        result = mock_storage._store_simple(file_obj)
+
+        assert result.sha256 == TEST_HASH_HELLO
+
+    @pytest.mark.unit
+    def test_storage_computes_md5(self, mock_storage):
+        """Test storage layer also computes MD5 hash."""
+        content = TEST_CONTENT_HELLO
+        file_obj = io.BytesIO(content)
+
+        result = mock_storage._store_simple(file_obj)
+
+        expected_md5 = hashlib.md5(content).hexdigest()
+        assert result.md5 == expected_md5
+
+    @pytest.mark.unit
+    def test_storage_computes_sha1(self, mock_storage):
+        """Test storage layer also computes SHA1 hash."""
+        content = TEST_CONTENT_HELLO
+        file_obj = io.BytesIO(content)
+
+        result = mock_storage._store_simple(file_obj)
+
+        expected_sha1 = hashlib.sha1(content).hexdigest()
+        assert result.sha1 == expected_sha1
+
+    @pytest.mark.unit
+    def test_storage_returns_correct_size(self, mock_storage):
+        """Test storage layer returns correct file size."""
+        content = b"test content with known size"
+        file_obj = io.BytesIO(content)
+
+        result = mock_storage._store_simple(file_obj)
+
+        assert result.size == len(content)
+
+    @pytest.mark.unit
+    def test_storage_generates_correct_s3_key(self, mock_storage):
+        """Test storage layer generates correct S3 key pattern."""
+        content = TEST_CONTENT_HELLO
+        file_obj = io.BytesIO(content)
+
+        result = mock_storage._store_simple(file_obj)
+
+        # Key should be: fruits/{hash[:2]}/{hash[2:4]}/{hash}
+        expected_key = (
+            f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
+        )
+        assert result.s3_key == expected_key
+
+
+# =============================================================================
+# Hash Edge Cases
+# =============================================================================
+
+
+class TestHashEdgeCases:
+    """Edge case tests for hash computation."""
+
+    @pytest.mark.unit
+    def test_hash_empty_content_rejected(self, mock_storage):
+        """Test that empty content is rejected."""
+        from app.storage import HashComputationError
+
+        file_obj = io.BytesIO(b"")
+
+        with pytest.raises(HashComputationError):
+            mock_storage._store_simple(file_obj)
+
+    @pytest.mark.unit
+    def test_hash_large_file_streaming(self, mock_storage):
+        """Test hash computation for large files uses streaming."""
+        # Create a 10MB file
+        size = 10 * 1024 * 1024
+        content = b"x" * size
+        file_obj = io.BytesIO(content)
+
+        result = mock_storage._store_simple(file_obj)
+
+        expected_hash = compute_sha256(content)
+        assert result.sha256 == expected_hash
+
+    @pytest.mark.unit
+    def test_hash_special_bytes(self):
+        """Test hash handles all byte values correctly."""
+        # All possible byte values
+        content = bytes(range(256))
+        hash_value = compute_sha256(content)
+
+        assert len(hash_value) == 64
+        assert hash_value == TEST_HASH_BINARY
+
+
+# =============================================================================
+# S3 Existence Check Tests
+# =============================================================================
+
+
+class TestExistsMethod:
+    """Tests for the _exists() method that checks S3 object existence."""
+
+    @pytest.mark.unit
+    def test_exists_returns_true_for_existing_key(self, mock_storage, mock_s3_client):
+        """Test _exists() returns True when object exists."""
+        # Pre-populate the mock storage
+        test_key = "fruits/df/fd/test-hash"
+        mock_s3_client.objects[test_key] = b"content"
+
+        result = mock_storage._exists(test_key)
+
+        assert result is True
+
+    @pytest.mark.unit
+    def test_exists_returns_false_for_nonexistent_key(self, mock_storage):
+        """Test _exists() returns False when object doesn't exist."""
+        result = mock_storage._exists("fruits/no/ne/nonexistent-key")
+
+        assert result is False
+
+    @pytest.mark.unit
+    def test_exists_handles_404_error(self, mock_storage):
+        """Test _exists() handles 404 errors gracefully."""
+        # The mock client raises ClientError for nonexistent keys
+        result = mock_storage._exists("fruits/xx/yy/does-not-exist")
+
+        assert result is False
+
+
+# =============================================================================
+# S3 Key Generation Tests
+# =============================================================================
+
+
+class TestS3KeyGeneration:
+    """Tests for S3 key pattern generation."""
+
+    @pytest.mark.unit
+    def test_s3_key_pattern(self):
+        """Test S3 key follows pattern: fruits/{hash[:2]}/{hash[2:4]}/{hash}"""
+        test_hash = "abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890"
+
+        expected_key = f"fruits/{test_hash[:2]}/{test_hash[2:4]}/{test_hash}"
+        # Expected: fruits/ab/cd/abcdef1234567890...
+
+        assert expected_key == f"fruits/ab/cd/{test_hash}"
+
+    @pytest.mark.unit
+    def test_s3_key_generation_in_storage(self, mock_storage):
+        """Test storage layer generates correct S3 key."""
+        content = TEST_CONTENT_HELLO
+        file_obj = io.BytesIO(content)
+
+        result = mock_storage._store_simple(file_obj)
+
+        expected_key = (
+            f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
+        )
+        assert result.s3_key == expected_key
+
+    @pytest.mark.unit
+    def test_s3_key_uses_sha256_hash(self, mock_storage):
+        """Test S3 key is derived from SHA256 hash."""
+        content = b"unique test content for key test"
+        file_obj = io.BytesIO(content)
+        expected_hash = compute_sha256(content)
+
+        result = mock_storage._store_simple(file_obj)
+
+        # Key should contain the hash
+        assert expected_hash in result.s3_key
+
+
+# =============================================================================
+# Deduplication Behavior Tests
+# =============================================================================
+
+
+class TestDeduplicationBehavior:
+    """Tests for deduplication (skip upload when exists)."""
+
+    @pytest.mark.unit
+    def test_skips_upload_when_exists(self, mock_storage, mock_s3_client):
+        """Test storage skips S3 upload when artifact already exists."""
+        content = TEST_CONTENT_HELLO
+        s3_key = (
+            f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
+        )
+
+        # Pre-populate storage (simulate existing artifact)
+        mock_s3_client.objects[s3_key] = content
+
+        # Track put_object calls
+        original_put = mock_s3_client.put_object
+        put_called = []
+
+        def tracked_put(*args, **kwargs):
+            put_called.append(True)
+            return original_put(*args, **kwargs)
+
+        mock_s3_client.put_object = tracked_put
+
+        # Store the same content
+        file_obj = io.BytesIO(content)
+        result = mock_storage._store_simple(file_obj)
+
+        # put_object should NOT have been called (deduplication)
+        assert len(put_called) == 0
+        assert result.sha256 == TEST_HASH_HELLO
+
+    @pytest.mark.unit
+    def test_uploads_when_not_exists(self, mock_storage, mock_s3_client):
+        """Test storage uploads to S3 when artifact doesn't exist."""
+        content = b"brand new unique content"
+        content_hash = compute_sha256(content)
+        s3_key = f"fruits/{content_hash[:2]}/{content_hash[2:4]}/{content_hash}"
+
+        # Ensure object doesn't exist
+        assert s3_key not in mock_s3_client.objects
+
+        # Store the content
+        file_obj = io.BytesIO(content)
+        result = mock_storage._store_simple(file_obj)
+
+        # Object should now exist in mock storage
+        assert s3_key in mock_s3_client.objects
+        assert mock_s3_client.objects[s3_key] == content
+
+    @pytest.mark.unit
+    def test_returns_same_hash_for_duplicate(self, mock_storage, mock_s3_client):
+        """Test storing same content twice returns same hash."""
+        content = b"content to be stored twice"
+
+        # First store
+        file1 = io.BytesIO(content)
+        result1 = mock_storage._store_simple(file1)
+
+        # Second store (duplicate)
+        file2 = io.BytesIO(content)
+        result2 = mock_storage._store_simple(file2)
+
+        assert result1.sha256 == result2.sha256
+        assert result1.s3_key == result2.s3_key
+
+    @pytest.mark.unit
+    def test_different_content_different_keys(self, mock_storage):
+        """Test different content produces different S3 keys."""
+        content1 = b"first content"
+        content2 = b"second content"
+
+        file1 = io.BytesIO(content1)
+        result1 = mock_storage._store_simple(file1)
+
+        file2 = io.BytesIO(content2)
+        result2 = mock_storage._store_simple(file2)
+
+        assert result1.sha256 != result2.sha256
+        assert result1.s3_key != result2.s3_key
+
+
+# =============================================================================
+# Deduplication Edge Cases
+# =============================================================================
+
+
+class TestDeduplicationEdgeCases:
+    """Edge case tests for deduplication."""
+
+    @pytest.mark.unit
+    def test_same_content_different_filenames(self, mock_storage):
+        """Test same content with different metadata is deduplicated."""
+        content = b"identical content"
+
+        # Store with "filename1"
+        file1 = io.BytesIO(content)
+        result1 = mock_storage._store_simple(file1)
+
+        # Store with "filename2" (same content)
+        file2 = io.BytesIO(content)
+        result2 = mock_storage._store_simple(file2)
+
+        # Both should have same hash (content-addressable)
+        assert result1.sha256 == result2.sha256
+
+    @pytest.mark.unit
+    def test_whitespace_only_difference(self, mock_storage):
+        """Test content differing only by whitespace produces different hashes."""
+        content1 = b"test content"
+        content2 = b"test  content"  # Extra space
+        content3 = b"test content "  # Trailing space
+
+        file1 = io.BytesIO(content1)
+        file2 = io.BytesIO(content2)
+        file3 = io.BytesIO(content3)
+
+        result1 = mock_storage._store_simple(file1)
+        result2 = mock_storage._store_simple(file2)
+        result3 = mock_storage._store_simple(file3)
+
+        # All should be different (content-addressable)
+        assert len({result1.sha256, result2.sha256, result3.sha256}) == 3
diff --git a/migrations/004_history_tables.sql b/migrations/004_history_tables.sql
new file mode 100644
index 0000000..79cd836
--- /dev/null
+++ b/migrations/004_history_tables.sql
@@ -0,0 +1,98 @@
+-- Migration 004: Project and Package History Tables
+-- Adds history tracking tables for project and package metadata changes
+
+-- ============================================
+-- Project History Table
+-- ============================================
+CREATE TABLE IF NOT EXISTS project_history (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
+    field_name VARCHAR(100) NOT NULL,
+    old_value TEXT,
+    new_value TEXT,
+    changed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    changed_by VARCHAR(255) NOT NULL
+);
+
+CREATE INDEX IF NOT EXISTS idx_project_history_project_id ON project_history(project_id);
+CREATE INDEX IF NOT EXISTS idx_project_history_changed_at ON project_history(changed_at);
+CREATE INDEX IF NOT EXISTS idx_project_history_project_changed_at ON project_history(project_id, changed_at);
+
+-- ============================================
+-- Package History Table
+-- ============================================
+CREATE TABLE IF NOT EXISTS package_history (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    package_id UUID NOT NULL REFERENCES packages(id) ON DELETE CASCADE,
+    field_name VARCHAR(100) NOT NULL,
+    old_value TEXT,
+    new_value TEXT,
+    changed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    changed_by VARCHAR(255) NOT NULL
+);
+
+CREATE INDEX IF NOT EXISTS idx_package_history_package_id ON package_history(package_id);
+CREATE INDEX IF NOT EXISTS idx_package_history_changed_at ON package_history(changed_at);
+CREATE INDEX IF NOT EXISTS idx_package_history_package_changed_at ON package_history(package_id, changed_at);
+
+-- ============================================
+-- Project Update Trigger
+-- ============================================
+CREATE OR REPLACE FUNCTION log_project_changes()
+RETURNS TRIGGER AS $$
+BEGIN
+    -- Log description change
+    IF OLD.description IS DISTINCT FROM NEW.description THEN
+        INSERT INTO project_history (project_id, field_name, old_value, new_value, changed_by)
+        VALUES (NEW.id, 'description', OLD.description, NEW.description, COALESCE(current_setting('app.current_user', true), 'system'));
+    END IF;
+    
+    -- Log is_public change
+    IF OLD.is_public IS DISTINCT FROM NEW.is_public THEN
+        INSERT INTO project_history (project_id, field_name, old_value, new_value, changed_by)
+        VALUES (NEW.id, 'is_public', OLD.is_public::text, NEW.is_public::text, COALESCE(current_setting('app.current_user', true), 'system'));
+    END IF;
+    
+    RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+DROP TRIGGER IF EXISTS project_changes_trigger ON projects;
+CREATE TRIGGER project_changes_trigger
+    AFTER UPDATE ON projects
+    FOR EACH ROW
+    EXECUTE FUNCTION log_project_changes();
+
+-- ============================================
+-- Package Update Trigger
+-- ============================================
+CREATE OR REPLACE FUNCTION log_package_changes()
+RETURNS TRIGGER AS $$
+BEGIN
+    -- Log description change
+    IF OLD.description IS DISTINCT FROM NEW.description THEN
+        INSERT INTO package_history (package_id, field_name, old_value, new_value, changed_by)
+        VALUES (NEW.id, 'description', OLD.description, NEW.description, COALESCE(current_setting('app.current_user', true), 'system'));
+    END IF;
+    
+    -- Log format change
+    IF OLD.format IS DISTINCT FROM NEW.format THEN
+        INSERT INTO package_history (package_id, field_name, old_value, new_value, changed_by)
+        VALUES (NEW.id, 'format', OLD.format, NEW.format, COALESCE(current_setting('app.current_user', true), 'system'));
+    END IF;
+    
+    -- Log platform change
+    IF OLD.platform IS DISTINCT FROM NEW.platform THEN
+        INSERT INTO package_history (package_id, field_name, old_value, new_value, changed_by)
+        VALUES (NEW.id, 'platform', OLD.platform, NEW.platform, COALESCE(current_setting('app.current_user', true), 'system'));
+    END IF;
+    
+    RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+DROP TRIGGER IF EXISTS package_changes_trigger ON packages;
+CREATE TRIGGER package_changes_trigger
+    AFTER UPDATE ON packages
+    FOR EACH ROW
+    EXECUTE FUNCTION log_package_changes();
diff --git a/migrations/005_upload_enhancements.sql b/migrations/005_upload_enhancements.sql
new file mode 100644
index 0000000..b1706e6
--- /dev/null
+++ b/migrations/005_upload_enhancements.sql
@@ -0,0 +1,83 @@
+-- Migration 005: Upload Workflow Enhancements
+-- Adds status tracking and error handling for uploads
+
+-- ============================================
+-- Add status column to uploads table
+-- ============================================
+DO $$ 
+BEGIN
+    IF NOT EXISTS (SELECT 1 FROM information_schema.columns 
+                   WHERE table_name = 'uploads' AND column_name = 'status') THEN
+        ALTER TABLE uploads ADD COLUMN status VARCHAR(20) DEFAULT 'completed' NOT NULL;
+    END IF;
+END $$;
+
+-- ============================================
+-- Add error_message column for failed uploads
+-- ============================================
+DO $$ 
+BEGIN
+    IF NOT EXISTS (SELECT 1 FROM information_schema.columns 
+                   WHERE table_name = 'uploads' AND column_name = 'error_message') THEN
+        ALTER TABLE uploads ADD COLUMN error_message TEXT;
+    END IF;
+END $$;
+
+-- ============================================
+-- Add client_checksum column for verification
+-- ============================================
+DO $$ 
+BEGIN
+    IF NOT EXISTS (SELECT 1 FROM information_schema.columns 
+                   WHERE table_name = 'uploads' AND column_name = 'client_checksum') THEN
+        ALTER TABLE uploads ADD COLUMN client_checksum VARCHAR(64);
+    END IF;
+END $$;
+
+-- ============================================
+-- Add indexes for upload status queries
+-- ============================================
+CREATE INDEX IF NOT EXISTS idx_uploads_status ON uploads(status);
+CREATE INDEX IF NOT EXISTS idx_uploads_status_uploaded_at ON uploads(status, uploaded_at);
+
+-- ============================================
+-- Add constraint to validate status values
+-- ============================================
+DO $$ 
+BEGIN
+    IF NOT EXISTS (SELECT 1 FROM information_schema.constraint_column_usage 
+                   WHERE constraint_name = 'check_upload_status') THEN
+        ALTER TABLE uploads ADD CONSTRAINT check_upload_status 
+            CHECK (status IN ('pending', 'completed', 'failed'));
+    END IF;
+END $$;
+
+-- ============================================
+-- Create table for tracking in-progress uploads (for 409 conflict detection)
+-- ============================================
+CREATE TABLE IF NOT EXISTS upload_locks (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    sha256_hash VARCHAR(64) NOT NULL,
+    package_id UUID NOT NULL REFERENCES packages(id) ON DELETE CASCADE,
+    locked_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    locked_by VARCHAR(255) NOT NULL,
+    expires_at TIMESTAMP WITH TIME ZONE NOT NULL,
+    UNIQUE(sha256_hash, package_id)
+);
+
+CREATE INDEX IF NOT EXISTS idx_upload_locks_expires_at ON upload_locks(expires_at);
+CREATE INDEX IF NOT EXISTS idx_upload_locks_hash_package ON upload_locks(sha256_hash, package_id);
+
+-- ============================================
+-- Function to clean up expired upload locks
+-- ============================================
+CREATE OR REPLACE FUNCTION cleanup_expired_upload_locks()
+RETURNS INTEGER AS $$
+DECLARE
+    deleted_count INTEGER;
+BEGIN
+    DELETE FROM upload_locks WHERE expires_at < NOW();
+    GET DIAGNOSTICS deleted_count = ROW_COUNT;
+    RETURN deleted_count;
+END;
+$$ LANGUAGE plpgsql;