Metadata database tracks all uploads with project, package, tag, and timestamp queryable via API

This commit is contained in:
Mondo Diaz
2026-01-07 12:31:44 -06:00
parent 81458b3bcb
commit 2f1891cf01
24 changed files with 5044 additions and 2123 deletions

View File

@@ -4,15 +4,14 @@ Test configuration and fixtures for Orchard backend tests.
This module provides:
- Database fixtures with test isolation
- Mock S3 storage using moto
- Test data factories for common scenarios
- Shared pytest fixtures
"""
import os
import pytest
import hashlib
from typing import Generator, BinaryIO
from unittest.mock import MagicMock, patch
import io
from typing import Generator
from unittest.mock import MagicMock
# Set test environment defaults before importing app modules
# Use setdefault to NOT override existing env vars (from docker-compose)
@@ -26,54 +25,27 @@ os.environ.setdefault("ORCHARD_S3_BUCKET", "test-bucket")
os.environ.setdefault("ORCHARD_S3_ACCESS_KEY_ID", "test")
os.environ.setdefault("ORCHARD_S3_SECRET_ACCESS_KEY", "test")
# =============================================================================
# Test Data Factories
# =============================================================================
def create_test_file(content: bytes = None, size: int = 1024) -> io.BytesIO:
"""
Create a test file with known content.
Args:
content: Specific content to use, or None to generate random-ish content
size: Size of generated content if content is None
Returns:
BytesIO object with the content
"""
if content is None:
content = os.urandom(size)
return io.BytesIO(content)
def compute_sha256(content: bytes) -> str:
"""Compute SHA256 hash of content as lowercase hex string."""
return hashlib.sha256(content).hexdigest()
def compute_md5(content: bytes) -> str:
"""Compute MD5 hash of content as lowercase hex string."""
return hashlib.md5(content).hexdigest()
def compute_sha1(content: bytes) -> str:
"""Compute SHA1 hash of content as lowercase hex string."""
return hashlib.sha1(content).hexdigest()
# Known test data with pre-computed hashes
TEST_CONTENT_HELLO = b"Hello, World!"
TEST_HASH_HELLO = "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f"
TEST_MD5_HELLO = "65a8e27d8879283831b664bd8b7f0ad4"
TEST_SHA1_HELLO = "0a0a9f2a6772942557ab5355d76af442f8f65e01"
TEST_CONTENT_EMPTY = b""
# Note: Empty content should be rejected by the storage layer
TEST_CONTENT_BINARY = bytes(range(256))
TEST_HASH_BINARY = compute_sha256(TEST_CONTENT_BINARY)
# Re-export factory functions for backward compatibility
from tests.factories import (
create_test_file,
compute_sha256,
compute_md5,
compute_sha1,
upload_test_file,
TEST_CONTENT_HELLO,
TEST_HASH_HELLO,
TEST_MD5_HELLO,
TEST_SHA1_HELLO,
TEST_CONTENT_EMPTY,
TEST_CONTENT_BINARY,
TEST_HASH_BINARY,
get_s3_client,
get_s3_bucket,
list_s3_objects_by_hash,
count_s3_objects_by_prefix,
s3_object_exists,
delete_s3_object_by_hash,
)
# =============================================================================
@@ -289,126 +261,3 @@ def test_content():
content = f"test-content-{uuid.uuid4().hex}".encode()
sha256 = compute_sha256(content)
return (content, sha256)
def upload_test_file(
client,
project: str,
package: str,
content: bytes,
filename: str = "test.bin",
tag: str = None,
) -> dict:
"""
Helper function to upload a test file.
Returns the upload response as a dict.
"""
files = {"file": (filename, io.BytesIO(content), "application/octet-stream")}
data = {}
if tag:
data["tag"] = tag
response = client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data=data if data else None,
)
assert response.status_code == 200, f"Upload failed: {response.text}"
return response.json()
# =============================================================================
# S3 Direct Access Helpers (for integration tests)
# =============================================================================
def get_s3_client():
"""
Create a boto3 S3 client for direct S3 access in integration tests.
Uses environment variables for configuration (same as the app).
Note: When running in container, S3 endpoint should be 'minio:9000' not 'localhost:9000'.
"""
import boto3
from botocore.config import Config
config = Config(s3={"addressing_style": "path"})
# Use the same endpoint as the app (minio:9000 in container, localhost:9000 locally)
endpoint = os.environ.get("ORCHARD_S3_ENDPOINT", "http://minio:9000")
return boto3.client(
"s3",
endpoint_url=endpoint,
region_name=os.environ.get("ORCHARD_S3_REGION", "us-east-1"),
aws_access_key_id=os.environ.get("ORCHARD_S3_ACCESS_KEY_ID", "minioadmin"),
aws_secret_access_key=os.environ.get(
"ORCHARD_S3_SECRET_ACCESS_KEY", "minioadmin"
),
config=config,
)
def get_s3_bucket():
"""Get the S3 bucket name from environment."""
return os.environ.get("ORCHARD_S3_BUCKET", "orchard-artifacts")
def list_s3_objects_by_hash(sha256_hash: str) -> list:
"""
List S3 objects that match a specific SHA256 hash.
Uses the fruits/{hash[:2]}/{hash[2:4]}/{hash} key pattern.
Returns list of matching object keys.
"""
client = get_s3_client()
bucket = get_s3_bucket()
prefix = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
response = client.list_objects_v2(Bucket=bucket, Prefix=prefix)
if "Contents" not in response:
return []
return [obj["Key"] for obj in response["Contents"]]
def count_s3_objects_by_prefix(prefix: str) -> int:
"""
Count S3 objects with a given prefix.
Useful for checking if duplicate uploads created multiple objects.
"""
client = get_s3_client()
bucket = get_s3_bucket()
response = client.list_objects_v2(Bucket=bucket, Prefix=prefix)
if "Contents" not in response:
return 0
return len(response["Contents"])
def s3_object_exists(sha256_hash: str) -> bool:
"""
Check if an S3 object exists for a given SHA256 hash.
"""
objects = list_s3_objects_by_hash(sha256_hash)
return len(objects) > 0
def delete_s3_object_by_hash(sha256_hash: str) -> bool:
"""
Delete an S3 object by its SHA256 hash (for test cleanup).
"""
client = get_s3_client()
bucket = get_s3_bucket()
s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
try:
client.delete_object(Bucket=bucket, Key=s3_key)
return True
except Exception:
return False

288
backend/tests/factories.py Normal file
View File

@@ -0,0 +1,288 @@
"""
Test data factories for Orchard backend tests.
This module provides factory functions for creating test data,
including test files, pre-computed hashes, and helper utilities.
"""
import hashlib
import io
import os
import uuid
from typing import Optional
# =============================================================================
# Hash Computation Utilities
# =============================================================================
def compute_sha256(content: bytes) -> str:
"""Compute SHA256 hash of content as lowercase hex string."""
return hashlib.sha256(content).hexdigest()
def compute_md5(content: bytes) -> str:
"""Compute MD5 hash of content as lowercase hex string."""
return hashlib.md5(content).hexdigest()
def compute_sha1(content: bytes) -> str:
"""Compute SHA1 hash of content as lowercase hex string."""
return hashlib.sha1(content).hexdigest()
# =============================================================================
# Test File Factories
# =============================================================================
def create_test_file(content: Optional[bytes] = None, size: int = 1024) -> io.BytesIO:
"""
Create a test file with known content.
Args:
content: Specific content to use, or None to generate random-ish content
size: Size of generated content if content is None
Returns:
BytesIO object with the content
"""
if content is None:
content = os.urandom(size)
return io.BytesIO(content)
def create_unique_content(prefix: str = "test-content") -> tuple[bytes, str]:
"""
Create unique test content with its SHA256 hash.
Args:
prefix: Prefix for the content string
Returns:
Tuple of (content_bytes, sha256_hash)
"""
content = f"{prefix}-{uuid.uuid4().hex}".encode()
sha256 = compute_sha256(content)
return content, sha256
# =============================================================================
# Known Test Data (Pre-computed hashes for deterministic tests)
# =============================================================================
TEST_CONTENT_HELLO = b"Hello, World!"
TEST_HASH_HELLO = "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f"
TEST_MD5_HELLO = "65a8e27d8879283831b664bd8b7f0ad4"
TEST_SHA1_HELLO = "0a0a9f2a6772942557ab5355d76af442f8f65e01"
TEST_CONTENT_EMPTY = b""
# Note: Empty content should be rejected by the storage layer
TEST_CONTENT_BINARY = bytes(range(256))
TEST_HASH_BINARY = compute_sha256(TEST_CONTENT_BINARY)
# =============================================================================
# API Test Helpers
# =============================================================================
def upload_test_file(
client,
project: str,
package: str,
content: bytes,
filename: str = "test.bin",
tag: Optional[str] = None,
) -> dict:
"""
Helper function to upload a test file via the API.
Args:
client: HTTP client (httpx or TestClient)
project: Project name
package: Package name
content: File content as bytes
filename: Original filename
tag: Optional tag to assign
Returns:
The upload response as a dict
"""
files = {"file": (filename, io.BytesIO(content), "application/octet-stream")}
data = {}
if tag:
data["tag"] = tag
response = client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data=data if data else None,
)
assert response.status_code == 200, f"Upload failed: {response.text}"
return response.json()
# =============================================================================
# Project/Package Factories
# =============================================================================
def create_test_project(client, unique_id: Optional[str] = None) -> str:
"""
Create a test project via the API.
Args:
client: HTTP client
unique_id: Unique identifier for the project name
Returns:
Project name
"""
if unique_id is None:
unique_id = uuid.uuid4().hex[:8]
project_name = f"test-project-{unique_id}"
response = client.post(
"/api/v1/projects",
json={"name": project_name, "description": "Test project", "is_public": True},
)
assert response.status_code == 200, f"Failed to create project: {response.text}"
return project_name
def create_test_package(client, project: str, unique_id: Optional[str] = None) -> str:
"""
Create a test package via the API.
Args:
client: HTTP client
project: Project name
unique_id: Unique identifier for the package name
Returns:
Package name
"""
if unique_id is None:
unique_id = uuid.uuid4().hex[:8]
package_name = f"test-package-{unique_id}"
response = client.post(
f"/api/v1/project/{project}/packages",
json={"name": package_name, "description": "Test package"},
)
assert response.status_code == 200, f"Failed to create package: {response.text}"
return package_name
def delete_test_project(client, project: str) -> None:
"""
Delete a test project (cleanup helper).
Args:
client: HTTP client
project: Project name to delete
"""
try:
client.delete(f"/api/v1/projects/{project}")
except Exception:
pass # Ignore cleanup errors
# =============================================================================
# S3 Test Helpers
# =============================================================================
def get_s3_client():
"""
Create a boto3 S3 client for direct S3 access in integration tests.
Uses environment variables for configuration (same as the app).
Note: When running in container, S3 endpoint should be 'minio:9000' not 'localhost:9000'.
"""
import boto3
from botocore.config import Config
config = Config(s3={"addressing_style": "path"})
# Use the same endpoint as the app (minio:9000 in container, localhost:9000 locally)
endpoint = os.environ.get("ORCHARD_S3_ENDPOINT", "http://minio:9000")
return boto3.client(
"s3",
endpoint_url=endpoint,
region_name=os.environ.get("ORCHARD_S3_REGION", "us-east-1"),
aws_access_key_id=os.environ.get("ORCHARD_S3_ACCESS_KEY_ID", "minioadmin"),
aws_secret_access_key=os.environ.get(
"ORCHARD_S3_SECRET_ACCESS_KEY", "minioadmin"
),
config=config,
)
def get_s3_bucket() -> str:
"""Get the S3 bucket name from environment."""
return os.environ.get("ORCHARD_S3_BUCKET", "orchard-artifacts")
def list_s3_objects_by_hash(sha256_hash: str) -> list:
"""
List S3 objects that match a specific SHA256 hash.
Uses the fruits/{hash[:2]}/{hash[2:4]}/{hash} key pattern.
Returns list of matching object keys.
"""
client = get_s3_client()
bucket = get_s3_bucket()
prefix = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
response = client.list_objects_v2(Bucket=bucket, Prefix=prefix)
if "Contents" not in response:
return []
return [obj["Key"] for obj in response["Contents"]]
def count_s3_objects_by_prefix(prefix: str) -> int:
"""
Count S3 objects with a given prefix.
Useful for checking if duplicate uploads created multiple objects.
"""
client = get_s3_client()
bucket = get_s3_bucket()
response = client.list_objects_v2(Bucket=bucket, Prefix=prefix)
if "Contents" not in response:
return 0
return len(response["Contents"])
def s3_object_exists(sha256_hash: str) -> bool:
"""
Check if an S3 object exists for a given SHA256 hash.
"""
objects = list_s3_objects_by_hash(sha256_hash)
return len(objects) > 0
def delete_s3_object_by_hash(sha256_hash: str) -> bool:
"""
Delete an S3 object by its SHA256 hash (for test cleanup).
"""
client = get_s3_client()
bucket = get_s3_bucket()
s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
try:
client.delete_object(Bucket=bucket, Key=s3_key)
return True
except Exception:
return False

View File

View File

@@ -0,0 +1,638 @@
"""
Integration tests for artifact API endpoints.
Tests cover:
- Artifact retrieval by ID
- Artifact stats endpoint
- Artifact provenance/history
- Artifact uploads listing
- Garbage collection endpoints
- Orphaned artifacts management
"""
import pytest
from tests.factories import compute_sha256, upload_test_file
class TestArtifactRetrieval:
"""Tests for artifact retrieval endpoints."""
@pytest.mark.integration
def test_get_artifact_by_id(self, integration_client, test_package):
"""Test retrieving an artifact by its SHA256 ID."""
project_name, package_name = test_package
content = b"artifact retrieval test"
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project_name, package_name, content, tag="v1"
)
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
data = response.json()
assert data["id"] == expected_hash
assert data["sha256"] == expected_hash
assert data["size"] == len(content)
assert "ref_count" in data
assert "created_at" in data
@pytest.mark.integration
def test_get_nonexistent_artifact(self, integration_client):
"""Test getting a non-existent artifact returns 404."""
fake_hash = "a" * 64
response = integration_client.get(f"/api/v1/artifact/{fake_hash}")
assert response.status_code == 404
@pytest.mark.integration
def test_artifact_includes_tags(self, integration_client, test_package):
"""Test artifact response includes tags pointing to it."""
project_name, package_name = test_package
content = b"artifact with tags test"
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project_name, package_name, content, tag="tagged-v1"
)
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
data = response.json()
assert "tags" in data
assert len(data["tags"]) >= 1
tag = data["tags"][0]
assert "name" in tag
assert "package_name" in tag
assert "project_name" in tag
class TestArtifactStats:
"""Tests for artifact statistics endpoint."""
@pytest.mark.integration
def test_artifact_stats_returns_valid_response(
self, integration_client, test_package, unique_test_id
):
"""Test artifact stats returns expected fields."""
project, package = test_package
content = f"artifact stats test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project, package, content, tag=f"art-{unique_test_id}"
)
response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats")
assert response.status_code == 200
data = response.json()
assert "artifact_id" in data
assert "sha256" in data
assert "size" in data
assert "ref_count" in data
assert "storage_savings" in data
assert "tags" in data
assert "projects" in data
assert "packages" in data
@pytest.mark.integration
def test_artifact_stats_not_found(self, integration_client):
"""Test artifact stats returns 404 for non-existent artifact."""
fake_hash = "0" * 64
response = integration_client.get(f"/api/v1/artifact/{fake_hash}/stats")
assert response.status_code == 404
@pytest.mark.integration
def test_artifact_stats_shows_correct_projects(
self, integration_client, unique_test_id
):
"""Test artifact stats shows all projects using the artifact."""
content = f"multi-project artifact {unique_test_id}".encode()
expected_hash = compute_sha256(content)
proj1 = f"art-stats-a-{unique_test_id}"
proj2 = f"art-stats-b-{unique_test_id}"
try:
# Create projects and packages
integration_client.post(
"/api/v1/projects",
json={"name": proj1, "description": "Test", "is_public": True},
)
integration_client.post(
"/api/v1/projects",
json={"name": proj2, "description": "Test", "is_public": True},
)
integration_client.post(
f"/api/v1/project/{proj1}/packages",
json={"name": "pkg", "description": "Test"},
)
integration_client.post(
f"/api/v1/project/{proj2}/packages",
json={"name": "pkg", "description": "Test"},
)
# Upload same content to both projects
upload_test_file(integration_client, proj1, "pkg", content, tag="v1")
upload_test_file(integration_client, proj2, "pkg", content, tag="v1")
# Check artifact stats
response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats")
assert response.status_code == 200
data = response.json()
assert len(data["projects"]) == 2
assert proj1 in data["projects"]
assert proj2 in data["projects"]
finally:
integration_client.delete(f"/api/v1/projects/{proj1}")
integration_client.delete(f"/api/v1/projects/{proj2}")
class TestArtifactProvenance:
"""Tests for artifact provenance/history endpoint."""
@pytest.mark.integration
def test_artifact_history_returns_200(self, integration_client, test_package):
"""Test artifact history endpoint returns 200."""
project_name, package_name = test_package
upload_result = upload_test_file(
integration_client,
project_name,
package_name,
b"provenance test content",
"prov.txt",
)
artifact_id = upload_result["artifact_id"]
response = integration_client.get(f"/api/v1/artifact/{artifact_id}/history")
assert response.status_code == 200
@pytest.mark.integration
def test_artifact_history_has_required_fields(
self, integration_client, test_package
):
"""Test artifact history has all required fields."""
project_name, package_name = test_package
upload_result = upload_test_file(
integration_client,
project_name,
package_name,
b"provenance fields test",
"fields.txt",
)
artifact_id = upload_result["artifact_id"]
response = integration_client.get(f"/api/v1/artifact/{artifact_id}/history")
assert response.status_code == 200
data = response.json()
assert "artifact_id" in data
assert "sha256" in data
assert "size" in data
assert "created_at" in data
assert "created_by" in data
assert "ref_count" in data
assert "first_uploaded_at" in data
assert "first_uploaded_by" in data
assert "upload_count" in data
assert "packages" in data
assert "tags" in data
assert "uploads" in data
@pytest.mark.integration
def test_artifact_history_not_found(self, integration_client):
"""Test non-existent artifact returns 404."""
fake_hash = "b" * 64
response = integration_client.get(f"/api/v1/artifact/{fake_hash}/history")
assert response.status_code == 404
@pytest.mark.integration
def test_artifact_history_with_tag(self, integration_client, test_package):
"""Test artifact history includes tag information when tagged."""
project_name, package_name = test_package
upload_result = upload_test_file(
integration_client,
project_name,
package_name,
b"tagged provenance test",
"tagged.txt",
tag="v1.0.0",
)
artifact_id = upload_result["artifact_id"]
response = integration_client.get(f"/api/v1/artifact/{artifact_id}/history")
assert response.status_code == 200
data = response.json()
assert len(data["tags"]) >= 1
tag = data["tags"][0]
assert "project_name" in tag
assert "package_name" in tag
assert "tag_name" in tag
class TestArtifactUploads:
"""Tests for artifact uploads listing endpoint."""
@pytest.mark.integration
def test_artifact_uploads_returns_200(self, integration_client, test_package):
"""Test artifact uploads endpoint returns 200."""
project_name, package_name = test_package
upload_result = upload_test_file(
integration_client,
project_name,
package_name,
b"artifact upload test",
"artifact.txt",
)
artifact_id = upload_result["artifact_id"]
response = integration_client.get(f"/api/v1/artifact/{artifact_id}/uploads")
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
assert len(data["items"]) >= 1
@pytest.mark.integration
def test_artifact_uploads_not_found(self, integration_client):
"""Test non-existent artifact returns 404."""
fake_hash = "a" * 64
response = integration_client.get(f"/api/v1/artifact/{fake_hash}/uploads")
assert response.status_code == 404
class TestOrphanedArtifacts:
"""Tests for orphaned artifacts management."""
@pytest.mark.integration
def test_list_orphaned_artifacts_returns_list(self, integration_client):
"""Test orphaned artifacts endpoint returns a list."""
response = integration_client.get("/api/v1/admin/orphaned-artifacts")
assert response.status_code == 200
assert isinstance(response.json(), list)
@pytest.mark.integration
def test_orphaned_artifact_has_required_fields(self, integration_client):
"""Test orphaned artifact response has required fields."""
response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1")
assert response.status_code == 200
data = response.json()
if len(data) > 0:
artifact = data[0]
assert "id" in artifact
assert "size" in artifact
assert "created_at" in artifact
assert "created_by" in artifact
assert "original_name" in artifact
@pytest.mark.integration
def test_orphaned_artifacts_respects_limit(self, integration_client):
"""Test orphaned artifacts endpoint respects limit parameter."""
response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=5")
assert response.status_code == 200
assert len(response.json()) <= 5
@pytest.mark.integration
def test_artifact_becomes_orphaned_when_tag_deleted(
self, integration_client, test_package, unique_test_id
):
"""Test artifact appears in orphaned list after tag is deleted."""
project, package = test_package
content = f"orphan test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload with tag
upload_test_file(integration_client, project, package, content, tag="temp-tag")
# Verify not in orphaned list
response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1000")
orphaned_ids = [a["id"] for a in response.json()]
assert expected_hash not in orphaned_ids
# Delete the tag
integration_client.delete(f"/api/v1/project/{project}/{package}/tags/temp-tag")
# Verify now in orphaned list
response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1000")
orphaned_ids = [a["id"] for a in response.json()]
assert expected_hash in orphaned_ids
class TestGarbageCollection:
"""Tests for garbage collection endpoint."""
@pytest.mark.integration
def test_garbage_collect_dry_run_returns_response(self, integration_client):
"""Test garbage collection dry run returns valid response."""
response = integration_client.post("/api/v1/admin/garbage-collect?dry_run=true")
assert response.status_code == 200
data = response.json()
assert "artifacts_deleted" in data
assert "bytes_freed" in data
assert "artifact_ids" in data
assert "dry_run" in data
assert data["dry_run"] is True
@pytest.mark.integration
def test_garbage_collect_dry_run_doesnt_delete(
self, integration_client, test_package, unique_test_id
):
"""Test garbage collection dry run doesn't actually delete artifacts."""
project, package = test_package
content = f"dry run test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload and delete tag to create orphan
upload_test_file(integration_client, project, package, content, tag="dry-run")
integration_client.delete(f"/api/v1/project/{project}/{package}/tags/dry-run")
# Verify artifact exists
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
# Run garbage collection in dry-run mode
gc_response = integration_client.post(
"/api/v1/admin/garbage-collect?dry_run=true&limit=1000"
)
assert gc_response.status_code == 200
assert expected_hash in gc_response.json()["artifact_ids"]
# Verify artifact STILL exists
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
@pytest.mark.integration
def test_garbage_collect_preserves_referenced_artifacts(
self, integration_client, test_package, unique_test_id
):
"""Test garbage collection doesn't delete artifacts with ref_count > 0."""
project, package = test_package
content = f"preserve test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload with tag (ref_count=1)
upload_test_file(integration_client, project, package, content, tag="keep-this")
# Verify artifact exists with ref_count=1
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
assert response.json()["ref_count"] == 1
# Run garbage collection (dry_run to not affect other tests)
gc_response = integration_client.post(
"/api/v1/admin/garbage-collect?dry_run=true&limit=1000"
)
assert gc_response.status_code == 200
# Verify artifact was NOT in delete list
assert expected_hash not in gc_response.json()["artifact_ids"]
# Verify artifact still exists
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
assert response.json()["ref_count"] == 1
@pytest.mark.integration
def test_garbage_collect_respects_limit(self, integration_client):
"""Test garbage collection respects limit parameter."""
response = integration_client.post(
"/api/v1/admin/garbage-collect?dry_run=true&limit=5"
)
assert response.status_code == 200
assert response.json()["artifacts_deleted"] <= 5
@pytest.mark.integration
def test_garbage_collect_returns_bytes_freed(self, integration_client):
"""Test garbage collection returns accurate bytes_freed."""
response = integration_client.post("/api/v1/admin/garbage-collect?dry_run=true")
assert response.status_code == 200
data = response.json()
assert data["bytes_freed"] >= 0
assert isinstance(data["bytes_freed"], int)
class TestGlobalUploads:
"""Tests for global uploads endpoint."""
@pytest.mark.integration
def test_global_uploads_returns_200(self, integration_client):
"""Test global uploads endpoint returns 200."""
response = integration_client.get("/api/v1/uploads")
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
@pytest.mark.integration
def test_global_uploads_pagination(self, integration_client):
"""Test global uploads endpoint respects pagination."""
response = integration_client.get("/api/v1/uploads?limit=5&page=1")
assert response.status_code == 200
data = response.json()
assert len(data["items"]) <= 5
assert data["pagination"]["limit"] == 5
assert data["pagination"]["page"] == 1
@pytest.mark.integration
def test_global_uploads_filter_by_project(self, integration_client, test_package):
"""Test filtering global uploads by project name."""
project_name, package_name = test_package
# Upload a file
upload_test_file(
integration_client,
project_name,
package_name,
b"global filter test",
"global.txt",
)
response = integration_client.get(f"/api/v1/uploads?project={project_name}")
assert response.status_code == 200
data = response.json()
for item in data["items"]:
assert item["project_name"] == project_name
@pytest.mark.integration
def test_global_uploads_has_more_field(self, integration_client):
"""Test pagination includes has_more field."""
response = integration_client.get("/api/v1/uploads?limit=1")
assert response.status_code == 200
data = response.json()
assert "has_more" in data["pagination"]
assert isinstance(data["pagination"]["has_more"], bool)
class TestGlobalArtifacts:
"""Tests for global artifacts endpoint."""
@pytest.mark.integration
def test_global_artifacts_returns_200(self, integration_client):
"""Test global artifacts endpoint returns 200."""
response = integration_client.get("/api/v1/artifacts")
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
@pytest.mark.integration
def test_global_artifacts_pagination(self, integration_client):
"""Test global artifacts endpoint respects pagination."""
response = integration_client.get("/api/v1/artifacts?limit=5&page=1")
assert response.status_code == 200
data = response.json()
assert len(data["items"]) <= 5
assert data["pagination"]["limit"] == 5
@pytest.mark.integration
def test_global_artifacts_filter_by_size(self, integration_client):
"""Test filtering global artifacts by size range."""
response = integration_client.get(
"/api/v1/artifacts?min_size=1&max_size=1000000"
)
assert response.status_code == 200
data = response.json()
for item in data["items"]:
assert 1 <= item["size"] <= 1000000
@pytest.mark.integration
def test_global_artifacts_sort_by_size(self, integration_client):
"""Test sorting global artifacts by size."""
response = integration_client.get("/api/v1/artifacts?sort=size&order=desc")
assert response.status_code == 200
data = response.json()
if len(data["items"]) > 1:
sizes = [item["size"] for item in data["items"]]
assert sizes == sorted(sizes, reverse=True)
@pytest.mark.integration
def test_global_artifacts_invalid_sort_returns_400(self, integration_client):
"""Test invalid sort field returns 400."""
response = integration_client.get("/api/v1/artifacts?sort=invalid_field")
assert response.status_code == 400
class TestGlobalTags:
"""Tests for global tags endpoint."""
@pytest.mark.integration
def test_global_tags_returns_200(self, integration_client):
"""Test global tags endpoint returns 200."""
response = integration_client.get("/api/v1/tags")
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
@pytest.mark.integration
def test_global_tags_pagination(self, integration_client):
"""Test global tags endpoint respects pagination."""
response = integration_client.get("/api/v1/tags?limit=5&page=1")
assert response.status_code == 200
data = response.json()
assert len(data["items"]) <= 5
assert data["pagination"]["limit"] == 5
@pytest.mark.integration
def test_global_tags_has_project_context(self, integration_client):
"""Test global tags response includes project/package context."""
response = integration_client.get("/api/v1/tags?limit=1")
assert response.status_code == 200
data = response.json()
if len(data["items"]) > 0:
item = data["items"][0]
assert "project_name" in item
assert "package_name" in item
assert "artifact_id" in item
@pytest.mark.integration
def test_global_tags_search_with_wildcard(self, integration_client):
"""Test global tags search supports wildcards."""
response = integration_client.get("/api/v1/tags?search=v*")
assert response.status_code == 200
# Just verify it doesn't error; results may vary
class TestAuditLogs:
"""Tests for global audit logs endpoint."""
@pytest.mark.integration
def test_list_audit_logs_returns_valid_response(self, integration_client):
"""Test audit logs endpoint returns valid paginated response."""
response = integration_client.get("/api/v1/audit-logs")
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
assert isinstance(data["items"], list)
pagination = data["pagination"]
assert "page" in pagination
assert "limit" in pagination
assert "total" in pagination
assert "total_pages" in pagination
@pytest.mark.integration
def test_audit_logs_respects_pagination(self, integration_client):
"""Test audit logs endpoint respects limit parameter."""
response = integration_client.get("/api/v1/audit-logs?limit=5")
assert response.status_code == 200
data = response.json()
assert len(data["items"]) <= 5
assert data["pagination"]["limit"] == 5
@pytest.mark.integration
def test_audit_logs_filter_by_action(self, integration_client, test_package):
"""Test filtering audit logs by action type."""
project_name, package_name = test_package
response = integration_client.get("/api/v1/audit-logs?action=project.create")
assert response.status_code == 200
data = response.json()
for item in data["items"]:
assert item["action"] == "project.create"
@pytest.mark.integration
def test_audit_log_entry_has_required_fields(
self, integration_client, test_project
):
"""Test audit log entries have all required fields."""
response = integration_client.get("/api/v1/audit-logs?limit=10")
assert response.status_code == 200
data = response.json()
if data["items"]:
item = data["items"][0]
assert "id" in item
assert "action" in item
assert "resource" in item
assert "user_id" in item
assert "timestamp" in item

View File

@@ -0,0 +1,345 @@
"""
Integration tests for package API endpoints.
Tests cover:
- Package CRUD operations
- Package listing with pagination, search, filtering
- Package stats endpoint
- Package-level audit logs
- Cascade delete behavior
"""
import pytest
from tests.factories import compute_sha256, upload_test_file
class TestPackageCRUD:
"""Tests for package create, read, update, delete operations."""
@pytest.mark.integration
def test_create_package(self, integration_client, test_project, unique_test_id):
"""Test creating a new package."""
package_name = f"test-create-pkg-{unique_test_id}"
response = integration_client.post(
f"/api/v1/project/{test_project}/packages",
json={
"name": package_name,
"description": "Test package",
"format": "npm",
"platform": "linux",
},
)
assert response.status_code == 200
data = response.json()
assert data["name"] == package_name
assert data["description"] == "Test package"
assert data["format"] == "npm"
assert data["platform"] == "linux"
@pytest.mark.integration
def test_get_package(self, integration_client, test_package):
"""Test getting a package by name."""
project_name, package_name = test_package
response = integration_client.get(
f"/api/v1/project/{project_name}/packages/{package_name}"
)
assert response.status_code == 200
data = response.json()
assert data["name"] == package_name
@pytest.mark.integration
def test_get_nonexistent_package(self, integration_client, test_project):
"""Test getting a non-existent package returns 404."""
response = integration_client.get(
f"/api/v1/project/{test_project}/packages/nonexistent-pkg"
)
assert response.status_code == 404
@pytest.mark.integration
def test_list_packages(self, integration_client, test_package):
"""Test listing packages includes created package."""
project_name, package_name = test_package
response = integration_client.get(f"/api/v1/project/{project_name}/packages")
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
package_names = [p["name"] for p in data["items"]]
assert package_name in package_names
@pytest.mark.integration
def test_delete_package(self, integration_client, test_project, unique_test_id):
"""Test deleting a package."""
package_name = f"test-delete-pkg-{unique_test_id}"
# Create package
integration_client.post(
f"/api/v1/project/{test_project}/packages",
json={"name": package_name, "description": "To be deleted"},
)
# Delete package
response = integration_client.delete(
f"/api/v1/project/{test_project}/packages/{package_name}"
)
assert response.status_code == 204
# Verify deleted
response = integration_client.get(
f"/api/v1/project/{test_project}/packages/{package_name}"
)
assert response.status_code == 404
class TestPackageListingFilters:
"""Tests for package listing with filters and pagination."""
@pytest.mark.integration
def test_packages_pagination(self, integration_client, test_project):
"""Test package listing respects pagination parameters."""
response = integration_client.get(
f"/api/v1/project/{test_project}/packages?page=1&limit=5"
)
assert response.status_code == 200
data = response.json()
assert len(data["items"]) <= 5
assert data["pagination"]["limit"] == 5
assert data["pagination"]["page"] == 1
@pytest.mark.integration
def test_packages_filter_by_format(
self, integration_client, test_project, unique_test_id
):
"""Test package filtering by format."""
# Create a package with specific format
package_name = f"npm-pkg-{unique_test_id}"
integration_client.post(
f"/api/v1/project/{test_project}/packages",
json={"name": package_name, "format": "npm"},
)
response = integration_client.get(
f"/api/v1/project/{test_project}/packages?format=npm"
)
assert response.status_code == 200
data = response.json()
for pkg in data["items"]:
assert pkg["format"] == "npm"
@pytest.mark.integration
def test_packages_filter_by_platform(
self, integration_client, test_project, unique_test_id
):
"""Test package filtering by platform."""
# Create a package with specific platform
package_name = f"linux-pkg-{unique_test_id}"
integration_client.post(
f"/api/v1/project/{test_project}/packages",
json={"name": package_name, "platform": "linux"},
)
response = integration_client.get(
f"/api/v1/project/{test_project}/packages?platform=linux"
)
assert response.status_code == 200
data = response.json()
for pkg in data["items"]:
assert pkg["platform"] == "linux"
class TestPackageStats:
"""Tests for package statistics endpoint."""
@pytest.mark.integration
def test_package_stats_returns_valid_response(
self, integration_client, test_package
):
"""Test package stats endpoint returns expected fields."""
project, package = test_package
response = integration_client.get(
f"/api/v1/project/{project}/packages/{package}/stats"
)
assert response.status_code == 200
data = response.json()
assert "package_id" in data
assert "package_name" in data
assert "project_name" in data
assert "tag_count" in data
assert "artifact_count" in data
assert "total_size_bytes" in data
assert "upload_count" in data
assert "deduplicated_uploads" in data
assert "storage_saved_bytes" in data
assert "deduplication_ratio" in data
@pytest.mark.integration
def test_package_stats_not_found(self, integration_client, test_project):
"""Test package stats returns 404 for non-existent package."""
response = integration_client.get(
f"/api/v1/project/{test_project}/packages/nonexistent-package/stats"
)
assert response.status_code == 404
class TestPackageAuditLogs:
"""Tests for package-level audit logs endpoint."""
@pytest.mark.integration
def test_package_audit_logs_returns_200(self, integration_client, test_package):
"""Test package audit logs endpoint returns 200."""
project_name, package_name = test_package
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/audit-logs"
)
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
@pytest.mark.integration
def test_package_audit_logs_project_not_found(self, integration_client):
"""Test non-existent project returns 404."""
response = integration_client.get(
"/api/v1/project/nonexistent/nonexistent/audit-logs"
)
assert response.status_code == 404
@pytest.mark.integration
def test_package_audit_logs_package_not_found(
self, integration_client, test_project
):
"""Test non-existent package returns 404."""
response = integration_client.get(
f"/api/v1/project/{test_project}/nonexistent-package/audit-logs"
)
assert response.status_code == 404
class TestPackageCascadeDelete:
"""Tests for cascade delete behavior when deleting packages."""
@pytest.mark.integration
def test_ref_count_decrements_on_package_delete(
self, integration_client, unique_test_id
):
"""Test ref_count decrements for all tags when package is deleted."""
project_name = f"cascade-pkg-{unique_test_id}"
package_name = f"test-pkg-{unique_test_id}"
# Create project
response = integration_client.post(
"/api/v1/projects",
json={
"name": project_name,
"description": "Test project",
"is_public": True,
},
)
assert response.status_code == 200
# Create package
response = integration_client.post(
f"/api/v1/project/{project_name}/packages",
json={"name": package_name, "description": "Test package"},
)
assert response.status_code == 200
# Upload content with multiple tags
content = f"cascade delete test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project_name, package_name, content, tag="v1"
)
upload_test_file(
integration_client, project_name, package_name, content, tag="v2"
)
upload_test_file(
integration_client, project_name, package_name, content, tag="v3"
)
# Verify ref_count is 3
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 3
# Delete the package
delete_response = integration_client.delete(
f"/api/v1/project/{project_name}/packages/{package_name}"
)
assert delete_response.status_code == 204
# Verify ref_count is 0
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 0
# Cleanup
integration_client.delete(f"/api/v1/projects/{project_name}")
class TestPackageUploads:
"""Tests for package-level uploads endpoint."""
@pytest.mark.integration
def test_package_uploads_returns_200(self, integration_client, test_package):
"""Test package uploads endpoint returns 200."""
project_name, package_name = test_package
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/uploads"
)
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
@pytest.mark.integration
def test_package_uploads_after_upload(self, integration_client, test_package):
"""Test uploads are recorded after file upload."""
project_name, package_name = test_package
# Upload a file
upload_result = upload_test_file(
integration_client,
project_name,
package_name,
b"test upload content",
"test.txt",
)
assert upload_result["artifact_id"]
# Check uploads endpoint
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/uploads"
)
assert response.status_code == 200
data = response.json()
assert len(data["items"]) >= 1
# Verify upload record fields
upload = data["items"][0]
assert "artifact_id" in upload
assert "package_name" in upload
assert "project_name" in upload
assert "uploaded_at" in upload
assert "uploaded_by" in upload
@pytest.mark.integration
def test_package_uploads_project_not_found(self, integration_client):
"""Test non-existent project returns 404."""
response = integration_client.get(
"/api/v1/project/nonexistent/nonexistent/uploads"
)
assert response.status_code == 404

View File

@@ -0,0 +1,322 @@
"""
Integration tests for project API endpoints.
Tests cover:
- Project CRUD operations
- Project listing with pagination, search, and sorting
- Project stats endpoint
- Project-level audit logs
- Cascade delete behavior
"""
import pytest
from tests.factories import compute_sha256, upload_test_file
class TestProjectCRUD:
"""Tests for project create, read, update, delete operations."""
@pytest.mark.integration
def test_create_project(self, integration_client, unique_test_id):
"""Test creating a new project."""
project_name = f"test-create-{unique_test_id}"
try:
response = integration_client.post(
"/api/v1/projects",
json={
"name": project_name,
"description": "Test project",
"is_public": True,
},
)
assert response.status_code == 200
data = response.json()
assert data["name"] == project_name
assert data["description"] == "Test project"
assert data["is_public"] is True
assert "id" in data
assert "created_at" in data
finally:
integration_client.delete(f"/api/v1/projects/{project_name}")
@pytest.mark.integration
def test_get_project(self, integration_client, test_project):
"""Test getting a project by name."""
response = integration_client.get(f"/api/v1/projects/{test_project}")
assert response.status_code == 200
data = response.json()
assert data["name"] == test_project
@pytest.mark.integration
def test_get_nonexistent_project(self, integration_client):
"""Test getting a non-existent project returns 404."""
response = integration_client.get("/api/v1/projects/nonexistent-project-xyz")
assert response.status_code == 404
@pytest.mark.integration
def test_list_projects(self, integration_client, test_project):
"""Test listing projects includes created project."""
response = integration_client.get("/api/v1/projects")
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
project_names = [p["name"] for p in data["items"]]
assert test_project in project_names
@pytest.mark.integration
def test_delete_project(self, integration_client, unique_test_id):
"""Test deleting a project."""
project_name = f"test-delete-{unique_test_id}"
# Create project
integration_client.post(
"/api/v1/projects",
json={"name": project_name, "description": "To be deleted"},
)
# Delete project
response = integration_client.delete(f"/api/v1/projects/{project_name}")
assert response.status_code == 204
# Verify deleted
response = integration_client.get(f"/api/v1/projects/{project_name}")
assert response.status_code == 404
class TestProjectListingFilters:
"""Tests for project listing with filters and pagination."""
@pytest.mark.integration
def test_projects_pagination(self, integration_client):
"""Test project listing respects pagination parameters."""
response = integration_client.get("/api/v1/projects?page=1&limit=5")
assert response.status_code == 200
data = response.json()
assert len(data["items"]) <= 5
assert data["pagination"]["limit"] == 5
assert data["pagination"]["page"] == 1
assert "has_more" in data["pagination"]
@pytest.mark.integration
def test_projects_search(self, integration_client, test_project):
"""Test project search by name."""
# Search for our test project
response = integration_client.get(
f"/api/v1/projects?search={test_project[:10]}"
)
assert response.status_code == 200
data = response.json()
# Our project should be in results
project_names = [p["name"] for p in data["items"]]
assert test_project in project_names
@pytest.mark.integration
def test_projects_sort_by_name(self, integration_client):
"""Test project sorting by name."""
response = integration_client.get("/api/v1/projects?sort=name&order=asc")
assert response.status_code == 200
data = response.json()
names = [p["name"] for p in data["items"]]
assert names == sorted(names)
class TestProjectStats:
"""Tests for project statistics endpoint."""
@pytest.mark.integration
def test_project_stats_returns_valid_response(
self, integration_client, test_project
):
"""Test project stats endpoint returns expected fields."""
response = integration_client.get(f"/api/v1/projects/{test_project}/stats")
assert response.status_code == 200
data = response.json()
assert "project_id" in data
assert "project_name" in data
assert "package_count" in data
assert "tag_count" in data
assert "artifact_count" in data
assert "total_size_bytes" in data
assert "upload_count" in data
assert "deduplicated_uploads" in data
assert "storage_saved_bytes" in data
assert "deduplication_ratio" in data
@pytest.mark.integration
def test_project_stats_not_found(self, integration_client):
"""Test project stats returns 404 for non-existent project."""
response = integration_client.get("/api/v1/projects/nonexistent-project/stats")
assert response.status_code == 404
class TestProjectAuditLogs:
"""Tests for project-level audit logs endpoint."""
@pytest.mark.integration
def test_project_audit_logs_returns_200(self, integration_client, test_project):
"""Test project audit logs endpoint returns 200."""
response = integration_client.get(f"/api/v1/projects/{test_project}/audit-logs")
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
@pytest.mark.integration
def test_project_audit_logs_not_found(self, integration_client):
"""Test non-existent project returns 404."""
response = integration_client.get(
"/api/v1/projects/nonexistent-project/audit-logs"
)
assert response.status_code == 404
class TestProjectCascadeDelete:
"""Tests for cascade delete behavior when deleting projects."""
@pytest.mark.integration
def test_project_delete_cascades_to_packages(
self, integration_client, unique_test_id
):
"""Test deleting project cascades to packages."""
project_name = f"cascade-proj-{unique_test_id}"
package_name = f"cascade-pkg-{unique_test_id}"
try:
# Create project and package
integration_client.post(
"/api/v1/projects",
json={"name": project_name, "description": "Test", "is_public": True},
)
integration_client.post(
f"/api/v1/project/{project_name}/packages",
json={"name": package_name, "description": "Test package"},
)
# Verify package exists
response = integration_client.get(
f"/api/v1/project/{project_name}/packages/{package_name}"
)
assert response.status_code == 200
# Delete project
integration_client.delete(f"/api/v1/projects/{project_name}")
# Verify project is deleted (and package with it)
response = integration_client.get(f"/api/v1/projects/{project_name}")
assert response.status_code == 404
except Exception:
# Cleanup if test fails
integration_client.delete(f"/api/v1/projects/{project_name}")
raise
@pytest.mark.integration
def test_ref_count_decrements_on_project_delete(
self, integration_client, unique_test_id
):
"""Test ref_count decrements for all tags when project is deleted."""
project_name = f"cascade-proj-{unique_test_id}"
package1_name = f"pkg1-{unique_test_id}"
package2_name = f"pkg2-{unique_test_id}"
# Create project
response = integration_client.post(
"/api/v1/projects",
json={
"name": project_name,
"description": "Test project",
"is_public": True,
},
)
assert response.status_code == 200
# Create two packages
for pkg_name in [package1_name, package2_name]:
response = integration_client.post(
f"/api/v1/project/{project_name}/packages",
json={"name": pkg_name, "description": "Test package"},
)
assert response.status_code == 200
# Upload same content with tags in both packages
content = f"project cascade test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project_name, package1_name, content, tag="v1"
)
upload_test_file(
integration_client, project_name, package1_name, content, tag="v2"
)
upload_test_file(
integration_client, project_name, package2_name, content, tag="latest"
)
upload_test_file(
integration_client, project_name, package2_name, content, tag="stable"
)
# Verify ref_count is 4 (2 tags in each of 2 packages)
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 4
# Delete the project
delete_response = integration_client.delete(f"/api/v1/projects/{project_name}")
assert delete_response.status_code == 204
# Verify ref_count is 0
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 0
class TestProjectUploads:
"""Tests for project-level uploads endpoint."""
@pytest.mark.integration
def test_project_uploads_returns_200(self, integration_client, test_project):
"""Test project uploads endpoint returns 200."""
response = integration_client.get(f"/api/v1/project/{test_project}/uploads")
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
@pytest.mark.integration
def test_project_uploads_after_upload(self, integration_client, test_package):
"""Test uploads are recorded in project uploads."""
project_name, package_name = test_package
# Upload a file
upload_test_file(
integration_client,
project_name,
package_name,
b"project uploads test",
"project.txt",
)
response = integration_client.get(f"/api/v1/project/{project_name}/uploads")
assert response.status_code == 200
data = response.json()
assert len(data["items"]) >= 1
# Verify project name matches
for item in data["items"]:
assert item["project_name"] == project_name
@pytest.mark.integration
def test_project_uploads_not_found(self, integration_client):
"""Test non-existent project returns 404."""
response = integration_client.get("/api/v1/project/nonexistent/uploads")
assert response.status_code == 404

View File

@@ -0,0 +1,403 @@
"""
Integration tests for tag API endpoints.
Tests cover:
- Tag CRUD operations
- Tag listing with pagination and search
- Tag history tracking
- ref_count behavior with tag operations
"""
import pytest
from tests.factories import compute_sha256, upload_test_file
class TestTagCRUD:
"""Tests for tag create, read, delete operations."""
@pytest.mark.integration
def test_create_tag_via_upload(self, integration_client, test_package):
"""Test creating a tag via upload endpoint."""
project_name, package_name = test_package
result = upload_test_file(
integration_client,
project_name,
package_name,
b"tag create test",
tag="v1.0.0",
)
assert result["tag"] == "v1.0.0"
assert result["artifact_id"]
@pytest.mark.integration
def test_create_tag_via_post(
self, integration_client, test_package, unique_test_id
):
"""Test creating a tag via POST /tags endpoint."""
project_name, package_name = test_package
# First upload an artifact
result = upload_test_file(
integration_client,
project_name,
package_name,
b"artifact for tag",
)
artifact_id = result["artifact_id"]
# Create tag via POST
tag_name = f"post-tag-{unique_test_id}"
response = integration_client.post(
f"/api/v1/project/{project_name}/{package_name}/tags",
json={"name": tag_name, "artifact_id": artifact_id},
)
assert response.status_code == 200
data = response.json()
assert data["name"] == tag_name
assert data["artifact_id"] == artifact_id
@pytest.mark.integration
def test_get_tag(self, integration_client, test_package):
"""Test getting a tag by name."""
project_name, package_name = test_package
upload_test_file(
integration_client,
project_name,
package_name,
b"get tag test",
tag="get-tag",
)
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/tags/get-tag"
)
assert response.status_code == 200
data = response.json()
assert data["name"] == "get-tag"
assert "artifact_id" in data
assert "artifact_size" in data
assert "artifact_content_type" in data
@pytest.mark.integration
def test_list_tags(self, integration_client, test_package):
"""Test listing tags for a package."""
project_name, package_name = test_package
# Create some tags
upload_test_file(
integration_client,
project_name,
package_name,
b"list tags test",
tag="list-v1",
)
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/tags"
)
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
tag_names = [t["name"] for t in data["items"]]
assert "list-v1" in tag_names
@pytest.mark.integration
def test_delete_tag(self, integration_client, test_package):
"""Test deleting a tag."""
project_name, package_name = test_package
upload_test_file(
integration_client,
project_name,
package_name,
b"delete tag test",
tag="to-delete",
)
# Delete tag
response = integration_client.delete(
f"/api/v1/project/{project_name}/{package_name}/tags/to-delete"
)
assert response.status_code == 204
# Verify deleted
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/tags/to-delete"
)
assert response.status_code == 404
class TestTagListingFilters:
"""Tests for tag listing with filters and search."""
@pytest.mark.integration
def test_tags_pagination(self, integration_client, test_package):
"""Test tag listing respects pagination."""
project_name, package_name = test_package
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/tags?limit=5"
)
assert response.status_code == 200
data = response.json()
assert len(data["items"]) <= 5
assert data["pagination"]["limit"] == 5
@pytest.mark.integration
def test_tags_search(self, integration_client, test_package, unique_test_id):
"""Test tag search by name."""
project_name, package_name = test_package
tag_name = f"searchable-{unique_test_id}"
upload_test_file(
integration_client,
project_name,
package_name,
b"search test",
tag=tag_name,
)
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/tags?search=searchable"
)
assert response.status_code == 200
data = response.json()
tag_names = [t["name"] for t in data["items"]]
assert tag_name in tag_names
class TestTagHistory:
"""Tests for tag history tracking."""
@pytest.mark.integration
def test_tag_history_on_create(self, integration_client, test_package):
"""Test tag history is created when tag is created."""
project_name, package_name = test_package
upload_test_file(
integration_client,
project_name,
package_name,
b"history create test",
tag="history-create",
)
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/tags/history-create/history"
)
assert response.status_code == 200
data = response.json()
assert len(data) >= 1
@pytest.mark.integration
def test_tag_history_on_update(
self, integration_client, test_package, unique_test_id
):
"""Test tag history is created when tag is updated."""
project_name, package_name = test_package
tag_name = f"history-update-{unique_test_id}"
# Create tag with first artifact
upload_test_file(
integration_client,
project_name,
package_name,
b"first content",
tag=tag_name,
)
# Update tag with second artifact
upload_test_file(
integration_client,
project_name,
package_name,
b"second content",
tag=tag_name,
)
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/tags/{tag_name}/history"
)
assert response.status_code == 200
data = response.json()
# Should have at least 2 history entries (create + update)
assert len(data) >= 2
class TestTagRefCount:
"""Tests for ref_count behavior with tag operations."""
@pytest.mark.integration
def test_ref_count_decrements_on_tag_delete(self, integration_client, test_package):
"""Test ref_count decrements when a tag is deleted."""
project_name, package_name = test_package
content = b"ref count delete test"
expected_hash = compute_sha256(content)
# Upload with two tags
upload_test_file(
integration_client, project_name, package_name, content, tag="rc-v1"
)
upload_test_file(
integration_client, project_name, package_name, content, tag="rc-v2"
)
# Verify ref_count is 2
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 2
# Delete one tag
delete_response = integration_client.delete(
f"/api/v1/project/{project_name}/{package_name}/tags/rc-v1"
)
assert delete_response.status_code == 204
# Verify ref_count is now 1
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 1
@pytest.mark.integration
def test_ref_count_zero_after_all_tags_deleted(
self, integration_client, test_package
):
"""Test ref_count goes to 0 when all tags are deleted."""
project_name, package_name = test_package
content = b"orphan test content"
expected_hash = compute_sha256(content)
# Upload with one tag
upload_test_file(
integration_client, project_name, package_name, content, tag="only-tag"
)
# Delete the tag
integration_client.delete(
f"/api/v1/project/{project_name}/{package_name}/tags/only-tag"
)
# Verify ref_count is 0
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 0
@pytest.mark.integration
def test_ref_count_adjusts_on_tag_update(
self, integration_client, test_package, unique_test_id
):
"""Test ref_count adjusts when a tag is updated to point to different artifact."""
project_name, package_name = test_package
# Upload two different artifacts
content1 = f"artifact one {unique_test_id}".encode()
content2 = f"artifact two {unique_test_id}".encode()
hash1 = compute_sha256(content1)
hash2 = compute_sha256(content2)
# Upload first artifact with tag "latest"
upload_test_file(
integration_client, project_name, package_name, content1, tag="latest"
)
# Verify first artifact has ref_count 1
response = integration_client.get(f"/api/v1/artifact/{hash1}")
assert response.json()["ref_count"] == 1
# Upload second artifact with different tag
upload_test_file(
integration_client, project_name, package_name, content2, tag="stable"
)
# Now update "latest" tag to point to second artifact
upload_test_file(
integration_client, project_name, package_name, content2, tag="latest"
)
# Verify first artifact ref_count decreased to 0
response = integration_client.get(f"/api/v1/artifact/{hash1}")
assert response.json()["ref_count"] == 0
# Verify second artifact ref_count increased to 2
response = integration_client.get(f"/api/v1/artifact/{hash2}")
assert response.json()["ref_count"] == 2
@pytest.mark.integration
def test_ref_count_unchanged_when_tag_same_artifact(
self, integration_client, test_package, unique_test_id
):
"""Test ref_count doesn't change when tag is 'updated' to same artifact."""
project_name, package_name = test_package
content = f"same artifact {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload with tag
upload_test_file(
integration_client, project_name, package_name, content, tag="same-v1"
)
# Verify ref_count is 1
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 1
# Upload same content with same tag (no-op)
upload_test_file(
integration_client, project_name, package_name, content, tag="same-v1"
)
# Verify ref_count is still 1
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 1
@pytest.mark.integration
def test_tag_via_post_endpoint_increments_ref_count(
self, integration_client, test_package, unique_test_id
):
"""Test creating tag via POST /tags endpoint increments ref_count."""
project_name, package_name = test_package
content = f"tag endpoint test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload artifact without tag
result = upload_test_file(
integration_client, project_name, package_name, content, filename="test.bin"
)
artifact_id = result["artifact_id"]
# Verify ref_count is 0 (no tags yet)
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 0
# Create tag via POST endpoint
tag_response = integration_client.post(
f"/api/v1/project/{project_name}/{package_name}/tags",
json={"name": "post-v1", "artifact_id": artifact_id},
)
assert tag_response.status_code == 200
# Verify ref_count is now 1
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 1
# Create another tag via POST endpoint
tag_response = integration_client.post(
f"/api/v1/project/{project_name}/{package_name}/tags",
json={"name": "post-latest", "artifact_id": artifact_id},
)
assert tag_response.status_code == 200
# Verify ref_count is now 2
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 2

View File

@@ -1,33 +1,109 @@
"""
Integration tests for duplicate uploads and storage verification.
These tests require the full stack to be running (docker-compose.local.yml).
Integration tests for upload and download API endpoints.
Tests cover:
- Duplicate upload scenarios across packages and projects
- Storage verification (single S3 object, single artifact row)
- Upload table tracking
- Content integrity verification
- Upload functionality and deduplication
- Download by tag and artifact ID
- Concurrent upload handling
- Failure cleanup
- File size validation
- Upload failure cleanup
- S3 storage verification
"""
import pytest
import io
import threading
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from tests.conftest import (
from tests.factories import (
compute_sha256,
upload_test_file,
list_s3_objects_by_hash,
s3_object_exists,
delete_s3_object_by_hash,
)
class TestDuplicateUploadScenarios:
"""Integration tests for duplicate upload behavior."""
class TestUploadBasics:
"""Tests for basic upload functionality."""
@pytest.mark.integration
def test_upload_returns_artifact_id(self, integration_client, test_package):
"""Test upload returns the artifact ID (SHA256 hash)."""
project_name, package_name = test_package
content = b"basic upload test"
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project_name, package_name, content, tag="v1"
)
assert result["artifact_id"] == expected_hash
@pytest.mark.integration
def test_upload_response_has_upload_id(self, integration_client, test_package):
"""Test upload response includes upload_id."""
project_name, package_name = test_package
result = upload_test_file(
integration_client,
project_name,
package_name,
b"upload id test",
"uploadid.txt",
)
assert "upload_id" in result
assert result["upload_id"] is not None
@pytest.mark.integration
def test_upload_response_has_content_type(self, integration_client, test_package):
"""Test upload response includes content_type."""
project_name, package_name = test_package
result = upload_test_file(
integration_client,
project_name,
package_name,
b"content type test",
"content.txt",
)
assert "content_type" in result
@pytest.mark.integration
def test_upload_response_has_original_name(self, integration_client, test_package):
"""Test upload response includes original_name."""
project_name, package_name = test_package
result = upload_test_file(
integration_client,
project_name,
package_name,
b"original name test",
"originalname.txt",
)
assert "original_name" in result
assert result["original_name"] == "originalname.txt"
@pytest.mark.integration
def test_upload_response_has_created_at(self, integration_client, test_package):
"""Test upload response includes created_at."""
project_name, package_name = test_package
result = upload_test_file(
integration_client,
project_name,
package_name,
b"created at test",
"createdat.txt",
)
assert "created_at" in result
assert result["created_at"] is not None
class TestDuplicateUploads:
"""Tests for duplicate upload deduplication behavior."""
@pytest.mark.integration
def test_same_file_twice_returns_same_artifact_id(
@@ -103,62 +179,11 @@ class TestDuplicateUploadScenarios:
assert result2["artifact_id"] == expected_hash
assert result2["deduplicated"] is True
@pytest.mark.integration
def test_same_file_different_projects_shares_artifact(
self, integration_client, unique_test_id
):
"""Test uploading same file to different projects shares artifact."""
content = f"content shared across projects {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Create two projects with packages
proj1 = f"project-x-{unique_test_id}"
proj2 = f"project-y-{unique_test_id}"
pkg_name = "shared-pkg"
try:
# Create projects and packages
integration_client.post(
"/api/v1/projects",
json={"name": proj1, "description": "Project X", "is_public": True},
)
integration_client.post(
"/api/v1/projects",
json={"name": proj2, "description": "Project Y", "is_public": True},
)
integration_client.post(
f"/api/v1/project/{proj1}/packages",
json={"name": pkg_name, "description": "Package"},
)
integration_client.post(
f"/api/v1/project/{proj2}/packages",
json={"name": pkg_name, "description": "Package"},
)
# Upload to first project
result1 = upload_test_file(
integration_client, proj1, pkg_name, content, tag="v1"
)
assert result1["artifact_id"] == expected_hash
assert result1["deduplicated"] is False
# Upload to second project
result2 = upload_test_file(
integration_client, proj2, pkg_name, content, tag="v1"
)
assert result2["artifact_id"] == expected_hash
assert result2["deduplicated"] is True
finally:
# Cleanup
integration_client.delete(f"/api/v1/projects/{proj1}")
integration_client.delete(f"/api/v1/projects/{proj2}")
@pytest.mark.integration
def test_same_file_different_filenames_shares_artifact(
self, integration_client, test_package
):
"""Test uploading same file with different original filenames shares artifact."""
"""Test uploading same file with different filenames shares artifact."""
project, package = test_package
content = b"content with different filenames"
expected_hash = compute_sha256(content)
@@ -186,110 +211,68 @@ class TestDuplicateUploadScenarios:
assert result2["artifact_id"] == expected_hash
assert result2["deduplicated"] is True
@pytest.mark.integration
def test_same_file_different_tags_shares_artifact(
self, integration_client, test_package, unique_test_id
):
"""Test uploading same file with different tags shares artifact."""
project, package = test_package
content = f"content with different tags {unique_test_id}".encode()
expected_hash = compute_sha256(content)
tags = ["latest", "stable", "v1.0.0", "release"]
for i, tag in enumerate(tags):
result = upload_test_file(
integration_client, project, package, content, tag=tag
)
assert result["artifact_id"] == expected_hash
if i == 0:
assert result["deduplicated"] is False
else:
assert result["deduplicated"] is True
class TestStorageVerification:
"""Tests to verify storage behavior after duplicate uploads."""
class TestDownload:
"""Tests for download functionality."""
@pytest.mark.integration
def test_artifact_table_single_row_after_duplicates(
self, integration_client, test_package
):
"""Test artifact table contains only one row after duplicate uploads."""
def test_download_by_tag(self, integration_client, test_package):
"""Test downloading artifact by tag name."""
project, package = test_package
content = b"content for single row test"
expected_hash = compute_sha256(content)
original_content = b"download by tag test"
# Upload same content multiple times with different tags
for tag in ["v1", "v2", "v3"]:
upload_test_file(integration_client, project, package, content, tag=tag)
upload_test_file(
integration_client, project, package, original_content, tag="download-tag"
)
# Query artifact - should exist and be unique
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
artifact = response.json()
assert artifact["id"] == expected_hash
assert artifact["ref_count"] == 3
@pytest.mark.integration
def test_upload_table_multiple_rows_for_duplicates(
self, integration_client, test_package
):
"""Test upload table contains multiple rows for duplicate uploads (event tracking)."""
project, package = test_package
content = b"content for upload tracking test"
# Upload same content 3 times
for tag in ["upload1", "upload2", "upload3"]:
upload_test_file(integration_client, project, package, content, tag=tag)
# Check package stats - should show 3 uploads but fewer unique artifacts
response = integration_client.get(
f"/api/v1/project/{project}/packages/{package}"
f"/api/v1/project/{project}/{package}/+/download-tag",
params={"mode": "proxy"},
)
assert response.status_code == 200
pkg_info = response.json()
assert pkg_info["tag_count"] == 3
assert response.content == original_content
@pytest.mark.integration
def test_artifact_content_matches_original(self, integration_client, test_package):
"""Test artifact content retrieved matches original content exactly."""
def test_download_by_artifact_id(self, integration_client, test_package):
"""Test downloading artifact by artifact ID."""
project, package = test_package
original_content = b"download by id test"
expected_hash = compute_sha256(original_content)
upload_test_file(integration_client, project, package, original_content)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/artifact:{expected_hash}",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == original_content
@pytest.mark.integration
def test_download_nonexistent_tag(self, integration_client, test_package):
"""Test downloading nonexistent tag returns 404."""
project, package = test_package
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/nonexistent-tag"
)
assert response.status_code == 404
@pytest.mark.integration
def test_content_matches_original(self, integration_client, test_package):
"""Test downloaded content matches original exactly."""
project, package = test_package
original_content = b"exact content verification test data 12345"
# Upload
result = upload_test_file(
upload_test_file(
integration_client, project, package, original_content, tag="verify"
)
# Download and compare
download_response = integration_client.get(
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/verify", params={"mode": "proxy"}
)
assert download_response.status_code == 200
downloaded_content = download_response.content
assert downloaded_content == original_content
@pytest.mark.integration
def test_storage_stats_reflect_deduplication(
self, integration_client, test_package
):
"""Test total storage size matches single artifact size after duplicates."""
project, package = test_package
content = b"content for storage stats test - should only count once"
content_size = len(content)
# Upload same content 5 times
for tag in ["a", "b", "c", "d", "e"]:
upload_test_file(integration_client, project, package, content, tag=tag)
# Check global stats
response = integration_client.get("/api/v1/stats")
assert response.status_code == 200
stats = response.json()
# Deduplication should show savings
assert stats["deduplicated_uploads"] > 0
assert stats["storage_saved_bytes"] > 0
assert response.content == original_content
class TestConcurrentUploads:
@@ -308,7 +291,6 @@ class TestConcurrentUploads:
def upload_worker(tag_suffix):
try:
# Create a new client for this thread
from httpx import Client
base_url = "http://localhost:8080"
@@ -332,13 +314,11 @@ class TestConcurrentUploads:
except Exception as e:
errors.append(str(e))
# Run concurrent uploads
with ThreadPoolExecutor(max_workers=num_concurrent) as executor:
futures = [executor.submit(upload_worker, i) for i in range(num_concurrent)]
for future in as_completed(futures):
pass # Wait for all to complete
pass
# Verify results
assert len(errors) == 0, f"Errors during concurrent uploads: {errors}"
assert len(results) == num_concurrent
@@ -353,227 +333,27 @@ class TestConcurrentUploads:
assert response.json()["ref_count"] == num_concurrent
class TestDeduplicationAcrossRestarts:
"""Tests for deduplication persistence."""
@pytest.mark.integration
def test_deduplication_persists(
self, integration_client, test_package, unique_test_id
):
"""
Test deduplication works with persisted data.
This test uploads content, then uploads the same content again.
Since the database persists, the second upload should detect
the existing artifact even without server restart.
"""
project, package = test_package
content = f"persisted content for dedup test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# First upload
result1 = upload_test_file(
integration_client, project, package, content, tag="persist1"
)
assert result1["artifact_id"] == expected_hash
assert result1["deduplicated"] is False
# Second upload (simulating after restart - data is persisted)
result2 = upload_test_file(
integration_client, project, package, content, tag="persist2"
)
assert result2["artifact_id"] == expected_hash
assert result2["deduplicated"] is True
# Verify artifact exists with correct ref_count
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
assert response.json()["ref_count"] == 2
class TestS3ObjectVerification:
"""Tests to verify S3 storage behavior directly."""
@pytest.mark.integration
def test_s3_bucket_single_object_after_duplicates(
self, integration_client, test_package, unique_test_id
):
"""Test S3 bucket contains only one object after duplicate uploads."""
project, package = test_package
content = f"content for s3 object count test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload same content multiple times with different tags
for tag in ["s3test1", "s3test2", "s3test3"]:
upload_test_file(integration_client, project, package, content, tag=tag)
# Verify only one S3 object exists for this hash
s3_objects = list_s3_objects_by_hash(expected_hash)
assert len(s3_objects) == 1, (
f"Expected 1 S3 object, found {len(s3_objects)}: {s3_objects}"
)
# Verify the object key follows expected pattern
expected_key = (
f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
)
assert s3_objects[0] == expected_key
class TestUploadFailureCleanup:
"""Tests for cleanup when uploads fail."""
@pytest.mark.integration
def test_upload_failure_invalid_project_no_orphaned_s3(
self, integration_client, unique_test_id
):
"""Test upload to non-existent project doesn't leave orphaned S3 objects."""
content = f"content for orphan s3 test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Attempt upload to non-existent project
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/nonexistent-project-{unique_test_id}/nonexistent-pkg/upload",
files=files,
data={"tag": "test"},
)
# Upload should fail
assert response.status_code == 404
# Verify no S3 object was created
assert not s3_object_exists(expected_hash), (
"Orphaned S3 object found after failed upload"
)
@pytest.mark.integration
def test_upload_failure_invalid_package_no_orphaned_s3(
self, integration_client, test_project, unique_test_id
):
"""Test upload to non-existent package doesn't leave orphaned S3 objects."""
content = f"content for orphan s3 test pkg {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Attempt upload to non-existent package
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
files=files,
data={"tag": "test"},
)
# Upload should fail
assert response.status_code == 404
# Verify no S3 object was created
assert not s3_object_exists(expected_hash), (
"Orphaned S3 object found after failed upload"
)
@pytest.mark.integration
def test_upload_failure_empty_file_no_orphaned_s3(
self, integration_client, test_package, unique_test_id
):
"""Test upload of empty file doesn't leave orphaned S3 objects or DB records."""
project, package = test_package
content = b"" # Empty content
# Attempt upload of empty file
files = {"file": ("empty.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"tag": f"empty-{unique_test_id}"},
)
# Upload should fail (empty files are rejected)
assert response.status_code in (400, 422), (
f"Expected 400/422, got {response.status_code}"
)
@pytest.mark.integration
def test_upload_failure_no_orphaned_database_records(
self, integration_client, test_project, unique_test_id
):
"""Test failed upload doesn't leave orphaned database records."""
content = f"content for db orphan test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Attempt upload to non-existent package (should fail before DB insert)
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
files=files,
data={"tag": "test"},
)
# Upload should fail
assert response.status_code == 404
# Verify no artifact record was created
artifact_response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert artifact_response.status_code == 404, (
"Orphaned artifact record found after failed upload"
)
@pytest.mark.integration
def test_duplicate_tag_upload_handles_gracefully(
self, integration_client, test_package, unique_test_id
):
"""Test uploading with duplicate tag is handled without orphaned data."""
project, package = test_package
content1 = f"content version 1 {unique_test_id}".encode()
content2 = f"content version 2 {unique_test_id}".encode()
tag = f"duplicate-tag-{unique_test_id}"
# First upload with tag
result1 = upload_test_file(
integration_client, project, package, content1, tag=tag
)
hash1 = result1["artifact_id"]
# Second upload with same tag (should update the tag to point to new artifact)
result2 = upload_test_file(
integration_client, project, package, content2, tag=tag
)
hash2 = result2["artifact_id"]
# Both artifacts should exist
assert integration_client.get(f"/api/v1/artifact/{hash1}").status_code == 200
assert integration_client.get(f"/api/v1/artifact/{hash2}").status_code == 200
# Tag should point to the second artifact
tag_response = integration_client.get(
f"/api/v1/project/{project}/{package}/tags/{tag}"
)
assert tag_response.status_code == 200
assert tag_response.json()["artifact_id"] == hash2
class TestFileSizeValidation:
"""Tests for file size limits and empty file rejection."""
@pytest.mark.integration
def test_empty_file_rejected(self, integration_client, test_package):
"""Test that empty files are rejected with appropriate error."""
"""Test empty files are rejected with appropriate error."""
project, package = test_package
# Try to upload empty content
files = {"file": ("empty.txt", io.BytesIO(b""), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
)
# Should be rejected (422 from storage layer or validation)
assert response.status_code in [422, 400]
@pytest.mark.integration
def test_small_valid_file_accepted(self, integration_client, test_package):
"""Test that small (1 byte) files are accepted."""
"""Test small (1 byte) files are accepted."""
project, package = test_package
content = b"X" # Single byte
content = b"X"
result = upload_test_file(
integration_client, project, package, content, tag="tiny"
@@ -586,7 +366,7 @@ class TestFileSizeValidation:
def test_file_size_reported_correctly(
self, integration_client, test_package, unique_test_id
):
"""Test that file size is correctly reported in response."""
"""Test file size is correctly reported in response."""
project, package = test_package
content = f"Test content for size check {unique_test_id}".encode()
expected_size = len(content)
@@ -602,3 +382,121 @@ class TestFileSizeValidation:
f"/api/v1/artifact/{result['artifact_id']}"
)
assert artifact_response.json()["size"] == expected_size
class TestUploadFailureCleanup:
"""Tests for cleanup when uploads fail."""
@pytest.mark.integration
def test_upload_failure_invalid_project_no_orphaned_s3(
self, integration_client, unique_test_id
):
"""Test upload to non-existent project doesn't leave orphaned S3 objects."""
content = f"content for orphan s3 test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/nonexistent-project-{unique_test_id}/nonexistent-pkg/upload",
files=files,
data={"tag": "test"},
)
assert response.status_code == 404
# Verify no S3 object was created
assert not s3_object_exists(expected_hash), (
"Orphaned S3 object found after failed upload"
)
@pytest.mark.integration
def test_upload_failure_invalid_package_no_orphaned_s3(
self, integration_client, test_project, unique_test_id
):
"""Test upload to non-existent package doesn't leave orphaned S3 objects."""
content = f"content for orphan s3 test pkg {unique_test_id}".encode()
expected_hash = compute_sha256(content)
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
files=files,
data={"tag": "test"},
)
assert response.status_code == 404
assert not s3_object_exists(expected_hash), (
"Orphaned S3 object found after failed upload"
)
@pytest.mark.integration
def test_upload_failure_no_orphaned_database_records(
self, integration_client, test_project, unique_test_id
):
"""Test failed upload doesn't leave orphaned database records."""
content = f"content for db orphan test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
files=files,
data={"tag": "test"},
)
assert response.status_code == 404
artifact_response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert artifact_response.status_code == 404, (
"Orphaned artifact record found after failed upload"
)
class TestS3StorageVerification:
"""Tests to verify S3 storage behavior."""
@pytest.mark.integration
def test_s3_single_object_after_duplicates(
self, integration_client, test_package, unique_test_id
):
"""Test S3 bucket contains only one object after duplicate uploads."""
project, package = test_package
content = f"content for s3 object count test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload same content multiple times
for tag in ["s3test1", "s3test2", "s3test3"]:
upload_test_file(integration_client, project, package, content, tag=tag)
# Verify only one S3 object exists
s3_objects = list_s3_objects_by_hash(expected_hash)
assert len(s3_objects) == 1, (
f"Expected 1 S3 object, found {len(s3_objects)}: {s3_objects}"
)
# Verify object key follows expected pattern
expected_key = (
f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
)
assert s3_objects[0] == expected_key
@pytest.mark.integration
def test_artifact_table_single_row_after_duplicates(
self, integration_client, test_package
):
"""Test artifact table contains only one row after duplicate uploads."""
project, package = test_package
content = b"content for single row test"
expected_hash = compute_sha256(content)
# Upload same content multiple times
for tag in ["v1", "v2", "v3"]:
upload_test_file(integration_client, project, package, content, tag=tag)
# Query artifact
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
artifact = response.json()
assert artifact["id"] == expected_hash
assert artifact["ref_count"] == 3

View File

@@ -1,207 +0,0 @@
"""
Unit tests for duplicate detection and deduplication logic.
Tests cover:
- _exists() method correctly identifies existing S3 keys
- S3 key generation follows expected pattern
- Storage layer skips upload when artifact already exists
- Storage layer performs upload when artifact does not exist
"""
import pytest
import io
from unittest.mock import MagicMock, patch
from tests.conftest import (
compute_sha256,
TEST_CONTENT_HELLO,
TEST_HASH_HELLO,
)
class TestExistsMethod:
"""Tests for the _exists() method that checks S3 object existence."""
@pytest.mark.unit
def test_exists_returns_true_for_existing_key(self, mock_storage, mock_s3_client):
"""Test _exists() returns True when object exists."""
# Pre-populate the mock storage
test_key = "fruits/df/fd/test-hash"
mock_s3_client.objects[test_key] = b"content"
result = mock_storage._exists(test_key)
assert result is True
@pytest.mark.unit
def test_exists_returns_false_for_nonexistent_key(self, mock_storage):
"""Test _exists() returns False when object doesn't exist."""
result = mock_storage._exists("fruits/no/ne/nonexistent-key")
assert result is False
@pytest.mark.unit
def test_exists_handles_404_error(self, mock_storage):
"""Test _exists() handles 404 errors gracefully."""
# The mock client raises ClientError for nonexistent keys
result = mock_storage._exists("fruits/xx/yy/does-not-exist")
assert result is False
class TestS3KeyGeneration:
"""Tests for S3 key pattern generation."""
@pytest.mark.unit
def test_s3_key_pattern(self):
"""Test S3 key follows pattern: fruits/{hash[:2]}/{hash[2:4]}/{hash}"""
test_hash = "abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890"
expected_key = f"fruits/{test_hash[:2]}/{test_hash[2:4]}/{test_hash}"
# Expected: fruits/ab/cd/abcdef1234567890...
assert expected_key == f"fruits/ab/cd/{test_hash}"
@pytest.mark.unit
def test_s3_key_generation_in_storage(self, mock_storage):
"""Test storage layer generates correct S3 key."""
content = TEST_CONTENT_HELLO
file_obj = io.BytesIO(content)
result = mock_storage._store_simple(file_obj)
expected_key = (
f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
)
assert result.s3_key == expected_key
@pytest.mark.unit
def test_s3_key_uses_sha256_hash(self, mock_storage):
"""Test S3 key is derived from SHA256 hash."""
content = b"unique test content for key test"
file_obj = io.BytesIO(content)
expected_hash = compute_sha256(content)
result = mock_storage._store_simple(file_obj)
# Key should contain the hash
assert expected_hash in result.s3_key
class TestDeduplicationBehavior:
"""Tests for deduplication (skip upload when exists)."""
@pytest.mark.unit
def test_skips_upload_when_exists(self, mock_storage, mock_s3_client):
"""Test storage skips S3 upload when artifact already exists."""
content = TEST_CONTENT_HELLO
s3_key = (
f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
)
# Pre-populate storage (simulate existing artifact)
mock_s3_client.objects[s3_key] = content
# Track put_object calls
original_put = mock_s3_client.put_object
put_called = []
def tracked_put(*args, **kwargs):
put_called.append(True)
return original_put(*args, **kwargs)
mock_s3_client.put_object = tracked_put
# Store the same content
file_obj = io.BytesIO(content)
result = mock_storage._store_simple(file_obj)
# put_object should NOT have been called (deduplication)
assert len(put_called) == 0
assert result.sha256 == TEST_HASH_HELLO
@pytest.mark.unit
def test_uploads_when_not_exists(self, mock_storage, mock_s3_client):
"""Test storage uploads to S3 when artifact doesn't exist."""
content = b"brand new unique content"
content_hash = compute_sha256(content)
s3_key = f"fruits/{content_hash[:2]}/{content_hash[2:4]}/{content_hash}"
# Ensure object doesn't exist
assert s3_key not in mock_s3_client.objects
# Store the content
file_obj = io.BytesIO(content)
result = mock_storage._store_simple(file_obj)
# Object should now exist in mock storage
assert s3_key in mock_s3_client.objects
assert mock_s3_client.objects[s3_key] == content
@pytest.mark.unit
def test_returns_same_hash_for_duplicate(self, mock_storage, mock_s3_client):
"""Test storing same content twice returns same hash."""
content = b"content to be stored twice"
# First store
file1 = io.BytesIO(content)
result1 = mock_storage._store_simple(file1)
# Second store (duplicate)
file2 = io.BytesIO(content)
result2 = mock_storage._store_simple(file2)
assert result1.sha256 == result2.sha256
assert result1.s3_key == result2.s3_key
@pytest.mark.unit
def test_different_content_different_keys(self, mock_storage):
"""Test different content produces different S3 keys."""
content1 = b"first content"
content2 = b"second content"
file1 = io.BytesIO(content1)
result1 = mock_storage._store_simple(file1)
file2 = io.BytesIO(content2)
result2 = mock_storage._store_simple(file2)
assert result1.sha256 != result2.sha256
assert result1.s3_key != result2.s3_key
class TestDeduplicationEdgeCases:
"""Edge case tests for deduplication."""
@pytest.mark.unit
def test_same_content_different_filenames(self, mock_storage):
"""Test same content with different metadata is deduplicated."""
content = b"identical content"
# Store with "filename1"
file1 = io.BytesIO(content)
result1 = mock_storage._store_simple(file1)
# Store with "filename2" (same content)
file2 = io.BytesIO(content)
result2 = mock_storage._store_simple(file2)
# Both should have same hash (content-addressable)
assert result1.sha256 == result2.sha256
@pytest.mark.unit
def test_whitespace_only_difference(self, mock_storage):
"""Test content differing only by whitespace produces different hashes."""
content1 = b"test content"
content2 = b"test content" # Extra space
content3 = b"test content " # Trailing space
file1 = io.BytesIO(content1)
file2 = io.BytesIO(content2)
file3 = io.BytesIO(content3)
result1 = mock_storage._store_simple(file1)
result2 = mock_storage._store_simple(file2)
result3 = mock_storage._store_simple(file3)
# All should be different (content-addressable)
assert len({result1.sha256, result2.sha256, result3.sha256}) == 3

View File

@@ -1,168 +0,0 @@
"""
Integration tests for garbage collection functionality.
Tests cover:
- Listing orphaned artifacts (ref_count=0)
- Garbage collection in dry-run mode
- Garbage collection actual deletion
- Verifying artifacts with refs are not deleted
"""
import pytest
from tests.conftest import (
compute_sha256,
upload_test_file,
)
class TestOrphanedArtifactsEndpoint:
"""Tests for GET /api/v1/admin/orphaned-artifacts endpoint."""
@pytest.mark.integration
def test_list_orphaned_artifacts_returns_list(self, integration_client):
"""Test orphaned artifacts endpoint returns a list."""
response = integration_client.get("/api/v1/admin/orphaned-artifacts")
assert response.status_code == 200
assert isinstance(response.json(), list)
@pytest.mark.integration
def test_orphaned_artifact_has_required_fields(self, integration_client):
"""Test orphaned artifact response has required fields."""
response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1")
assert response.status_code == 200
data = response.json()
if len(data) > 0:
artifact = data[0]
assert "id" in artifact
assert "size" in artifact
assert "created_at" in artifact
assert "created_by" in artifact
assert "original_name" in artifact
@pytest.mark.integration
def test_orphaned_artifacts_respects_limit(self, integration_client):
"""Test orphaned artifacts endpoint respects limit parameter."""
response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=5")
assert response.status_code == 200
assert len(response.json()) <= 5
@pytest.mark.integration
def test_artifact_becomes_orphaned_when_tag_deleted(
self, integration_client, test_package, unique_test_id
):
"""Test artifact appears in orphaned list after tag is deleted."""
project, package = test_package
content = f"orphan test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload with tag
upload_test_file(integration_client, project, package, content, tag="temp-tag")
# Verify not in orphaned list (has ref_count=1)
response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1000")
orphaned_ids = [a["id"] for a in response.json()]
assert expected_hash not in orphaned_ids
# Delete the tag
integration_client.delete(f"/api/v1/project/{project}/{package}/tags/temp-tag")
# Verify now in orphaned list (ref_count=0)
response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1000")
orphaned_ids = [a["id"] for a in response.json()]
assert expected_hash in orphaned_ids
class TestGarbageCollectionEndpoint:
"""Tests for POST /api/v1/admin/garbage-collect endpoint."""
@pytest.mark.integration
def test_garbage_collect_dry_run_returns_response(self, integration_client):
"""Test garbage collection dry run returns valid response."""
response = integration_client.post("/api/v1/admin/garbage-collect?dry_run=true")
assert response.status_code == 200
data = response.json()
assert "artifacts_deleted" in data
assert "bytes_freed" in data
assert "artifact_ids" in data
assert "dry_run" in data
assert data["dry_run"] is True
@pytest.mark.integration
def test_garbage_collect_dry_run_doesnt_delete(
self, integration_client, test_package, unique_test_id
):
"""Test garbage collection dry run doesn't actually delete artifacts."""
project, package = test_package
content = f"dry run test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload and delete tag to create orphan
upload_test_file(integration_client, project, package, content, tag="dry-run")
integration_client.delete(f"/api/v1/project/{project}/{package}/tags/dry-run")
# Verify artifact exists
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
# Run garbage collection in dry-run mode
gc_response = integration_client.post(
"/api/v1/admin/garbage-collect?dry_run=true&limit=1000"
)
assert gc_response.status_code == 200
assert expected_hash in gc_response.json()["artifact_ids"]
# Verify artifact STILL exists (dry run didn't delete)
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
@pytest.mark.integration
def test_garbage_collect_preserves_referenced_artifacts(
self, integration_client, test_package, unique_test_id
):
"""Test garbage collection doesn't delete artifacts with ref_count > 0."""
project, package = test_package
content = f"preserve test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload with tag (ref_count=1)
upload_test_file(integration_client, project, package, content, tag="keep-this")
# Verify artifact exists with ref_count=1
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
assert response.json()["ref_count"] == 1
# Run garbage collection (dry_run to not affect other tests)
gc_response = integration_client.post(
"/api/v1/admin/garbage-collect?dry_run=true&limit=1000"
)
assert gc_response.status_code == 200
# Verify artifact was NOT in delete list (has ref_count > 0)
assert expected_hash not in gc_response.json()["artifact_ids"]
# Verify artifact still exists
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
assert response.json()["ref_count"] == 1
@pytest.mark.integration
def test_garbage_collect_respects_limit(self, integration_client):
"""Test garbage collection respects limit parameter."""
response = integration_client.post(
"/api/v1/admin/garbage-collect?dry_run=true&limit=5"
)
assert response.status_code == 200
assert response.json()["artifacts_deleted"] <= 5
@pytest.mark.integration
def test_garbage_collect_returns_bytes_freed(self, integration_client):
"""Test garbage collection returns accurate bytes_freed."""
response = integration_client.post("/api/v1/admin/garbage-collect?dry_run=true")
assert response.status_code == 200
data = response.json()
assert data["bytes_freed"] >= 0
assert isinstance(data["bytes_freed"], int)

View File

@@ -1,215 +0,0 @@
"""
Unit tests for SHA256 hash calculation and deduplication logic.
Tests cover:
- Hash computation produces consistent results
- Hash is always 64 character lowercase hexadecimal
- Different content produces different hashes
- Binary content handling
- Large file handling (streaming)
"""
import pytest
import hashlib
import io
from tests.conftest import (
create_test_file,
compute_sha256,
TEST_CONTENT_HELLO,
TEST_HASH_HELLO,
TEST_CONTENT_BINARY,
TEST_HASH_BINARY,
)
class TestHashComputation:
"""Unit tests for hash calculation functionality."""
@pytest.mark.unit
def test_sha256_consistent_results(self):
"""Test SHA256 hash produces consistent results for identical content."""
content = b"test content for hashing"
# Compute hash multiple times
hash1 = compute_sha256(content)
hash2 = compute_sha256(content)
hash3 = compute_sha256(content)
assert hash1 == hash2 == hash3
@pytest.mark.unit
def test_sha256_different_content_different_hash(self):
"""Test SHA256 produces different hashes for different content."""
content1 = b"content version 1"
content2 = b"content version 2"
hash1 = compute_sha256(content1)
hash2 = compute_sha256(content2)
assert hash1 != hash2
@pytest.mark.unit
def test_sha256_format_64_char_hex(self):
"""Test SHA256 hash is always 64 character lowercase hexadecimal."""
test_cases = [
b"", # Empty
b"a", # Single char
b"Hello, World!", # Normal string
bytes(range(256)), # All byte values
b"x" * 10000, # Larger content
]
for content in test_cases:
hash_value = compute_sha256(content)
# Check length
assert len(hash_value) == 64, (
f"Hash length should be 64, got {len(hash_value)}"
)
# Check lowercase
assert hash_value == hash_value.lower(), "Hash should be lowercase"
# Check hexadecimal
assert all(c in "0123456789abcdef" for c in hash_value), (
"Hash should be hex"
)
@pytest.mark.unit
def test_sha256_known_value(self):
"""Test SHA256 produces expected hash for known input."""
assert compute_sha256(TEST_CONTENT_HELLO) == TEST_HASH_HELLO
@pytest.mark.unit
def test_sha256_binary_content(self):
"""Test SHA256 handles binary content correctly."""
assert compute_sha256(TEST_CONTENT_BINARY) == TEST_HASH_BINARY
# Test with null bytes
content_with_nulls = b"\x00\x00test\x00\x00"
hash_value = compute_sha256(content_with_nulls)
assert len(hash_value) == 64
@pytest.mark.unit
def test_sha256_streaming_computation(self):
"""Test SHA256 can be computed in chunks (streaming)."""
# Large content
chunk_size = 8192
total_size = chunk_size * 10 # 80KB
content = b"x" * total_size
# Direct computation
direct_hash = compute_sha256(content)
# Streaming computation
hasher = hashlib.sha256()
for i in range(0, total_size, chunk_size):
hasher.update(content[i : i + chunk_size])
streaming_hash = hasher.hexdigest()
assert direct_hash == streaming_hash
@pytest.mark.unit
def test_sha256_order_matters(self):
"""Test that content order affects hash (not just content set)."""
content1 = b"AB"
content2 = b"BA"
assert compute_sha256(content1) != compute_sha256(content2)
class TestStorageHashComputation:
"""Tests for hash computation in the storage layer."""
@pytest.mark.unit
def test_storage_computes_sha256(self, mock_storage):
"""Test storage layer correctly computes SHA256 hash."""
content = TEST_CONTENT_HELLO
file_obj = io.BytesIO(content)
result = mock_storage._store_simple(file_obj)
assert result.sha256 == TEST_HASH_HELLO
@pytest.mark.unit
def test_storage_computes_md5(self, mock_storage):
"""Test storage layer also computes MD5 hash."""
content = TEST_CONTENT_HELLO
file_obj = io.BytesIO(content)
result = mock_storage._store_simple(file_obj)
expected_md5 = hashlib.md5(content).hexdigest()
assert result.md5 == expected_md5
@pytest.mark.unit
def test_storage_computes_sha1(self, mock_storage):
"""Test storage layer also computes SHA1 hash."""
content = TEST_CONTENT_HELLO
file_obj = io.BytesIO(content)
result = mock_storage._store_simple(file_obj)
expected_sha1 = hashlib.sha1(content).hexdigest()
assert result.sha1 == expected_sha1
@pytest.mark.unit
def test_storage_returns_correct_size(self, mock_storage):
"""Test storage layer returns correct file size."""
content = b"test content with known size"
file_obj = io.BytesIO(content)
result = mock_storage._store_simple(file_obj)
assert result.size == len(content)
@pytest.mark.unit
def test_storage_generates_correct_s3_key(self, mock_storage):
"""Test storage layer generates correct S3 key pattern."""
content = TEST_CONTENT_HELLO
file_obj = io.BytesIO(content)
result = mock_storage._store_simple(file_obj)
# Key should be: fruits/{hash[:2]}/{hash[2:4]}/{hash}
expected_key = (
f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
)
assert result.s3_key == expected_key
class TestHashEdgeCases:
"""Edge case tests for hash computation."""
@pytest.mark.unit
def test_hash_empty_content_rejected(self, mock_storage):
"""Test that empty content is rejected."""
from app.storage import HashComputationError
file_obj = io.BytesIO(b"")
with pytest.raises(HashComputationError):
mock_storage._store_simple(file_obj)
@pytest.mark.unit
def test_hash_large_file_streaming(self, mock_storage):
"""Test hash computation for large files uses streaming."""
# Create a 10MB file
size = 10 * 1024 * 1024
content = b"x" * size
file_obj = io.BytesIO(content)
result = mock_storage._store_simple(file_obj)
expected_hash = compute_sha256(content)
assert result.sha256 == expected_hash
@pytest.mark.unit
def test_hash_special_bytes(self):
"""Test hash handles all byte values correctly."""
# All possible byte values
content = bytes(range(256))
hash_value = compute_sha256(content)
assert len(hash_value) == 64
assert hash_value == TEST_HASH_BINARY

View File

@@ -1,458 +0,0 @@
"""
Unit and integration tests for reference counting behavior.
Tests cover:
- ref_count is set correctly for new artifacts
- ref_count increments on duplicate uploads
- ref_count query correctly identifies existing artifacts
- Artifact lookup by SHA256 hash works correctly
"""
import pytest
import io
from tests.conftest import (
compute_sha256,
upload_test_file,
TEST_CONTENT_HELLO,
TEST_HASH_HELLO,
)
class TestRefCountQuery:
"""Tests for ref_count querying and artifact lookup."""
@pytest.mark.integration
def test_artifact_lookup_by_sha256(self, integration_client, test_package):
"""Test artifact lookup by SHA256 hash (primary key) works correctly."""
project, package = test_package
content = b"unique content for lookup test"
expected_hash = compute_sha256(content)
# Upload a file
upload_result = upload_test_file(
integration_client, project, package, content, tag="v1"
)
assert upload_result["artifact_id"] == expected_hash
# Look up artifact by ID (SHA256)
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
artifact = response.json()
assert artifact["id"] == expected_hash
assert artifact["sha256"] == expected_hash
assert artifact["size"] == len(content)
@pytest.mark.integration
def test_ref_count_query_identifies_existing_artifact(
self, integration_client, test_package
):
"""Test ref_count query correctly identifies existing artifacts by hash."""
project, package = test_package
content = b"content for ref count query test"
expected_hash = compute_sha256(content)
# Upload a file with a tag
upload_result = upload_test_file(
integration_client, project, package, content, tag="v1"
)
# Query artifact and check ref_count
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
artifact = response.json()
assert artifact["ref_count"] >= 1 # At least 1 from the tag
@pytest.mark.integration
def test_ref_count_set_to_1_for_new_artifact_with_tag(
self, integration_client, test_package, unique_test_id
):
"""Test ref_count is set to 1 for new artifacts when created with a tag."""
project, package = test_package
content = f"brand new content for ref count test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload a new file with a tag
upload_result = upload_test_file(
integration_client, project, package, content, tag="initial"
)
assert upload_result["artifact_id"] == expected_hash
assert upload_result["ref_count"] == 1
assert upload_result["deduplicated"] is False
@pytest.mark.integration
def test_ref_count_increments_on_duplicate_upload_with_tag(
self, integration_client, test_package, unique_test_id
):
"""Test ref_count is incremented when duplicate content is uploaded with a new tag."""
project, package = test_package
content = f"content that will be uploaded twice {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# First upload with tag
result1 = upload_test_file(
integration_client, project, package, content, tag="v1"
)
assert result1["ref_count"] == 1
assert result1["deduplicated"] is False
# Second upload with different tag (same content)
result2 = upload_test_file(
integration_client, project, package, content, tag="v2"
)
assert result2["artifact_id"] == expected_hash
assert result2["ref_count"] == 2
assert result2["deduplicated"] is True
@pytest.mark.integration
def test_ref_count_after_multiple_tags(self, integration_client, test_package):
"""Test ref_count correctly reflects number of tags pointing to artifact."""
project, package = test_package
content = b"content for multiple tag test"
expected_hash = compute_sha256(content)
# Upload with multiple tags
tags = ["v1", "v2", "v3", "latest"]
for i, tag in enumerate(tags):
result = upload_test_file(
integration_client, project, package, content, tag=tag
)
assert result["artifact_id"] == expected_hash
assert result["ref_count"] == i + 1
# Verify final ref_count via artifact endpoint
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
assert response.json()["ref_count"] == len(tags)
class TestRefCountWithDeletion:
"""Tests for ref_count behavior when tags are deleted."""
@pytest.mark.integration
def test_ref_count_decrements_on_tag_delete(self, integration_client, test_package):
"""Test ref_count decrements when a tag is deleted."""
project, package = test_package
content = b"content for delete test"
expected_hash = compute_sha256(content)
# Upload with two tags
upload_test_file(integration_client, project, package, content, tag="v1")
upload_test_file(integration_client, project, package, content, tag="v2")
# Verify ref_count is 2
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 2
# Delete one tag
delete_response = integration_client.delete(
f"/api/v1/project/{project}/{package}/tags/v1"
)
assert delete_response.status_code == 204
# Verify ref_count is now 1
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 1
@pytest.mark.integration
def test_ref_count_zero_after_all_tags_deleted(
self, integration_client, test_package
):
"""Test ref_count goes to 0 when all tags are deleted."""
project, package = test_package
content = b"content that will be orphaned"
expected_hash = compute_sha256(content)
# Upload with one tag
upload_test_file(integration_client, project, package, content, tag="only-tag")
# Delete the tag
integration_client.delete(f"/api/v1/project/{project}/{package}/tags/only-tag")
# Verify ref_count is 0
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 0
class TestRefCountCascadeDelete:
"""Tests for ref_count behavior during cascade deletions."""
@pytest.mark.integration
def test_ref_count_decrements_on_package_delete(
self, integration_client, unique_test_id
):
"""Test ref_count decrements for all tags when package is deleted."""
# Create a project and package manually (not using fixtures to control cleanup)
project_name = f"cascade-pkg-{unique_test_id}"
package_name = f"test-pkg-{unique_test_id}"
# Create project
response = integration_client.post(
"/api/v1/projects",
json={
"name": project_name,
"description": "Test project",
"is_public": True,
},
)
assert response.status_code == 200
# Create package
response = integration_client.post(
f"/api/v1/project/{project_name}/packages",
json={"name": package_name, "description": "Test package"},
)
assert response.status_code == 200
# Upload content with multiple tags
content = f"cascade delete test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project_name, package_name, content, tag="v1"
)
upload_test_file(
integration_client, project_name, package_name, content, tag="v2"
)
upload_test_file(
integration_client, project_name, package_name, content, tag="v3"
)
# Verify ref_count is 3
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 3
# Delete the package (should cascade delete all tags and decrement ref_count)
delete_response = integration_client.delete(
f"/api/v1/project/{project_name}/packages/{package_name}"
)
assert delete_response.status_code == 204
# Verify ref_count is 0 (all tags were deleted)
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 0
# Cleanup: delete the project
integration_client.delete(f"/api/v1/projects/{project_name}")
@pytest.mark.integration
def test_ref_count_decrements_on_project_delete(
self, integration_client, unique_test_id
):
"""Test ref_count decrements for all tags in all packages when project is deleted."""
# Create a project manually (not using fixtures to control cleanup)
project_name = f"cascade-proj-{unique_test_id}"
package1_name = f"pkg1-{unique_test_id}"
package2_name = f"pkg2-{unique_test_id}"
# Create project
response = integration_client.post(
"/api/v1/projects",
json={
"name": project_name,
"description": "Test project",
"is_public": True,
},
)
assert response.status_code == 200
# Create two packages
for pkg_name in [package1_name, package2_name]:
response = integration_client.post(
f"/api/v1/project/{project_name}/packages",
json={"name": pkg_name, "description": "Test package"},
)
assert response.status_code == 200
# Upload same content with tags in both packages
content = f"project cascade test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project_name, package1_name, content, tag="v1"
)
upload_test_file(
integration_client, project_name, package1_name, content, tag="v2"
)
upload_test_file(
integration_client, project_name, package2_name, content, tag="latest"
)
upload_test_file(
integration_client, project_name, package2_name, content, tag="stable"
)
# Verify ref_count is 4 (2 tags in each of 2 packages)
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 4
# Delete the project (should cascade delete all packages, tags, and decrement ref_count)
delete_response = integration_client.delete(f"/api/v1/projects/{project_name}")
assert delete_response.status_code == 204
# Verify ref_count is 0
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 0
@pytest.mark.integration
def test_shared_artifact_ref_count_partial_decrement(
self, integration_client, unique_test_id
):
"""Test ref_count correctly decrements when artifact is shared across packages."""
# Create project with two packages
project_name = f"shared-artifact-{unique_test_id}"
package1_name = f"pkg1-{unique_test_id}"
package2_name = f"pkg2-{unique_test_id}"
# Create project
response = integration_client.post(
"/api/v1/projects",
json={
"name": project_name,
"description": "Test project",
"is_public": True,
},
)
assert response.status_code == 200
# Create two packages
for pkg_name in [package1_name, package2_name]:
response = integration_client.post(
f"/api/v1/project/{project_name}/packages",
json={"name": pkg_name, "description": "Test package"},
)
assert response.status_code == 200
# Upload same content to both packages
content = f"shared artifact {unique_test_id}".encode()
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project_name, package1_name, content, tag="v1"
)
upload_test_file(
integration_client, project_name, package2_name, content, tag="v1"
)
# Verify ref_count is 2
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 2
# Delete only package1 (package2 still references the artifact)
delete_response = integration_client.delete(
f"/api/v1/project/{project_name}/packages/{package1_name}"
)
assert delete_response.status_code == 204
# Verify ref_count is 1 (only package2's tag remains)
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 1
# Cleanup
integration_client.delete(f"/api/v1/projects/{project_name}")
class TestRefCountTagUpdate:
"""Tests for ref_count behavior when tags are updated to point to different artifacts."""
@pytest.mark.integration
def test_ref_count_adjusts_on_tag_update(
self, integration_client, test_package, unique_test_id
):
"""Test ref_count adjusts when a tag is updated to point to a different artifact."""
project, package = test_package
# Upload two different artifacts
content1 = f"artifact one {unique_test_id}".encode()
content2 = f"artifact two {unique_test_id}".encode()
hash1 = compute_sha256(content1)
hash2 = compute_sha256(content2)
# Upload first artifact with tag "latest"
upload_test_file(integration_client, project, package, content1, tag="latest")
# Verify first artifact has ref_count 1
response = integration_client.get(f"/api/v1/artifact/{hash1}")
assert response.json()["ref_count"] == 1
# Upload second artifact with different tag
upload_test_file(integration_client, project, package, content2, tag="stable")
# Now update "latest" tag to point to second artifact
# This is done by uploading the same content with the same tag
upload_test_file(integration_client, project, package, content2, tag="latest")
# Verify first artifact ref_count decreased to 0 (tag moved away)
response = integration_client.get(f"/api/v1/artifact/{hash1}")
assert response.json()["ref_count"] == 0
# Verify second artifact ref_count increased to 2 (stable + latest)
response = integration_client.get(f"/api/v1/artifact/{hash2}")
assert response.json()["ref_count"] == 2
@pytest.mark.integration
def test_ref_count_unchanged_when_tag_same_artifact(
self, integration_client, test_package, unique_test_id
):
"""Test ref_count doesn't change when tag is 'updated' to same artifact."""
project, package = test_package
content = f"same artifact {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload with tag
upload_test_file(integration_client, project, package, content, tag="v1")
# Verify ref_count is 1
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 1
# Upload same content with same tag (no-op)
upload_test_file(integration_client, project, package, content, tag="v1")
# Verify ref_count is still 1 (no double-counting)
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 1
@pytest.mark.integration
def test_tag_via_post_endpoint_increments_ref_count(
self, integration_client, test_package, unique_test_id
):
"""Test creating tag via POST /tags endpoint increments ref_count."""
project, package = test_package
content = f"tag endpoint test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload artifact without tag
result = upload_test_file(
integration_client, project, package, content, filename="test.bin", tag=None
)
artifact_id = result["artifact_id"]
# Verify ref_count is 0 (no tags yet)
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 0
# Create tag via POST endpoint
tag_response = integration_client.post(
f"/api/v1/project/{project}/{package}/tags",
json={"name": "v1.0.0", "artifact_id": artifact_id},
)
assert tag_response.status_code == 200
# Verify ref_count is now 1
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 1
# Create another tag via POST endpoint
tag_response = integration_client.post(
f"/api/v1/project/{project}/{package}/tags",
json={"name": "latest", "artifact_id": artifact_id},
)
assert tag_response.status_code == 200
# Verify ref_count is now 2
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 2

View File

@@ -1,488 +0,0 @@
"""
Integration tests for statistics endpoints.
Tests cover:
- Global stats endpoint
- Deduplication stats endpoint
- Cross-project deduplication
- Timeline stats
- Export and report endpoints
- Package and artifact stats
"""
import pytest
from tests.conftest import compute_sha256, upload_test_file
class TestGlobalStats:
"""Tests for GET /api/v1/stats endpoint."""
@pytest.mark.integration
def test_stats_returns_valid_response(self, integration_client):
"""Test stats endpoint returns expected fields."""
response = integration_client.get("/api/v1/stats")
assert response.status_code == 200
data = response.json()
# Check all required fields exist
assert "total_artifacts" in data
assert "total_size_bytes" in data
assert "unique_artifacts" in data
assert "orphaned_artifacts" in data
assert "orphaned_size_bytes" in data
assert "total_uploads" in data
assert "deduplicated_uploads" in data
assert "deduplication_ratio" in data
assert "storage_saved_bytes" in data
@pytest.mark.integration
def test_stats_values_are_non_negative(self, integration_client):
"""Test all stat values are non-negative."""
response = integration_client.get("/api/v1/stats")
assert response.status_code == 200
data = response.json()
assert data["total_artifacts"] >= 0
assert data["total_size_bytes"] >= 0
assert data["unique_artifacts"] >= 0
assert data["orphaned_artifacts"] >= 0
assert data["total_uploads"] >= 0
assert data["deduplicated_uploads"] >= 0
assert data["deduplication_ratio"] >= 0
assert data["storage_saved_bytes"] >= 0
@pytest.mark.integration
def test_stats_update_after_upload(
self, integration_client, test_package, unique_test_id
):
"""Test stats update after uploading an artifact."""
project, package = test_package
# Get initial stats
initial_response = integration_client.get("/api/v1/stats")
initial_stats = initial_response.json()
# Upload a new file
content = f"stats test content {unique_test_id}".encode()
upload_test_file(
integration_client, project, package, content, tag=f"stats-{unique_test_id}"
)
# Get updated stats
updated_response = integration_client.get("/api/v1/stats")
updated_stats = updated_response.json()
# Verify stats increased
assert updated_stats["total_uploads"] >= initial_stats["total_uploads"]
class TestDeduplicationStats:
"""Tests for GET /api/v1/stats/deduplication endpoint."""
@pytest.mark.integration
def test_dedup_stats_returns_valid_response(self, integration_client):
"""Test deduplication stats returns expected fields."""
response = integration_client.get("/api/v1/stats/deduplication")
assert response.status_code == 200
data = response.json()
assert "total_logical_bytes" in data
assert "total_physical_bytes" in data
assert "bytes_saved" in data
assert "savings_percentage" in data
assert "total_uploads" in data
assert "unique_artifacts" in data
assert "duplicate_uploads" in data
assert "average_ref_count" in data
assert "max_ref_count" in data
assert "most_referenced_artifacts" in data
@pytest.mark.integration
def test_most_referenced_artifacts_format(self, integration_client):
"""Test most_referenced_artifacts has correct structure."""
response = integration_client.get("/api/v1/stats/deduplication")
assert response.status_code == 200
data = response.json()
artifacts = data["most_referenced_artifacts"]
assert isinstance(artifacts, list)
if len(artifacts) > 0:
artifact = artifacts[0]
assert "artifact_id" in artifact
assert "ref_count" in artifact
assert "size" in artifact
assert "storage_saved" in artifact
@pytest.mark.integration
def test_dedup_stats_with_top_n_param(self, integration_client):
"""Test deduplication stats respects top_n parameter."""
response = integration_client.get("/api/v1/stats/deduplication?top_n=3")
assert response.status_code == 200
data = response.json()
assert len(data["most_referenced_artifacts"]) <= 3
@pytest.mark.integration
def test_savings_percentage_valid_range(self, integration_client):
"""Test savings percentage is between 0 and 100."""
response = integration_client.get("/api/v1/stats/deduplication")
assert response.status_code == 200
data = response.json()
assert 0 <= data["savings_percentage"] <= 100
class TestCrossProjectStats:
"""Tests for GET /api/v1/stats/cross-project endpoint."""
@pytest.mark.integration
def test_cross_project_returns_valid_response(self, integration_client):
"""Test cross-project stats returns expected fields."""
response = integration_client.get("/api/v1/stats/cross-project")
assert response.status_code == 200
data = response.json()
assert "shared_artifacts_count" in data
assert "total_cross_project_savings" in data
assert "shared_artifacts" in data
assert isinstance(data["shared_artifacts"], list)
@pytest.mark.integration
def test_cross_project_respects_limit(self, integration_client):
"""Test cross-project stats respects limit parameter."""
response = integration_client.get("/api/v1/stats/cross-project?limit=5")
assert response.status_code == 200
data = response.json()
assert len(data["shared_artifacts"]) <= 5
@pytest.mark.integration
def test_cross_project_detects_shared_artifacts(
self, integration_client, unique_test_id
):
"""Test cross-project deduplication is detected."""
content = f"shared across projects {unique_test_id}".encode()
# Create two projects
proj1 = f"cross-proj-a-{unique_test_id}"
proj2 = f"cross-proj-b-{unique_test_id}"
try:
# Create projects and packages
integration_client.post(
"/api/v1/projects",
json={"name": proj1, "description": "Test", "is_public": True},
)
integration_client.post(
"/api/v1/projects",
json={"name": proj2, "description": "Test", "is_public": True},
)
integration_client.post(
f"/api/v1/project/{proj1}/packages",
json={"name": "pkg", "description": "Test"},
)
integration_client.post(
f"/api/v1/project/{proj2}/packages",
json={"name": "pkg", "description": "Test"},
)
# Upload same content to both projects
upload_test_file(integration_client, proj1, "pkg", content, tag="v1")
upload_test_file(integration_client, proj2, "pkg", content, tag="v1")
# Check cross-project stats
response = integration_client.get("/api/v1/stats/cross-project")
assert response.status_code == 200
data = response.json()
assert data["shared_artifacts_count"] >= 1
finally:
# Cleanup
integration_client.delete(f"/api/v1/projects/{proj1}")
integration_client.delete(f"/api/v1/projects/{proj2}")
class TestTimelineStats:
"""Tests for GET /api/v1/stats/timeline endpoint."""
@pytest.mark.integration
def test_timeline_returns_valid_response(self, integration_client):
"""Test timeline stats returns expected fields."""
response = integration_client.get("/api/v1/stats/timeline")
assert response.status_code == 200
data = response.json()
assert "period" in data
assert "start_date" in data
assert "end_date" in data
assert "data_points" in data
assert isinstance(data["data_points"], list)
@pytest.mark.integration
def test_timeline_daily_period(self, integration_client):
"""Test timeline with daily period."""
response = integration_client.get("/api/v1/stats/timeline?period=daily")
assert response.status_code == 200
data = response.json()
assert data["period"] == "daily"
@pytest.mark.integration
def test_timeline_weekly_period(self, integration_client):
"""Test timeline with weekly period."""
response = integration_client.get("/api/v1/stats/timeline?period=weekly")
assert response.status_code == 200
data = response.json()
assert data["period"] == "weekly"
@pytest.mark.integration
def test_timeline_monthly_period(self, integration_client):
"""Test timeline with monthly period."""
response = integration_client.get("/api/v1/stats/timeline?period=monthly")
assert response.status_code == 200
data = response.json()
assert data["period"] == "monthly"
@pytest.mark.integration
def test_timeline_invalid_period_rejected(self, integration_client):
"""Test timeline rejects invalid period."""
response = integration_client.get("/api/v1/stats/timeline?period=invalid")
assert response.status_code == 422
@pytest.mark.integration
def test_timeline_data_point_structure(self, integration_client):
"""Test timeline data points have correct structure."""
response = integration_client.get("/api/v1/stats/timeline")
assert response.status_code == 200
data = response.json()
if len(data["data_points"]) > 0:
point = data["data_points"][0]
assert "date" in point
assert "total_uploads" in point
assert "unique_artifacts" in point
assert "duplicated_uploads" in point
assert "bytes_saved" in point
class TestExportEndpoint:
"""Tests for GET /api/v1/stats/export endpoint."""
@pytest.mark.integration
def test_export_json_format(self, integration_client):
"""Test export with JSON format."""
response = integration_client.get("/api/v1/stats/export?format=json")
assert response.status_code == 200
data = response.json()
assert "total_artifacts" in data
assert "generated_at" in data
@pytest.mark.integration
def test_export_csv_format(self, integration_client):
"""Test export with CSV format."""
response = integration_client.get("/api/v1/stats/export?format=csv")
assert response.status_code == 200
assert "text/csv" in response.headers.get("content-type", "")
content = response.text
assert "Metric,Value" in content
assert "total_artifacts" in content
@pytest.mark.integration
def test_export_invalid_format_rejected(self, integration_client):
"""Test export rejects invalid format."""
response = integration_client.get("/api/v1/stats/export?format=xml")
assert response.status_code == 422
class TestReportEndpoint:
"""Tests for GET /api/v1/stats/report endpoint."""
@pytest.mark.integration
def test_report_markdown_format(self, integration_client):
"""Test report with markdown format."""
response = integration_client.get("/api/v1/stats/report?format=markdown")
assert response.status_code == 200
data = response.json()
assert data["format"] == "markdown"
assert "generated_at" in data
assert "content" in data
assert "# Orchard Storage Report" in data["content"]
@pytest.mark.integration
def test_report_json_format(self, integration_client):
"""Test report with JSON format."""
response = integration_client.get("/api/v1/stats/report?format=json")
assert response.status_code == 200
data = response.json()
assert data["format"] == "json"
assert "content" in data
@pytest.mark.integration
def test_report_contains_sections(self, integration_client):
"""Test markdown report contains expected sections."""
response = integration_client.get("/api/v1/stats/report?format=markdown")
assert response.status_code == 200
content = response.json()["content"]
assert "## Overview" in content
assert "## Storage" in content
assert "## Uploads" in content
class TestProjectStats:
"""Tests for GET /api/v1/projects/:project/stats endpoint."""
@pytest.mark.integration
def test_project_stats_returns_valid_response(
self, integration_client, test_project
):
"""Test project stats returns expected fields."""
response = integration_client.get(f"/api/v1/projects/{test_project}/stats")
assert response.status_code == 200
data = response.json()
assert "project_id" in data
assert "project_name" in data
assert "package_count" in data
assert "tag_count" in data
assert "artifact_count" in data
assert "total_size_bytes" in data
assert "upload_count" in data
assert "deduplicated_uploads" in data
assert "storage_saved_bytes" in data
assert "deduplication_ratio" in data
@pytest.mark.integration
def test_project_stats_not_found(self, integration_client):
"""Test project stats returns 404 for non-existent project."""
response = integration_client.get("/api/v1/projects/nonexistent-project/stats")
assert response.status_code == 404
class TestPackageStats:
"""Tests for GET /api/v1/project/:project/packages/:package/stats endpoint."""
@pytest.mark.integration
def test_package_stats_returns_valid_response(
self, integration_client, test_package
):
"""Test package stats returns expected fields."""
project, package = test_package
response = integration_client.get(
f"/api/v1/project/{project}/packages/{package}/stats"
)
assert response.status_code == 200
data = response.json()
assert "package_id" in data
assert "package_name" in data
assert "project_name" in data
assert "tag_count" in data
assert "artifact_count" in data
assert "total_size_bytes" in data
assert "upload_count" in data
assert "deduplicated_uploads" in data
assert "storage_saved_bytes" in data
assert "deduplication_ratio" in data
@pytest.mark.integration
def test_package_stats_not_found(self, integration_client, test_project):
"""Test package stats returns 404 for non-existent package."""
response = integration_client.get(
f"/api/v1/project/{test_project}/packages/nonexistent-package/stats"
)
assert response.status_code == 404
class TestArtifactStats:
"""Tests for GET /api/v1/artifact/:id/stats endpoint."""
@pytest.mark.integration
def test_artifact_stats_returns_valid_response(
self, integration_client, test_package, unique_test_id
):
"""Test artifact stats returns expected fields."""
project, package = test_package
content = f"artifact stats test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload artifact
upload_test_file(
integration_client, project, package, content, tag=f"art-{unique_test_id}"
)
# Get artifact stats
response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats")
assert response.status_code == 200
data = response.json()
assert "artifact_id" in data
assert "sha256" in data
assert "size" in data
assert "ref_count" in data
assert "storage_savings" in data
assert "tags" in data
assert "projects" in data
assert "packages" in data
@pytest.mark.integration
def test_artifact_stats_not_found(self, integration_client):
"""Test artifact stats returns 404 for non-existent artifact."""
fake_hash = "0" * 64
response = integration_client.get(f"/api/v1/artifact/{fake_hash}/stats")
assert response.status_code == 404
@pytest.mark.integration
def test_artifact_stats_shows_correct_projects(
self, integration_client, unique_test_id
):
"""Test artifact stats shows all projects using the artifact."""
content = f"multi-project artifact {unique_test_id}".encode()
expected_hash = compute_sha256(content)
proj1 = f"art-stats-a-{unique_test_id}"
proj2 = f"art-stats-b-{unique_test_id}"
try:
# Create projects and packages
integration_client.post(
"/api/v1/projects",
json={"name": proj1, "description": "Test", "is_public": True},
)
integration_client.post(
"/api/v1/projects",
json={"name": proj2, "description": "Test", "is_public": True},
)
integration_client.post(
f"/api/v1/project/{proj1}/packages",
json={"name": "pkg", "description": "Test"},
)
integration_client.post(
f"/api/v1/project/{proj2}/packages",
json={"name": "pkg", "description": "Test"},
)
# Upload same content to both projects
upload_test_file(integration_client, proj1, "pkg", content, tag="v1")
upload_test_file(integration_client, proj2, "pkg", content, tag="v1")
# Check artifact stats
response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats")
assert response.status_code == 200
data = response.json()
assert len(data["projects"]) == 2
assert proj1 in data["projects"]
assert proj2 in data["projects"]
finally:
integration_client.delete(f"/api/v1/projects/{proj1}")
integration_client.delete(f"/api/v1/projects/{proj2}")

View File

View File

@@ -0,0 +1,271 @@
"""
Unit tests for SQLAlchemy models.
Tests cover:
- Model instantiation and defaults
- Property aliases (sha256, format_metadata)
- Relationship definitions
- Constraint definitions
"""
import pytest
import uuid
from datetime import datetime
class TestArtifactModel:
"""Tests for the Artifact model."""
@pytest.mark.unit
def test_artifact_sha256_property(self):
"""Test sha256 property is an alias for id."""
from app.models import Artifact
artifact = Artifact(
id="a" * 64,
size=1024,
created_by="test-user",
s3_key="fruits/aa/aa/test",
)
assert artifact.sha256 == artifact.id
assert artifact.sha256 == "a" * 64
@pytest.mark.unit
def test_artifact_format_metadata_alias(self):
"""Test format_metadata is an alias for artifact_metadata."""
from app.models import Artifact
test_metadata = {"format": "tarball", "version": "1.0.0"}
artifact = Artifact(
id="b" * 64,
size=2048,
created_by="test-user",
s3_key="fruits/bb/bb/test",
artifact_metadata=test_metadata,
)
assert artifact.format_metadata == test_metadata
assert artifact.format_metadata == artifact.artifact_metadata
@pytest.mark.unit
def test_artifact_format_metadata_setter(self):
"""Test format_metadata setter updates artifact_metadata."""
from app.models import Artifact
artifact = Artifact(
id="c" * 64,
size=512,
created_by="test-user",
s3_key="fruits/cc/cc/test",
)
new_metadata = {"type": "rpm", "arch": "x86_64"}
artifact.format_metadata = new_metadata
assert artifact.artifact_metadata == new_metadata
assert artifact.format_metadata == new_metadata
@pytest.mark.unit
def test_artifact_default_ref_count(self):
"""Test artifact ref_count column has default value of 1."""
from app.models import Artifact
# Check the column definition has the right default
ref_count_col = Artifact.__table__.columns["ref_count"]
assert ref_count_col.default is not None
assert ref_count_col.default.arg == 1
@pytest.mark.unit
def test_artifact_default_metadata_is_dict(self):
"""Test artifact default metadata is an empty dict."""
from app.models import Artifact
artifact = Artifact(
id="e" * 64,
size=100,
created_by="test-user",
s3_key="fruits/ee/ee/test",
)
# Default might be None until saved, but the column default is dict
assert artifact.artifact_metadata is None or isinstance(
artifact.artifact_metadata, dict
)
class TestProjectModel:
"""Tests for the Project model."""
@pytest.mark.unit
def test_project_default_is_public(self):
"""Test project is_public column has default value of True."""
from app.models import Project
# Check the column definition has the right default
is_public_col = Project.__table__.columns["is_public"]
assert is_public_col.default is not None
assert is_public_col.default.arg is True
@pytest.mark.unit
def test_project_uuid_generation(self):
"""Test project generates UUID by default."""
from app.models import Project
project = Project(
name="uuid-test-project",
created_by="test-user",
)
# UUID should be set by default function
assert project.id is not None or hasattr(Project.id, "default")
class TestPackageModel:
"""Tests for the Package model."""
@pytest.mark.unit
def test_package_default_format(self):
"""Test package format column has default value of 'generic'."""
from app.models import Package
# Check the column definition has the right default
format_col = Package.__table__.columns["format"]
assert format_col.default is not None
assert format_col.default.arg == "generic"
@pytest.mark.unit
def test_package_default_platform(self):
"""Test package platform column has default value of 'any'."""
from app.models import Package
# Check the column definition has the right default
platform_col = Package.__table__.columns["platform"]
assert platform_col.default is not None
assert platform_col.default.arg == "any"
class TestTagModel:
"""Tests for the Tag model."""
@pytest.mark.unit
def test_tag_requires_package_id(self):
"""Test tag requires package_id."""
from app.models import Tag
tag = Tag(
name="v1.0.0",
package_id=uuid.uuid4(),
artifact_id="f" * 64,
created_by="test-user",
)
assert tag.package_id is not None
assert tag.artifact_id == "f" * 64
class TestTagHistoryModel:
"""Tests for the TagHistory model."""
@pytest.mark.unit
def test_tag_history_default_change_type(self):
"""Test tag history change_type column has default value of 'update'."""
from app.models import TagHistory
# Check the column definition has the right default
change_type_col = TagHistory.__table__.columns["change_type"]
assert change_type_col.default is not None
assert change_type_col.default.arg == "update"
@pytest.mark.unit
def test_tag_history_allows_null_old_artifact(self):
"""Test tag history allows null old_artifact_id (for create events)."""
from app.models import TagHistory
history = TagHistory(
tag_id=uuid.uuid4(),
old_artifact_id=None,
new_artifact_id="h" * 64,
change_type="create",
changed_by="test-user",
)
assert history.old_artifact_id is None
class TestUploadModel:
"""Tests for the Upload model."""
@pytest.mark.unit
def test_upload_default_deduplicated_is_false(self):
"""Test upload deduplicated column has default value of False."""
from app.models import Upload
# Check the column definition has the right default
deduplicated_col = Upload.__table__.columns["deduplicated"]
assert deduplicated_col.default is not None
assert deduplicated_col.default.arg is False
@pytest.mark.unit
def test_upload_default_checksum_verified_is_true(self):
"""Test upload checksum_verified column has default value of True."""
from app.models import Upload
# Check the column definition has the right default
checksum_verified_col = Upload.__table__.columns["checksum_verified"]
assert checksum_verified_col.default is not None
assert checksum_verified_col.default.arg is True
class TestAccessPermissionModel:
"""Tests for the AccessPermission model."""
@pytest.mark.unit
def test_access_permission_levels(self):
"""Test valid access permission levels."""
from app.models import AccessPermission
# This tests the check constraint values
valid_levels = ["read", "write", "admin"]
for level in valid_levels:
permission = AccessPermission(
project_id=uuid.uuid4(),
user_id="test-user",
level=level,
)
assert permission.level == level
class TestAuditLogModel:
"""Tests for the AuditLog model."""
@pytest.mark.unit
def test_audit_log_required_fields(self):
"""Test audit log has all required fields."""
from app.models import AuditLog
log = AuditLog(
action="project.create",
resource="/projects/test-project",
user_id="test-user",
)
assert log.action == "project.create"
assert log.resource == "/projects/test-project"
assert log.user_id == "test-user"
@pytest.mark.unit
def test_audit_log_optional_details(self):
"""Test audit log can have optional details JSON."""
from app.models import AuditLog
details = {"old_value": "v1", "new_value": "v2"}
log = AuditLog(
action="tag.update",
resource="/projects/test/packages/pkg/tags/latest",
user_id="test-user",
details=details,
)
assert log.details == details

View File

@@ -0,0 +1,439 @@
"""
Unit tests for S3 storage layer.
Tests cover:
- SHA256 hash calculation and consistency
- Hash format validation (64-char hex)
- S3 key generation pattern
- Deduplication behavior (_exists method)
- Storage result computation (MD5, SHA1, size)
- Edge cases (empty files, large files, binary content)
"""
import pytest
import hashlib
import io
from tests.factories import (
compute_sha256,
TEST_CONTENT_HELLO,
TEST_HASH_HELLO,
TEST_CONTENT_BINARY,
TEST_HASH_BINARY,
)
# =============================================================================
# Hash Computation Tests
# =============================================================================
class TestHashComputation:
"""Unit tests for hash calculation functionality."""
@pytest.mark.unit
def test_sha256_consistent_results(self):
"""Test SHA256 hash produces consistent results for identical content."""
content = b"test content for hashing"
# Compute hash multiple times
hash1 = compute_sha256(content)
hash2 = compute_sha256(content)
hash3 = compute_sha256(content)
assert hash1 == hash2 == hash3
@pytest.mark.unit
def test_sha256_different_content_different_hash(self):
"""Test SHA256 produces different hashes for different content."""
content1 = b"content version 1"
content2 = b"content version 2"
hash1 = compute_sha256(content1)
hash2 = compute_sha256(content2)
assert hash1 != hash2
@pytest.mark.unit
def test_sha256_format_64_char_hex(self):
"""Test SHA256 hash is always 64 character lowercase hexadecimal."""
test_cases = [
b"", # Empty
b"a", # Single char
b"Hello, World!", # Normal string
bytes(range(256)), # All byte values
b"x" * 10000, # Larger content
]
for content in test_cases:
hash_value = compute_sha256(content)
# Check length
assert len(hash_value) == 64, (
f"Hash length should be 64, got {len(hash_value)}"
)
# Check lowercase
assert hash_value == hash_value.lower(), "Hash should be lowercase"
# Check hexadecimal
assert all(c in "0123456789abcdef" for c in hash_value), (
"Hash should be hex"
)
@pytest.mark.unit
def test_sha256_known_value(self):
"""Test SHA256 produces expected hash for known input."""
assert compute_sha256(TEST_CONTENT_HELLO) == TEST_HASH_HELLO
@pytest.mark.unit
def test_sha256_binary_content(self):
"""Test SHA256 handles binary content correctly."""
assert compute_sha256(TEST_CONTENT_BINARY) == TEST_HASH_BINARY
# Test with null bytes
content_with_nulls = b"\x00\x00test\x00\x00"
hash_value = compute_sha256(content_with_nulls)
assert len(hash_value) == 64
@pytest.mark.unit
def test_sha256_streaming_computation(self):
"""Test SHA256 can be computed in chunks (streaming)."""
# Large content
chunk_size = 8192
total_size = chunk_size * 10 # 80KB
content = b"x" * total_size
# Direct computation
direct_hash = compute_sha256(content)
# Streaming computation
hasher = hashlib.sha256()
for i in range(0, total_size, chunk_size):
hasher.update(content[i : i + chunk_size])
streaming_hash = hasher.hexdigest()
assert direct_hash == streaming_hash
@pytest.mark.unit
def test_sha256_order_matters(self):
"""Test that content order affects hash (not just content set)."""
content1 = b"AB"
content2 = b"BA"
assert compute_sha256(content1) != compute_sha256(content2)
# =============================================================================
# Storage Hash Computation Tests
# =============================================================================
class TestStorageHashComputation:
"""Tests for hash computation in the storage layer."""
@pytest.mark.unit
def test_storage_computes_sha256(self, mock_storage):
"""Test storage layer correctly computes SHA256 hash."""
content = TEST_CONTENT_HELLO
file_obj = io.BytesIO(content)
result = mock_storage._store_simple(file_obj)
assert result.sha256 == TEST_HASH_HELLO
@pytest.mark.unit
def test_storage_computes_md5(self, mock_storage):
"""Test storage layer also computes MD5 hash."""
content = TEST_CONTENT_HELLO
file_obj = io.BytesIO(content)
result = mock_storage._store_simple(file_obj)
expected_md5 = hashlib.md5(content).hexdigest()
assert result.md5 == expected_md5
@pytest.mark.unit
def test_storage_computes_sha1(self, mock_storage):
"""Test storage layer also computes SHA1 hash."""
content = TEST_CONTENT_HELLO
file_obj = io.BytesIO(content)
result = mock_storage._store_simple(file_obj)
expected_sha1 = hashlib.sha1(content).hexdigest()
assert result.sha1 == expected_sha1
@pytest.mark.unit
def test_storage_returns_correct_size(self, mock_storage):
"""Test storage layer returns correct file size."""
content = b"test content with known size"
file_obj = io.BytesIO(content)
result = mock_storage._store_simple(file_obj)
assert result.size == len(content)
@pytest.mark.unit
def test_storage_generates_correct_s3_key(self, mock_storage):
"""Test storage layer generates correct S3 key pattern."""
content = TEST_CONTENT_HELLO
file_obj = io.BytesIO(content)
result = mock_storage._store_simple(file_obj)
# Key should be: fruits/{hash[:2]}/{hash[2:4]}/{hash}
expected_key = (
f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
)
assert result.s3_key == expected_key
# =============================================================================
# Hash Edge Cases
# =============================================================================
class TestHashEdgeCases:
"""Edge case tests for hash computation."""
@pytest.mark.unit
def test_hash_empty_content_rejected(self, mock_storage):
"""Test that empty content is rejected."""
from app.storage import HashComputationError
file_obj = io.BytesIO(b"")
with pytest.raises(HashComputationError):
mock_storage._store_simple(file_obj)
@pytest.mark.unit
def test_hash_large_file_streaming(self, mock_storage):
"""Test hash computation for large files uses streaming."""
# Create a 10MB file
size = 10 * 1024 * 1024
content = b"x" * size
file_obj = io.BytesIO(content)
result = mock_storage._store_simple(file_obj)
expected_hash = compute_sha256(content)
assert result.sha256 == expected_hash
@pytest.mark.unit
def test_hash_special_bytes(self):
"""Test hash handles all byte values correctly."""
# All possible byte values
content = bytes(range(256))
hash_value = compute_sha256(content)
assert len(hash_value) == 64
assert hash_value == TEST_HASH_BINARY
# =============================================================================
# S3 Existence Check Tests
# =============================================================================
class TestExistsMethod:
"""Tests for the _exists() method that checks S3 object existence."""
@pytest.mark.unit
def test_exists_returns_true_for_existing_key(self, mock_storage, mock_s3_client):
"""Test _exists() returns True when object exists."""
# Pre-populate the mock storage
test_key = "fruits/df/fd/test-hash"
mock_s3_client.objects[test_key] = b"content"
result = mock_storage._exists(test_key)
assert result is True
@pytest.mark.unit
def test_exists_returns_false_for_nonexistent_key(self, mock_storage):
"""Test _exists() returns False when object doesn't exist."""
result = mock_storage._exists("fruits/no/ne/nonexistent-key")
assert result is False
@pytest.mark.unit
def test_exists_handles_404_error(self, mock_storage):
"""Test _exists() handles 404 errors gracefully."""
# The mock client raises ClientError for nonexistent keys
result = mock_storage._exists("fruits/xx/yy/does-not-exist")
assert result is False
# =============================================================================
# S3 Key Generation Tests
# =============================================================================
class TestS3KeyGeneration:
"""Tests for S3 key pattern generation."""
@pytest.mark.unit
def test_s3_key_pattern(self):
"""Test S3 key follows pattern: fruits/{hash[:2]}/{hash[2:4]}/{hash}"""
test_hash = "abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890"
expected_key = f"fruits/{test_hash[:2]}/{test_hash[2:4]}/{test_hash}"
# Expected: fruits/ab/cd/abcdef1234567890...
assert expected_key == f"fruits/ab/cd/{test_hash}"
@pytest.mark.unit
def test_s3_key_generation_in_storage(self, mock_storage):
"""Test storage layer generates correct S3 key."""
content = TEST_CONTENT_HELLO
file_obj = io.BytesIO(content)
result = mock_storage._store_simple(file_obj)
expected_key = (
f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
)
assert result.s3_key == expected_key
@pytest.mark.unit
def test_s3_key_uses_sha256_hash(self, mock_storage):
"""Test S3 key is derived from SHA256 hash."""
content = b"unique test content for key test"
file_obj = io.BytesIO(content)
expected_hash = compute_sha256(content)
result = mock_storage._store_simple(file_obj)
# Key should contain the hash
assert expected_hash in result.s3_key
# =============================================================================
# Deduplication Behavior Tests
# =============================================================================
class TestDeduplicationBehavior:
"""Tests for deduplication (skip upload when exists)."""
@pytest.mark.unit
def test_skips_upload_when_exists(self, mock_storage, mock_s3_client):
"""Test storage skips S3 upload when artifact already exists."""
content = TEST_CONTENT_HELLO
s3_key = (
f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
)
# Pre-populate storage (simulate existing artifact)
mock_s3_client.objects[s3_key] = content
# Track put_object calls
original_put = mock_s3_client.put_object
put_called = []
def tracked_put(*args, **kwargs):
put_called.append(True)
return original_put(*args, **kwargs)
mock_s3_client.put_object = tracked_put
# Store the same content
file_obj = io.BytesIO(content)
result = mock_storage._store_simple(file_obj)
# put_object should NOT have been called (deduplication)
assert len(put_called) == 0
assert result.sha256 == TEST_HASH_HELLO
@pytest.mark.unit
def test_uploads_when_not_exists(self, mock_storage, mock_s3_client):
"""Test storage uploads to S3 when artifact doesn't exist."""
content = b"brand new unique content"
content_hash = compute_sha256(content)
s3_key = f"fruits/{content_hash[:2]}/{content_hash[2:4]}/{content_hash}"
# Ensure object doesn't exist
assert s3_key not in mock_s3_client.objects
# Store the content
file_obj = io.BytesIO(content)
result = mock_storage._store_simple(file_obj)
# Object should now exist in mock storage
assert s3_key in mock_s3_client.objects
assert mock_s3_client.objects[s3_key] == content
@pytest.mark.unit
def test_returns_same_hash_for_duplicate(self, mock_storage, mock_s3_client):
"""Test storing same content twice returns same hash."""
content = b"content to be stored twice"
# First store
file1 = io.BytesIO(content)
result1 = mock_storage._store_simple(file1)
# Second store (duplicate)
file2 = io.BytesIO(content)
result2 = mock_storage._store_simple(file2)
assert result1.sha256 == result2.sha256
assert result1.s3_key == result2.s3_key
@pytest.mark.unit
def test_different_content_different_keys(self, mock_storage):
"""Test different content produces different S3 keys."""
content1 = b"first content"
content2 = b"second content"
file1 = io.BytesIO(content1)
result1 = mock_storage._store_simple(file1)
file2 = io.BytesIO(content2)
result2 = mock_storage._store_simple(file2)
assert result1.sha256 != result2.sha256
assert result1.s3_key != result2.s3_key
# =============================================================================
# Deduplication Edge Cases
# =============================================================================
class TestDeduplicationEdgeCases:
"""Edge case tests for deduplication."""
@pytest.mark.unit
def test_same_content_different_filenames(self, mock_storage):
"""Test same content with different metadata is deduplicated."""
content = b"identical content"
# Store with "filename1"
file1 = io.BytesIO(content)
result1 = mock_storage._store_simple(file1)
# Store with "filename2" (same content)
file2 = io.BytesIO(content)
result2 = mock_storage._store_simple(file2)
# Both should have same hash (content-addressable)
assert result1.sha256 == result2.sha256
@pytest.mark.unit
def test_whitespace_only_difference(self, mock_storage):
"""Test content differing only by whitespace produces different hashes."""
content1 = b"test content"
content2 = b"test content" # Extra space
content3 = b"test content " # Trailing space
file1 = io.BytesIO(content1)
file2 = io.BytesIO(content2)
file3 = io.BytesIO(content3)
result1 = mock_storage._store_simple(file1)
result2 = mock_storage._store_simple(file2)
result3 = mock_storage._store_simple(file3)
# All should be different (content-addressable)
assert len({result1.sha256, result2.sha256, result3.sha256}) == 3