Metadata database tracks all uploads with project, package, tag, and timestamp queryable via API

This commit is contained in:
Mondo Diaz
2026-01-07 12:31:44 -06:00
parent 81458b3bcb
commit 2f1891cf01
24 changed files with 5044 additions and 2123 deletions

View File

View File

@@ -0,0 +1,638 @@
"""
Integration tests for artifact API endpoints.
Tests cover:
- Artifact retrieval by ID
- Artifact stats endpoint
- Artifact provenance/history
- Artifact uploads listing
- Garbage collection endpoints
- Orphaned artifacts management
"""
import pytest
from tests.factories import compute_sha256, upload_test_file
class TestArtifactRetrieval:
"""Tests for artifact retrieval endpoints."""
@pytest.mark.integration
def test_get_artifact_by_id(self, integration_client, test_package):
"""Test retrieving an artifact by its SHA256 ID."""
project_name, package_name = test_package
content = b"artifact retrieval test"
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project_name, package_name, content, tag="v1"
)
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
data = response.json()
assert data["id"] == expected_hash
assert data["sha256"] == expected_hash
assert data["size"] == len(content)
assert "ref_count" in data
assert "created_at" in data
@pytest.mark.integration
def test_get_nonexistent_artifact(self, integration_client):
"""Test getting a non-existent artifact returns 404."""
fake_hash = "a" * 64
response = integration_client.get(f"/api/v1/artifact/{fake_hash}")
assert response.status_code == 404
@pytest.mark.integration
def test_artifact_includes_tags(self, integration_client, test_package):
"""Test artifact response includes tags pointing to it."""
project_name, package_name = test_package
content = b"artifact with tags test"
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project_name, package_name, content, tag="tagged-v1"
)
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
data = response.json()
assert "tags" in data
assert len(data["tags"]) >= 1
tag = data["tags"][0]
assert "name" in tag
assert "package_name" in tag
assert "project_name" in tag
class TestArtifactStats:
"""Tests for artifact statistics endpoint."""
@pytest.mark.integration
def test_artifact_stats_returns_valid_response(
self, integration_client, test_package, unique_test_id
):
"""Test artifact stats returns expected fields."""
project, package = test_package
content = f"artifact stats test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project, package, content, tag=f"art-{unique_test_id}"
)
response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats")
assert response.status_code == 200
data = response.json()
assert "artifact_id" in data
assert "sha256" in data
assert "size" in data
assert "ref_count" in data
assert "storage_savings" in data
assert "tags" in data
assert "projects" in data
assert "packages" in data
@pytest.mark.integration
def test_artifact_stats_not_found(self, integration_client):
"""Test artifact stats returns 404 for non-existent artifact."""
fake_hash = "0" * 64
response = integration_client.get(f"/api/v1/artifact/{fake_hash}/stats")
assert response.status_code == 404
@pytest.mark.integration
def test_artifact_stats_shows_correct_projects(
self, integration_client, unique_test_id
):
"""Test artifact stats shows all projects using the artifact."""
content = f"multi-project artifact {unique_test_id}".encode()
expected_hash = compute_sha256(content)
proj1 = f"art-stats-a-{unique_test_id}"
proj2 = f"art-stats-b-{unique_test_id}"
try:
# Create projects and packages
integration_client.post(
"/api/v1/projects",
json={"name": proj1, "description": "Test", "is_public": True},
)
integration_client.post(
"/api/v1/projects",
json={"name": proj2, "description": "Test", "is_public": True},
)
integration_client.post(
f"/api/v1/project/{proj1}/packages",
json={"name": "pkg", "description": "Test"},
)
integration_client.post(
f"/api/v1/project/{proj2}/packages",
json={"name": "pkg", "description": "Test"},
)
# Upload same content to both projects
upload_test_file(integration_client, proj1, "pkg", content, tag="v1")
upload_test_file(integration_client, proj2, "pkg", content, tag="v1")
# Check artifact stats
response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats")
assert response.status_code == 200
data = response.json()
assert len(data["projects"]) == 2
assert proj1 in data["projects"]
assert proj2 in data["projects"]
finally:
integration_client.delete(f"/api/v1/projects/{proj1}")
integration_client.delete(f"/api/v1/projects/{proj2}")
class TestArtifactProvenance:
"""Tests for artifact provenance/history endpoint."""
@pytest.mark.integration
def test_artifact_history_returns_200(self, integration_client, test_package):
"""Test artifact history endpoint returns 200."""
project_name, package_name = test_package
upload_result = upload_test_file(
integration_client,
project_name,
package_name,
b"provenance test content",
"prov.txt",
)
artifact_id = upload_result["artifact_id"]
response = integration_client.get(f"/api/v1/artifact/{artifact_id}/history")
assert response.status_code == 200
@pytest.mark.integration
def test_artifact_history_has_required_fields(
self, integration_client, test_package
):
"""Test artifact history has all required fields."""
project_name, package_name = test_package
upload_result = upload_test_file(
integration_client,
project_name,
package_name,
b"provenance fields test",
"fields.txt",
)
artifact_id = upload_result["artifact_id"]
response = integration_client.get(f"/api/v1/artifact/{artifact_id}/history")
assert response.status_code == 200
data = response.json()
assert "artifact_id" in data
assert "sha256" in data
assert "size" in data
assert "created_at" in data
assert "created_by" in data
assert "ref_count" in data
assert "first_uploaded_at" in data
assert "first_uploaded_by" in data
assert "upload_count" in data
assert "packages" in data
assert "tags" in data
assert "uploads" in data
@pytest.mark.integration
def test_artifact_history_not_found(self, integration_client):
"""Test non-existent artifact returns 404."""
fake_hash = "b" * 64
response = integration_client.get(f"/api/v1/artifact/{fake_hash}/history")
assert response.status_code == 404
@pytest.mark.integration
def test_artifact_history_with_tag(self, integration_client, test_package):
"""Test artifact history includes tag information when tagged."""
project_name, package_name = test_package
upload_result = upload_test_file(
integration_client,
project_name,
package_name,
b"tagged provenance test",
"tagged.txt",
tag="v1.0.0",
)
artifact_id = upload_result["artifact_id"]
response = integration_client.get(f"/api/v1/artifact/{artifact_id}/history")
assert response.status_code == 200
data = response.json()
assert len(data["tags"]) >= 1
tag = data["tags"][0]
assert "project_name" in tag
assert "package_name" in tag
assert "tag_name" in tag
class TestArtifactUploads:
"""Tests for artifact uploads listing endpoint."""
@pytest.mark.integration
def test_artifact_uploads_returns_200(self, integration_client, test_package):
"""Test artifact uploads endpoint returns 200."""
project_name, package_name = test_package
upload_result = upload_test_file(
integration_client,
project_name,
package_name,
b"artifact upload test",
"artifact.txt",
)
artifact_id = upload_result["artifact_id"]
response = integration_client.get(f"/api/v1/artifact/{artifact_id}/uploads")
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
assert len(data["items"]) >= 1
@pytest.mark.integration
def test_artifact_uploads_not_found(self, integration_client):
"""Test non-existent artifact returns 404."""
fake_hash = "a" * 64
response = integration_client.get(f"/api/v1/artifact/{fake_hash}/uploads")
assert response.status_code == 404
class TestOrphanedArtifacts:
"""Tests for orphaned artifacts management."""
@pytest.mark.integration
def test_list_orphaned_artifacts_returns_list(self, integration_client):
"""Test orphaned artifacts endpoint returns a list."""
response = integration_client.get("/api/v1/admin/orphaned-artifacts")
assert response.status_code == 200
assert isinstance(response.json(), list)
@pytest.mark.integration
def test_orphaned_artifact_has_required_fields(self, integration_client):
"""Test orphaned artifact response has required fields."""
response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1")
assert response.status_code == 200
data = response.json()
if len(data) > 0:
artifact = data[0]
assert "id" in artifact
assert "size" in artifact
assert "created_at" in artifact
assert "created_by" in artifact
assert "original_name" in artifact
@pytest.mark.integration
def test_orphaned_artifacts_respects_limit(self, integration_client):
"""Test orphaned artifacts endpoint respects limit parameter."""
response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=5")
assert response.status_code == 200
assert len(response.json()) <= 5
@pytest.mark.integration
def test_artifact_becomes_orphaned_when_tag_deleted(
self, integration_client, test_package, unique_test_id
):
"""Test artifact appears in orphaned list after tag is deleted."""
project, package = test_package
content = f"orphan test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload with tag
upload_test_file(integration_client, project, package, content, tag="temp-tag")
# Verify not in orphaned list
response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1000")
orphaned_ids = [a["id"] for a in response.json()]
assert expected_hash not in orphaned_ids
# Delete the tag
integration_client.delete(f"/api/v1/project/{project}/{package}/tags/temp-tag")
# Verify now in orphaned list
response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1000")
orphaned_ids = [a["id"] for a in response.json()]
assert expected_hash in orphaned_ids
class TestGarbageCollection:
"""Tests for garbage collection endpoint."""
@pytest.mark.integration
def test_garbage_collect_dry_run_returns_response(self, integration_client):
"""Test garbage collection dry run returns valid response."""
response = integration_client.post("/api/v1/admin/garbage-collect?dry_run=true")
assert response.status_code == 200
data = response.json()
assert "artifacts_deleted" in data
assert "bytes_freed" in data
assert "artifact_ids" in data
assert "dry_run" in data
assert data["dry_run"] is True
@pytest.mark.integration
def test_garbage_collect_dry_run_doesnt_delete(
self, integration_client, test_package, unique_test_id
):
"""Test garbage collection dry run doesn't actually delete artifacts."""
project, package = test_package
content = f"dry run test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload and delete tag to create orphan
upload_test_file(integration_client, project, package, content, tag="dry-run")
integration_client.delete(f"/api/v1/project/{project}/{package}/tags/dry-run")
# Verify artifact exists
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
# Run garbage collection in dry-run mode
gc_response = integration_client.post(
"/api/v1/admin/garbage-collect?dry_run=true&limit=1000"
)
assert gc_response.status_code == 200
assert expected_hash in gc_response.json()["artifact_ids"]
# Verify artifact STILL exists
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
@pytest.mark.integration
def test_garbage_collect_preserves_referenced_artifacts(
self, integration_client, test_package, unique_test_id
):
"""Test garbage collection doesn't delete artifacts with ref_count > 0."""
project, package = test_package
content = f"preserve test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload with tag (ref_count=1)
upload_test_file(integration_client, project, package, content, tag="keep-this")
# Verify artifact exists with ref_count=1
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
assert response.json()["ref_count"] == 1
# Run garbage collection (dry_run to not affect other tests)
gc_response = integration_client.post(
"/api/v1/admin/garbage-collect?dry_run=true&limit=1000"
)
assert gc_response.status_code == 200
# Verify artifact was NOT in delete list
assert expected_hash not in gc_response.json()["artifact_ids"]
# Verify artifact still exists
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
assert response.json()["ref_count"] == 1
@pytest.mark.integration
def test_garbage_collect_respects_limit(self, integration_client):
"""Test garbage collection respects limit parameter."""
response = integration_client.post(
"/api/v1/admin/garbage-collect?dry_run=true&limit=5"
)
assert response.status_code == 200
assert response.json()["artifacts_deleted"] <= 5
@pytest.mark.integration
def test_garbage_collect_returns_bytes_freed(self, integration_client):
"""Test garbage collection returns accurate bytes_freed."""
response = integration_client.post("/api/v1/admin/garbage-collect?dry_run=true")
assert response.status_code == 200
data = response.json()
assert data["bytes_freed"] >= 0
assert isinstance(data["bytes_freed"], int)
class TestGlobalUploads:
"""Tests for global uploads endpoint."""
@pytest.mark.integration
def test_global_uploads_returns_200(self, integration_client):
"""Test global uploads endpoint returns 200."""
response = integration_client.get("/api/v1/uploads")
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
@pytest.mark.integration
def test_global_uploads_pagination(self, integration_client):
"""Test global uploads endpoint respects pagination."""
response = integration_client.get("/api/v1/uploads?limit=5&page=1")
assert response.status_code == 200
data = response.json()
assert len(data["items"]) <= 5
assert data["pagination"]["limit"] == 5
assert data["pagination"]["page"] == 1
@pytest.mark.integration
def test_global_uploads_filter_by_project(self, integration_client, test_package):
"""Test filtering global uploads by project name."""
project_name, package_name = test_package
# Upload a file
upload_test_file(
integration_client,
project_name,
package_name,
b"global filter test",
"global.txt",
)
response = integration_client.get(f"/api/v1/uploads?project={project_name}")
assert response.status_code == 200
data = response.json()
for item in data["items"]:
assert item["project_name"] == project_name
@pytest.mark.integration
def test_global_uploads_has_more_field(self, integration_client):
"""Test pagination includes has_more field."""
response = integration_client.get("/api/v1/uploads?limit=1")
assert response.status_code == 200
data = response.json()
assert "has_more" in data["pagination"]
assert isinstance(data["pagination"]["has_more"], bool)
class TestGlobalArtifacts:
"""Tests for global artifacts endpoint."""
@pytest.mark.integration
def test_global_artifacts_returns_200(self, integration_client):
"""Test global artifacts endpoint returns 200."""
response = integration_client.get("/api/v1/artifacts")
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
@pytest.mark.integration
def test_global_artifacts_pagination(self, integration_client):
"""Test global artifacts endpoint respects pagination."""
response = integration_client.get("/api/v1/artifacts?limit=5&page=1")
assert response.status_code == 200
data = response.json()
assert len(data["items"]) <= 5
assert data["pagination"]["limit"] == 5
@pytest.mark.integration
def test_global_artifacts_filter_by_size(self, integration_client):
"""Test filtering global artifacts by size range."""
response = integration_client.get(
"/api/v1/artifacts?min_size=1&max_size=1000000"
)
assert response.status_code == 200
data = response.json()
for item in data["items"]:
assert 1 <= item["size"] <= 1000000
@pytest.mark.integration
def test_global_artifacts_sort_by_size(self, integration_client):
"""Test sorting global artifacts by size."""
response = integration_client.get("/api/v1/artifacts?sort=size&order=desc")
assert response.status_code == 200
data = response.json()
if len(data["items"]) > 1:
sizes = [item["size"] for item in data["items"]]
assert sizes == sorted(sizes, reverse=True)
@pytest.mark.integration
def test_global_artifacts_invalid_sort_returns_400(self, integration_client):
"""Test invalid sort field returns 400."""
response = integration_client.get("/api/v1/artifacts?sort=invalid_field")
assert response.status_code == 400
class TestGlobalTags:
"""Tests for global tags endpoint."""
@pytest.mark.integration
def test_global_tags_returns_200(self, integration_client):
"""Test global tags endpoint returns 200."""
response = integration_client.get("/api/v1/tags")
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
@pytest.mark.integration
def test_global_tags_pagination(self, integration_client):
"""Test global tags endpoint respects pagination."""
response = integration_client.get("/api/v1/tags?limit=5&page=1")
assert response.status_code == 200
data = response.json()
assert len(data["items"]) <= 5
assert data["pagination"]["limit"] == 5
@pytest.mark.integration
def test_global_tags_has_project_context(self, integration_client):
"""Test global tags response includes project/package context."""
response = integration_client.get("/api/v1/tags?limit=1")
assert response.status_code == 200
data = response.json()
if len(data["items"]) > 0:
item = data["items"][0]
assert "project_name" in item
assert "package_name" in item
assert "artifact_id" in item
@pytest.mark.integration
def test_global_tags_search_with_wildcard(self, integration_client):
"""Test global tags search supports wildcards."""
response = integration_client.get("/api/v1/tags?search=v*")
assert response.status_code == 200
# Just verify it doesn't error; results may vary
class TestAuditLogs:
"""Tests for global audit logs endpoint."""
@pytest.mark.integration
def test_list_audit_logs_returns_valid_response(self, integration_client):
"""Test audit logs endpoint returns valid paginated response."""
response = integration_client.get("/api/v1/audit-logs")
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
assert isinstance(data["items"], list)
pagination = data["pagination"]
assert "page" in pagination
assert "limit" in pagination
assert "total" in pagination
assert "total_pages" in pagination
@pytest.mark.integration
def test_audit_logs_respects_pagination(self, integration_client):
"""Test audit logs endpoint respects limit parameter."""
response = integration_client.get("/api/v1/audit-logs?limit=5")
assert response.status_code == 200
data = response.json()
assert len(data["items"]) <= 5
assert data["pagination"]["limit"] == 5
@pytest.mark.integration
def test_audit_logs_filter_by_action(self, integration_client, test_package):
"""Test filtering audit logs by action type."""
project_name, package_name = test_package
response = integration_client.get("/api/v1/audit-logs?action=project.create")
assert response.status_code == 200
data = response.json()
for item in data["items"]:
assert item["action"] == "project.create"
@pytest.mark.integration
def test_audit_log_entry_has_required_fields(
self, integration_client, test_project
):
"""Test audit log entries have all required fields."""
response = integration_client.get("/api/v1/audit-logs?limit=10")
assert response.status_code == 200
data = response.json()
if data["items"]:
item = data["items"][0]
assert "id" in item
assert "action" in item
assert "resource" in item
assert "user_id" in item
assert "timestamp" in item

View File

@@ -0,0 +1,345 @@
"""
Integration tests for package API endpoints.
Tests cover:
- Package CRUD operations
- Package listing with pagination, search, filtering
- Package stats endpoint
- Package-level audit logs
- Cascade delete behavior
"""
import pytest
from tests.factories import compute_sha256, upload_test_file
class TestPackageCRUD:
"""Tests for package create, read, update, delete operations."""
@pytest.mark.integration
def test_create_package(self, integration_client, test_project, unique_test_id):
"""Test creating a new package."""
package_name = f"test-create-pkg-{unique_test_id}"
response = integration_client.post(
f"/api/v1/project/{test_project}/packages",
json={
"name": package_name,
"description": "Test package",
"format": "npm",
"platform": "linux",
},
)
assert response.status_code == 200
data = response.json()
assert data["name"] == package_name
assert data["description"] == "Test package"
assert data["format"] == "npm"
assert data["platform"] == "linux"
@pytest.mark.integration
def test_get_package(self, integration_client, test_package):
"""Test getting a package by name."""
project_name, package_name = test_package
response = integration_client.get(
f"/api/v1/project/{project_name}/packages/{package_name}"
)
assert response.status_code == 200
data = response.json()
assert data["name"] == package_name
@pytest.mark.integration
def test_get_nonexistent_package(self, integration_client, test_project):
"""Test getting a non-existent package returns 404."""
response = integration_client.get(
f"/api/v1/project/{test_project}/packages/nonexistent-pkg"
)
assert response.status_code == 404
@pytest.mark.integration
def test_list_packages(self, integration_client, test_package):
"""Test listing packages includes created package."""
project_name, package_name = test_package
response = integration_client.get(f"/api/v1/project/{project_name}/packages")
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
package_names = [p["name"] for p in data["items"]]
assert package_name in package_names
@pytest.mark.integration
def test_delete_package(self, integration_client, test_project, unique_test_id):
"""Test deleting a package."""
package_name = f"test-delete-pkg-{unique_test_id}"
# Create package
integration_client.post(
f"/api/v1/project/{test_project}/packages",
json={"name": package_name, "description": "To be deleted"},
)
# Delete package
response = integration_client.delete(
f"/api/v1/project/{test_project}/packages/{package_name}"
)
assert response.status_code == 204
# Verify deleted
response = integration_client.get(
f"/api/v1/project/{test_project}/packages/{package_name}"
)
assert response.status_code == 404
class TestPackageListingFilters:
"""Tests for package listing with filters and pagination."""
@pytest.mark.integration
def test_packages_pagination(self, integration_client, test_project):
"""Test package listing respects pagination parameters."""
response = integration_client.get(
f"/api/v1/project/{test_project}/packages?page=1&limit=5"
)
assert response.status_code == 200
data = response.json()
assert len(data["items"]) <= 5
assert data["pagination"]["limit"] == 5
assert data["pagination"]["page"] == 1
@pytest.mark.integration
def test_packages_filter_by_format(
self, integration_client, test_project, unique_test_id
):
"""Test package filtering by format."""
# Create a package with specific format
package_name = f"npm-pkg-{unique_test_id}"
integration_client.post(
f"/api/v1/project/{test_project}/packages",
json={"name": package_name, "format": "npm"},
)
response = integration_client.get(
f"/api/v1/project/{test_project}/packages?format=npm"
)
assert response.status_code == 200
data = response.json()
for pkg in data["items"]:
assert pkg["format"] == "npm"
@pytest.mark.integration
def test_packages_filter_by_platform(
self, integration_client, test_project, unique_test_id
):
"""Test package filtering by platform."""
# Create a package with specific platform
package_name = f"linux-pkg-{unique_test_id}"
integration_client.post(
f"/api/v1/project/{test_project}/packages",
json={"name": package_name, "platform": "linux"},
)
response = integration_client.get(
f"/api/v1/project/{test_project}/packages?platform=linux"
)
assert response.status_code == 200
data = response.json()
for pkg in data["items"]:
assert pkg["platform"] == "linux"
class TestPackageStats:
"""Tests for package statistics endpoint."""
@pytest.mark.integration
def test_package_stats_returns_valid_response(
self, integration_client, test_package
):
"""Test package stats endpoint returns expected fields."""
project, package = test_package
response = integration_client.get(
f"/api/v1/project/{project}/packages/{package}/stats"
)
assert response.status_code == 200
data = response.json()
assert "package_id" in data
assert "package_name" in data
assert "project_name" in data
assert "tag_count" in data
assert "artifact_count" in data
assert "total_size_bytes" in data
assert "upload_count" in data
assert "deduplicated_uploads" in data
assert "storage_saved_bytes" in data
assert "deduplication_ratio" in data
@pytest.mark.integration
def test_package_stats_not_found(self, integration_client, test_project):
"""Test package stats returns 404 for non-existent package."""
response = integration_client.get(
f"/api/v1/project/{test_project}/packages/nonexistent-package/stats"
)
assert response.status_code == 404
class TestPackageAuditLogs:
"""Tests for package-level audit logs endpoint."""
@pytest.mark.integration
def test_package_audit_logs_returns_200(self, integration_client, test_package):
"""Test package audit logs endpoint returns 200."""
project_name, package_name = test_package
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/audit-logs"
)
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
@pytest.mark.integration
def test_package_audit_logs_project_not_found(self, integration_client):
"""Test non-existent project returns 404."""
response = integration_client.get(
"/api/v1/project/nonexistent/nonexistent/audit-logs"
)
assert response.status_code == 404
@pytest.mark.integration
def test_package_audit_logs_package_not_found(
self, integration_client, test_project
):
"""Test non-existent package returns 404."""
response = integration_client.get(
f"/api/v1/project/{test_project}/nonexistent-package/audit-logs"
)
assert response.status_code == 404
class TestPackageCascadeDelete:
"""Tests for cascade delete behavior when deleting packages."""
@pytest.mark.integration
def test_ref_count_decrements_on_package_delete(
self, integration_client, unique_test_id
):
"""Test ref_count decrements for all tags when package is deleted."""
project_name = f"cascade-pkg-{unique_test_id}"
package_name = f"test-pkg-{unique_test_id}"
# Create project
response = integration_client.post(
"/api/v1/projects",
json={
"name": project_name,
"description": "Test project",
"is_public": True,
},
)
assert response.status_code == 200
# Create package
response = integration_client.post(
f"/api/v1/project/{project_name}/packages",
json={"name": package_name, "description": "Test package"},
)
assert response.status_code == 200
# Upload content with multiple tags
content = f"cascade delete test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project_name, package_name, content, tag="v1"
)
upload_test_file(
integration_client, project_name, package_name, content, tag="v2"
)
upload_test_file(
integration_client, project_name, package_name, content, tag="v3"
)
# Verify ref_count is 3
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 3
# Delete the package
delete_response = integration_client.delete(
f"/api/v1/project/{project_name}/packages/{package_name}"
)
assert delete_response.status_code == 204
# Verify ref_count is 0
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 0
# Cleanup
integration_client.delete(f"/api/v1/projects/{project_name}")
class TestPackageUploads:
"""Tests for package-level uploads endpoint."""
@pytest.mark.integration
def test_package_uploads_returns_200(self, integration_client, test_package):
"""Test package uploads endpoint returns 200."""
project_name, package_name = test_package
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/uploads"
)
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
@pytest.mark.integration
def test_package_uploads_after_upload(self, integration_client, test_package):
"""Test uploads are recorded after file upload."""
project_name, package_name = test_package
# Upload a file
upload_result = upload_test_file(
integration_client,
project_name,
package_name,
b"test upload content",
"test.txt",
)
assert upload_result["artifact_id"]
# Check uploads endpoint
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/uploads"
)
assert response.status_code == 200
data = response.json()
assert len(data["items"]) >= 1
# Verify upload record fields
upload = data["items"][0]
assert "artifact_id" in upload
assert "package_name" in upload
assert "project_name" in upload
assert "uploaded_at" in upload
assert "uploaded_by" in upload
@pytest.mark.integration
def test_package_uploads_project_not_found(self, integration_client):
"""Test non-existent project returns 404."""
response = integration_client.get(
"/api/v1/project/nonexistent/nonexistent/uploads"
)
assert response.status_code == 404

View File

@@ -0,0 +1,322 @@
"""
Integration tests for project API endpoints.
Tests cover:
- Project CRUD operations
- Project listing with pagination, search, and sorting
- Project stats endpoint
- Project-level audit logs
- Cascade delete behavior
"""
import pytest
from tests.factories import compute_sha256, upload_test_file
class TestProjectCRUD:
"""Tests for project create, read, update, delete operations."""
@pytest.mark.integration
def test_create_project(self, integration_client, unique_test_id):
"""Test creating a new project."""
project_name = f"test-create-{unique_test_id}"
try:
response = integration_client.post(
"/api/v1/projects",
json={
"name": project_name,
"description": "Test project",
"is_public": True,
},
)
assert response.status_code == 200
data = response.json()
assert data["name"] == project_name
assert data["description"] == "Test project"
assert data["is_public"] is True
assert "id" in data
assert "created_at" in data
finally:
integration_client.delete(f"/api/v1/projects/{project_name}")
@pytest.mark.integration
def test_get_project(self, integration_client, test_project):
"""Test getting a project by name."""
response = integration_client.get(f"/api/v1/projects/{test_project}")
assert response.status_code == 200
data = response.json()
assert data["name"] == test_project
@pytest.mark.integration
def test_get_nonexistent_project(self, integration_client):
"""Test getting a non-existent project returns 404."""
response = integration_client.get("/api/v1/projects/nonexistent-project-xyz")
assert response.status_code == 404
@pytest.mark.integration
def test_list_projects(self, integration_client, test_project):
"""Test listing projects includes created project."""
response = integration_client.get("/api/v1/projects")
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
project_names = [p["name"] for p in data["items"]]
assert test_project in project_names
@pytest.mark.integration
def test_delete_project(self, integration_client, unique_test_id):
"""Test deleting a project."""
project_name = f"test-delete-{unique_test_id}"
# Create project
integration_client.post(
"/api/v1/projects",
json={"name": project_name, "description": "To be deleted"},
)
# Delete project
response = integration_client.delete(f"/api/v1/projects/{project_name}")
assert response.status_code == 204
# Verify deleted
response = integration_client.get(f"/api/v1/projects/{project_name}")
assert response.status_code == 404
class TestProjectListingFilters:
"""Tests for project listing with filters and pagination."""
@pytest.mark.integration
def test_projects_pagination(self, integration_client):
"""Test project listing respects pagination parameters."""
response = integration_client.get("/api/v1/projects?page=1&limit=5")
assert response.status_code == 200
data = response.json()
assert len(data["items"]) <= 5
assert data["pagination"]["limit"] == 5
assert data["pagination"]["page"] == 1
assert "has_more" in data["pagination"]
@pytest.mark.integration
def test_projects_search(self, integration_client, test_project):
"""Test project search by name."""
# Search for our test project
response = integration_client.get(
f"/api/v1/projects?search={test_project[:10]}"
)
assert response.status_code == 200
data = response.json()
# Our project should be in results
project_names = [p["name"] for p in data["items"]]
assert test_project in project_names
@pytest.mark.integration
def test_projects_sort_by_name(self, integration_client):
"""Test project sorting by name."""
response = integration_client.get("/api/v1/projects?sort=name&order=asc")
assert response.status_code == 200
data = response.json()
names = [p["name"] for p in data["items"]]
assert names == sorted(names)
class TestProjectStats:
"""Tests for project statistics endpoint."""
@pytest.mark.integration
def test_project_stats_returns_valid_response(
self, integration_client, test_project
):
"""Test project stats endpoint returns expected fields."""
response = integration_client.get(f"/api/v1/projects/{test_project}/stats")
assert response.status_code == 200
data = response.json()
assert "project_id" in data
assert "project_name" in data
assert "package_count" in data
assert "tag_count" in data
assert "artifact_count" in data
assert "total_size_bytes" in data
assert "upload_count" in data
assert "deduplicated_uploads" in data
assert "storage_saved_bytes" in data
assert "deduplication_ratio" in data
@pytest.mark.integration
def test_project_stats_not_found(self, integration_client):
"""Test project stats returns 404 for non-existent project."""
response = integration_client.get("/api/v1/projects/nonexistent-project/stats")
assert response.status_code == 404
class TestProjectAuditLogs:
"""Tests for project-level audit logs endpoint."""
@pytest.mark.integration
def test_project_audit_logs_returns_200(self, integration_client, test_project):
"""Test project audit logs endpoint returns 200."""
response = integration_client.get(f"/api/v1/projects/{test_project}/audit-logs")
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
@pytest.mark.integration
def test_project_audit_logs_not_found(self, integration_client):
"""Test non-existent project returns 404."""
response = integration_client.get(
"/api/v1/projects/nonexistent-project/audit-logs"
)
assert response.status_code == 404
class TestProjectCascadeDelete:
"""Tests for cascade delete behavior when deleting projects."""
@pytest.mark.integration
def test_project_delete_cascades_to_packages(
self, integration_client, unique_test_id
):
"""Test deleting project cascades to packages."""
project_name = f"cascade-proj-{unique_test_id}"
package_name = f"cascade-pkg-{unique_test_id}"
try:
# Create project and package
integration_client.post(
"/api/v1/projects",
json={"name": project_name, "description": "Test", "is_public": True},
)
integration_client.post(
f"/api/v1/project/{project_name}/packages",
json={"name": package_name, "description": "Test package"},
)
# Verify package exists
response = integration_client.get(
f"/api/v1/project/{project_name}/packages/{package_name}"
)
assert response.status_code == 200
# Delete project
integration_client.delete(f"/api/v1/projects/{project_name}")
# Verify project is deleted (and package with it)
response = integration_client.get(f"/api/v1/projects/{project_name}")
assert response.status_code == 404
except Exception:
# Cleanup if test fails
integration_client.delete(f"/api/v1/projects/{project_name}")
raise
@pytest.mark.integration
def test_ref_count_decrements_on_project_delete(
self, integration_client, unique_test_id
):
"""Test ref_count decrements for all tags when project is deleted."""
project_name = f"cascade-proj-{unique_test_id}"
package1_name = f"pkg1-{unique_test_id}"
package2_name = f"pkg2-{unique_test_id}"
# Create project
response = integration_client.post(
"/api/v1/projects",
json={
"name": project_name,
"description": "Test project",
"is_public": True,
},
)
assert response.status_code == 200
# Create two packages
for pkg_name in [package1_name, package2_name]:
response = integration_client.post(
f"/api/v1/project/{project_name}/packages",
json={"name": pkg_name, "description": "Test package"},
)
assert response.status_code == 200
# Upload same content with tags in both packages
content = f"project cascade test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project_name, package1_name, content, tag="v1"
)
upload_test_file(
integration_client, project_name, package1_name, content, tag="v2"
)
upload_test_file(
integration_client, project_name, package2_name, content, tag="latest"
)
upload_test_file(
integration_client, project_name, package2_name, content, tag="stable"
)
# Verify ref_count is 4 (2 tags in each of 2 packages)
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 4
# Delete the project
delete_response = integration_client.delete(f"/api/v1/projects/{project_name}")
assert delete_response.status_code == 204
# Verify ref_count is 0
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 0
class TestProjectUploads:
"""Tests for project-level uploads endpoint."""
@pytest.mark.integration
def test_project_uploads_returns_200(self, integration_client, test_project):
"""Test project uploads endpoint returns 200."""
response = integration_client.get(f"/api/v1/project/{test_project}/uploads")
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
@pytest.mark.integration
def test_project_uploads_after_upload(self, integration_client, test_package):
"""Test uploads are recorded in project uploads."""
project_name, package_name = test_package
# Upload a file
upload_test_file(
integration_client,
project_name,
package_name,
b"project uploads test",
"project.txt",
)
response = integration_client.get(f"/api/v1/project/{project_name}/uploads")
assert response.status_code == 200
data = response.json()
assert len(data["items"]) >= 1
# Verify project name matches
for item in data["items"]:
assert item["project_name"] == project_name
@pytest.mark.integration
def test_project_uploads_not_found(self, integration_client):
"""Test non-existent project returns 404."""
response = integration_client.get("/api/v1/project/nonexistent/uploads")
assert response.status_code == 404

View File

@@ -0,0 +1,403 @@
"""
Integration tests for tag API endpoints.
Tests cover:
- Tag CRUD operations
- Tag listing with pagination and search
- Tag history tracking
- ref_count behavior with tag operations
"""
import pytest
from tests.factories import compute_sha256, upload_test_file
class TestTagCRUD:
"""Tests for tag create, read, delete operations."""
@pytest.mark.integration
def test_create_tag_via_upload(self, integration_client, test_package):
"""Test creating a tag via upload endpoint."""
project_name, package_name = test_package
result = upload_test_file(
integration_client,
project_name,
package_name,
b"tag create test",
tag="v1.0.0",
)
assert result["tag"] == "v1.0.0"
assert result["artifact_id"]
@pytest.mark.integration
def test_create_tag_via_post(
self, integration_client, test_package, unique_test_id
):
"""Test creating a tag via POST /tags endpoint."""
project_name, package_name = test_package
# First upload an artifact
result = upload_test_file(
integration_client,
project_name,
package_name,
b"artifact for tag",
)
artifact_id = result["artifact_id"]
# Create tag via POST
tag_name = f"post-tag-{unique_test_id}"
response = integration_client.post(
f"/api/v1/project/{project_name}/{package_name}/tags",
json={"name": tag_name, "artifact_id": artifact_id},
)
assert response.status_code == 200
data = response.json()
assert data["name"] == tag_name
assert data["artifact_id"] == artifact_id
@pytest.mark.integration
def test_get_tag(self, integration_client, test_package):
"""Test getting a tag by name."""
project_name, package_name = test_package
upload_test_file(
integration_client,
project_name,
package_name,
b"get tag test",
tag="get-tag",
)
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/tags/get-tag"
)
assert response.status_code == 200
data = response.json()
assert data["name"] == "get-tag"
assert "artifact_id" in data
assert "artifact_size" in data
assert "artifact_content_type" in data
@pytest.mark.integration
def test_list_tags(self, integration_client, test_package):
"""Test listing tags for a package."""
project_name, package_name = test_package
# Create some tags
upload_test_file(
integration_client,
project_name,
package_name,
b"list tags test",
tag="list-v1",
)
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/tags"
)
assert response.status_code == 200
data = response.json()
assert "items" in data
assert "pagination" in data
tag_names = [t["name"] for t in data["items"]]
assert "list-v1" in tag_names
@pytest.mark.integration
def test_delete_tag(self, integration_client, test_package):
"""Test deleting a tag."""
project_name, package_name = test_package
upload_test_file(
integration_client,
project_name,
package_name,
b"delete tag test",
tag="to-delete",
)
# Delete tag
response = integration_client.delete(
f"/api/v1/project/{project_name}/{package_name}/tags/to-delete"
)
assert response.status_code == 204
# Verify deleted
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/tags/to-delete"
)
assert response.status_code == 404
class TestTagListingFilters:
"""Tests for tag listing with filters and search."""
@pytest.mark.integration
def test_tags_pagination(self, integration_client, test_package):
"""Test tag listing respects pagination."""
project_name, package_name = test_package
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/tags?limit=5"
)
assert response.status_code == 200
data = response.json()
assert len(data["items"]) <= 5
assert data["pagination"]["limit"] == 5
@pytest.mark.integration
def test_tags_search(self, integration_client, test_package, unique_test_id):
"""Test tag search by name."""
project_name, package_name = test_package
tag_name = f"searchable-{unique_test_id}"
upload_test_file(
integration_client,
project_name,
package_name,
b"search test",
tag=tag_name,
)
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/tags?search=searchable"
)
assert response.status_code == 200
data = response.json()
tag_names = [t["name"] for t in data["items"]]
assert tag_name in tag_names
class TestTagHistory:
"""Tests for tag history tracking."""
@pytest.mark.integration
def test_tag_history_on_create(self, integration_client, test_package):
"""Test tag history is created when tag is created."""
project_name, package_name = test_package
upload_test_file(
integration_client,
project_name,
package_name,
b"history create test",
tag="history-create",
)
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/tags/history-create/history"
)
assert response.status_code == 200
data = response.json()
assert len(data) >= 1
@pytest.mark.integration
def test_tag_history_on_update(
self, integration_client, test_package, unique_test_id
):
"""Test tag history is created when tag is updated."""
project_name, package_name = test_package
tag_name = f"history-update-{unique_test_id}"
# Create tag with first artifact
upload_test_file(
integration_client,
project_name,
package_name,
b"first content",
tag=tag_name,
)
# Update tag with second artifact
upload_test_file(
integration_client,
project_name,
package_name,
b"second content",
tag=tag_name,
)
response = integration_client.get(
f"/api/v1/project/{project_name}/{package_name}/tags/{tag_name}/history"
)
assert response.status_code == 200
data = response.json()
# Should have at least 2 history entries (create + update)
assert len(data) >= 2
class TestTagRefCount:
"""Tests for ref_count behavior with tag operations."""
@pytest.mark.integration
def test_ref_count_decrements_on_tag_delete(self, integration_client, test_package):
"""Test ref_count decrements when a tag is deleted."""
project_name, package_name = test_package
content = b"ref count delete test"
expected_hash = compute_sha256(content)
# Upload with two tags
upload_test_file(
integration_client, project_name, package_name, content, tag="rc-v1"
)
upload_test_file(
integration_client, project_name, package_name, content, tag="rc-v2"
)
# Verify ref_count is 2
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 2
# Delete one tag
delete_response = integration_client.delete(
f"/api/v1/project/{project_name}/{package_name}/tags/rc-v1"
)
assert delete_response.status_code == 204
# Verify ref_count is now 1
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 1
@pytest.mark.integration
def test_ref_count_zero_after_all_tags_deleted(
self, integration_client, test_package
):
"""Test ref_count goes to 0 when all tags are deleted."""
project_name, package_name = test_package
content = b"orphan test content"
expected_hash = compute_sha256(content)
# Upload with one tag
upload_test_file(
integration_client, project_name, package_name, content, tag="only-tag"
)
# Delete the tag
integration_client.delete(
f"/api/v1/project/{project_name}/{package_name}/tags/only-tag"
)
# Verify ref_count is 0
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 0
@pytest.mark.integration
def test_ref_count_adjusts_on_tag_update(
self, integration_client, test_package, unique_test_id
):
"""Test ref_count adjusts when a tag is updated to point to different artifact."""
project_name, package_name = test_package
# Upload two different artifacts
content1 = f"artifact one {unique_test_id}".encode()
content2 = f"artifact two {unique_test_id}".encode()
hash1 = compute_sha256(content1)
hash2 = compute_sha256(content2)
# Upload first artifact with tag "latest"
upload_test_file(
integration_client, project_name, package_name, content1, tag="latest"
)
# Verify first artifact has ref_count 1
response = integration_client.get(f"/api/v1/artifact/{hash1}")
assert response.json()["ref_count"] == 1
# Upload second artifact with different tag
upload_test_file(
integration_client, project_name, package_name, content2, tag="stable"
)
# Now update "latest" tag to point to second artifact
upload_test_file(
integration_client, project_name, package_name, content2, tag="latest"
)
# Verify first artifact ref_count decreased to 0
response = integration_client.get(f"/api/v1/artifact/{hash1}")
assert response.json()["ref_count"] == 0
# Verify second artifact ref_count increased to 2
response = integration_client.get(f"/api/v1/artifact/{hash2}")
assert response.json()["ref_count"] == 2
@pytest.mark.integration
def test_ref_count_unchanged_when_tag_same_artifact(
self, integration_client, test_package, unique_test_id
):
"""Test ref_count doesn't change when tag is 'updated' to same artifact."""
project_name, package_name = test_package
content = f"same artifact {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload with tag
upload_test_file(
integration_client, project_name, package_name, content, tag="same-v1"
)
# Verify ref_count is 1
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 1
# Upload same content with same tag (no-op)
upload_test_file(
integration_client, project_name, package_name, content, tag="same-v1"
)
# Verify ref_count is still 1
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 1
@pytest.mark.integration
def test_tag_via_post_endpoint_increments_ref_count(
self, integration_client, test_package, unique_test_id
):
"""Test creating tag via POST /tags endpoint increments ref_count."""
project_name, package_name = test_package
content = f"tag endpoint test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload artifact without tag
result = upload_test_file(
integration_client, project_name, package_name, content, filename="test.bin"
)
artifact_id = result["artifact_id"]
# Verify ref_count is 0 (no tags yet)
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 0
# Create tag via POST endpoint
tag_response = integration_client.post(
f"/api/v1/project/{project_name}/{package_name}/tags",
json={"name": "post-v1", "artifact_id": artifact_id},
)
assert tag_response.status_code == 200
# Verify ref_count is now 1
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 1
# Create another tag via POST endpoint
tag_response = integration_client.post(
f"/api/v1/project/{project_name}/{package_name}/tags",
json={"name": "post-latest", "artifact_id": artifact_id},
)
assert tag_response.status_code == 200
# Verify ref_count is now 2
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.json()["ref_count"] == 2

View File

@@ -0,0 +1,502 @@
"""
Integration tests for upload and download API endpoints.
Tests cover:
- Upload functionality and deduplication
- Download by tag and artifact ID
- Concurrent upload handling
- File size validation
- Upload failure cleanup
- S3 storage verification
"""
import pytest
import io
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
from tests.factories import (
compute_sha256,
upload_test_file,
list_s3_objects_by_hash,
s3_object_exists,
)
class TestUploadBasics:
"""Tests for basic upload functionality."""
@pytest.mark.integration
def test_upload_returns_artifact_id(self, integration_client, test_package):
"""Test upload returns the artifact ID (SHA256 hash)."""
project_name, package_name = test_package
content = b"basic upload test"
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project_name, package_name, content, tag="v1"
)
assert result["artifact_id"] == expected_hash
@pytest.mark.integration
def test_upload_response_has_upload_id(self, integration_client, test_package):
"""Test upload response includes upload_id."""
project_name, package_name = test_package
result = upload_test_file(
integration_client,
project_name,
package_name,
b"upload id test",
"uploadid.txt",
)
assert "upload_id" in result
assert result["upload_id"] is not None
@pytest.mark.integration
def test_upload_response_has_content_type(self, integration_client, test_package):
"""Test upload response includes content_type."""
project_name, package_name = test_package
result = upload_test_file(
integration_client,
project_name,
package_name,
b"content type test",
"content.txt",
)
assert "content_type" in result
@pytest.mark.integration
def test_upload_response_has_original_name(self, integration_client, test_package):
"""Test upload response includes original_name."""
project_name, package_name = test_package
result = upload_test_file(
integration_client,
project_name,
package_name,
b"original name test",
"originalname.txt",
)
assert "original_name" in result
assert result["original_name"] == "originalname.txt"
@pytest.mark.integration
def test_upload_response_has_created_at(self, integration_client, test_package):
"""Test upload response includes created_at."""
project_name, package_name = test_package
result = upload_test_file(
integration_client,
project_name,
package_name,
b"created at test",
"createdat.txt",
)
assert "created_at" in result
assert result["created_at"] is not None
class TestDuplicateUploads:
"""Tests for duplicate upload deduplication behavior."""
@pytest.mark.integration
def test_same_file_twice_returns_same_artifact_id(
self, integration_client, test_package
):
"""Test uploading same file twice returns same artifact_id."""
project, package = test_package
content = b"content uploaded twice for same artifact test"
expected_hash = compute_sha256(content)
# First upload
result1 = upload_test_file(
integration_client, project, package, content, tag="first"
)
assert result1["artifact_id"] == expected_hash
# Second upload
result2 = upload_test_file(
integration_client, project, package, content, tag="second"
)
assert result2["artifact_id"] == expected_hash
assert result1["artifact_id"] == result2["artifact_id"]
@pytest.mark.integration
def test_same_file_twice_increments_ref_count(
self, integration_client, test_package
):
"""Test uploading same file twice increments ref_count to 2."""
project, package = test_package
content = b"content for ref count increment test"
# First upload
result1 = upload_test_file(
integration_client, project, package, content, tag="v1"
)
assert result1["ref_count"] == 1
# Second upload
result2 = upload_test_file(
integration_client, project, package, content, tag="v2"
)
assert result2["ref_count"] == 2
@pytest.mark.integration
def test_same_file_different_packages_shares_artifact(
self, integration_client, test_project, unique_test_id
):
"""Test uploading same file to different packages shares artifact."""
project = test_project
content = f"content shared across packages {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Create two packages
pkg1 = f"package-a-{unique_test_id}"
pkg2 = f"package-b-{unique_test_id}"
integration_client.post(
f"/api/v1/project/{project}/packages",
json={"name": pkg1, "description": "Package A"},
)
integration_client.post(
f"/api/v1/project/{project}/packages",
json={"name": pkg2, "description": "Package B"},
)
# Upload to first package
result1 = upload_test_file(integration_client, project, pkg1, content, tag="v1")
assert result1["artifact_id"] == expected_hash
assert result1["deduplicated"] is False
# Upload to second package
result2 = upload_test_file(integration_client, project, pkg2, content, tag="v1")
assert result2["artifact_id"] == expected_hash
assert result2["deduplicated"] is True
@pytest.mark.integration
def test_same_file_different_filenames_shares_artifact(
self, integration_client, test_package
):
"""Test uploading same file with different filenames shares artifact."""
project, package = test_package
content = b"content with different filenames"
expected_hash = compute_sha256(content)
# Upload with filename1
result1 = upload_test_file(
integration_client,
project,
package,
content,
filename="file1.bin",
tag="v1",
)
assert result1["artifact_id"] == expected_hash
# Upload with filename2
result2 = upload_test_file(
integration_client,
project,
package,
content,
filename="file2.bin",
tag="v2",
)
assert result2["artifact_id"] == expected_hash
assert result2["deduplicated"] is True
class TestDownload:
"""Tests for download functionality."""
@pytest.mark.integration
def test_download_by_tag(self, integration_client, test_package):
"""Test downloading artifact by tag name."""
project, package = test_package
original_content = b"download by tag test"
upload_test_file(
integration_client, project, package, original_content, tag="download-tag"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/download-tag",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == original_content
@pytest.mark.integration
def test_download_by_artifact_id(self, integration_client, test_package):
"""Test downloading artifact by artifact ID."""
project, package = test_package
original_content = b"download by id test"
expected_hash = compute_sha256(original_content)
upload_test_file(integration_client, project, package, original_content)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/artifact:{expected_hash}",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == original_content
@pytest.mark.integration
def test_download_nonexistent_tag(self, integration_client, test_package):
"""Test downloading nonexistent tag returns 404."""
project, package = test_package
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/nonexistent-tag"
)
assert response.status_code == 404
@pytest.mark.integration
def test_content_matches_original(self, integration_client, test_package):
"""Test downloaded content matches original exactly."""
project, package = test_package
original_content = b"exact content verification test data 12345"
upload_test_file(
integration_client, project, package, original_content, tag="verify"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/verify", params={"mode": "proxy"}
)
assert response.status_code == 200
assert response.content == original_content
class TestConcurrentUploads:
"""Tests for concurrent upload handling."""
@pytest.mark.integration
def test_concurrent_uploads_same_file(self, integration_client, test_package):
"""Test concurrent uploads of same file handle deduplication correctly."""
project, package = test_package
content = b"content for concurrent upload test"
expected_hash = compute_sha256(content)
num_concurrent = 5
results = []
errors = []
def upload_worker(tag_suffix):
try:
from httpx import Client
base_url = "http://localhost:8080"
with Client(base_url=base_url, timeout=30.0) as client:
files = {
"file": (
f"concurrent-{tag_suffix}.bin",
io.BytesIO(content),
"application/octet-stream",
)
}
response = client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"tag": f"concurrent-{tag_suffix}"},
)
if response.status_code == 200:
results.append(response.json())
else:
errors.append(f"Status {response.status_code}: {response.text}")
except Exception as e:
errors.append(str(e))
with ThreadPoolExecutor(max_workers=num_concurrent) as executor:
futures = [executor.submit(upload_worker, i) for i in range(num_concurrent)]
for future in as_completed(futures):
pass
assert len(errors) == 0, f"Errors during concurrent uploads: {errors}"
assert len(results) == num_concurrent
# All should have same artifact_id
artifact_ids = set(r["artifact_id"] for r in results)
assert len(artifact_ids) == 1
assert expected_hash in artifact_ids
# Verify final ref_count
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
assert response.json()["ref_count"] == num_concurrent
class TestFileSizeValidation:
"""Tests for file size limits and empty file rejection."""
@pytest.mark.integration
def test_empty_file_rejected(self, integration_client, test_package):
"""Test empty files are rejected with appropriate error."""
project, package = test_package
files = {"file": ("empty.txt", io.BytesIO(b""), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
)
assert response.status_code in [422, 400]
@pytest.mark.integration
def test_small_valid_file_accepted(self, integration_client, test_package):
"""Test small (1 byte) files are accepted."""
project, package = test_package
content = b"X"
result = upload_test_file(
integration_client, project, package, content, tag="tiny"
)
assert result["artifact_id"] is not None
assert result["size"] == 1
@pytest.mark.integration
def test_file_size_reported_correctly(
self, integration_client, test_package, unique_test_id
):
"""Test file size is correctly reported in response."""
project, package = test_package
content = f"Test content for size check {unique_test_id}".encode()
expected_size = len(content)
result = upload_test_file(
integration_client, project, package, content, tag="size-test"
)
assert result["size"] == expected_size
# Also verify via artifact endpoint
artifact_response = integration_client.get(
f"/api/v1/artifact/{result['artifact_id']}"
)
assert artifact_response.json()["size"] == expected_size
class TestUploadFailureCleanup:
"""Tests for cleanup when uploads fail."""
@pytest.mark.integration
def test_upload_failure_invalid_project_no_orphaned_s3(
self, integration_client, unique_test_id
):
"""Test upload to non-existent project doesn't leave orphaned S3 objects."""
content = f"content for orphan s3 test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/nonexistent-project-{unique_test_id}/nonexistent-pkg/upload",
files=files,
data={"tag": "test"},
)
assert response.status_code == 404
# Verify no S3 object was created
assert not s3_object_exists(expected_hash), (
"Orphaned S3 object found after failed upload"
)
@pytest.mark.integration
def test_upload_failure_invalid_package_no_orphaned_s3(
self, integration_client, test_project, unique_test_id
):
"""Test upload to non-existent package doesn't leave orphaned S3 objects."""
content = f"content for orphan s3 test pkg {unique_test_id}".encode()
expected_hash = compute_sha256(content)
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
files=files,
data={"tag": "test"},
)
assert response.status_code == 404
assert not s3_object_exists(expected_hash), (
"Orphaned S3 object found after failed upload"
)
@pytest.mark.integration
def test_upload_failure_no_orphaned_database_records(
self, integration_client, test_project, unique_test_id
):
"""Test failed upload doesn't leave orphaned database records."""
content = f"content for db orphan test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
files=files,
data={"tag": "test"},
)
assert response.status_code == 404
artifact_response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert artifact_response.status_code == 404, (
"Orphaned artifact record found after failed upload"
)
class TestS3StorageVerification:
"""Tests to verify S3 storage behavior."""
@pytest.mark.integration
def test_s3_single_object_after_duplicates(
self, integration_client, test_package, unique_test_id
):
"""Test S3 bucket contains only one object after duplicate uploads."""
project, package = test_package
content = f"content for s3 object count test {unique_test_id}".encode()
expected_hash = compute_sha256(content)
# Upload same content multiple times
for tag in ["s3test1", "s3test2", "s3test3"]:
upload_test_file(integration_client, project, package, content, tag=tag)
# Verify only one S3 object exists
s3_objects = list_s3_objects_by_hash(expected_hash)
assert len(s3_objects) == 1, (
f"Expected 1 S3 object, found {len(s3_objects)}: {s3_objects}"
)
# Verify object key follows expected pattern
expected_key = (
f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
)
assert s3_objects[0] == expected_key
@pytest.mark.integration
def test_artifact_table_single_row_after_duplicates(
self, integration_client, test_package
):
"""Test artifact table contains only one row after duplicate uploads."""
project, package = test_package
content = b"content for single row test"
expected_hash = compute_sha256(content)
# Upload same content multiple times
for tag in ["v1", "v2", "v3"]:
upload_test_file(integration_client, project, package, content, tag=tag)
# Query artifact
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
artifact = response.json()
assert artifact["id"] == expected_hash
assert artifact["ref_count"] == 3