""" Integration tests for artifact API endpoints. Tests cover: - Artifact retrieval by ID - Artifact stats endpoint - Artifact provenance/history - Artifact uploads listing - Garbage collection endpoints - Orphaned artifacts management """ import pytest from tests.factories import compute_sha256, upload_test_file class TestArtifactRetrieval: """Tests for artifact retrieval endpoints.""" @pytest.mark.integration def test_get_artifact_by_id(self, integration_client, test_package): """Test retrieving an artifact by its SHA256 ID.""" project_name, package_name = test_package content = b"artifact retrieval test" expected_hash = compute_sha256(content) upload_test_file( integration_client, project_name, package_name, content, version="v1" ) response = integration_client.get(f"/api/v1/artifact/{expected_hash}") assert response.status_code == 200 data = response.json() assert data["id"] == expected_hash assert data["sha256"] == expected_hash assert data["size"] == len(content) assert "ref_count" in data assert "created_at" in data @pytest.mark.integration def test_get_nonexistent_artifact(self, integration_client): """Test getting a non-existent artifact returns 404.""" fake_hash = "a" * 64 response = integration_client.get(f"/api/v1/artifact/{fake_hash}") assert response.status_code == 404 @pytest.mark.integration def test_artifact_includes_tags(self, integration_client, test_package): """Test artifact response includes tags pointing to it.""" project_name, package_name = test_package content = b"artifact with tags test" expected_hash = compute_sha256(content) upload_test_file( integration_client, project_name, package_name, content, version="tagged-v1" ) response = integration_client.get(f"/api/v1/artifact/{expected_hash}") assert response.status_code == 200 data = response.json() assert "tags" in data assert len(data["tags"]) >= 1 tag = data["tags"][0] assert "name" in tag assert "package_name" in tag assert "project_name" in tag class TestArtifactStats: """Tests for artifact statistics endpoint.""" @pytest.mark.integration def test_artifact_stats_returns_valid_response( self, integration_client, test_package, unique_test_id ): """Test artifact stats returns expected fields.""" project, package = test_package content = f"artifact stats test {unique_test_id}".encode() expected_hash = compute_sha256(content) upload_test_file( integration_client, project, package, content, version=f"art-{unique_test_id}" ) response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats") assert response.status_code == 200 data = response.json() assert "artifact_id" in data assert "sha256" in data assert "size" in data assert "ref_count" in data assert "storage_savings" in data assert "tags" in data assert "projects" in data assert "packages" in data @pytest.mark.integration def test_artifact_stats_not_found(self, integration_client): """Test artifact stats returns 404 for non-existent artifact.""" fake_hash = "0" * 64 response = integration_client.get(f"/api/v1/artifact/{fake_hash}/stats") assert response.status_code == 404 @pytest.mark.integration def test_artifact_stats_shows_correct_projects( self, integration_client, unique_test_id ): """Test artifact stats shows all projects using the artifact.""" content = f"multi-project artifact {unique_test_id}".encode() expected_hash = compute_sha256(content) proj1 = f"art-stats-a-{unique_test_id}" proj2 = f"art-stats-b-{unique_test_id}" try: # Create projects and packages integration_client.post( "/api/v1/projects", json={"name": proj1, "description": "Test", "is_public": True}, ) integration_client.post( "/api/v1/projects", json={"name": proj2, "description": "Test", "is_public": True}, ) integration_client.post( f"/api/v1/project/{proj1}/packages", json={"name": "pkg", "description": "Test"}, ) integration_client.post( f"/api/v1/project/{proj2}/packages", json={"name": "pkg", "description": "Test"}, ) # Upload same content to both projects upload_test_file(integration_client, proj1, "pkg", content, version="v1") upload_test_file(integration_client, proj2, "pkg", content, version="v1") # Check artifact stats response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats") assert response.status_code == 200 data = response.json() assert len(data["projects"]) == 2 assert proj1 in data["projects"] assert proj2 in data["projects"] finally: integration_client.delete(f"/api/v1/projects/{proj1}") integration_client.delete(f"/api/v1/projects/{proj2}") class TestArtifactProvenance: """Tests for artifact provenance/history endpoint.""" @pytest.mark.integration def test_artifact_history_returns_200(self, integration_client, test_package): """Test artifact history endpoint returns 200.""" project_name, package_name = test_package upload_result = upload_test_file( integration_client, project_name, package_name, b"provenance test content", "prov.txt", ) artifact_id = upload_result["artifact_id"] response = integration_client.get(f"/api/v1/artifact/{artifact_id}/history") assert response.status_code == 200 @pytest.mark.integration def test_artifact_history_has_required_fields( self, integration_client, test_package ): """Test artifact history has all required fields.""" project_name, package_name = test_package upload_result = upload_test_file( integration_client, project_name, package_name, b"provenance fields test", "fields.txt", ) artifact_id = upload_result["artifact_id"] response = integration_client.get(f"/api/v1/artifact/{artifact_id}/history") assert response.status_code == 200 data = response.json() assert "artifact_id" in data assert "sha256" in data assert "size" in data assert "created_at" in data assert "created_by" in data assert "ref_count" in data assert "first_uploaded_at" in data assert "first_uploaded_by" in data assert "upload_count" in data assert "packages" in data assert "tags" in data assert "uploads" in data @pytest.mark.integration def test_artifact_history_not_found(self, integration_client): """Test non-existent artifact returns 404.""" fake_hash = "b" * 64 response = integration_client.get(f"/api/v1/artifact/{fake_hash}/history") assert response.status_code == 404 @pytest.mark.integration def test_artifact_history_with_tag(self, integration_client, test_package): """Test artifact history includes tag information when tagged.""" project_name, package_name = test_package upload_result = upload_test_file( integration_client, project_name, package_name, b"tagged provenance test", "tagged.txt", tag="v1.0.0", ) artifact_id = upload_result["artifact_id"] response = integration_client.get(f"/api/v1/artifact/{artifact_id}/history") assert response.status_code == 200 data = response.json() assert len(data["tags"]) >= 1 tag = data["tags"][0] assert "project_name" in tag assert "package_name" in tag assert "tag_name" in tag class TestArtifactUploads: """Tests for artifact uploads listing endpoint.""" @pytest.mark.integration def test_artifact_uploads_returns_200(self, integration_client, test_package): """Test artifact uploads endpoint returns 200.""" project_name, package_name = test_package upload_result = upload_test_file( integration_client, project_name, package_name, b"artifact upload test", "artifact.txt", ) artifact_id = upload_result["artifact_id"] response = integration_client.get(f"/api/v1/artifact/{artifact_id}/uploads") assert response.status_code == 200 data = response.json() assert "items" in data assert "pagination" in data assert len(data["items"]) >= 1 @pytest.mark.integration def test_artifact_uploads_not_found(self, integration_client): """Test non-existent artifact returns 404.""" fake_hash = "a" * 64 response = integration_client.get(f"/api/v1/artifact/{fake_hash}/uploads") assert response.status_code == 404 class TestOrphanedArtifacts: """Tests for orphaned artifacts management.""" @pytest.mark.integration def test_list_orphaned_artifacts_returns_list(self, integration_client): """Test orphaned artifacts endpoint returns a list.""" response = integration_client.get("/api/v1/admin/orphaned-artifacts") assert response.status_code == 200 assert isinstance(response.json(), list) @pytest.mark.integration def test_orphaned_artifact_has_required_fields(self, integration_client): """Test orphaned artifact response has required fields.""" response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1") assert response.status_code == 200 data = response.json() if len(data) > 0: artifact = data[0] assert "id" in artifact assert "size" in artifact assert "created_at" in artifact assert "created_by" in artifact assert "original_name" in artifact @pytest.mark.integration def test_orphaned_artifacts_respects_limit(self, integration_client): """Test orphaned artifacts endpoint respects limit parameter.""" response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=5") assert response.status_code == 200 assert len(response.json()) <= 5 @pytest.mark.integration def test_artifact_becomes_orphaned_when_tag_deleted( self, integration_client, test_package, unique_test_id ): """Test artifact appears in orphaned list after tag is deleted.""" project, package = test_package content = f"orphan test {unique_test_id}".encode() expected_hash = compute_sha256(content) # Upload with tag upload_test_file(integration_client, project, package, content, version="temp-tag") # Verify not in orphaned list response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1000") orphaned_ids = [a["id"] for a in response.json()] assert expected_hash not in orphaned_ids # Delete the tag integration_client.delete(f"/api/v1/project/{project}/{package}/tags/temp-tag") # Verify now in orphaned list response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1000") orphaned_ids = [a["id"] for a in response.json()] assert expected_hash in orphaned_ids class TestGarbageCollection: """Tests for garbage collection endpoint.""" @pytest.mark.integration def test_garbage_collect_dry_run_returns_response(self, integration_client): """Test garbage collection dry run returns valid response.""" response = integration_client.post("/api/v1/admin/garbage-collect?dry_run=true") assert response.status_code == 200 data = response.json() assert "artifacts_deleted" in data assert "bytes_freed" in data assert "artifact_ids" in data assert "dry_run" in data assert data["dry_run"] is True @pytest.mark.integration def test_garbage_collect_dry_run_doesnt_delete( self, integration_client, test_package, unique_test_id ): """Test garbage collection dry run doesn't actually delete artifacts.""" project, package = test_package content = f"dry run test {unique_test_id}".encode() expected_hash = compute_sha256(content) # Upload and delete tag to create orphan upload_test_file(integration_client, project, package, content, version="dry-run") integration_client.delete(f"/api/v1/project/{project}/{package}/tags/dry-run") # Verify artifact exists response = integration_client.get(f"/api/v1/artifact/{expected_hash}") assert response.status_code == 200 # Run garbage collection in dry-run mode gc_response = integration_client.post( "/api/v1/admin/garbage-collect?dry_run=true&limit=1000" ) assert gc_response.status_code == 200 assert expected_hash in gc_response.json()["artifact_ids"] # Verify artifact STILL exists response = integration_client.get(f"/api/v1/artifact/{expected_hash}") assert response.status_code == 200 @pytest.mark.integration def test_garbage_collect_preserves_referenced_artifacts( self, integration_client, test_package, unique_test_id ): """Test garbage collection doesn't delete artifacts with ref_count > 0.""" project, package = test_package content = f"preserve test {unique_test_id}".encode() expected_hash = compute_sha256(content) # Upload with tag (ref_count=1) upload_test_file(integration_client, project, package, content, version="keep-this") # Verify artifact exists with ref_count=1 response = integration_client.get(f"/api/v1/artifact/{expected_hash}") assert response.status_code == 200 assert response.json()["ref_count"] == 1 # Run garbage collection (dry_run to not affect other tests) gc_response = integration_client.post( "/api/v1/admin/garbage-collect?dry_run=true&limit=1000" ) assert gc_response.status_code == 200 # Verify artifact was NOT in delete list assert expected_hash not in gc_response.json()["artifact_ids"] # Verify artifact still exists response = integration_client.get(f"/api/v1/artifact/{expected_hash}") assert response.status_code == 200 assert response.json()["ref_count"] == 1 @pytest.mark.integration def test_garbage_collect_respects_limit(self, integration_client): """Test garbage collection respects limit parameter.""" response = integration_client.post( "/api/v1/admin/garbage-collect?dry_run=true&limit=5" ) assert response.status_code == 200 assert response.json()["artifacts_deleted"] <= 5 @pytest.mark.integration def test_garbage_collect_returns_bytes_freed(self, integration_client): """Test garbage collection returns accurate bytes_freed.""" response = integration_client.post("/api/v1/admin/garbage-collect?dry_run=true") assert response.status_code == 200 data = response.json() assert data["bytes_freed"] >= 0 assert isinstance(data["bytes_freed"], int) class TestGlobalUploads: """Tests for global uploads endpoint.""" @pytest.mark.integration def test_global_uploads_returns_200(self, integration_client): """Test global uploads endpoint returns 200.""" response = integration_client.get("/api/v1/uploads") assert response.status_code == 200 data = response.json() assert "items" in data assert "pagination" in data @pytest.mark.integration def test_global_uploads_pagination(self, integration_client): """Test global uploads endpoint respects pagination.""" response = integration_client.get("/api/v1/uploads?limit=5&page=1") assert response.status_code == 200 data = response.json() assert len(data["items"]) <= 5 assert data["pagination"]["limit"] == 5 assert data["pagination"]["page"] == 1 @pytest.mark.integration def test_global_uploads_filter_by_project(self, integration_client, test_package): """Test filtering global uploads by project name.""" project_name, package_name = test_package # Upload a file upload_test_file( integration_client, project_name, package_name, b"global filter test", "global.txt", ) response = integration_client.get(f"/api/v1/uploads?project={project_name}") assert response.status_code == 200 data = response.json() for item in data["items"]: assert item["project_name"] == project_name @pytest.mark.integration def test_global_uploads_has_more_field(self, integration_client): """Test pagination includes has_more field.""" response = integration_client.get("/api/v1/uploads?limit=1") assert response.status_code == 200 data = response.json() assert "has_more" in data["pagination"] assert isinstance(data["pagination"]["has_more"], bool) class TestGlobalArtifacts: """Tests for global artifacts endpoint.""" @pytest.mark.integration def test_global_artifacts_returns_200(self, integration_client): """Test global artifacts endpoint returns 200.""" response = integration_client.get("/api/v1/artifacts") assert response.status_code == 200 data = response.json() assert "items" in data assert "pagination" in data @pytest.mark.integration def test_global_artifacts_pagination(self, integration_client): """Test global artifacts endpoint respects pagination.""" response = integration_client.get("/api/v1/artifacts?limit=5&page=1") assert response.status_code == 200 data = response.json() assert len(data["items"]) <= 5 assert data["pagination"]["limit"] == 5 @pytest.mark.integration def test_global_artifacts_filter_by_size(self, integration_client): """Test filtering global artifacts by size range.""" response = integration_client.get( "/api/v1/artifacts?min_size=1&max_size=1000000" ) assert response.status_code == 200 data = response.json() for item in data["items"]: assert 1 <= item["size"] <= 1000000 @pytest.mark.integration def test_global_artifacts_sort_by_size(self, integration_client): """Test sorting global artifacts by size.""" response = integration_client.get("/api/v1/artifacts?sort=size&order=desc") assert response.status_code == 200 data = response.json() if len(data["items"]) > 1: sizes = [item["size"] for item in data["items"]] assert sizes == sorted(sizes, reverse=True) @pytest.mark.integration def test_global_artifacts_invalid_sort_returns_400(self, integration_client): """Test invalid sort field returns 400.""" response = integration_client.get("/api/v1/artifacts?sort=invalid_field") assert response.status_code == 400 class TestGlobalTags: """Tests for global tags endpoint.""" @pytest.mark.integration def test_global_tags_returns_200(self, integration_client): """Test global tags endpoint returns 200.""" response = integration_client.get("/api/v1/tags") assert response.status_code == 200 data = response.json() assert "items" in data assert "pagination" in data @pytest.mark.integration def test_global_tags_pagination(self, integration_client): """Test global tags endpoint respects pagination.""" response = integration_client.get("/api/v1/tags?limit=5&page=1") assert response.status_code == 200 data = response.json() assert len(data["items"]) <= 5 assert data["pagination"]["limit"] == 5 @pytest.mark.integration def test_global_tags_has_project_context(self, integration_client): """Test global tags response includes project/package context.""" response = integration_client.get("/api/v1/tags?limit=1") assert response.status_code == 200 data = response.json() if len(data["items"]) > 0: item = data["items"][0] assert "project_name" in item assert "package_name" in item assert "artifact_id" in item @pytest.mark.integration def test_global_tags_search_with_wildcard(self, integration_client): """Test global tags search supports wildcards.""" response = integration_client.get("/api/v1/tags?search=v*") assert response.status_code == 200 # Just verify it doesn't error; results may vary class TestAuditLogs: """Tests for global audit logs endpoint.""" @pytest.mark.integration def test_list_audit_logs_returns_valid_response(self, integration_client): """Test audit logs endpoint returns valid paginated response.""" response = integration_client.get("/api/v1/audit-logs") assert response.status_code == 200 data = response.json() assert "items" in data assert "pagination" in data assert isinstance(data["items"], list) pagination = data["pagination"] assert "page" in pagination assert "limit" in pagination assert "total" in pagination assert "total_pages" in pagination @pytest.mark.integration def test_audit_logs_respects_pagination(self, integration_client): """Test audit logs endpoint respects limit parameter.""" response = integration_client.get("/api/v1/audit-logs?limit=5") assert response.status_code == 200 data = response.json() assert len(data["items"]) <= 5 assert data["pagination"]["limit"] == 5 @pytest.mark.integration def test_audit_logs_filter_by_action(self, integration_client, test_package): """Test filtering audit logs by action type.""" project_name, package_name = test_package response = integration_client.get("/api/v1/audit-logs?action=project.create") assert response.status_code == 200 data = response.json() for item in data["items"]: assert item["action"] == "project.create" @pytest.mark.integration def test_audit_log_entry_has_required_fields( self, integration_client, test_project ): """Test audit log entries have all required fields.""" response = integration_client.get("/api/v1/audit-logs?limit=10") assert response.status_code == 200 data = response.json() if data["items"]: item = data["items"][0] assert "id" in item assert "action" in item assert "resource" in item assert "user_id" in item assert "timestamp" in item