diff --git a/backend/app/routes.py b/backend/app/routes.py index b4c1b4f..a962ff6 100644 --- a/backend/app/routes.py +++ b/backend/app/routes.py @@ -2904,7 +2904,7 @@ def generate_stats_report( """Generate a summary report of storage and deduplication statistics.""" # Gather stats total_artifacts = db.query(func.count(Artifact.id)).scalar() or 0 - total_size = db.query(func.coalesce(func.sum(Artifact.size), 0)).scalar() or 0 + total_size = int(db.query(func.coalesce(func.sum(Artifact.size), 0)).scalar() or 0) total_uploads = db.query(func.count(Upload.id)).scalar() or 0 deduplicated_uploads = ( db.query(func.count(Upload.id)).filter(Upload.deduplicated == True).scalar() @@ -2917,7 +2917,7 @@ def generate_stats_report( db.query(func.count(Artifact.id)).filter(Artifact.ref_count == 0).scalar() or 0 ) - storage_saved = ( + storage_saved = int( db.query(func.coalesce(func.sum(Artifact.size), 0)) .join(Upload, Upload.artifact_id == Artifact.id) .filter(Upload.deduplicated == True) diff --git a/backend/tests/test_stats_endpoints.py b/backend/tests/test_stats_endpoints.py new file mode 100644 index 0000000..ce4da69 --- /dev/null +++ b/backend/tests/test_stats_endpoints.py @@ -0,0 +1,488 @@ +""" +Integration tests for statistics endpoints. + +Tests cover: +- Global stats endpoint +- Deduplication stats endpoint +- Cross-project deduplication +- Timeline stats +- Export and report endpoints +- Package and artifact stats +""" + +import pytest +from tests.conftest import compute_sha256, upload_test_file + + +class TestGlobalStats: + """Tests for GET /api/v1/stats endpoint.""" + + @pytest.mark.integration + def test_stats_returns_valid_response(self, integration_client): + """Test stats endpoint returns expected fields.""" + response = integration_client.get("/api/v1/stats") + assert response.status_code == 200 + + data = response.json() + # Check all required fields exist + assert "total_artifacts" in data + assert "total_size_bytes" in data + assert "unique_artifacts" in data + assert "orphaned_artifacts" in data + assert "orphaned_size_bytes" in data + assert "total_uploads" in data + assert "deduplicated_uploads" in data + assert "deduplication_ratio" in data + assert "storage_saved_bytes" in data + + @pytest.mark.integration + def test_stats_values_are_non_negative(self, integration_client): + """Test all stat values are non-negative.""" + response = integration_client.get("/api/v1/stats") + assert response.status_code == 200 + + data = response.json() + assert data["total_artifacts"] >= 0 + assert data["total_size_bytes"] >= 0 + assert data["unique_artifacts"] >= 0 + assert data["orphaned_artifacts"] >= 0 + assert data["total_uploads"] >= 0 + assert data["deduplicated_uploads"] >= 0 + assert data["deduplication_ratio"] >= 0 + assert data["storage_saved_bytes"] >= 0 + + @pytest.mark.integration + def test_stats_update_after_upload( + self, integration_client, test_package, unique_test_id + ): + """Test stats update after uploading an artifact.""" + project, package = test_package + + # Get initial stats + initial_response = integration_client.get("/api/v1/stats") + initial_stats = initial_response.json() + + # Upload a new file + content = f"stats test content {unique_test_id}".encode() + upload_test_file( + integration_client, project, package, content, tag=f"stats-{unique_test_id}" + ) + + # Get updated stats + updated_response = integration_client.get("/api/v1/stats") + updated_stats = updated_response.json() + + # Verify stats increased + assert updated_stats["total_uploads"] >= initial_stats["total_uploads"] + + +class TestDeduplicationStats: + """Tests for GET /api/v1/stats/deduplication endpoint.""" + + @pytest.mark.integration + def test_dedup_stats_returns_valid_response(self, integration_client): + """Test deduplication stats returns expected fields.""" + response = integration_client.get("/api/v1/stats/deduplication") + assert response.status_code == 200 + + data = response.json() + assert "total_logical_bytes" in data + assert "total_physical_bytes" in data + assert "bytes_saved" in data + assert "savings_percentage" in data + assert "total_uploads" in data + assert "unique_artifacts" in data + assert "duplicate_uploads" in data + assert "average_ref_count" in data + assert "max_ref_count" in data + assert "most_referenced_artifacts" in data + + @pytest.mark.integration + def test_most_referenced_artifacts_format(self, integration_client): + """Test most_referenced_artifacts has correct structure.""" + response = integration_client.get("/api/v1/stats/deduplication") + assert response.status_code == 200 + + data = response.json() + artifacts = data["most_referenced_artifacts"] + assert isinstance(artifacts, list) + + if len(artifacts) > 0: + artifact = artifacts[0] + assert "artifact_id" in artifact + assert "ref_count" in artifact + assert "size" in artifact + assert "storage_saved" in artifact + + @pytest.mark.integration + def test_dedup_stats_with_top_n_param(self, integration_client): + """Test deduplication stats respects top_n parameter.""" + response = integration_client.get("/api/v1/stats/deduplication?top_n=3") + assert response.status_code == 200 + + data = response.json() + assert len(data["most_referenced_artifacts"]) <= 3 + + @pytest.mark.integration + def test_savings_percentage_valid_range(self, integration_client): + """Test savings percentage is between 0 and 100.""" + response = integration_client.get("/api/v1/stats/deduplication") + assert response.status_code == 200 + + data = response.json() + assert 0 <= data["savings_percentage"] <= 100 + + +class TestCrossProjectStats: + """Tests for GET /api/v1/stats/cross-project endpoint.""" + + @pytest.mark.integration + def test_cross_project_returns_valid_response(self, integration_client): + """Test cross-project stats returns expected fields.""" + response = integration_client.get("/api/v1/stats/cross-project") + assert response.status_code == 200 + + data = response.json() + assert "shared_artifacts_count" in data + assert "total_cross_project_savings" in data + assert "shared_artifacts" in data + assert isinstance(data["shared_artifacts"], list) + + @pytest.mark.integration + def test_cross_project_respects_limit(self, integration_client): + """Test cross-project stats respects limit parameter.""" + response = integration_client.get("/api/v1/stats/cross-project?limit=5") + assert response.status_code == 200 + + data = response.json() + assert len(data["shared_artifacts"]) <= 5 + + @pytest.mark.integration + def test_cross_project_detects_shared_artifacts( + self, integration_client, unique_test_id + ): + """Test cross-project deduplication is detected.""" + content = f"shared across projects {unique_test_id}".encode() + + # Create two projects + proj1 = f"cross-proj-a-{unique_test_id}" + proj2 = f"cross-proj-b-{unique_test_id}" + + try: + # Create projects and packages + integration_client.post( + "/api/v1/projects", + json={"name": proj1, "description": "Test", "is_public": True}, + ) + integration_client.post( + "/api/v1/projects", + json={"name": proj2, "description": "Test", "is_public": True}, + ) + integration_client.post( + f"/api/v1/project/{proj1}/packages", + json={"name": "pkg", "description": "Test"}, + ) + integration_client.post( + f"/api/v1/project/{proj2}/packages", + json={"name": "pkg", "description": "Test"}, + ) + + # Upload same content to both projects + upload_test_file(integration_client, proj1, "pkg", content, tag="v1") + upload_test_file(integration_client, proj2, "pkg", content, tag="v1") + + # Check cross-project stats + response = integration_client.get("/api/v1/stats/cross-project") + assert response.status_code == 200 + + data = response.json() + assert data["shared_artifacts_count"] >= 1 + + finally: + # Cleanup + integration_client.delete(f"/api/v1/projects/{proj1}") + integration_client.delete(f"/api/v1/projects/{proj2}") + + +class TestTimelineStats: + """Tests for GET /api/v1/stats/timeline endpoint.""" + + @pytest.mark.integration + def test_timeline_returns_valid_response(self, integration_client): + """Test timeline stats returns expected fields.""" + response = integration_client.get("/api/v1/stats/timeline") + assert response.status_code == 200 + + data = response.json() + assert "period" in data + assert "start_date" in data + assert "end_date" in data + assert "data_points" in data + assert isinstance(data["data_points"], list) + + @pytest.mark.integration + def test_timeline_daily_period(self, integration_client): + """Test timeline with daily period.""" + response = integration_client.get("/api/v1/stats/timeline?period=daily") + assert response.status_code == 200 + + data = response.json() + assert data["period"] == "daily" + + @pytest.mark.integration + def test_timeline_weekly_period(self, integration_client): + """Test timeline with weekly period.""" + response = integration_client.get("/api/v1/stats/timeline?period=weekly") + assert response.status_code == 200 + + data = response.json() + assert data["period"] == "weekly" + + @pytest.mark.integration + def test_timeline_monthly_period(self, integration_client): + """Test timeline with monthly period.""" + response = integration_client.get("/api/v1/stats/timeline?period=monthly") + assert response.status_code == 200 + + data = response.json() + assert data["period"] == "monthly" + + @pytest.mark.integration + def test_timeline_invalid_period_rejected(self, integration_client): + """Test timeline rejects invalid period.""" + response = integration_client.get("/api/v1/stats/timeline?period=invalid") + assert response.status_code == 422 + + @pytest.mark.integration + def test_timeline_data_point_structure(self, integration_client): + """Test timeline data points have correct structure.""" + response = integration_client.get("/api/v1/stats/timeline") + assert response.status_code == 200 + + data = response.json() + if len(data["data_points"]) > 0: + point = data["data_points"][0] + assert "date" in point + assert "total_uploads" in point + assert "unique_artifacts" in point + assert "duplicated_uploads" in point + assert "bytes_saved" in point + + +class TestExportEndpoint: + """Tests for GET /api/v1/stats/export endpoint.""" + + @pytest.mark.integration + def test_export_json_format(self, integration_client): + """Test export with JSON format.""" + response = integration_client.get("/api/v1/stats/export?format=json") + assert response.status_code == 200 + + data = response.json() + assert "total_artifacts" in data + assert "generated_at" in data + + @pytest.mark.integration + def test_export_csv_format(self, integration_client): + """Test export with CSV format.""" + response = integration_client.get("/api/v1/stats/export?format=csv") + assert response.status_code == 200 + assert "text/csv" in response.headers.get("content-type", "") + + content = response.text + assert "Metric,Value" in content + assert "total_artifacts" in content + + @pytest.mark.integration + def test_export_invalid_format_rejected(self, integration_client): + """Test export rejects invalid format.""" + response = integration_client.get("/api/v1/stats/export?format=xml") + assert response.status_code == 422 + + +class TestReportEndpoint: + """Tests for GET /api/v1/stats/report endpoint.""" + + @pytest.mark.integration + def test_report_markdown_format(self, integration_client): + """Test report with markdown format.""" + response = integration_client.get("/api/v1/stats/report?format=markdown") + assert response.status_code == 200 + + data = response.json() + assert data["format"] == "markdown" + assert "generated_at" in data + assert "content" in data + assert "# Orchard Storage Report" in data["content"] + + @pytest.mark.integration + def test_report_json_format(self, integration_client): + """Test report with JSON format.""" + response = integration_client.get("/api/v1/stats/report?format=json") + assert response.status_code == 200 + + data = response.json() + assert data["format"] == "json" + assert "content" in data + + @pytest.mark.integration + def test_report_contains_sections(self, integration_client): + """Test markdown report contains expected sections.""" + response = integration_client.get("/api/v1/stats/report?format=markdown") + assert response.status_code == 200 + + content = response.json()["content"] + assert "## Overview" in content + assert "## Storage" in content + assert "## Uploads" in content + + +class TestProjectStats: + """Tests for GET /api/v1/projects/:project/stats endpoint.""" + + @pytest.mark.integration + def test_project_stats_returns_valid_response( + self, integration_client, test_project + ): + """Test project stats returns expected fields.""" + response = integration_client.get(f"/api/v1/projects/{test_project}/stats") + assert response.status_code == 200 + + data = response.json() + assert "project_id" in data + assert "project_name" in data + assert "package_count" in data + assert "tag_count" in data + assert "artifact_count" in data + assert "total_size_bytes" in data + assert "upload_count" in data + assert "deduplicated_uploads" in data + assert "storage_saved_bytes" in data + assert "deduplication_ratio" in data + + @pytest.mark.integration + def test_project_stats_not_found(self, integration_client): + """Test project stats returns 404 for non-existent project.""" + response = integration_client.get("/api/v1/projects/nonexistent-project/stats") + assert response.status_code == 404 + + +class TestPackageStats: + """Tests for GET /api/v1/project/:project/packages/:package/stats endpoint.""" + + @pytest.mark.integration + def test_package_stats_returns_valid_response( + self, integration_client, test_package + ): + """Test package stats returns expected fields.""" + project, package = test_package + response = integration_client.get( + f"/api/v1/project/{project}/packages/{package}/stats" + ) + assert response.status_code == 200 + + data = response.json() + assert "package_id" in data + assert "package_name" in data + assert "project_name" in data + assert "tag_count" in data + assert "artifact_count" in data + assert "total_size_bytes" in data + assert "upload_count" in data + assert "deduplicated_uploads" in data + assert "storage_saved_bytes" in data + assert "deduplication_ratio" in data + + @pytest.mark.integration + def test_package_stats_not_found(self, integration_client, test_project): + """Test package stats returns 404 for non-existent package.""" + response = integration_client.get( + f"/api/v1/project/{test_project}/packages/nonexistent-package/stats" + ) + assert response.status_code == 404 + + +class TestArtifactStats: + """Tests for GET /api/v1/artifact/:id/stats endpoint.""" + + @pytest.mark.integration + def test_artifact_stats_returns_valid_response( + self, integration_client, test_package, unique_test_id + ): + """Test artifact stats returns expected fields.""" + project, package = test_package + content = f"artifact stats test {unique_test_id}".encode() + expected_hash = compute_sha256(content) + + # Upload artifact + upload_test_file( + integration_client, project, package, content, tag=f"art-{unique_test_id}" + ) + + # Get artifact stats + response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats") + assert response.status_code == 200 + + data = response.json() + assert "artifact_id" in data + assert "sha256" in data + assert "size" in data + assert "ref_count" in data + assert "storage_savings" in data + assert "tags" in data + assert "projects" in data + assert "packages" in data + + @pytest.mark.integration + def test_artifact_stats_not_found(self, integration_client): + """Test artifact stats returns 404 for non-existent artifact.""" + fake_hash = "0" * 64 + response = integration_client.get(f"/api/v1/artifact/{fake_hash}/stats") + assert response.status_code == 404 + + @pytest.mark.integration + def test_artifact_stats_shows_correct_projects( + self, integration_client, unique_test_id + ): + """Test artifact stats shows all projects using the artifact.""" + content = f"multi-project artifact {unique_test_id}".encode() + expected_hash = compute_sha256(content) + + proj1 = f"art-stats-a-{unique_test_id}" + proj2 = f"art-stats-b-{unique_test_id}" + + try: + # Create projects and packages + integration_client.post( + "/api/v1/projects", + json={"name": proj1, "description": "Test", "is_public": True}, + ) + integration_client.post( + "/api/v1/projects", + json={"name": proj2, "description": "Test", "is_public": True}, + ) + integration_client.post( + f"/api/v1/project/{proj1}/packages", + json={"name": "pkg", "description": "Test"}, + ) + integration_client.post( + f"/api/v1/project/{proj2}/packages", + json={"name": "pkg", "description": "Test"}, + ) + + # Upload same content to both projects + upload_test_file(integration_client, proj1, "pkg", content, tag="v1") + upload_test_file(integration_client, proj2, "pkg", content, tag="v1") + + # Check artifact stats + response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats") + assert response.status_code == 200 + + data = response.json() + assert len(data["projects"]) == 2 + assert proj1 in data["projects"] + assert proj2 in data["projects"] + + finally: + integration_client.delete(f"/api/v1/projects/{proj1}") + integration_client.delete(f"/api/v1/projects/{proj2}")