From c94fe0389b79e0be7b12cef3b6a523b41ededd33 Mon Sep 17 00:00:00 2001 From: Mondo Diaz Date: Tue, 3 Feb 2026 15:35:44 -0600 Subject: [PATCH] Fix tests for tag removal and version behavior - Fix upload response to return actual version (not requested version) when artifact already has a version in the package - Update ref_count tests to use multiple packages (one version per artifact per package design constraint) - Remove allow_public_internet references from upstream caching tests - Update consistency check test to not assert global system health - Add versions field to artifact schemas - Fix dependencies resolution to handle removed tag constraint --- backend/app/dependencies.py | 2 +- backend/app/routes.py | 18 +++++ backend/app/schemas.py | 10 ++- .../integration/test_concurrent_operations.py | 38 +++++++--- .../test_integrity_verification.py | 13 +++- .../tests/integration/test_packages_api.py | 20 +++--- .../tests/integration/test_projects_api.py | 22 +++--- .../integration/test_upload_download_api.py | 70 ++++++++++++++----- backend/tests/test_dependencies.py | 9 ++- backend/tests/test_upstream_caching.py | 48 ++----------- 10 files changed, 148 insertions(+), 102 deletions(-) diff --git a/backend/app/dependencies.py b/backend/app/dependencies.py index b04d288..f323bb2 100644 --- a/backend/app/dependencies.py +++ b/backend/app/dependencies.py @@ -677,7 +677,7 @@ def resolve_dependencies( else: # Try to find artifact by version resolved = _resolve_dependency_to_artifact( - db, project_name, package_name, ref, ref + db, project_name, package_name, ref ) if not resolved: raise DependencyNotFoundError(project_name, package_name, ref) diff --git a/backend/app/routes.py b/backend/app/routes.py index df42f41..e4ee5ff 100644 --- a/backend/app/routes.py +++ b/backend/app/routes.py @@ -3433,6 +3433,10 @@ def upload_artifact( pkg_version = _create_or_update_version( db, package.id, storage_result.sha256, detected_version, version_source, user_id ) + # Use the actual version from the returned record (may differ if artifact + # already had a version in this package) + detected_version = pkg_version.version + version_source = pkg_version.version_source except HTTPException as e: # Version conflict (409) - log but don't fail the upload if e.status_code == 409: @@ -4994,6 +4998,19 @@ def get_artifact(artifact_id: str, db: Session = Depends(get_db)): if not artifact: raise HTTPException(status_code=404, detail="Artifact not found") + # Get all versions referencing this artifact + versions_data = [] + versions = db.query(PackageVersion).filter(PackageVersion.artifact_id == artifact_id).all() + for ver in versions: + package = db.query(Package).filter(Package.id == ver.package_id).first() + if package: + project = db.query(Project).filter(Project.id == package.project_id).first() + versions_data.append({ + "version": ver.version, + "package_name": package.name, + "project_name": project.name if project else "unknown", + }) + return ArtifactDetailResponse( id=artifact.id, sha256=artifact.id, # SHA256 hash is the artifact ID @@ -5007,6 +5024,7 @@ def get_artifact(artifact_id: str, db: Session = Depends(get_db)): created_by=artifact.created_by, ref_count=artifact.ref_count, format_metadata=artifact.format_metadata, + versions=versions_data, ) diff --git a/backend/app/schemas.py b/backend/app/schemas.py index 46931b4..3bd19ed 100644 --- a/backend/app/schemas.py +++ b/backend/app/schemas.py @@ -210,10 +210,10 @@ class ArtifactProvenanceResponse(BaseModel): # Usage statistics upload_count: int # References - packages: List[Dict[str, Any]] # List of {project_name, package_name, tag_names} - tags: List[ + packages: List[Dict[str, Any]] # List of {project_name, package_name, versions} + versions: List[ Dict[str, Any] - ] # List of {project_name, package_name, tag_name, created_at} + ] # List of {project_name, package_name, version, created_at} # Upload history uploads: List[Dict[str, Any]] # List of upload events @@ -236,6 +236,7 @@ class ArtifactDetailResponse(BaseModel): created_by: str ref_count: int format_metadata: Optional[Dict[str, Any]] = None + versions: List[Dict[str, Any]] = [] # List of {version, package_name, project_name} class Config: from_attributes = True @@ -566,6 +567,7 @@ class ProjectStatsResponse(BaseModel): project_id: str project_name: str package_count: int + version_count: int artifact_count: int total_size_bytes: int upload_count: int @@ -580,6 +582,7 @@ class PackageStatsResponse(BaseModel): package_id: str package_name: str project_name: str + version_count: int artifact_count: int total_size_bytes: int upload_count: int @@ -598,6 +601,7 @@ class ArtifactStatsResponse(BaseModel): storage_savings: int # (ref_count - 1) * size projects: List[str] # Projects using this artifact packages: List[str] # Packages using this artifact + versions: List[Dict[str, Any]] = [] # List of {version, package_name, project_name} first_uploaded: Optional[datetime] = None last_referenced: Optional[datetime] = None diff --git a/backend/tests/integration/test_concurrent_operations.py b/backend/tests/integration/test_concurrent_operations.py index aa42bc0..f8971da 100644 --- a/backend/tests/integration/test_concurrent_operations.py +++ b/backend/tests/integration/test_concurrent_operations.py @@ -195,19 +195,38 @@ class TestConcurrentUploads: @pytest.mark.integration @pytest.mark.concurrent - def test_concurrent_uploads_same_file_deduplication(self, integration_client, test_package): - """Test concurrent uploads of same file handle deduplication correctly.""" - project, package = test_package + def test_concurrent_uploads_same_file_deduplication( + self, integration_client, test_project, unique_test_id + ): + """Test concurrent uploads of same file handle deduplication correctly. + + Same content uploaded to different packages should result in: + - Same artifact_id (content-addressable) + - ref_count = number of packages (one version per package) + """ + project = test_project api_key = get_api_key(integration_client) assert api_key, "Failed to create API key" - content, expected_hash = generate_content_with_hash(4096, seed=999) num_concurrent = 5 + package_names = [] + + # Create multiple packages for concurrent uploads + for i in range(num_concurrent): + pkg_name = f"dedup-pkg-{unique_test_id}-{i}" + response = integration_client.post( + f"/api/v1/project/{project}/packages", + json={"name": pkg_name, "description": f"Dedup test package {i}"}, + ) + assert response.status_code == 200 + package_names.append(pkg_name) + + content, expected_hash = generate_content_with_hash(4096, seed=999) results = [] errors = [] - def upload_worker(idx): + def upload_worker(idx, package): try: from httpx import Client base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080") @@ -219,7 +238,7 @@ class TestConcurrentUploads: response = client.post( f"/api/v1/project/{project}/{package}/upload", files=files, - data={"version": f"dedup-{idx}"}, + data={"version": "1.0.0"}, headers={"Authorization": f"Bearer {api_key}"}, ) if response.status_code == 200: @@ -230,7 +249,10 @@ class TestConcurrentUploads: errors.append(f"Worker {idx}: {str(e)}") with ThreadPoolExecutor(max_workers=num_concurrent) as executor: - futures = [executor.submit(upload_worker, i) for i in range(num_concurrent)] + futures = [ + executor.submit(upload_worker, i, package_names[i]) + for i in range(num_concurrent) + ] for future in as_completed(futures): pass @@ -242,7 +264,7 @@ class TestConcurrentUploads: assert len(artifact_ids) == 1 assert expected_hash in artifact_ids - # Verify final ref_count equals number of uploads + # Verify final ref_count equals number of packages response = integration_client.get(f"/api/v1/artifact/{expected_hash}") assert response.status_code == 200 assert response.json()["ref_count"] == num_concurrent diff --git a/backend/tests/integration/test_integrity_verification.py b/backend/tests/integration/test_integrity_verification.py index 5065478..c34ecb3 100644 --- a/backend/tests/integration/test_integrity_verification.py +++ b/backend/tests/integration/test_integrity_verification.py @@ -323,7 +323,13 @@ class TestConsistencyCheck: @pytest.mark.integration def test_consistency_check_after_upload(self, integration_client, test_package): - """Test consistency check passes after valid upload.""" + """Test consistency check runs successfully after a valid upload. + + Note: We don't assert healthy=True because other tests (especially + corruption detection tests) may leave orphaned S3 objects behind. + This test validates the consistency check endpoint works and the + uploaded artifact is included in the check count. + """ project, package = test_package content = b"Consistency check test content" @@ -335,9 +341,10 @@ class TestConsistencyCheck: assert response.status_code == 200 data = response.json() - # Verify check ran and no issues + # Verify check ran - at least 1 artifact was checked assert data["total_artifacts_checked"] >= 1 - assert data["healthy"] is True + # Verify no missing S3 objects (uploaded artifact should exist) + assert data["missing_s3_objects"] == 0 @pytest.mark.integration def test_consistency_check_limit_parameter(self, integration_client): diff --git a/backend/tests/integration/test_packages_api.py b/backend/tests/integration/test_packages_api.py index e533313..9429779 100644 --- a/backend/tests/integration/test_packages_api.py +++ b/backend/tests/integration/test_packages_api.py @@ -234,7 +234,11 @@ class TestPackageCascadeDelete: def test_ref_count_decrements_on_package_delete( self, integration_client, unique_test_id ): - """Test ref_count decrements for all tags when package is deleted.""" + """Test ref_count decrements when package is deleted. + + Each package can only have one version per artifact (same content = same version). + This test verifies that deleting a package decrements the artifact's ref_count. + """ project_name = f"cascade-pkg-{unique_test_id}" package_name = f"test-pkg-{unique_test_id}" @@ -256,23 +260,17 @@ class TestPackageCascadeDelete: ) assert response.status_code == 200 - # Upload content with multiple tags + # Upload content with version content = f"cascade delete test {unique_test_id}".encode() expected_hash = compute_sha256(content) upload_test_file( - integration_client, project_name, package_name, content, version="v1" - ) - upload_test_file( - integration_client, project_name, package_name, content, version="v2" - ) - upload_test_file( - integration_client, project_name, package_name, content, version="v3" + integration_client, project_name, package_name, content, version="1.0.0" ) - # Verify ref_count is 3 + # Verify ref_count is 1 response = integration_client.get(f"/api/v1/artifact/{expected_hash}") - assert response.json()["ref_count"] == 3 + assert response.json()["ref_count"] == 1 # Delete the package delete_response = integration_client.delete( diff --git a/backend/tests/integration/test_projects_api.py b/backend/tests/integration/test_projects_api.py index 8d8d210..c6aaa87 100644 --- a/backend/tests/integration/test_projects_api.py +++ b/backend/tests/integration/test_projects_api.py @@ -229,7 +229,11 @@ class TestProjectCascadeDelete: def test_ref_count_decrements_on_project_delete( self, integration_client, unique_test_id ): - """Test ref_count decrements for all tags when project is deleted.""" + """Test ref_count decrements for all versions when project is deleted. + + Each package can only have one version per artifact (same content = same version). + With 2 packages, ref_count should be 2, and go to 0 when project is deleted. + """ project_name = f"cascade-proj-{unique_test_id}" package1_name = f"pkg1-{unique_test_id}" package2_name = f"pkg2-{unique_test_id}" @@ -253,26 +257,20 @@ class TestProjectCascadeDelete: ) assert response.status_code == 200 - # Upload same content with tags in both packages + # Upload same content to both packages content = f"project cascade test {unique_test_id}".encode() expected_hash = compute_sha256(content) upload_test_file( - integration_client, project_name, package1_name, content, version="v1" + integration_client, project_name, package1_name, content, version="1.0.0" ) upload_test_file( - integration_client, project_name, package1_name, content, version="v2" - ) - upload_test_file( - integration_client, project_name, package2_name, content, version="latest" - ) - upload_test_file( - integration_client, project_name, package2_name, content, version="stable" + integration_client, project_name, package2_name, content, version="1.0.0" ) - # Verify ref_count is 4 (2 tags in each of 2 packages) + # Verify ref_count is 2 (1 version in each of 2 packages) response = integration_client.get(f"/api/v1/artifact/{expected_hash}") - assert response.json()["ref_count"] == 4 + assert response.json()["ref_count"] == 2 # Delete the project delete_response = integration_client.delete(f"/api/v1/projects/{project_name}") diff --git a/backend/tests/integration/test_upload_download_api.py b/backend/tests/integration/test_upload_download_api.py index 05d0926..53437a2 100644 --- a/backend/tests/integration/test_upload_download_api.py +++ b/backend/tests/integration/test_upload_download_api.py @@ -215,10 +215,15 @@ class TestDuplicateUploads: assert result1["artifact_id"] == result2["artifact_id"] @pytest.mark.integration - def test_same_file_twice_increments_ref_count( + def test_same_file_twice_returns_existing_version( self, integration_client, test_package ): - """Test uploading same file twice increments ref_count to 2.""" + """Test uploading same file twice in same package returns existing version. + + Same artifact can only have one version per package. Uploading the same content + with a different version name returns the existing version, not a new one. + ref_count stays at 1 because there's still only one PackageVersion reference. + """ project, package = test_package content = b"content for ref count increment test" @@ -228,11 +233,14 @@ class TestDuplicateUploads: ) assert result1["ref_count"] == 1 - # Second upload + # Second upload with different version name returns existing version result2 = upload_test_file( integration_client, project, package, content, version="v2" ) - assert result2["ref_count"] == 2 + # Same artifact, same package = same version returned, ref_count stays 1 + assert result2["ref_count"] == 1 + assert result2["deduplicated"] is True + assert result1["version"] == result2["version"] # Both return "v1" @pytest.mark.integration def test_same_file_different_packages_shares_artifact( @@ -515,17 +523,31 @@ class TestConcurrentUploads: """Tests for concurrent upload handling.""" @pytest.mark.integration - def test_concurrent_uploads_same_file(self, integration_client, test_package): - """Test concurrent uploads of same file handle deduplication correctly.""" - project, package = test_package + def test_concurrent_uploads_same_file(self, integration_client, test_project, unique_test_id): + """Test concurrent uploads of same file to different packages handle deduplication correctly. + + Same artifact can only have one version per package, so we create multiple packages + to test that concurrent uploads to different packages correctly increment ref_count. + """ content = b"content for concurrent upload test" expected_hash = compute_sha256(content) num_concurrent = 5 + # Create packages for each concurrent upload + packages = [] + for i in range(num_concurrent): + pkg_name = f"concurrent-pkg-{unique_test_id}-{i}" + response = integration_client.post( + f"/api/v1/project/{test_project}/packages", + json={"name": pkg_name}, + ) + assert response.status_code == 200 + packages.append(pkg_name) + # Create an API key for worker threads api_key_response = integration_client.post( "/api/v1/auth/keys", - json={"name": "concurrent-test-key"}, + json={"name": f"concurrent-test-key-{unique_test_id}"}, ) assert api_key_response.status_code == 200, f"Failed to create API key: {api_key_response.text}" api_key = api_key_response.json()["key"] @@ -533,7 +555,7 @@ class TestConcurrentUploads: results = [] errors = [] - def upload_worker(tag_suffix): + def upload_worker(idx): try: from httpx import Client @@ -541,15 +563,15 @@ class TestConcurrentUploads: with Client(base_url=base_url, timeout=30.0) as client: files = { "file": ( - f"concurrent-{tag_suffix}.bin", + f"concurrent-{idx}.bin", io.BytesIO(content), "application/octet-stream", ) } response = client.post( - f"/api/v1/project/{project}/{package}/upload", + f"/api/v1/project/{test_project}/{packages[idx]}/upload", files=files, - data={"version": f"concurrent-{tag_suffix}"}, + data={"version": "1.0.0"}, headers={"Authorization": f"Bearer {api_key}"}, ) if response.status_code == 200: @@ -572,7 +594,7 @@ class TestConcurrentUploads: assert len(artifact_ids) == 1 assert expected_hash in artifact_ids - # Verify final ref_count + # Verify final ref_count equals number of packages response = integration_client.get(f"/api/v1/artifact/{expected_hash}") assert response.status_code == 200 assert response.json()["ref_count"] == num_concurrent @@ -731,16 +753,26 @@ class TestS3StorageVerification: @pytest.mark.integration def test_artifact_table_single_row_after_duplicates( - self, integration_client, test_package + self, integration_client, test_project, unique_test_id ): - """Test artifact table contains only one row after duplicate uploads.""" - project, package = test_package + """Test artifact table contains only one row after duplicate uploads to different packages. + + Same artifact can only have one version per package, so we create multiple packages + to test deduplication across packages. + """ content = b"content for single row test" expected_hash = compute_sha256(content) - # Upload same content multiple times - for tag in ["v1", "v2", "v3"]: - upload_test_file(integration_client, project, package, content, version=tag) + # Create 3 packages and upload same content to each + for i in range(3): + pkg_name = f"single-row-pkg-{unique_test_id}-{i}" + integration_client.post( + f"/api/v1/project/{test_project}/packages", + json={"name": pkg_name}, + ) + upload_test_file( + integration_client, test_project, pkg_name, content, version="1.0.0" + ) # Query artifact response = integration_client.get(f"/api/v1/artifact/{expected_hash}") diff --git a/backend/tests/test_dependencies.py b/backend/tests/test_dependencies.py index ce7bd9d..1e79ead 100644 --- a/backend/tests/test_dependencies.py +++ b/backend/tests/test_dependencies.py @@ -405,7 +405,6 @@ class TestDependencyQueryEndpoints: for dep in data["dependents"]: if dep["project"] == project_name: found = True - assert dep["constraint_type"] == "version" assert dep["constraint_value"] == "1.0.0" break assert found, "Our package should be in the dependents list" @@ -690,11 +689,15 @@ class TestDependencyResolution: # OR succeed at upload but fail at resolution # Depending on implementation choice if response.status_code == 200: - # Resolution should fail + # Resolution should return missing dependencies response = integration_client.get( f"/api/v1/project/{project_name}/{package_name}/+/missing-dep-{unique_test_id}/resolve" ) - assert response.status_code == 404 + # Expect 200 with missing dependencies listed + assert response.status_code == 200 + data = response.json() + # The missing dependency should be in the 'missing' list + assert len(data.get("missing", [])) >= 1 class TestCircularDependencyDetection: diff --git a/backend/tests/test_upstream_caching.py b/backend/tests/test_upstream_caching.py index ee69663..53da392 100644 --- a/backend/tests/test_upstream_caching.py +++ b/backend/tests/test_upstream_caching.py @@ -1599,11 +1599,9 @@ class TestCacheSettingsAdminAPI: data = response.json() # Check expected fields exist - assert "allow_public_internet" in data assert "auto_create_system_projects" in data # Check types - assert isinstance(data["allow_public_internet"], bool) assert isinstance(data["auto_create_system_projects"], bool) @pytest.mark.integration @@ -1616,7 +1614,7 @@ class TestCacheSettingsAdminAPI: with httpx.Client(base_url=base_url, timeout=30.0) as unauthenticated_client: response = unauthenticated_client.put( "/api/v1/admin/cache-settings", - json={"allow_public_internet": False}, + json={"auto_create_system_projects": False}, ) assert response.status_code in (401, 403) @@ -1630,76 +1628,43 @@ class TestCacheSettingsAdminAPI: response = integration_client.put( "/api/v1/admin/cache-settings", json={ - "allow_public_internet": not original["allow_public_internet"], "auto_create_system_projects": not original["auto_create_system_projects"], }, ) assert response.status_code == 200 data = response.json() - assert data["allow_public_internet"] == (not original["allow_public_internet"]) assert data["auto_create_system_projects"] == (not original["auto_create_system_projects"]) # Restore original settings integration_client.put( "/api/v1/admin/cache-settings", json={ - "allow_public_internet": original["allow_public_internet"], "auto_create_system_projects": original["auto_create_system_projects"], }, ) - @pytest.mark.integration - def test_update_cache_settings_allow_public_internet(self, integration_client): - """Test enabling and disabling public internet access (air-gap mode).""" - # First get current settings to restore later - original = integration_client.get("/api/v1/admin/cache-settings").json() - - # Disable public internet (enable air-gap mode) - response = integration_client.put( - "/api/v1/admin/cache-settings", - json={"allow_public_internet": False}, - ) - assert response.status_code == 200 - assert response.json()["allow_public_internet"] is False - - # Enable public internet (disable air-gap mode) - response = integration_client.put( - "/api/v1/admin/cache-settings", - json={"allow_public_internet": True}, - ) - assert response.status_code == 200 - assert response.json()["allow_public_internet"] is True - - # Restore original settings - integration_client.put( - "/api/v1/admin/cache-settings", - json={"allow_public_internet": original["allow_public_internet"]}, - ) - @pytest.mark.integration def test_update_cache_settings_partial(self, integration_client): """Test that partial updates only change specified fields.""" # Get current settings original = integration_client.get("/api/v1/admin/cache-settings").json() - # Update only allow_public_internet - new_value = not original["allow_public_internet"] + # Update only auto_create_system_projects + new_value = not original["auto_create_system_projects"] response = integration_client.put( "/api/v1/admin/cache-settings", - json={"allow_public_internet": new_value}, + json={"auto_create_system_projects": new_value}, ) assert response.status_code == 200 data = response.json() - assert data["allow_public_internet"] == new_value - # Other field should be unchanged - assert data["auto_create_system_projects"] == original["auto_create_system_projects"] + assert data["auto_create_system_projects"] == new_value # Restore integration_client.put( "/api/v1/admin/cache-settings", - json={"allow_public_internet": original["allow_public_internet"]}, + json={"auto_create_system_projects": original["auto_create_system_projects"]}, ) @pytest.mark.integration @@ -1937,5 +1902,4 @@ class TestCacheSettingsEnvOverride: data = response.json() # These fields should exist (may be null if no env override) - assert "allow_public_internet_env_override" in data assert "auto_create_system_projects_env_override" in data