""" Integration tests for large file upload functionality. Tests cover: - Large file uploads (100MB, 1GB) - Multipart upload behavior - Upload metrics (duration, throughput) - Memory efficiency during uploads - Upload progress tracking Note: Large tests are marked with @pytest.mark.slow and will be skipped by default. Run with `pytest --run-slow` to include them. """ import pytest import io import time from tests.factories import ( compute_sha256, upload_test_file, s3_object_exists, ) from tests.conftest import ( SIZE_1KB, SIZE_100KB, SIZE_1MB, SIZE_10MB, SIZE_100MB, SIZE_1GB, ) class TestUploadMetrics: """Tests for upload duration and throughput metrics.""" @pytest.mark.integration def test_upload_response_includes_duration_ms(self, integration_client, test_package): """Test upload response includes duration_ms field.""" project, package = test_package content = b"duration test content" result = upload_test_file( integration_client, project, package, content, tag="duration-test" ) assert "duration_ms" in result assert result["duration_ms"] is not None assert result["duration_ms"] >= 0 @pytest.mark.integration def test_upload_response_includes_throughput(self, integration_client, test_package): """Test upload response includes throughput_mbps field.""" project, package = test_package content = b"throughput test content" result = upload_test_file( integration_client, project, package, content, tag="throughput-test" ) assert "throughput_mbps" in result # For small files throughput may be very high or None # Just verify the field exists @pytest.mark.integration def test_upload_duration_reasonable( self, integration_client, test_package, sized_content ): """Test upload duration is reasonable for file size.""" project, package = test_package content, _ = sized_content(SIZE_1MB, seed=100) start = time.time() result = upload_test_file( integration_client, project, package, content, tag="duration-check" ) actual_duration = (time.time() - start) * 1000 # ms # Reported duration should be close to actual assert result["duration_ms"] is not None # Allow some variance (network overhead) assert result["duration_ms"] <= actual_duration + 1000 # Within 1s class TestLargeFileUploads: """Tests for large file uploads using multipart.""" @pytest.mark.integration def test_upload_10mb_file(self, integration_client, test_package, sized_content): """Test uploading a 10MB file.""" project, package = test_package content, expected_hash = sized_content(SIZE_10MB, seed=200) result = upload_test_file( integration_client, project, package, content, tag="large-10mb" ) assert result["artifact_id"] == expected_hash assert result["size"] == SIZE_10MB assert result["duration_ms"] is not None assert result["throughput_mbps"] is not None @pytest.mark.integration @pytest.mark.slow def test_upload_100mb_file(self, integration_client, test_package, sized_content): """Test uploading a 100MB file (triggers multipart upload).""" project, package = test_package content, expected_hash = sized_content(SIZE_100MB, seed=300) result = upload_test_file( integration_client, project, package, content, tag="large-100mb" ) assert result["artifact_id"] == expected_hash assert result["size"] == SIZE_100MB # Verify S3 object exists assert s3_object_exists(expected_hash) @pytest.mark.integration @pytest.mark.slow @pytest.mark.large def test_upload_1gb_file(self, integration_client, test_package, sized_content): """Test uploading a 1GB file.""" project, package = test_package content, expected_hash = sized_content(SIZE_1GB, seed=400) result = upload_test_file( integration_client, project, package, content, tag="large-1gb" ) assert result["artifact_id"] == expected_hash assert result["size"] == SIZE_1GB # Should have measurable throughput assert result["throughput_mbps"] is not None assert result["throughput_mbps"] > 0 @pytest.mark.integration def test_large_file_deduplication( self, integration_client, test_package, sized_content, unique_test_id ): """Test deduplication works for large files.""" project, package = test_package # Use unique_test_id to ensure unique content per test run seed = hash(unique_test_id) % 10000 content, expected_hash = sized_content(SIZE_10MB, seed=seed) # First upload result1 = upload_test_file( integration_client, project, package, content, tag=f"dedup-{unique_test_id}-1" ) # Note: may be True if previous test uploaded same content first_dedupe = result1["deduplicated"] # Second upload of same content result2 = upload_test_file( integration_client, project, package, content, tag=f"dedup-{unique_test_id}-2" ) assert result2["artifact_id"] == expected_hash # Second upload MUST be deduplicated assert result2["deduplicated"] is True class TestUploadProgress: """Tests for upload progress tracking endpoint.""" @pytest.mark.integration def test_progress_endpoint_returns_not_found_for_invalid_id( self, integration_client, test_package ): """Test progress endpoint returns not_found status for invalid upload ID.""" project, package = test_package response = integration_client.get( f"/api/v1/project/{project}/{package}/upload/invalid-upload-id/progress" ) assert response.status_code == 200 data = response.json() assert data["status"] == "not_found" assert data["upload_id"] == "invalid-upload-id" @pytest.mark.integration def test_progress_endpoint_requires_valid_project( self, integration_client, unique_test_id ): """Test progress endpoint validates project exists.""" response = integration_client.get( f"/api/v1/project/nonexistent-{unique_test_id}/pkg/upload/upload-id/progress" ) assert response.status_code == 404 @pytest.mark.integration def test_progress_endpoint_requires_valid_package( self, integration_client, test_project, unique_test_id ): """Test progress endpoint validates package exists.""" response = integration_client.get( f"/api/v1/project/{test_project}/nonexistent-{unique_test_id}/upload/upload-id/progress" ) assert response.status_code == 404 class TestResumableUploadProgress: """Tests for progress tracking during resumable uploads.""" @pytest.mark.integration def test_resumable_upload_init_and_progress( self, integration_client, test_package, sized_content ): """Test initializing resumable upload and checking progress.""" project, package = test_package content, expected_hash = sized_content(SIZE_100KB, seed=600) # Get API key for auth api_key_response = integration_client.post( "/api/v1/auth/keys", json={"name": "progress-test-key"}, ) assert api_key_response.status_code == 200 api_key = api_key_response.json()["key"] # Initialize resumable upload init_response = integration_client.post( f"/api/v1/project/{project}/{package}/upload/init", json={ "expected_hash": expected_hash, "filename": "progress-test.bin", "size": SIZE_100KB, }, headers={"Authorization": f"Bearer {api_key}"}, ) assert init_response.status_code == 200 upload_id = init_response.json().get("upload_id") if upload_id: # Check initial progress progress_response = integration_client.get( f"/api/v1/project/{project}/{package}/upload/{upload_id}/progress", headers={"Authorization": f"Bearer {api_key}"}, ) assert progress_response.status_code == 200 progress = progress_response.json() assert progress["status"] == "in_progress" assert progress["bytes_uploaded"] == 0 assert progress["bytes_total"] == SIZE_100KB # Abort to clean up integration_client.delete( f"/api/v1/project/{project}/{package}/upload/{upload_id}", headers={"Authorization": f"Bearer {api_key}"}, ) class TestUploadSizeLimits: """Tests for upload size limit enforcement.""" @pytest.mark.integration def test_empty_file_rejected(self, integration_client, test_package): """Test empty files are rejected.""" project, package = test_package files = {"file": ("empty.txt", io.BytesIO(b""), "application/octet-stream")} response = integration_client.post( f"/api/v1/project/{project}/{package}/upload", files=files, ) assert response.status_code in [400, 422] @pytest.mark.integration def test_minimum_size_accepted(self, integration_client, test_package): """Test 1-byte file is accepted.""" project, package = test_package content = b"X" result = upload_test_file( integration_client, project, package, content, tag="min-size" ) assert result["size"] == 1 @pytest.mark.integration def test_content_length_header_used_in_response(self, integration_client, test_package): """Test that upload response size matches Content-Length.""" project, package = test_package content = b"content length verification test" result = upload_test_file( integration_client, project, package, content, tag="content-length-test" ) # Size in response should match actual content length assert result["size"] == len(content) class TestUploadErrorHandling: """Tests for upload error handling.""" @pytest.mark.integration def test_upload_to_nonexistent_project_returns_404( self, integration_client, unique_test_id ): """Test upload to nonexistent project returns 404.""" content = b"test content" files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")} response = integration_client.post( f"/api/v1/project/nonexistent-{unique_test_id}/pkg/upload", files=files, ) assert response.status_code == 404 @pytest.mark.integration def test_upload_to_nonexistent_package_returns_404( self, integration_client, test_project, unique_test_id ): """Test upload to nonexistent package returns 404.""" content = b"test content" files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")} response = integration_client.post( f"/api/v1/project/{test_project}/nonexistent-{unique_test_id}/upload", files=files, ) assert response.status_code == 404 @pytest.mark.integration def test_upload_without_file_returns_422(self, integration_client, test_package): """Test upload without file field returns 422.""" project, package = test_package response = integration_client.post( f"/api/v1/project/{project}/{package}/upload", data={"tag": "no-file"}, ) assert response.status_code == 422 @pytest.mark.integration def test_upload_with_invalid_checksum_rejected( self, integration_client, test_package ): """Test upload with invalid checksum header format is rejected.""" project, package = test_package content = b"checksum test" files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")} response = integration_client.post( f"/api/v1/project/{project}/{package}/upload", files=files, headers={"X-Checksum-SHA256": "invalid-checksum"}, ) assert response.status_code == 400 @pytest.mark.integration def test_upload_with_mismatched_checksum_rejected( self, integration_client, test_package ): """Test upload with wrong checksum is rejected.""" project, package = test_package content = b"mismatch test" wrong_hash = "0" * 64 files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")} response = integration_client.post( f"/api/v1/project/{project}/{package}/upload", files=files, headers={"X-Checksum-SHA256": wrong_hash}, ) assert response.status_code == 422 assert "verification failed" in response.json().get("detail", "").lower() class TestResumableUploadCancellation: """Tests for resumable upload cancellation.""" @pytest.mark.integration def test_abort_resumable_upload(self, integration_client, test_package, sized_content): """Test aborting a resumable upload cleans up properly.""" project, package = test_package content, expected_hash = sized_content(SIZE_100KB, seed=700) # Get API key for auth api_key_response = integration_client.post( "/api/v1/auth/keys", json={"name": "abort-test-key"}, ) assert api_key_response.status_code == 200 api_key = api_key_response.json()["key"] # Initialize resumable upload init_response = integration_client.post( f"/api/v1/project/{project}/{package}/upload/init", json={ "expected_hash": expected_hash, "filename": "abort-test.bin", "size": SIZE_100KB, }, headers={"Authorization": f"Bearer {api_key}"}, ) assert init_response.status_code == 200 upload_id = init_response.json().get("upload_id") if upload_id: # Abort the upload (without uploading any parts) abort_response = integration_client.delete( f"/api/v1/project/{project}/{package}/upload/{upload_id}", headers={"Authorization": f"Bearer {api_key}"}, ) assert abort_response.status_code in [200, 204] # Verify progress shows not_found after abort progress_response = integration_client.get( f"/api/v1/project/{project}/{package}/upload/{upload_id}/progress", headers={"Authorization": f"Bearer {api_key}"}, ) assert progress_response.status_code == 200 assert progress_response.json()["status"] == "not_found" @pytest.mark.integration def test_abort_nonexistent_upload(self, integration_client, test_package): """Test aborting nonexistent upload returns appropriate error.""" project, package = test_package # Get API key for auth api_key_response = integration_client.post( "/api/v1/auth/keys", json={"name": "abort-nonexistent-key"}, ) assert api_key_response.status_code == 200 api_key = api_key_response.json()["key"] response = integration_client.delete( f"/api/v1/project/{project}/{package}/upload/nonexistent-upload-id", headers={"Authorization": f"Bearer {api_key}"}, ) # Should return 404 or 200 (idempotent delete) assert response.status_code in [200, 204, 404] class TestUploadTimeout: """Tests for upload timeout handling.""" @pytest.mark.integration def test_upload_with_short_timeout_succeeds_for_small_file( self, integration_client, test_package ): """Test small file upload succeeds with reasonable timeout.""" project, package = test_package content = b"small timeout test" # httpx client should handle this quickly result = upload_test_file( integration_client, project, package, content, tag="timeout-small" ) assert result["artifact_id"] is not None @pytest.mark.integration def test_upload_response_duration_under_timeout( self, integration_client, test_package, sized_content ): """Test upload completes within reasonable time.""" project, package = test_package content, _ = sized_content(SIZE_1MB, seed=800) start = time.time() result = upload_test_file( integration_client, project, package, content, tag="timeout-check" ) duration = time.time() - start # 1MB should upload in well under 60 seconds on local assert duration < 60 assert result["artifact_id"] is not None class TestConcurrentUploads: """Tests for concurrent upload handling.""" @pytest.mark.integration def test_concurrent_different_files( self, integration_client, test_package, sized_content ): """Test concurrent uploads of different files succeed.""" from concurrent.futures import ThreadPoolExecutor, as_completed project, package = test_package # Get API key for auth api_key_response = integration_client.post( "/api/v1/auth/keys", json={"name": "concurrent-diff-key"}, ) assert api_key_response.status_code == 200 api_key = api_key_response.json()["key"] num_uploads = 3 results = [] errors = [] def upload_unique_file(idx): try: from httpx import Client content, expected_hash = sized_content(SIZE_100KB, seed=900 + idx) base_url = "http://localhost:8080" with Client(base_url=base_url, timeout=30.0) as client: files = { "file": ( f"concurrent-{idx}.bin", io.BytesIO(content), "application/octet-stream", ) } response = client.post( f"/api/v1/project/{project}/{package}/upload", files=files, data={"tag": f"concurrent-diff-{idx}"}, headers={"Authorization": f"Bearer {api_key}"}, ) if response.status_code == 200: results.append((idx, response.json(), expected_hash)) else: errors.append(f"Upload {idx}: {response.status_code} - {response.text}") except Exception as e: errors.append(f"Upload {idx}: {str(e)}") with ThreadPoolExecutor(max_workers=num_uploads) as executor: futures = [executor.submit(upload_unique_file, i) for i in range(num_uploads)] for future in as_completed(futures): pass assert len(errors) == 0, f"Concurrent upload errors: {errors}" assert len(results) == num_uploads # Each upload should have unique artifact ID artifact_ids = set(r[1]["artifact_id"] for r in results) assert len(artifact_ids) == num_uploads # Each should match expected hash for idx, result, expected_hash in results: assert result["artifact_id"] == expected_hash