""" Integration tests for concurrent upload and download operations. Tests cover: - Concurrent uploads of different files - Concurrent uploads of same file (deduplication race) - Concurrent downloads of same artifact - Concurrent downloads of different artifacts - Mixed concurrent uploads and downloads - Data corruption prevention under concurrency """ import pytest import io import os from concurrent.futures import ThreadPoolExecutor, as_completed from tests.factories import ( compute_sha256, upload_test_file, generate_content_with_hash, ) def get_api_key(integration_client): """Create an API key for concurrent test workers.""" import uuid response = integration_client.post( "/api/v1/auth/keys", json={"name": f"concurrent-test-{uuid.uuid4().hex[:8]}"}, ) if response.status_code == 200: return response.json()["key"] return None class TestConcurrentUploads: """Tests for concurrent upload operations.""" @pytest.mark.integration @pytest.mark.concurrent def test_2_concurrent_uploads_different_files(self, integration_client, test_package): """Test 2 concurrent uploads of different files.""" project, package = test_package api_key = get_api_key(integration_client) assert api_key, "Failed to create API key" files_data = [ generate_content_with_hash(1024, seed=i) for i in range(2) ] results = [] errors = [] def upload_worker(idx, content, expected_hash): try: from httpx import Client base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080") with Client(base_url=base_url, timeout=60.0) as client: files = { "file": (f"file-{idx}.bin", io.BytesIO(content), "application/octet-stream") } response = client.post( f"/api/v1/project/{project}/{package}/upload", files=files, data={"version": f"concurrent-{idx}"}, headers={"Authorization": f"Bearer {api_key}"}, ) if response.status_code == 200: result = response.json() results.append((idx, result, expected_hash)) else: errors.append(f"Worker {idx}: Status {response.status_code}: {response.text}") except Exception as e: errors.append(f"Worker {idx}: {str(e)}") with ThreadPoolExecutor(max_workers=2) as executor: futures = [ executor.submit(upload_worker, i, content, hash) for i, (content, hash) in enumerate(files_data) ] for future in as_completed(futures): pass assert len(errors) == 0, f"Errors: {errors}" assert len(results) == 2 # Verify each upload returned correct artifact_id for idx, result, expected_hash in results: assert result["artifact_id"] == expected_hash @pytest.mark.integration @pytest.mark.concurrent def test_5_concurrent_uploads_different_files(self, integration_client, test_package): """Test 5 concurrent uploads of different files.""" project, package = test_package api_key = get_api_key(integration_client) assert api_key, "Failed to create API key" num_files = 5 files_data = [ generate_content_with_hash(2048, seed=100 + i) for i in range(num_files) ] results = [] errors = [] def upload_worker(idx, content, expected_hash): try: from httpx import Client base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080") with Client(base_url=base_url, timeout=60.0) as client: files = { "file": (f"file-{idx}.bin", io.BytesIO(content), "application/octet-stream") } response = client.post( f"/api/v1/project/{project}/{package}/upload", files=files, data={"version": f"concurrent5-{idx}"}, headers={"Authorization": f"Bearer {api_key}"}, ) if response.status_code == 200: result = response.json() results.append((idx, result, expected_hash)) else: errors.append(f"Worker {idx}: Status {response.status_code}") except Exception as e: errors.append(f"Worker {idx}: {str(e)}") with ThreadPoolExecutor(max_workers=num_files) as executor: futures = [ executor.submit(upload_worker, i, content, hash) for i, (content, hash) in enumerate(files_data) ] for future in as_completed(futures): pass assert len(errors) == 0, f"Errors: {errors}" assert len(results) == num_files # Verify all uploads have unique artifact_ids artifact_ids = set(r[1]["artifact_id"] for r in results) assert len(artifact_ids) == num_files @pytest.mark.integration @pytest.mark.concurrent def test_10_concurrent_uploads_different_files(self, integration_client, test_package): """Test 10 concurrent uploads of different files.""" project, package = test_package api_key = get_api_key(integration_client) assert api_key, "Failed to create API key" num_files = 10 files_data = [ generate_content_with_hash(1024, seed=200 + i) for i in range(num_files) ] results = [] errors = [] def upload_worker(idx, content, expected_hash): try: from httpx import Client base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080") with Client(base_url=base_url, timeout=60.0) as client: files = { "file": (f"file-{idx}.bin", io.BytesIO(content), "application/octet-stream") } response = client.post( f"/api/v1/project/{project}/{package}/upload", files=files, data={"version": f"concurrent10-{idx}"}, headers={"Authorization": f"Bearer {api_key}"}, ) if response.status_code == 200: result = response.json() results.append((idx, result, expected_hash)) else: errors.append(f"Worker {idx}: Status {response.status_code}") except Exception as e: errors.append(f"Worker {idx}: {str(e)}") with ThreadPoolExecutor(max_workers=num_files) as executor: futures = [ executor.submit(upload_worker, i, content, hash) for i, (content, hash) in enumerate(files_data) ] for future in as_completed(futures): pass assert len(errors) == 0, f"Errors: {errors}" assert len(results) == num_files @pytest.mark.integration @pytest.mark.concurrent def test_concurrent_uploads_same_file_deduplication(self, integration_client, test_package): """Test concurrent uploads of same file handle deduplication correctly.""" project, package = test_package api_key = get_api_key(integration_client) assert api_key, "Failed to create API key" content, expected_hash = generate_content_with_hash(4096, seed=999) num_concurrent = 5 results = [] errors = [] def upload_worker(idx): try: from httpx import Client base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080") with Client(base_url=base_url, timeout=60.0) as client: files = { "file": (f"same-{idx}.bin", io.BytesIO(content), "application/octet-stream") } response = client.post( f"/api/v1/project/{project}/{package}/upload", files=files, data={"version": f"dedup-{idx}"}, headers={"Authorization": f"Bearer {api_key}"}, ) if response.status_code == 200: results.append(response.json()) else: errors.append(f"Worker {idx}: Status {response.status_code}") except Exception as e: errors.append(f"Worker {idx}: {str(e)}") with ThreadPoolExecutor(max_workers=num_concurrent) as executor: futures = [executor.submit(upload_worker, i) for i in range(num_concurrent)] for future in as_completed(futures): pass assert len(errors) == 0, f"Errors: {errors}" assert len(results) == num_concurrent # All should have same artifact_id artifact_ids = set(r["artifact_id"] for r in results) assert len(artifact_ids) == 1 assert expected_hash in artifact_ids # Verify final ref_count equals number of uploads response = integration_client.get(f"/api/v1/artifact/{expected_hash}") assert response.status_code == 200 assert response.json()["ref_count"] == num_concurrent @pytest.mark.integration @pytest.mark.concurrent def test_concurrent_uploads_to_different_packages(self, integration_client, test_project, unique_test_id): """Test concurrent uploads to different packages.""" project = test_project api_key = get_api_key(integration_client) assert api_key, "Failed to create API key" num_packages = 3 package_names = [] # Create multiple packages for i in range(num_packages): pkg_name = f"pkg-{unique_test_id}-{i}" response = integration_client.post( f"/api/v1/project/{project}/packages", json={"name": pkg_name, "description": f"Package {i}"}, ) assert response.status_code == 200 package_names.append(pkg_name) files_data = [ generate_content_with_hash(1024, seed=300 + i) for i in range(num_packages) ] results = [] errors = [] def upload_worker(idx, package, content, expected_hash): try: from httpx import Client base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080") with Client(base_url=base_url, timeout=60.0) as client: files = { "file": (f"file-{idx}.bin", io.BytesIO(content), "application/octet-stream") } response = client.post( f"/api/v1/project/{project}/{package}/upload", files=files, data={"version": "latest"}, headers={"Authorization": f"Bearer {api_key}"}, ) if response.status_code == 200: result = response.json() results.append((package, result, expected_hash)) else: errors.append(f"Worker {idx}: Status {response.status_code}") except Exception as e: errors.append(f"Worker {idx}: {str(e)}") with ThreadPoolExecutor(max_workers=num_packages) as executor: futures = [ executor.submit(upload_worker, i, package_names[i], content, hash) for i, (content, hash) in enumerate(files_data) ] for future in as_completed(futures): pass assert len(errors) == 0, f"Errors: {errors}" assert len(results) == num_packages class TestConcurrentDownloads: """Tests for concurrent download operations.""" @pytest.mark.integration @pytest.mark.concurrent def test_2_concurrent_downloads_same_artifact(self, integration_client, test_package): """Test 2 concurrent downloads of same artifact.""" project, package = test_package content, expected_hash = generate_content_with_hash(2048, seed=400) # Upload first upload_test_file(integration_client, project, package, content, version="download-test") results = [] errors = [] def download_worker(idx): try: from httpx import Client base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080") with Client(base_url=base_url, timeout=60.0) as client: response = client.get( f"/api/v1/project/{project}/{package}/+/download-test", params={"mode": "proxy"}, ) if response.status_code == 200: results.append((idx, response.content)) else: errors.append(f"Worker {idx}: Status {response.status_code}") except Exception as e: errors.append(f"Worker {idx}: {str(e)}") with ThreadPoolExecutor(max_workers=2) as executor: futures = [executor.submit(download_worker, i) for i in range(2)] for future in as_completed(futures): pass assert len(errors) == 0, f"Errors: {errors}" assert len(results) == 2 # All downloads should match original for idx, downloaded in results: assert downloaded == content @pytest.mark.integration @pytest.mark.concurrent def test_5_concurrent_downloads_same_artifact(self, integration_client, test_package): """Test 5 concurrent downloads of same artifact.""" project, package = test_package content, expected_hash = generate_content_with_hash(4096, seed=500) upload_test_file(integration_client, project, package, content, version="download5-test") num_downloads = 5 results = [] errors = [] def download_worker(idx): try: from httpx import Client base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080") with Client(base_url=base_url, timeout=60.0) as client: response = client.get( f"/api/v1/project/{project}/{package}/+/download5-test", params={"mode": "proxy"}, ) if response.status_code == 200: results.append((idx, response.content)) else: errors.append(f"Worker {idx}: Status {response.status_code}") except Exception as e: errors.append(f"Worker {idx}: {str(e)}") with ThreadPoolExecutor(max_workers=num_downloads) as executor: futures = [executor.submit(download_worker, i) for i in range(num_downloads)] for future in as_completed(futures): pass assert len(errors) == 0, f"Errors: {errors}" assert len(results) == num_downloads for idx, downloaded in results: assert downloaded == content @pytest.mark.integration @pytest.mark.concurrent def test_10_concurrent_downloads_same_artifact(self, integration_client, test_package): """Test 10 concurrent downloads of same artifact.""" project, package = test_package content, expected_hash = generate_content_with_hash(8192, seed=600) upload_test_file(integration_client, project, package, content, version="download10-test") num_downloads = 10 results = [] errors = [] def download_worker(idx): try: from httpx import Client base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080") with Client(base_url=base_url, timeout=60.0) as client: response = client.get( f"/api/v1/project/{project}/{package}/+/download10-test", params={"mode": "proxy"}, ) if response.status_code == 200: results.append((idx, response.content)) else: errors.append(f"Worker {idx}: Status {response.status_code}") except Exception as e: errors.append(f"Worker {idx}: {str(e)}") with ThreadPoolExecutor(max_workers=num_downloads) as executor: futures = [executor.submit(download_worker, i) for i in range(num_downloads)] for future in as_completed(futures): pass assert len(errors) == 0, f"Errors: {errors}" assert len(results) == num_downloads for idx, downloaded in results: assert downloaded == content @pytest.mark.integration @pytest.mark.concurrent def test_concurrent_downloads_different_artifacts(self, integration_client, test_package): """Test concurrent downloads of different artifacts.""" project, package = test_package # Upload multiple files num_files = 5 uploads = [] for i in range(num_files): content, expected_hash = generate_content_with_hash(1024, seed=700 + i) upload_test_file( integration_client, project, package, content, version=f"multi-download-{i}" ) uploads.append((f"multi-download-{i}", content)) results = [] errors = [] def download_worker(tag, expected_content): try: from httpx import Client base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080") with Client(base_url=base_url, timeout=60.0) as client: response = client.get( f"/api/v1/project/{project}/{package}/+/{tag}", params={"mode": "proxy"}, ) if response.status_code == 200: results.append((tag, response.content, expected_content)) else: errors.append(f"Tag {tag}: Status {response.status_code}") except Exception as e: errors.append(f"Tag {tag}: {str(e)}") with ThreadPoolExecutor(max_workers=num_files) as executor: futures = [ executor.submit(download_worker, tag, content) for tag, content in uploads ] for future in as_completed(futures): pass assert len(errors) == 0, f"Errors: {errors}" assert len(results) == num_files for tag, downloaded, expected in results: assert downloaded == expected, f"Content mismatch for {tag}" class TestMixedConcurrentOperations: """Tests for mixed concurrent upload and download operations.""" @pytest.mark.integration @pytest.mark.concurrent def test_upload_while_download_in_progress(self, integration_client, test_package): """Test uploading while a download is in progress.""" project, package = test_package api_key = get_api_key(integration_client) assert api_key, "Failed to create API key" # Upload initial content content1, hash1 = generate_content_with_hash(10240, seed=800) # 10KB upload_test_file(integration_client, project, package, content1, version="initial") # New content for upload during download content2, hash2 = generate_content_with_hash(10240, seed=801) results = {"downloads": [], "uploads": []} errors = [] def download_worker(): try: from httpx import Client base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080") with Client(base_url=base_url, timeout=60.0) as client: response = client.get( f"/api/v1/project/{project}/{package}/+/initial", params={"mode": "proxy"}, ) if response.status_code == 200: results["downloads"].append(response.content) else: errors.append(f"Download: Status {response.status_code}") except Exception as e: errors.append(f"Download: {str(e)}") def upload_worker(): try: from httpx import Client base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080") with Client(base_url=base_url, timeout=60.0) as client: files = { "file": ("new.bin", io.BytesIO(content2), "application/octet-stream") } response = client.post( f"/api/v1/project/{project}/{package}/upload", files=files, data={"version": "during-download"}, headers={"Authorization": f"Bearer {api_key}"}, ) if response.status_code == 200: results["uploads"].append(response.json()) else: errors.append(f"Upload: Status {response.status_code}") except Exception as e: errors.append(f"Upload: {str(e)}") with ThreadPoolExecutor(max_workers=2) as executor: futures = [ executor.submit(download_worker), executor.submit(upload_worker), ] for future in as_completed(futures): pass assert len(errors) == 0, f"Errors: {errors}" assert len(results["downloads"]) == 1 assert len(results["uploads"]) == 1 # Verify download got correct content assert results["downloads"][0] == content1 # Verify upload succeeded assert results["uploads"][0]["artifact_id"] == hash2 @pytest.mark.integration @pytest.mark.concurrent def test_multiple_uploads_and_downloads_simultaneously(self, integration_client, test_package): """Test multiple uploads and downloads running simultaneously.""" project, package = test_package api_key = get_api_key(integration_client) assert api_key, "Failed to create API key" # Pre-upload some files for downloading existing_files = [] for i in range(3): content, hash = generate_content_with_hash(2048, seed=900 + i) upload_test_file(integration_client, project, package, content, version=f"existing-{i}") existing_files.append((f"existing-{i}", content)) # New files for uploading new_files = [ generate_content_with_hash(2048, seed=910 + i) for i in range(3) ] results = {"downloads": [], "uploads": []} errors = [] def download_worker(tag, expected): try: from httpx import Client base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080") with Client(base_url=base_url, timeout=60.0) as client: response = client.get( f"/api/v1/project/{project}/{package}/+/{tag}", params={"mode": "proxy"}, ) if response.status_code == 200: results["downloads"].append((tag, response.content, expected)) else: errors.append(f"Download {tag}: Status {response.status_code}") except Exception as e: errors.append(f"Download {tag}: {str(e)}") def upload_worker(idx, content, expected_hash): try: from httpx import Client base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080") with Client(base_url=base_url, timeout=60.0) as client: files = { "file": (f"new-{idx}.bin", io.BytesIO(content), "application/octet-stream") } response = client.post( f"/api/v1/project/{project}/{package}/upload", files=files, data={"version": f"new-{idx}"}, headers={"Authorization": f"Bearer {api_key}"}, ) if response.status_code == 200: results["uploads"].append((idx, response.json(), expected_hash)) else: errors.append(f"Upload {idx}: Status {response.status_code}") except Exception as e: errors.append(f"Upload {idx}: {str(e)}") with ThreadPoolExecutor(max_workers=6) as executor: futures = [] # Submit downloads for tag, content in existing_files: futures.append(executor.submit(download_worker, tag, content)) # Submit uploads for i, (content, hash) in enumerate(new_files): futures.append(executor.submit(upload_worker, i, content, hash)) for future in as_completed(futures): pass assert len(errors) == 0, f"Errors: {errors}" assert len(results["downloads"]) == 3 assert len(results["uploads"]) == 3 # Verify downloads for tag, downloaded, expected in results["downloads"]: assert downloaded == expected, f"Download mismatch for {tag}" # Verify uploads for idx, result, expected_hash in results["uploads"]: assert result["artifact_id"] == expected_hash @pytest.mark.integration @pytest.mark.concurrent def test_no_data_corruption_under_concurrency(self, integration_client, test_package): """Test that no data corruption occurs under concurrent operations.""" project, package = test_package api_key = get_api_key(integration_client) assert api_key, "Failed to create API key" # Create content with recognizable patterns num_files = 5 files_data = [] for i in range(num_files): # Each file has unique repeating pattern for easy corruption detection pattern = bytes([i] * 256) content = pattern * 40 # 10KB each hash = compute_sha256(content) files_data.append((content, hash)) results = [] errors = [] def upload_and_verify(idx, content, expected_hash): try: from httpx import Client base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080") with Client(base_url=base_url, timeout=60.0) as client: # Upload files = { "file": (f"pattern-{idx}.bin", io.BytesIO(content), "application/octet-stream") } upload_resp = client.post( f"/api/v1/project/{project}/{package}/upload", files=files, data={"version": f"pattern-{idx}"}, headers={"Authorization": f"Bearer {api_key}"}, ) if upload_resp.status_code != 200: errors.append(f"Upload {idx}: Status {upload_resp.status_code}") return upload_result = upload_resp.json() if upload_result["artifact_id"] != expected_hash: errors.append(f"Upload {idx}: Hash mismatch") return # Immediately download and verify download_resp = client.get( f"/api/v1/project/{project}/{package}/+/pattern-{idx}", params={"mode": "proxy"}, ) if download_resp.status_code != 200: errors.append(f"Download {idx}: Status {download_resp.status_code}") return if download_resp.content != content: errors.append(f"Worker {idx}: DATA CORRUPTION DETECTED") return # Verify the downloaded content hash downloaded_hash = compute_sha256(download_resp.content) if downloaded_hash != expected_hash: errors.append(f"Worker {idx}: Hash verification failed") return results.append(idx) except Exception as e: errors.append(f"Worker {idx}: {str(e)}") with ThreadPoolExecutor(max_workers=num_files) as executor: futures = [ executor.submit(upload_and_verify, i, content, hash) for i, (content, hash) in enumerate(files_data) ] for future in as_completed(futures): pass assert len(errors) == 0, f"Errors: {errors}" assert len(results) == num_files