This commit is contained in:
552
backend/tests/integration/test_large_uploads.py
Normal file
552
backend/tests/integration/test_large_uploads.py
Normal file
@@ -0,0 +1,552 @@
|
||||
"""
|
||||
Integration tests for large file upload functionality.
|
||||
|
||||
Tests cover:
|
||||
- Large file uploads (100MB, 1GB)
|
||||
- Multipart upload behavior
|
||||
- Upload metrics (duration, throughput)
|
||||
- Memory efficiency during uploads
|
||||
- Upload progress tracking
|
||||
|
||||
Note: Large tests are marked with @pytest.mark.slow and will be skipped
|
||||
by default. Run with `pytest --run-slow` to include them.
|
||||
"""
|
||||
|
||||
import os
|
||||
import pytest
|
||||
import io
|
||||
import time
|
||||
from tests.factories import (
|
||||
compute_sha256,
|
||||
upload_test_file,
|
||||
s3_object_exists,
|
||||
)
|
||||
from tests.conftest import (
|
||||
SIZE_1KB,
|
||||
SIZE_100KB,
|
||||
SIZE_1MB,
|
||||
SIZE_10MB,
|
||||
SIZE_100MB,
|
||||
SIZE_1GB,
|
||||
)
|
||||
|
||||
|
||||
class TestUploadMetrics:
|
||||
"""Tests for upload duration and throughput metrics."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_response_includes_duration_ms(self, integration_client, test_package):
|
||||
"""Test upload response includes duration_ms field."""
|
||||
project, package = test_package
|
||||
content = b"duration test content"
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="duration-test"
|
||||
)
|
||||
|
||||
assert "duration_ms" in result
|
||||
assert result["duration_ms"] is not None
|
||||
assert result["duration_ms"] >= 0
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_response_includes_throughput(self, integration_client, test_package):
|
||||
"""Test upload response includes throughput_mbps field."""
|
||||
project, package = test_package
|
||||
content = b"throughput test content"
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="throughput-test"
|
||||
)
|
||||
|
||||
assert "throughput_mbps" in result
|
||||
# For small files throughput may be very high or None
|
||||
# Just verify the field exists
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_duration_reasonable(
|
||||
self, integration_client, test_package, sized_content
|
||||
):
|
||||
"""Test upload duration is reasonable for file size."""
|
||||
project, package = test_package
|
||||
content, _ = sized_content(SIZE_1MB, seed=100)
|
||||
|
||||
start = time.time()
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="duration-check"
|
||||
)
|
||||
actual_duration = (time.time() - start) * 1000 # ms
|
||||
|
||||
# Reported duration should be close to actual
|
||||
assert result["duration_ms"] is not None
|
||||
# Allow some variance (network overhead)
|
||||
assert result["duration_ms"] <= actual_duration + 1000 # Within 1s
|
||||
|
||||
|
||||
class TestLargeFileUploads:
|
||||
"""Tests for large file uploads using multipart."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_10mb_file(self, integration_client, test_package, sized_content):
|
||||
"""Test uploading a 10MB file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_10MB, seed=200)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="large-10mb"
|
||||
)
|
||||
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == SIZE_10MB
|
||||
assert result["duration_ms"] is not None
|
||||
assert result["throughput_mbps"] is not None
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.requires_direct_s3
|
||||
def test_upload_100mb_file(self, integration_client, test_package, sized_content):
|
||||
"""Test uploading a 100MB file (triggers multipart upload)."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_100MB, seed=300)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="large-100mb"
|
||||
)
|
||||
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == SIZE_100MB
|
||||
# Verify S3 object exists
|
||||
assert s3_object_exists(expected_hash)
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.large
|
||||
def test_upload_1gb_file(self, integration_client, test_package, sized_content):
|
||||
"""Test uploading a 1GB file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_1GB, seed=400)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="large-1gb"
|
||||
)
|
||||
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == SIZE_1GB
|
||||
# Should have measurable throughput
|
||||
assert result["throughput_mbps"] is not None
|
||||
assert result["throughput_mbps"] > 0
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_large_file_deduplication(
|
||||
self, integration_client, test_package, sized_content, unique_test_id
|
||||
):
|
||||
"""Test deduplication works for large files."""
|
||||
project, package = test_package
|
||||
# Use unique_test_id to ensure unique content per test run
|
||||
seed = hash(unique_test_id) % 10000
|
||||
content, expected_hash = sized_content(SIZE_10MB, seed=seed)
|
||||
|
||||
# First upload
|
||||
result1 = upload_test_file(
|
||||
integration_client, project, package, content, tag=f"dedup-{unique_test_id}-1"
|
||||
)
|
||||
# Note: may be True if previous test uploaded same content
|
||||
first_dedupe = result1["deduplicated"]
|
||||
|
||||
# Second upload of same content
|
||||
result2 = upload_test_file(
|
||||
integration_client, project, package, content, tag=f"dedup-{unique_test_id}-2"
|
||||
)
|
||||
assert result2["artifact_id"] == expected_hash
|
||||
# Second upload MUST be deduplicated
|
||||
assert result2["deduplicated"] is True
|
||||
|
||||
|
||||
class TestUploadProgress:
|
||||
"""Tests for upload progress tracking endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_progress_endpoint_returns_not_found_for_invalid_id(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test progress endpoint returns not_found status for invalid upload ID."""
|
||||
project, package = test_package
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/upload/invalid-upload-id/progress"
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["status"] == "not_found"
|
||||
assert data["upload_id"] == "invalid-upload-id"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_progress_endpoint_requires_valid_project(
|
||||
self, integration_client, unique_test_id
|
||||
):
|
||||
"""Test progress endpoint validates project exists."""
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/nonexistent-{unique_test_id}/pkg/upload/upload-id/progress"
|
||||
)
|
||||
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_progress_endpoint_requires_valid_package(
|
||||
self, integration_client, test_project, unique_test_id
|
||||
):
|
||||
"""Test progress endpoint validates package exists."""
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{test_project}/nonexistent-{unique_test_id}/upload/upload-id/progress"
|
||||
)
|
||||
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
class TestResumableUploadProgress:
|
||||
"""Tests for progress tracking during resumable uploads."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_resumable_upload_init_and_progress(
|
||||
self, integration_client, test_package, sized_content
|
||||
):
|
||||
"""Test initializing resumable upload and checking progress."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_100KB, seed=600)
|
||||
|
||||
# Get API key for auth
|
||||
api_key_response = integration_client.post(
|
||||
"/api/v1/auth/keys",
|
||||
json={"name": "progress-test-key"},
|
||||
)
|
||||
assert api_key_response.status_code == 200
|
||||
api_key = api_key_response.json()["key"]
|
||||
|
||||
# Initialize resumable upload
|
||||
init_response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload/init",
|
||||
json={
|
||||
"expected_hash": expected_hash,
|
||||
"filename": "progress-test.bin",
|
||||
"size": SIZE_100KB,
|
||||
},
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
assert init_response.status_code == 200
|
||||
upload_id = init_response.json().get("upload_id")
|
||||
|
||||
if upload_id:
|
||||
# Check initial progress
|
||||
progress_response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/upload/{upload_id}/progress",
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
assert progress_response.status_code == 200
|
||||
progress = progress_response.json()
|
||||
assert progress["status"] == "in_progress"
|
||||
assert progress["bytes_uploaded"] == 0
|
||||
assert progress["bytes_total"] == SIZE_100KB
|
||||
|
||||
# Abort to clean up
|
||||
integration_client.delete(
|
||||
f"/api/v1/project/{project}/{package}/upload/{upload_id}",
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
|
||||
|
||||
class TestUploadSizeLimits:
|
||||
"""Tests for upload size limit enforcement."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_empty_file_rejected(self, integration_client, test_package):
|
||||
"""Test empty files are rejected."""
|
||||
project, package = test_package
|
||||
|
||||
files = {"file": ("empty.txt", io.BytesIO(b""), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
)
|
||||
|
||||
assert response.status_code in [400, 422]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_minimum_size_accepted(self, integration_client, test_package):
|
||||
"""Test 1-byte file is accepted."""
|
||||
project, package = test_package
|
||||
content = b"X"
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="min-size"
|
||||
)
|
||||
|
||||
assert result["size"] == 1
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_content_length_header_used_in_response(self, integration_client, test_package):
|
||||
"""Test that upload response size matches Content-Length."""
|
||||
project, package = test_package
|
||||
content = b"content length verification test"
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="content-length-test"
|
||||
)
|
||||
|
||||
# Size in response should match actual content length
|
||||
assert result["size"] == len(content)
|
||||
|
||||
|
||||
class TestUploadErrorHandling:
|
||||
"""Tests for upload error handling."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_to_nonexistent_project_returns_404(
|
||||
self, integration_client, unique_test_id
|
||||
):
|
||||
"""Test upload to nonexistent project returns 404."""
|
||||
content = b"test content"
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/nonexistent-{unique_test_id}/pkg/upload",
|
||||
files=files,
|
||||
)
|
||||
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_to_nonexistent_package_returns_404(
|
||||
self, integration_client, test_project, unique_test_id
|
||||
):
|
||||
"""Test upload to nonexistent package returns 404."""
|
||||
content = b"test content"
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{test_project}/nonexistent-{unique_test_id}/upload",
|
||||
files=files,
|
||||
)
|
||||
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_without_file_returns_422(self, integration_client, test_package):
|
||||
"""Test upload without file field returns 422."""
|
||||
project, package = test_package
|
||||
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
data={"tag": "no-file"},
|
||||
)
|
||||
|
||||
assert response.status_code == 422
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_with_invalid_checksum_rejected(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test upload with invalid checksum header format is rejected."""
|
||||
project, package = test_package
|
||||
content = b"checksum test"
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
headers={"X-Checksum-SHA256": "invalid-checksum"},
|
||||
)
|
||||
|
||||
assert response.status_code == 400
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_with_mismatched_checksum_rejected(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test upload with wrong checksum is rejected."""
|
||||
project, package = test_package
|
||||
content = b"mismatch test"
|
||||
wrong_hash = "0" * 64
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
headers={"X-Checksum-SHA256": wrong_hash},
|
||||
)
|
||||
|
||||
assert response.status_code == 422
|
||||
assert "verification failed" in response.json().get("detail", "").lower()
|
||||
|
||||
|
||||
class TestResumableUploadCancellation:
|
||||
"""Tests for resumable upload cancellation."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_abort_resumable_upload(self, integration_client, test_package, sized_content):
|
||||
"""Test aborting a resumable upload cleans up properly."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_100KB, seed=700)
|
||||
|
||||
# Get API key for auth
|
||||
api_key_response = integration_client.post(
|
||||
"/api/v1/auth/keys",
|
||||
json={"name": "abort-test-key"},
|
||||
)
|
||||
assert api_key_response.status_code == 200
|
||||
api_key = api_key_response.json()["key"]
|
||||
|
||||
# Initialize resumable upload
|
||||
init_response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload/init",
|
||||
json={
|
||||
"expected_hash": expected_hash,
|
||||
"filename": "abort-test.bin",
|
||||
"size": SIZE_100KB,
|
||||
},
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
assert init_response.status_code == 200
|
||||
upload_id = init_response.json().get("upload_id")
|
||||
|
||||
if upload_id:
|
||||
# Abort the upload (without uploading any parts)
|
||||
abort_response = integration_client.delete(
|
||||
f"/api/v1/project/{project}/{package}/upload/{upload_id}",
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
assert abort_response.status_code in [200, 204]
|
||||
|
||||
# Verify progress shows not_found after abort
|
||||
progress_response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/upload/{upload_id}/progress",
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
assert progress_response.status_code == 200
|
||||
assert progress_response.json()["status"] == "not_found"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_abort_nonexistent_upload(self, integration_client, test_package):
|
||||
"""Test aborting nonexistent upload returns appropriate error."""
|
||||
project, package = test_package
|
||||
|
||||
# Get API key for auth
|
||||
api_key_response = integration_client.post(
|
||||
"/api/v1/auth/keys",
|
||||
json={"name": "abort-nonexistent-key"},
|
||||
)
|
||||
assert api_key_response.status_code == 200
|
||||
api_key = api_key_response.json()["key"]
|
||||
|
||||
response = integration_client.delete(
|
||||
f"/api/v1/project/{project}/{package}/upload/nonexistent-upload-id",
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
|
||||
# Should return 404 or 200 (idempotent delete)
|
||||
assert response.status_code in [200, 204, 404]
|
||||
|
||||
|
||||
class TestUploadTimeout:
|
||||
"""Tests for upload timeout handling."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_with_short_timeout_succeeds_for_small_file(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test small file upload succeeds with reasonable timeout."""
|
||||
project, package = test_package
|
||||
content = b"small timeout test"
|
||||
|
||||
# httpx client should handle this quickly
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="timeout-small"
|
||||
)
|
||||
|
||||
assert result["artifact_id"] is not None
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_response_duration_under_timeout(
|
||||
self, integration_client, test_package, sized_content
|
||||
):
|
||||
"""Test upload completes within reasonable time."""
|
||||
project, package = test_package
|
||||
content, _ = sized_content(SIZE_1MB, seed=800)
|
||||
|
||||
start = time.time()
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="timeout-check"
|
||||
)
|
||||
duration = time.time() - start
|
||||
|
||||
# 1MB should upload in well under 60 seconds on local
|
||||
assert duration < 60
|
||||
assert result["artifact_id"] is not None
|
||||
|
||||
|
||||
class TestConcurrentUploads:
|
||||
"""Tests for concurrent upload handling."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_concurrent_different_files(
|
||||
self, integration_client, test_package, sized_content
|
||||
):
|
||||
"""Test concurrent uploads of different files succeed."""
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
project, package = test_package
|
||||
|
||||
# Get API key for auth
|
||||
api_key_response = integration_client.post(
|
||||
"/api/v1/auth/keys",
|
||||
json={"name": "concurrent-diff-key"},
|
||||
)
|
||||
assert api_key_response.status_code == 200
|
||||
api_key = api_key_response.json()["key"]
|
||||
|
||||
num_uploads = 3
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def upload_unique_file(idx):
|
||||
try:
|
||||
from httpx import Client
|
||||
|
||||
content, expected_hash = sized_content(SIZE_100KB, seed=900 + idx)
|
||||
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
with Client(base_url=base_url, timeout=30.0) as client:
|
||||
files = {
|
||||
"file": (
|
||||
f"concurrent-{idx}.bin",
|
||||
io.BytesIO(content),
|
||||
"application/octet-stream",
|
||||
)
|
||||
}
|
||||
response = client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"tag": f"concurrent-diff-{idx}"},
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
results.append((idx, response.json(), expected_hash))
|
||||
else:
|
||||
errors.append(f"Upload {idx}: {response.status_code} - {response.text}")
|
||||
except Exception as e:
|
||||
errors.append(f"Upload {idx}: {str(e)}")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=num_uploads) as executor:
|
||||
futures = [executor.submit(upload_unique_file, i) for i in range(num_uploads)]
|
||||
for future in as_completed(futures):
|
||||
pass
|
||||
|
||||
assert len(errors) == 0, f"Concurrent upload errors: {errors}"
|
||||
assert len(results) == num_uploads
|
||||
|
||||
# Each upload should have unique artifact ID
|
||||
artifact_ids = set(r[1]["artifact_id"] for r in results)
|
||||
assert len(artifact_ids) == num_uploads
|
||||
|
||||
# Each should match expected hash
|
||||
for idx, result, expected_hash in results:
|
||||
assert result["artifact_id"] == expected_hash
|
||||
Reference in New Issue
Block a user