- Remove Tag/TagHistory model tests from unit tests - Update CacheSettings tests to remove allow_public_internet field - Replace tag= with version= in upload_test_file calls - Update test assertions to use versions instead of tags - Remove tests for tag: prefix downloads (now uses version:) - Update dependency tests for version-only schema
553 lines
20 KiB
Python
553 lines
20 KiB
Python
"""
|
|
Integration tests for large file upload functionality.
|
|
|
|
Tests cover:
|
|
- Large file uploads (100MB, 1GB)
|
|
- Multipart upload behavior
|
|
- Upload metrics (duration, throughput)
|
|
- Memory efficiency during uploads
|
|
- Upload progress tracking
|
|
|
|
Note: Large tests are marked with @pytest.mark.slow and will be skipped
|
|
by default. Run with `pytest --run-slow` to include them.
|
|
"""
|
|
|
|
import os
|
|
import pytest
|
|
import io
|
|
import time
|
|
from tests.factories import (
|
|
compute_sha256,
|
|
upload_test_file,
|
|
s3_object_exists,
|
|
)
|
|
from tests.conftest import (
|
|
SIZE_1KB,
|
|
SIZE_100KB,
|
|
SIZE_1MB,
|
|
SIZE_10MB,
|
|
SIZE_100MB,
|
|
SIZE_1GB,
|
|
)
|
|
|
|
|
|
class TestUploadMetrics:
|
|
"""Tests for upload duration and throughput metrics."""
|
|
|
|
@pytest.mark.integration
|
|
def test_upload_response_includes_duration_ms(self, integration_client, test_package):
|
|
"""Test upload response includes duration_ms field."""
|
|
project, package = test_package
|
|
content = b"duration test content"
|
|
|
|
result = upload_test_file(
|
|
integration_client, project, package, content, version="duration-test"
|
|
)
|
|
|
|
assert "duration_ms" in result
|
|
assert result["duration_ms"] is not None
|
|
assert result["duration_ms"] >= 0
|
|
|
|
@pytest.mark.integration
|
|
def test_upload_response_includes_throughput(self, integration_client, test_package):
|
|
"""Test upload response includes throughput_mbps field."""
|
|
project, package = test_package
|
|
content = b"throughput test content"
|
|
|
|
result = upload_test_file(
|
|
integration_client, project, package, content, version="throughput-test"
|
|
)
|
|
|
|
assert "throughput_mbps" in result
|
|
# For small files throughput may be very high or None
|
|
# Just verify the field exists
|
|
|
|
@pytest.mark.integration
|
|
def test_upload_duration_reasonable(
|
|
self, integration_client, test_package, sized_content
|
|
):
|
|
"""Test upload duration is reasonable for file size."""
|
|
project, package = test_package
|
|
content, _ = sized_content(SIZE_1MB, seed=100)
|
|
|
|
start = time.time()
|
|
result = upload_test_file(
|
|
integration_client, project, package, content, version="duration-check"
|
|
)
|
|
actual_duration = (time.time() - start) * 1000 # ms
|
|
|
|
# Reported duration should be close to actual
|
|
assert result["duration_ms"] is not None
|
|
# Allow some variance (network overhead)
|
|
assert result["duration_ms"] <= actual_duration + 1000 # Within 1s
|
|
|
|
|
|
class TestLargeFileUploads:
|
|
"""Tests for large file uploads using multipart."""
|
|
|
|
@pytest.mark.integration
|
|
def test_upload_10mb_file(self, integration_client, test_package, sized_content):
|
|
"""Test uploading a 10MB file."""
|
|
project, package = test_package
|
|
content, expected_hash = sized_content(SIZE_10MB, seed=200)
|
|
|
|
result = upload_test_file(
|
|
integration_client, project, package, content, version="large-10mb"
|
|
)
|
|
|
|
assert result["artifact_id"] == expected_hash
|
|
assert result["size"] == SIZE_10MB
|
|
assert result["duration_ms"] is not None
|
|
assert result["throughput_mbps"] is not None
|
|
|
|
@pytest.mark.integration
|
|
@pytest.mark.slow
|
|
@pytest.mark.requires_direct_s3
|
|
def test_upload_100mb_file(self, integration_client, test_package, sized_content):
|
|
"""Test uploading a 100MB file (triggers multipart upload)."""
|
|
project, package = test_package
|
|
content, expected_hash = sized_content(SIZE_100MB, seed=300)
|
|
|
|
result = upload_test_file(
|
|
integration_client, project, package, content, version="large-100mb"
|
|
)
|
|
|
|
assert result["artifact_id"] == expected_hash
|
|
assert result["size"] == SIZE_100MB
|
|
# Verify S3 object exists
|
|
assert s3_object_exists(expected_hash)
|
|
|
|
@pytest.mark.integration
|
|
@pytest.mark.slow
|
|
@pytest.mark.large
|
|
def test_upload_1gb_file(self, integration_client, test_package, sized_content):
|
|
"""Test uploading a 1GB file."""
|
|
project, package = test_package
|
|
content, expected_hash = sized_content(SIZE_1GB, seed=400)
|
|
|
|
result = upload_test_file(
|
|
integration_client, project, package, content, version="large-1gb"
|
|
)
|
|
|
|
assert result["artifact_id"] == expected_hash
|
|
assert result["size"] == SIZE_1GB
|
|
# Should have measurable throughput
|
|
assert result["throughput_mbps"] is not None
|
|
assert result["throughput_mbps"] > 0
|
|
|
|
@pytest.mark.integration
|
|
def test_large_file_deduplication(
|
|
self, integration_client, test_package, sized_content, unique_test_id
|
|
):
|
|
"""Test deduplication works for large files."""
|
|
project, package = test_package
|
|
# Use unique_test_id to ensure unique content per test run
|
|
seed = hash(unique_test_id) % 10000
|
|
content, expected_hash = sized_content(SIZE_10MB, seed=seed)
|
|
|
|
# First upload
|
|
result1 = upload_test_file(
|
|
integration_client, project, package, content, version=f"dedup-{unique_test_id}-1"
|
|
)
|
|
# Note: may be True if previous test uploaded same content
|
|
first_dedupe = result1["deduplicated"]
|
|
|
|
# Second upload of same content
|
|
result2 = upload_test_file(
|
|
integration_client, project, package, content, version=f"dedup-{unique_test_id}-2"
|
|
)
|
|
assert result2["artifact_id"] == expected_hash
|
|
# Second upload MUST be deduplicated
|
|
assert result2["deduplicated"] is True
|
|
|
|
|
|
class TestUploadProgress:
|
|
"""Tests for upload progress tracking endpoint."""
|
|
|
|
@pytest.mark.integration
|
|
def test_progress_endpoint_returns_not_found_for_invalid_id(
|
|
self, integration_client, test_package
|
|
):
|
|
"""Test progress endpoint returns not_found status for invalid upload ID."""
|
|
project, package = test_package
|
|
|
|
response = integration_client.get(
|
|
f"/api/v1/project/{project}/{package}/upload/invalid-upload-id/progress"
|
|
)
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert data["status"] == "not_found"
|
|
assert data["upload_id"] == "invalid-upload-id"
|
|
|
|
@pytest.mark.integration
|
|
def test_progress_endpoint_requires_valid_project(
|
|
self, integration_client, unique_test_id
|
|
):
|
|
"""Test progress endpoint validates project exists."""
|
|
response = integration_client.get(
|
|
f"/api/v1/project/nonexistent-{unique_test_id}/pkg/upload/upload-id/progress"
|
|
)
|
|
|
|
assert response.status_code == 404
|
|
|
|
@pytest.mark.integration
|
|
def test_progress_endpoint_requires_valid_package(
|
|
self, integration_client, test_project, unique_test_id
|
|
):
|
|
"""Test progress endpoint validates package exists."""
|
|
response = integration_client.get(
|
|
f"/api/v1/project/{test_project}/nonexistent-{unique_test_id}/upload/upload-id/progress"
|
|
)
|
|
|
|
assert response.status_code == 404
|
|
|
|
|
|
class TestResumableUploadProgress:
|
|
"""Tests for progress tracking during resumable uploads."""
|
|
|
|
@pytest.mark.integration
|
|
def test_resumable_upload_init_and_progress(
|
|
self, integration_client, test_package, sized_content
|
|
):
|
|
"""Test initializing resumable upload and checking progress."""
|
|
project, package = test_package
|
|
content, expected_hash = sized_content(SIZE_100KB, seed=600)
|
|
|
|
# Get API key for auth
|
|
api_key_response = integration_client.post(
|
|
"/api/v1/auth/keys",
|
|
json={"name": "progress-test-key"},
|
|
)
|
|
assert api_key_response.status_code == 200
|
|
api_key = api_key_response.json()["key"]
|
|
|
|
# Initialize resumable upload
|
|
init_response = integration_client.post(
|
|
f"/api/v1/project/{project}/{package}/upload/init",
|
|
json={
|
|
"expected_hash": expected_hash,
|
|
"filename": "progress-test.bin",
|
|
"size": SIZE_100KB,
|
|
},
|
|
headers={"Authorization": f"Bearer {api_key}"},
|
|
)
|
|
assert init_response.status_code == 200
|
|
upload_id = init_response.json().get("upload_id")
|
|
|
|
if upload_id:
|
|
# Check initial progress
|
|
progress_response = integration_client.get(
|
|
f"/api/v1/project/{project}/{package}/upload/{upload_id}/progress",
|
|
headers={"Authorization": f"Bearer {api_key}"},
|
|
)
|
|
assert progress_response.status_code == 200
|
|
progress = progress_response.json()
|
|
assert progress["status"] == "in_progress"
|
|
assert progress["bytes_uploaded"] == 0
|
|
assert progress["bytes_total"] == SIZE_100KB
|
|
|
|
# Abort to clean up
|
|
integration_client.delete(
|
|
f"/api/v1/project/{project}/{package}/upload/{upload_id}",
|
|
headers={"Authorization": f"Bearer {api_key}"},
|
|
)
|
|
|
|
|
|
class TestUploadSizeLimits:
|
|
"""Tests for upload size limit enforcement."""
|
|
|
|
@pytest.mark.integration
|
|
def test_empty_file_rejected(self, integration_client, test_package):
|
|
"""Test empty files are rejected."""
|
|
project, package = test_package
|
|
|
|
files = {"file": ("empty.txt", io.BytesIO(b""), "application/octet-stream")}
|
|
response = integration_client.post(
|
|
f"/api/v1/project/{project}/{package}/upload",
|
|
files=files,
|
|
)
|
|
|
|
assert response.status_code in [400, 422]
|
|
|
|
@pytest.mark.integration
|
|
def test_minimum_size_accepted(self, integration_client, test_package):
|
|
"""Test 1-byte file is accepted."""
|
|
project, package = test_package
|
|
content = b"X"
|
|
|
|
result = upload_test_file(
|
|
integration_client, project, package, content, version="min-size"
|
|
)
|
|
|
|
assert result["size"] == 1
|
|
|
|
@pytest.mark.integration
|
|
def test_content_length_header_used_in_response(self, integration_client, test_package):
|
|
"""Test that upload response size matches Content-Length."""
|
|
project, package = test_package
|
|
content = b"content length verification test"
|
|
|
|
result = upload_test_file(
|
|
integration_client, project, package, content, version="content-length-test"
|
|
)
|
|
|
|
# Size in response should match actual content length
|
|
assert result["size"] == len(content)
|
|
|
|
|
|
class TestUploadErrorHandling:
|
|
"""Tests for upload error handling."""
|
|
|
|
@pytest.mark.integration
|
|
def test_upload_to_nonexistent_project_returns_404(
|
|
self, integration_client, unique_test_id
|
|
):
|
|
"""Test upload to nonexistent project returns 404."""
|
|
content = b"test content"
|
|
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
|
|
|
response = integration_client.post(
|
|
f"/api/v1/project/nonexistent-{unique_test_id}/pkg/upload",
|
|
files=files,
|
|
)
|
|
|
|
assert response.status_code == 404
|
|
|
|
@pytest.mark.integration
|
|
def test_upload_to_nonexistent_package_returns_404(
|
|
self, integration_client, test_project, unique_test_id
|
|
):
|
|
"""Test upload to nonexistent package returns 404."""
|
|
content = b"test content"
|
|
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
|
|
|
response = integration_client.post(
|
|
f"/api/v1/project/{test_project}/nonexistent-{unique_test_id}/upload",
|
|
files=files,
|
|
)
|
|
|
|
assert response.status_code == 404
|
|
|
|
@pytest.mark.integration
|
|
def test_upload_without_file_returns_422(self, integration_client, test_package):
|
|
"""Test upload without file field returns 422."""
|
|
project, package = test_package
|
|
|
|
response = integration_client.post(
|
|
f"/api/v1/project/{project}/{package}/upload",
|
|
data={"version": "no-file"},
|
|
)
|
|
|
|
assert response.status_code == 422
|
|
|
|
@pytest.mark.integration
|
|
def test_upload_with_invalid_checksum_rejected(
|
|
self, integration_client, test_package
|
|
):
|
|
"""Test upload with invalid checksum header format is rejected."""
|
|
project, package = test_package
|
|
content = b"checksum test"
|
|
|
|
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
|
response = integration_client.post(
|
|
f"/api/v1/project/{project}/{package}/upload",
|
|
files=files,
|
|
headers={"X-Checksum-SHA256": "invalid-checksum"},
|
|
)
|
|
|
|
assert response.status_code == 400
|
|
|
|
@pytest.mark.integration
|
|
def test_upload_with_mismatched_checksum_rejected(
|
|
self, integration_client, test_package
|
|
):
|
|
"""Test upload with wrong checksum is rejected."""
|
|
project, package = test_package
|
|
content = b"mismatch test"
|
|
wrong_hash = "0" * 64
|
|
|
|
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
|
response = integration_client.post(
|
|
f"/api/v1/project/{project}/{package}/upload",
|
|
files=files,
|
|
headers={"X-Checksum-SHA256": wrong_hash},
|
|
)
|
|
|
|
assert response.status_code == 422
|
|
assert "verification failed" in response.json().get("detail", "").lower()
|
|
|
|
|
|
class TestResumableUploadCancellation:
|
|
"""Tests for resumable upload cancellation."""
|
|
|
|
@pytest.mark.integration
|
|
def test_abort_resumable_upload(self, integration_client, test_package, sized_content):
|
|
"""Test aborting a resumable upload cleans up properly."""
|
|
project, package = test_package
|
|
content, expected_hash = sized_content(SIZE_100KB, seed=700)
|
|
|
|
# Get API key for auth
|
|
api_key_response = integration_client.post(
|
|
"/api/v1/auth/keys",
|
|
json={"name": "abort-test-key"},
|
|
)
|
|
assert api_key_response.status_code == 200
|
|
api_key = api_key_response.json()["key"]
|
|
|
|
# Initialize resumable upload
|
|
init_response = integration_client.post(
|
|
f"/api/v1/project/{project}/{package}/upload/init",
|
|
json={
|
|
"expected_hash": expected_hash,
|
|
"filename": "abort-test.bin",
|
|
"size": SIZE_100KB,
|
|
},
|
|
headers={"Authorization": f"Bearer {api_key}"},
|
|
)
|
|
assert init_response.status_code == 200
|
|
upload_id = init_response.json().get("upload_id")
|
|
|
|
if upload_id:
|
|
# Abort the upload (without uploading any parts)
|
|
abort_response = integration_client.delete(
|
|
f"/api/v1/project/{project}/{package}/upload/{upload_id}",
|
|
headers={"Authorization": f"Bearer {api_key}"},
|
|
)
|
|
assert abort_response.status_code in [200, 204]
|
|
|
|
# Verify progress shows not_found after abort
|
|
progress_response = integration_client.get(
|
|
f"/api/v1/project/{project}/{package}/upload/{upload_id}/progress",
|
|
headers={"Authorization": f"Bearer {api_key}"},
|
|
)
|
|
assert progress_response.status_code == 200
|
|
assert progress_response.json()["status"] == "not_found"
|
|
|
|
@pytest.mark.integration
|
|
def test_abort_nonexistent_upload(self, integration_client, test_package):
|
|
"""Test aborting nonexistent upload returns appropriate error."""
|
|
project, package = test_package
|
|
|
|
# Get API key for auth
|
|
api_key_response = integration_client.post(
|
|
"/api/v1/auth/keys",
|
|
json={"name": "abort-nonexistent-key"},
|
|
)
|
|
assert api_key_response.status_code == 200
|
|
api_key = api_key_response.json()["key"]
|
|
|
|
response = integration_client.delete(
|
|
f"/api/v1/project/{project}/{package}/upload/nonexistent-upload-id",
|
|
headers={"Authorization": f"Bearer {api_key}"},
|
|
)
|
|
|
|
# Should return 404 or 200 (idempotent delete)
|
|
assert response.status_code in [200, 204, 404]
|
|
|
|
|
|
class TestUploadTimeout:
|
|
"""Tests for upload timeout handling."""
|
|
|
|
@pytest.mark.integration
|
|
def test_upload_with_short_timeout_succeeds_for_small_file(
|
|
self, integration_client, test_package
|
|
):
|
|
"""Test small file upload succeeds with reasonable timeout."""
|
|
project, package = test_package
|
|
content = b"small timeout test"
|
|
|
|
# httpx client should handle this quickly
|
|
result = upload_test_file(
|
|
integration_client, project, package, content, version="timeout-small"
|
|
)
|
|
|
|
assert result["artifact_id"] is not None
|
|
|
|
@pytest.mark.integration
|
|
def test_upload_response_duration_under_timeout(
|
|
self, integration_client, test_package, sized_content
|
|
):
|
|
"""Test upload completes within reasonable time."""
|
|
project, package = test_package
|
|
content, _ = sized_content(SIZE_1MB, seed=800)
|
|
|
|
start = time.time()
|
|
result = upload_test_file(
|
|
integration_client, project, package, content, version="timeout-check"
|
|
)
|
|
duration = time.time() - start
|
|
|
|
# 1MB should upload in well under 60 seconds on local
|
|
assert duration < 60
|
|
assert result["artifact_id"] is not None
|
|
|
|
|
|
class TestConcurrentUploads:
|
|
"""Tests for concurrent upload handling."""
|
|
|
|
@pytest.mark.integration
|
|
def test_concurrent_different_files(
|
|
self, integration_client, test_package, sized_content
|
|
):
|
|
"""Test concurrent uploads of different files succeed."""
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
|
project, package = test_package
|
|
|
|
# Get API key for auth
|
|
api_key_response = integration_client.post(
|
|
"/api/v1/auth/keys",
|
|
json={"name": "concurrent-diff-key"},
|
|
)
|
|
assert api_key_response.status_code == 200
|
|
api_key = api_key_response.json()["key"]
|
|
|
|
num_uploads = 3
|
|
results = []
|
|
errors = []
|
|
|
|
def upload_unique_file(idx):
|
|
try:
|
|
from httpx import Client
|
|
|
|
content, expected_hash = sized_content(SIZE_100KB, seed=900 + idx)
|
|
|
|
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
|
with Client(base_url=base_url, timeout=30.0) as client:
|
|
files = {
|
|
"file": (
|
|
f"concurrent-{idx}.bin",
|
|
io.BytesIO(content),
|
|
"application/octet-stream",
|
|
)
|
|
}
|
|
response = client.post(
|
|
f"/api/v1/project/{project}/{package}/upload",
|
|
files=files,
|
|
data={"version": f"concurrent-diff-{idx}"},
|
|
headers={"Authorization": f"Bearer {api_key}"},
|
|
)
|
|
if response.status_code == 200:
|
|
results.append((idx, response.json(), expected_hash))
|
|
else:
|
|
errors.append(f"Upload {idx}: {response.status_code} - {response.text}")
|
|
except Exception as e:
|
|
errors.append(f"Upload {idx}: {str(e)}")
|
|
|
|
with ThreadPoolExecutor(max_workers=num_uploads) as executor:
|
|
futures = [executor.submit(upload_unique_file, i) for i in range(num_uploads)]
|
|
for future in as_completed(futures):
|
|
pass
|
|
|
|
assert len(errors) == 0, f"Concurrent upload errors: {errors}"
|
|
assert len(results) == num_uploads
|
|
|
|
# Each upload should have unique artifact ID
|
|
artifact_ids = set(r[1]["artifact_id"] for r in results)
|
|
assert len(artifact_ids) == num_uploads
|
|
|
|
# Each should match expected hash
|
|
for idx, result, expected_hash in results:
|
|
assert result["artifact_id"] == expected_hash
|