Metadata database tracks all uploads with project, package, tag, and timestamp queryable via API
This commit is contained in:
502
backend/tests/integration/test_upload_download_api.py
Normal file
502
backend/tests/integration/test_upload_download_api.py
Normal file
@@ -0,0 +1,502 @@
|
||||
"""
|
||||
Integration tests for upload and download API endpoints.
|
||||
|
||||
Tests cover:
|
||||
- Upload functionality and deduplication
|
||||
- Download by tag and artifact ID
|
||||
- Concurrent upload handling
|
||||
- File size validation
|
||||
- Upload failure cleanup
|
||||
- S3 storage verification
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import io
|
||||
import threading
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from tests.factories import (
|
||||
compute_sha256,
|
||||
upload_test_file,
|
||||
list_s3_objects_by_hash,
|
||||
s3_object_exists,
|
||||
)
|
||||
|
||||
|
||||
class TestUploadBasics:
|
||||
"""Tests for basic upload functionality."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_returns_artifact_id(self, integration_client, test_package):
|
||||
"""Test upload returns the artifact ID (SHA256 hash)."""
|
||||
project_name, package_name = test_package
|
||||
content = b"basic upload test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project_name, package_name, content, tag="v1"
|
||||
)
|
||||
|
||||
assert result["artifact_id"] == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_response_has_upload_id(self, integration_client, test_package):
|
||||
"""Test upload response includes upload_id."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"upload id test",
|
||||
"uploadid.txt",
|
||||
)
|
||||
|
||||
assert "upload_id" in result
|
||||
assert result["upload_id"] is not None
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_response_has_content_type(self, integration_client, test_package):
|
||||
"""Test upload response includes content_type."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"content type test",
|
||||
"content.txt",
|
||||
)
|
||||
|
||||
assert "content_type" in result
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_response_has_original_name(self, integration_client, test_package):
|
||||
"""Test upload response includes original_name."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"original name test",
|
||||
"originalname.txt",
|
||||
)
|
||||
|
||||
assert "original_name" in result
|
||||
assert result["original_name"] == "originalname.txt"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_response_has_created_at(self, integration_client, test_package):
|
||||
"""Test upload response includes created_at."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"created at test",
|
||||
"createdat.txt",
|
||||
)
|
||||
|
||||
assert "created_at" in result
|
||||
assert result["created_at"] is not None
|
||||
|
||||
|
||||
class TestDuplicateUploads:
|
||||
"""Tests for duplicate upload deduplication behavior."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_same_file_twice_returns_same_artifact_id(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test uploading same file twice returns same artifact_id."""
|
||||
project, package = test_package
|
||||
content = b"content uploaded twice for same artifact test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# First upload
|
||||
result1 = upload_test_file(
|
||||
integration_client, project, package, content, tag="first"
|
||||
)
|
||||
assert result1["artifact_id"] == expected_hash
|
||||
|
||||
# Second upload
|
||||
result2 = upload_test_file(
|
||||
integration_client, project, package, content, tag="second"
|
||||
)
|
||||
assert result2["artifact_id"] == expected_hash
|
||||
assert result1["artifact_id"] == result2["artifact_id"]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_same_file_twice_increments_ref_count(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test uploading same file twice increments ref_count to 2."""
|
||||
project, package = test_package
|
||||
content = b"content for ref count increment test"
|
||||
|
||||
# First upload
|
||||
result1 = upload_test_file(
|
||||
integration_client, project, package, content, tag="v1"
|
||||
)
|
||||
assert result1["ref_count"] == 1
|
||||
|
||||
# Second upload
|
||||
result2 = upload_test_file(
|
||||
integration_client, project, package, content, tag="v2"
|
||||
)
|
||||
assert result2["ref_count"] == 2
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_same_file_different_packages_shares_artifact(
|
||||
self, integration_client, test_project, unique_test_id
|
||||
):
|
||||
"""Test uploading same file to different packages shares artifact."""
|
||||
project = test_project
|
||||
content = f"content shared across packages {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Create two packages
|
||||
pkg1 = f"package-a-{unique_test_id}"
|
||||
pkg2 = f"package-b-{unique_test_id}"
|
||||
|
||||
integration_client.post(
|
||||
f"/api/v1/project/{project}/packages",
|
||||
json={"name": pkg1, "description": "Package A"},
|
||||
)
|
||||
integration_client.post(
|
||||
f"/api/v1/project/{project}/packages",
|
||||
json={"name": pkg2, "description": "Package B"},
|
||||
)
|
||||
|
||||
# Upload to first package
|
||||
result1 = upload_test_file(integration_client, project, pkg1, content, tag="v1")
|
||||
assert result1["artifact_id"] == expected_hash
|
||||
assert result1["deduplicated"] is False
|
||||
|
||||
# Upload to second package
|
||||
result2 = upload_test_file(integration_client, project, pkg2, content, tag="v1")
|
||||
assert result2["artifact_id"] == expected_hash
|
||||
assert result2["deduplicated"] is True
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_same_file_different_filenames_shares_artifact(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test uploading same file with different filenames shares artifact."""
|
||||
project, package = test_package
|
||||
content = b"content with different filenames"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Upload with filename1
|
||||
result1 = upload_test_file(
|
||||
integration_client,
|
||||
project,
|
||||
package,
|
||||
content,
|
||||
filename="file1.bin",
|
||||
tag="v1",
|
||||
)
|
||||
assert result1["artifact_id"] == expected_hash
|
||||
|
||||
# Upload with filename2
|
||||
result2 = upload_test_file(
|
||||
integration_client,
|
||||
project,
|
||||
package,
|
||||
content,
|
||||
filename="file2.bin",
|
||||
tag="v2",
|
||||
)
|
||||
assert result2["artifact_id"] == expected_hash
|
||||
assert result2["deduplicated"] is True
|
||||
|
||||
|
||||
class TestDownload:
|
||||
"""Tests for download functionality."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_by_tag(self, integration_client, test_package):
|
||||
"""Test downloading artifact by tag name."""
|
||||
project, package = test_package
|
||||
original_content = b"download by tag test"
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, original_content, tag="download-tag"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/download-tag",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == original_content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_by_artifact_id(self, integration_client, test_package):
|
||||
"""Test downloading artifact by artifact ID."""
|
||||
project, package = test_package
|
||||
original_content = b"download by id test"
|
||||
expected_hash = compute_sha256(original_content)
|
||||
|
||||
upload_test_file(integration_client, project, package, original_content)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/artifact:{expected_hash}",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == original_content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_nonexistent_tag(self, integration_client, test_package):
|
||||
"""Test downloading nonexistent tag returns 404."""
|
||||
project, package = test_package
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/nonexistent-tag"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_content_matches_original(self, integration_client, test_package):
|
||||
"""Test downloaded content matches original exactly."""
|
||||
project, package = test_package
|
||||
original_content = b"exact content verification test data 12345"
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, original_content, tag="verify"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/verify", params={"mode": "proxy"}
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == original_content
|
||||
|
||||
|
||||
class TestConcurrentUploads:
|
||||
"""Tests for concurrent upload handling."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_concurrent_uploads_same_file(self, integration_client, test_package):
|
||||
"""Test concurrent uploads of same file handle deduplication correctly."""
|
||||
project, package = test_package
|
||||
content = b"content for concurrent upload test"
|
||||
expected_hash = compute_sha256(content)
|
||||
num_concurrent = 5
|
||||
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def upload_worker(tag_suffix):
|
||||
try:
|
||||
from httpx import Client
|
||||
|
||||
base_url = "http://localhost:8080"
|
||||
with Client(base_url=base_url, timeout=30.0) as client:
|
||||
files = {
|
||||
"file": (
|
||||
f"concurrent-{tag_suffix}.bin",
|
||||
io.BytesIO(content),
|
||||
"application/octet-stream",
|
||||
)
|
||||
}
|
||||
response = client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"tag": f"concurrent-{tag_suffix}"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
results.append(response.json())
|
||||
else:
|
||||
errors.append(f"Status {response.status_code}: {response.text}")
|
||||
except Exception as e:
|
||||
errors.append(str(e))
|
||||
|
||||
with ThreadPoolExecutor(max_workers=num_concurrent) as executor:
|
||||
futures = [executor.submit(upload_worker, i) for i in range(num_concurrent)]
|
||||
for future in as_completed(futures):
|
||||
pass
|
||||
|
||||
assert len(errors) == 0, f"Errors during concurrent uploads: {errors}"
|
||||
assert len(results) == num_concurrent
|
||||
|
||||
# All should have same artifact_id
|
||||
artifact_ids = set(r["artifact_id"] for r in results)
|
||||
assert len(artifact_ids) == 1
|
||||
assert expected_hash in artifact_ids
|
||||
|
||||
# Verify final ref_count
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.status_code == 200
|
||||
assert response.json()["ref_count"] == num_concurrent
|
||||
|
||||
|
||||
class TestFileSizeValidation:
|
||||
"""Tests for file size limits and empty file rejection."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_empty_file_rejected(self, integration_client, test_package):
|
||||
"""Test empty files are rejected with appropriate error."""
|
||||
project, package = test_package
|
||||
|
||||
files = {"file": ("empty.txt", io.BytesIO(b""), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
)
|
||||
|
||||
assert response.status_code in [422, 400]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_small_valid_file_accepted(self, integration_client, test_package):
|
||||
"""Test small (1 byte) files are accepted."""
|
||||
project, package = test_package
|
||||
content = b"X"
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="tiny"
|
||||
)
|
||||
|
||||
assert result["artifact_id"] is not None
|
||||
assert result["size"] == 1
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_file_size_reported_correctly(
|
||||
self, integration_client, test_package, unique_test_id
|
||||
):
|
||||
"""Test file size is correctly reported in response."""
|
||||
project, package = test_package
|
||||
content = f"Test content for size check {unique_test_id}".encode()
|
||||
expected_size = len(content)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="size-test"
|
||||
)
|
||||
|
||||
assert result["size"] == expected_size
|
||||
|
||||
# Also verify via artifact endpoint
|
||||
artifact_response = integration_client.get(
|
||||
f"/api/v1/artifact/{result['artifact_id']}"
|
||||
)
|
||||
assert artifact_response.json()["size"] == expected_size
|
||||
|
||||
|
||||
class TestUploadFailureCleanup:
|
||||
"""Tests for cleanup when uploads fail."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_failure_invalid_project_no_orphaned_s3(
|
||||
self, integration_client, unique_test_id
|
||||
):
|
||||
"""Test upload to non-existent project doesn't leave orphaned S3 objects."""
|
||||
content = f"content for orphan s3 test {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/nonexistent-project-{unique_test_id}/nonexistent-pkg/upload",
|
||||
files=files,
|
||||
data={"tag": "test"},
|
||||
)
|
||||
|
||||
assert response.status_code == 404
|
||||
|
||||
# Verify no S3 object was created
|
||||
assert not s3_object_exists(expected_hash), (
|
||||
"Orphaned S3 object found after failed upload"
|
||||
)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_failure_invalid_package_no_orphaned_s3(
|
||||
self, integration_client, test_project, unique_test_id
|
||||
):
|
||||
"""Test upload to non-existent package doesn't leave orphaned S3 objects."""
|
||||
content = f"content for orphan s3 test pkg {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
|
||||
files=files,
|
||||
data={"tag": "test"},
|
||||
)
|
||||
|
||||
assert response.status_code == 404
|
||||
|
||||
assert not s3_object_exists(expected_hash), (
|
||||
"Orphaned S3 object found after failed upload"
|
||||
)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_failure_no_orphaned_database_records(
|
||||
self, integration_client, test_project, unique_test_id
|
||||
):
|
||||
"""Test failed upload doesn't leave orphaned database records."""
|
||||
content = f"content for db orphan test {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
|
||||
files=files,
|
||||
data={"tag": "test"},
|
||||
)
|
||||
|
||||
assert response.status_code == 404
|
||||
|
||||
artifact_response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert artifact_response.status_code == 404, (
|
||||
"Orphaned artifact record found after failed upload"
|
||||
)
|
||||
|
||||
|
||||
class TestS3StorageVerification:
|
||||
"""Tests to verify S3 storage behavior."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_s3_single_object_after_duplicates(
|
||||
self, integration_client, test_package, unique_test_id
|
||||
):
|
||||
"""Test S3 bucket contains only one object after duplicate uploads."""
|
||||
project, package = test_package
|
||||
content = f"content for s3 object count test {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Upload same content multiple times
|
||||
for tag in ["s3test1", "s3test2", "s3test3"]:
|
||||
upload_test_file(integration_client, project, package, content, tag=tag)
|
||||
|
||||
# Verify only one S3 object exists
|
||||
s3_objects = list_s3_objects_by_hash(expected_hash)
|
||||
assert len(s3_objects) == 1, (
|
||||
f"Expected 1 S3 object, found {len(s3_objects)}: {s3_objects}"
|
||||
)
|
||||
|
||||
# Verify object key follows expected pattern
|
||||
expected_key = (
|
||||
f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
|
||||
)
|
||||
assert s3_objects[0] == expected_key
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_artifact_table_single_row_after_duplicates(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test artifact table contains only one row after duplicate uploads."""
|
||||
project, package = test_package
|
||||
content = b"content for single row test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Upload same content multiple times
|
||||
for tag in ["v1", "v2", "v3"]:
|
||||
upload_test_file(integration_client, project, package, content, tag=tag)
|
||||
|
||||
# Query artifact
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.status_code == 200
|
||||
artifact = response.json()
|
||||
assert artifact["id"] == expected_hash
|
||||
assert artifact["ref_count"] == 3
|
||||
Reference in New Issue
Block a user