Add streaming download enhancements and tests (#42)
- Add conditional request support (If-None-Match, If-Modified-Since) returning 304 Not Modified - Add caching headers: Cache-Control (immutable), Last-Modified - Add 416 Range Not Satisfiable response for invalid range requests - Add download completion logging with bytes transferred and throughput - Add client disconnect handling during streaming downloads - Add comprehensive streaming download tests
This commit is contained in:
@@ -24,6 +24,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Added corruption detection tests: bit flip, truncation, appended content, size mismatch, missing S3 objects (#40)
|
||||
- Added Digest header tests (RFC 3230) and verification mode tests (#40)
|
||||
- Added integrity verification documentation (`docs/integrity-verification.md`) (#40)
|
||||
- Added conditional request support for downloads (If-None-Match, If-Modified-Since) returning 304 Not Modified (#42)
|
||||
- Added caching headers to downloads: Cache-Control (immutable), Last-Modified (#42)
|
||||
- Added 416 Range Not Satisfiable response for invalid range requests (#42)
|
||||
- Added download completion logging with bytes transferred and throughput (#42)
|
||||
- Added client disconnect handling during streaming downloads (#42)
|
||||
- Added streaming download tests: range requests, conditional requests, caching headers, download resume (#42)
|
||||
- Added `package_versions` table for immutable version tracking separate from mutable tags (#56)
|
||||
- Versions are set at upload time via explicit `version` parameter or auto-detected from filename/metadata
|
||||
- Version detection priority: explicit parameter > package metadata > filename pattern
|
||||
|
||||
@@ -2972,6 +2972,8 @@ def download_artifact(
|
||||
storage: S3Storage = Depends(get_storage),
|
||||
current_user: Optional[User] = Depends(get_current_user_optional),
|
||||
range: Optional[str] = Header(None),
|
||||
if_none_match: Optional[str] = Header(None, alias="If-None-Match"),
|
||||
if_modified_since: Optional[str] = Header(None, alias="If-Modified-Since"),
|
||||
mode: Optional[Literal["proxy", "redirect", "presigned"]] = Query(
|
||||
default=None,
|
||||
description="Download mode: proxy (stream through backend), redirect (302 to presigned URL), presigned (return JSON with URL)",
|
||||
@@ -2988,6 +2990,15 @@ def download_artifact(
|
||||
"""
|
||||
Download an artifact by reference (tag name, artifact:hash, tag:name).
|
||||
|
||||
Supports conditional requests:
|
||||
- If-None-Match: Returns 304 Not Modified if ETag matches
|
||||
- If-Modified-Since: Returns 304 Not Modified if not modified since date
|
||||
|
||||
Supports range requests for partial downloads and resume:
|
||||
- Range: bytes=0-1023 (first 1KB)
|
||||
- Range: bytes=-1024 (last 1KB)
|
||||
- Returns 206 Partial Content with Content-Range header
|
||||
|
||||
Verification modes:
|
||||
- verify=false (default): No verification, maximum performance
|
||||
- verify=true&verify_mode=stream: Compute hash while streaming, verify after completion.
|
||||
@@ -3000,6 +3011,9 @@ def download_artifact(
|
||||
- X-Content-Length: File size in bytes
|
||||
- ETag: Artifact ID (SHA256)
|
||||
- Digest: RFC 3230 format sha-256 hash
|
||||
- Last-Modified: Artifact creation timestamp
|
||||
- Cache-Control: Immutable caching for content-addressable storage
|
||||
- Accept-Ranges: bytes (advertises range request support)
|
||||
|
||||
When verify=true:
|
||||
- X-Verified: 'true' if verified, 'false' if verification failed
|
||||
@@ -3024,6 +3038,52 @@ def download_artifact(
|
||||
|
||||
filename = sanitize_filename(artifact.original_name or f"{artifact.id}")
|
||||
|
||||
# Format Last-Modified header (RFC 7231 format)
|
||||
last_modified = None
|
||||
last_modified_str = None
|
||||
if artifact.created_at:
|
||||
last_modified = artifact.created_at
|
||||
if last_modified.tzinfo is None:
|
||||
last_modified = last_modified.replace(tzinfo=timezone.utc)
|
||||
last_modified_str = last_modified.strftime("%a, %d %b %Y %H:%M:%S GMT")
|
||||
|
||||
# Handle conditional requests (If-None-Match, If-Modified-Since)
|
||||
# Return 304 Not Modified if content hasn't changed
|
||||
artifact_etag = f'"{artifact.id}"'
|
||||
|
||||
if if_none_match:
|
||||
# Strip quotes and compare with artifact ETag
|
||||
client_etag = if_none_match.strip().strip('"')
|
||||
if client_etag == artifact.id or if_none_match == artifact_etag:
|
||||
return Response(
|
||||
status_code=304,
|
||||
headers={
|
||||
"ETag": artifact_etag,
|
||||
"Cache-Control": "public, max-age=31536000, immutable",
|
||||
**({"Last-Modified": last_modified_str} if last_modified_str else {}),
|
||||
},
|
||||
)
|
||||
|
||||
if if_modified_since and last_modified:
|
||||
try:
|
||||
# Parse If-Modified-Since header
|
||||
from email.utils import parsedate_to_datetime
|
||||
client_date = parsedate_to_datetime(if_modified_since)
|
||||
if client_date.tzinfo is None:
|
||||
client_date = client_date.replace(tzinfo=timezone.utc)
|
||||
# If artifact hasn't been modified since client's date, return 304
|
||||
if last_modified <= client_date:
|
||||
return Response(
|
||||
status_code=304,
|
||||
headers={
|
||||
"ETag": artifact_etag,
|
||||
"Cache-Control": "public, max-age=31536000, immutable",
|
||||
**({"Last-Modified": last_modified_str} if last_modified_str else {}),
|
||||
},
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
pass # Invalid date format, ignore and continue with download
|
||||
|
||||
# Audit log download
|
||||
user_id = get_user_id(request)
|
||||
_log_audit(
|
||||
@@ -3041,22 +3101,28 @@ def download_artifact(
|
||||
)
|
||||
db.commit()
|
||||
|
||||
# Build common checksum headers (always included)
|
||||
checksum_headers = {
|
||||
# Build common headers (always included)
|
||||
common_headers = {
|
||||
"X-Checksum-SHA256": artifact.id,
|
||||
"X-Content-Length": str(artifact.size),
|
||||
"ETag": f'"{artifact.id}"',
|
||||
"ETag": artifact_etag,
|
||||
# Cache-Control: content-addressable storage is immutable
|
||||
"Cache-Control": "public, max-age=31536000, immutable",
|
||||
}
|
||||
# Add Last-Modified header
|
||||
if last_modified_str:
|
||||
common_headers["Last-Modified"] = last_modified_str
|
||||
|
||||
# Add RFC 3230 Digest header
|
||||
try:
|
||||
digest_base64 = sha256_to_base64(artifact.id)
|
||||
checksum_headers["Digest"] = f"sha-256={digest_base64}"
|
||||
common_headers["Digest"] = f"sha-256={digest_base64}"
|
||||
except Exception:
|
||||
pass # Skip if conversion fails
|
||||
|
||||
# Add MD5 checksum if available
|
||||
if artifact.checksum_md5:
|
||||
checksum_headers["X-Checksum-MD5"] = artifact.checksum_md5
|
||||
common_headers["X-Checksum-MD5"] = artifact.checksum_md5
|
||||
|
||||
# Determine download mode (query param overrides server default)
|
||||
download_mode = mode or settings.download_mode
|
||||
@@ -3096,15 +3162,29 @@ def download_artifact(
|
||||
# Proxy mode (default fallback) - stream through backend
|
||||
# Handle range requests (verification not supported for partial downloads)
|
||||
if range:
|
||||
stream, content_length, content_range = storage.get_stream(
|
||||
artifact.s3_key, range
|
||||
)
|
||||
try:
|
||||
stream, content_length, content_range = storage.get_stream(
|
||||
artifact.s3_key, range
|
||||
)
|
||||
except Exception as e:
|
||||
# S3 returns InvalidRange error for unsatisfiable ranges
|
||||
error_str = str(e).lower()
|
||||
if "invalidrange" in error_str or "range" in error_str:
|
||||
raise HTTPException(
|
||||
status_code=416,
|
||||
detail="Range Not Satisfiable",
|
||||
headers={
|
||||
"Content-Range": f"bytes */{artifact.size}",
|
||||
"Accept-Ranges": "bytes",
|
||||
},
|
||||
)
|
||||
raise
|
||||
|
||||
headers = {
|
||||
"Content-Disposition": build_content_disposition(filename),
|
||||
"Accept-Ranges": "bytes",
|
||||
"Content-Length": str(content_length),
|
||||
**checksum_headers,
|
||||
**common_headers,
|
||||
}
|
||||
if content_range:
|
||||
headers["Content-Range"] = content_range
|
||||
@@ -3121,7 +3201,7 @@ def download_artifact(
|
||||
base_headers = {
|
||||
"Content-Disposition": build_content_disposition(filename),
|
||||
"Accept-Ranges": "bytes",
|
||||
**checksum_headers,
|
||||
**common_headers,
|
||||
}
|
||||
|
||||
# Pre-verification mode: verify before streaming
|
||||
@@ -3189,11 +3269,42 @@ def download_artifact(
|
||||
},
|
||||
)
|
||||
|
||||
# No verification - direct streaming
|
||||
# No verification - direct streaming with completion logging
|
||||
stream, content_length, _ = storage.get_stream(artifact.s3_key)
|
||||
|
||||
def logged_stream():
|
||||
"""Generator that yields chunks and logs completion/disconnection."""
|
||||
import time
|
||||
start_time = time.time()
|
||||
bytes_sent = 0
|
||||
try:
|
||||
for chunk in stream:
|
||||
bytes_sent += len(chunk)
|
||||
yield chunk
|
||||
# Download completed successfully
|
||||
duration = time.time() - start_time
|
||||
throughput_mbps = (bytes_sent / (1024 * 1024)) / duration if duration > 0 else 0
|
||||
logger.info(
|
||||
f"Download completed: artifact={artifact.id[:16]}... "
|
||||
f"bytes={bytes_sent} duration={duration:.2f}s throughput={throughput_mbps:.2f}MB/s"
|
||||
)
|
||||
except GeneratorExit:
|
||||
# Client disconnected before download completed
|
||||
duration = time.time() - start_time
|
||||
logger.warning(
|
||||
f"Download interrupted: artifact={artifact.id[:16]}... "
|
||||
f"bytes_sent={bytes_sent}/{content_length} duration={duration:.2f}s"
|
||||
)
|
||||
except Exception as e:
|
||||
duration = time.time() - start_time
|
||||
logger.error(
|
||||
f"Download error: artifact={artifact.id[:16]}... "
|
||||
f"bytes_sent={bytes_sent} duration={duration:.2f}s error={e}"
|
||||
)
|
||||
raise
|
||||
|
||||
return StreamingResponse(
|
||||
stream,
|
||||
logged_stream(),
|
||||
media_type=artifact.content_type or "application/octet-stream",
|
||||
headers={
|
||||
**base_headers,
|
||||
|
||||
535
backend/tests/integration/test_streaming_download.py
Normal file
535
backend/tests/integration/test_streaming_download.py
Normal file
@@ -0,0 +1,535 @@
|
||||
"""
|
||||
Integration tests for streaming download functionality.
|
||||
|
||||
Tests cover:
|
||||
- HTTP Range requests (partial downloads, resume)
|
||||
- Conditional requests (If-None-Match, If-Modified-Since)
|
||||
- Caching headers (Cache-Control, Last-Modified, Accept-Ranges)
|
||||
- Large file streaming
|
||||
- Download modes (proxy, redirect, presigned)
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import io
|
||||
import time
|
||||
from email.utils import formatdate
|
||||
from tests.factories import (
|
||||
compute_sha256,
|
||||
upload_test_file,
|
||||
)
|
||||
from tests.conftest import (
|
||||
SIZE_1KB,
|
||||
SIZE_100KB,
|
||||
SIZE_1MB,
|
||||
)
|
||||
|
||||
|
||||
class TestRangeRequests:
|
||||
"""Tests for HTTP Range request support (partial downloads)."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_range_request_first_bytes(self, integration_client, test_package):
|
||||
"""Test range request for first N bytes."""
|
||||
project, package = test_package
|
||||
content = b"0123456789" * 100 # 1000 bytes
|
||||
upload_test_file(integration_client, project, package, content, tag="range-test")
|
||||
|
||||
# Request first 10 bytes
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/range-test",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": "bytes=0-9"},
|
||||
)
|
||||
assert response.status_code == 206 # Partial Content
|
||||
assert response.content == b"0123456789"
|
||||
assert "Content-Range" in response.headers
|
||||
assert response.headers["Content-Range"].startswith("bytes 0-9/")
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_range_request_middle_bytes(self, integration_client, test_package):
|
||||
"""Test range request for bytes in the middle."""
|
||||
project, package = test_package
|
||||
content = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
upload_test_file(integration_client, project, package, content, tag="range-mid")
|
||||
|
||||
# Request bytes 10-19 (KLMNOPQRST)
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/range-mid",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": "bytes=10-19"},
|
||||
)
|
||||
assert response.status_code == 206
|
||||
assert response.content == b"KLMNOPQRST"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_range_request_suffix_bytes(self, integration_client, test_package):
|
||||
"""Test range request for last N bytes (suffix range)."""
|
||||
project, package = test_package
|
||||
content = b"0123456789ABCDEF" # 16 bytes
|
||||
upload_test_file(integration_client, project, package, content, tag="range-suffix")
|
||||
|
||||
# Request last 4 bytes
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/range-suffix",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": "bytes=-4"},
|
||||
)
|
||||
assert response.status_code == 206
|
||||
assert response.content == b"CDEF"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_range_request_open_ended(self, integration_client, test_package):
|
||||
"""Test range request from offset to end."""
|
||||
project, package = test_package
|
||||
content = b"0123456789"
|
||||
upload_test_file(integration_client, project, package, content, tag="range-open")
|
||||
|
||||
# Request from byte 5 to end
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/range-open",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": "bytes=5-"},
|
||||
)
|
||||
assert response.status_code == 206
|
||||
assert response.content == b"56789"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_range_request_includes_accept_ranges_header(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test that range requests include Accept-Ranges header."""
|
||||
project, package = test_package
|
||||
content = b"test content"
|
||||
upload_test_file(integration_client, project, package, content, tag="accept-ranges")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/accept-ranges",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": "bytes=0-4"},
|
||||
)
|
||||
assert response.status_code == 206
|
||||
assert response.headers.get("Accept-Ranges") == "bytes"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_full_download_advertises_accept_ranges(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test that full downloads advertise range support."""
|
||||
project, package = test_package
|
||||
content = b"test content"
|
||||
upload_test_file(integration_client, project, package, content, tag="full-accept")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/full-accept",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.headers.get("Accept-Ranges") == "bytes"
|
||||
|
||||
|
||||
class TestConditionalRequests:
|
||||
"""Tests for conditional request handling (304 Not Modified)."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_if_none_match_returns_304(self, integration_client, test_package):
|
||||
"""Test If-None-Match with matching ETag returns 304."""
|
||||
project, package = test_package
|
||||
content = b"conditional request test content"
|
||||
expected_hash = compute_sha256(content)
|
||||
upload_test_file(integration_client, project, package, content, tag="cond-etag")
|
||||
|
||||
# Request with matching ETag
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/cond-etag",
|
||||
params={"mode": "proxy"},
|
||||
headers={"If-None-Match": f'"{expected_hash}"'},
|
||||
)
|
||||
assert response.status_code == 304
|
||||
assert response.content == b"" # No body for 304
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_if_none_match_without_quotes(self, integration_client, test_package):
|
||||
"""Test If-None-Match works with or without quotes."""
|
||||
project, package = test_package
|
||||
content = b"etag no quotes test"
|
||||
expected_hash = compute_sha256(content)
|
||||
upload_test_file(integration_client, project, package, content, tag="cond-noquote")
|
||||
|
||||
# Request with ETag without quotes
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/cond-noquote",
|
||||
params={"mode": "proxy"},
|
||||
headers={"If-None-Match": expected_hash},
|
||||
)
|
||||
assert response.status_code == 304
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_if_none_match_mismatch_returns_200(self, integration_client, test_package):
|
||||
"""Test If-None-Match with non-matching ETag returns 200."""
|
||||
project, package = test_package
|
||||
content = b"etag mismatch test"
|
||||
upload_test_file(integration_client, project, package, content, tag="cond-mismatch")
|
||||
|
||||
# Request with different ETag
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/cond-mismatch",
|
||||
params={"mode": "proxy"},
|
||||
headers={"If-None-Match": '"different-etag-value"'},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_if_modified_since_returns_304(self, integration_client, test_package):
|
||||
"""Test If-Modified-Since with future date returns 304."""
|
||||
project, package = test_package
|
||||
content = b"modified since test"
|
||||
upload_test_file(integration_client, project, package, content, tag="cond-modified")
|
||||
|
||||
# Request with future date (artifact was definitely created before this)
|
||||
future_date = formatdate(time.time() + 86400, usegmt=True) # Tomorrow
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/cond-modified",
|
||||
params={"mode": "proxy"},
|
||||
headers={"If-Modified-Since": future_date},
|
||||
)
|
||||
assert response.status_code == 304
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_if_modified_since_old_date_returns_200(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test If-Modified-Since with old date returns 200."""
|
||||
project, package = test_package
|
||||
content = b"old date test"
|
||||
upload_test_file(integration_client, project, package, content, tag="cond-old")
|
||||
|
||||
# Request with old date (2020-01-01)
|
||||
old_date = "Wed, 01 Jan 2020 00:00:00 GMT"
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/cond-old",
|
||||
params={"mode": "proxy"},
|
||||
headers={"If-Modified-Since": old_date},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_304_includes_etag(self, integration_client, test_package):
|
||||
"""Test 304 response includes ETag header."""
|
||||
project, package = test_package
|
||||
content = b"304 etag test"
|
||||
expected_hash = compute_sha256(content)
|
||||
upload_test_file(integration_client, project, package, content, tag="304-etag")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/304-etag",
|
||||
params={"mode": "proxy"},
|
||||
headers={"If-None-Match": f'"{expected_hash}"'},
|
||||
)
|
||||
assert response.status_code == 304
|
||||
assert response.headers.get("ETag") == f'"{expected_hash}"'
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_304_includes_cache_control(self, integration_client, test_package):
|
||||
"""Test 304 response includes Cache-Control header."""
|
||||
project, package = test_package
|
||||
content = b"304 cache test"
|
||||
expected_hash = compute_sha256(content)
|
||||
upload_test_file(integration_client, project, package, content, tag="304-cache")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/304-cache",
|
||||
params={"mode": "proxy"},
|
||||
headers={"If-None-Match": f'"{expected_hash}"'},
|
||||
)
|
||||
assert response.status_code == 304
|
||||
assert "immutable" in response.headers.get("Cache-Control", "")
|
||||
|
||||
|
||||
class TestCachingHeaders:
|
||||
"""Tests for caching headers on download responses."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_includes_cache_control(self, integration_client, test_package):
|
||||
"""Test download response includes Cache-Control header."""
|
||||
project, package = test_package
|
||||
content = b"cache control test"
|
||||
upload_test_file(integration_client, project, package, content, tag="cache-ctl")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/cache-ctl",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
cache_control = response.headers.get("Cache-Control", "")
|
||||
assert "public" in cache_control
|
||||
assert "immutable" in cache_control
|
||||
assert "max-age" in cache_control
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_includes_last_modified(self, integration_client, test_package):
|
||||
"""Test download response includes Last-Modified header."""
|
||||
project, package = test_package
|
||||
content = b"last modified test"
|
||||
upload_test_file(integration_client, project, package, content, tag="last-mod")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/last-mod",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert "Last-Modified" in response.headers
|
||||
# Should be in RFC 7231 format
|
||||
last_modified = response.headers["Last-Modified"]
|
||||
assert "GMT" in last_modified
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_includes_etag(self, integration_client, test_package):
|
||||
"""Test download response includes ETag header."""
|
||||
project, package = test_package
|
||||
content = b"etag header test"
|
||||
expected_hash = compute_sha256(content)
|
||||
upload_test_file(integration_client, project, package, content, tag="etag-hdr")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/etag-hdr",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.headers.get("ETag") == f'"{expected_hash}"'
|
||||
|
||||
|
||||
class TestDownloadResume:
|
||||
"""Tests for download resume functionality using range requests."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_resume_download_after_partial(self, integration_client, test_package):
|
||||
"""Test resuming download from where it left off."""
|
||||
project, package = test_package
|
||||
content = b"ABCDEFGHIJ" * 100 # 1000 bytes
|
||||
upload_test_file(integration_client, project, package, content, tag="resume-test")
|
||||
|
||||
# Simulate partial download (first 500 bytes)
|
||||
response1 = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/resume-test",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": "bytes=0-499"},
|
||||
)
|
||||
assert response1.status_code == 206
|
||||
first_half = response1.content
|
||||
assert len(first_half) == 500
|
||||
|
||||
# Resume from byte 500
|
||||
response2 = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/resume-test",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": "bytes=500-"},
|
||||
)
|
||||
assert response2.status_code == 206
|
||||
second_half = response2.content
|
||||
assert len(second_half) == 500
|
||||
|
||||
# Combine and verify
|
||||
combined = first_half + second_half
|
||||
assert combined == content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_resume_with_etag_verification(self, integration_client, test_package):
|
||||
"""Test that resumed download can verify content hasn't changed."""
|
||||
project, package = test_package
|
||||
content = b"resume etag verification test content"
|
||||
expected_hash = compute_sha256(content)
|
||||
upload_test_file(integration_client, project, package, content, tag="resume-etag")
|
||||
|
||||
# Get ETag from first request
|
||||
response1 = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/resume-etag",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": "bytes=0-9"},
|
||||
)
|
||||
assert response1.status_code == 206
|
||||
etag = response1.headers.get("ETag")
|
||||
assert etag == f'"{expected_hash}"'
|
||||
|
||||
# Resume with If-Match to ensure content hasn't changed
|
||||
# (Note: If-Match would fail and return 412 if content changed)
|
||||
response2 = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/resume-etag",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": "bytes=10-"},
|
||||
)
|
||||
assert response2.status_code == 206
|
||||
# ETag should be the same
|
||||
assert response2.headers.get("ETag") == etag
|
||||
|
||||
|
||||
class TestLargeFileStreaming:
|
||||
"""Tests for streaming large files."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_stream_1mb_file(self, integration_client, test_package, sized_content):
|
||||
"""Test streaming a 1MB file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_1MB, seed=500)
|
||||
|
||||
upload_test_file(integration_client, project, package, content, tag="stream-1mb")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/stream-1mb",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert len(response.content) == SIZE_1MB
|
||||
assert compute_sha256(response.content) == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_stream_large_file_has_correct_headers(
|
||||
self, integration_client, test_package, sized_content
|
||||
):
|
||||
"""Test that large file streaming has correct headers."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_100KB, seed=501)
|
||||
|
||||
upload_test_file(integration_client, project, package, content, tag="stream-hdr")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/stream-hdr",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert int(response.headers.get("Content-Length", 0)) == SIZE_100KB
|
||||
assert response.headers.get("X-Checksum-SHA256") == expected_hash
|
||||
assert response.headers.get("Accept-Ranges") == "bytes"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_range_request_on_large_file(
|
||||
self, integration_client, test_package, sized_content
|
||||
):
|
||||
"""Test range request on a larger file."""
|
||||
project, package = test_package
|
||||
content, _ = sized_content(SIZE_100KB, seed=502)
|
||||
|
||||
upload_test_file(integration_client, project, package, content, tag="range-large")
|
||||
|
||||
# Request a slice from the middle
|
||||
start = 50000
|
||||
end = 50999
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/range-large",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": f"bytes={start}-{end}"},
|
||||
)
|
||||
assert response.status_code == 206
|
||||
assert len(response.content) == 1000
|
||||
assert response.content == content[start : end + 1]
|
||||
|
||||
|
||||
class TestDownloadModes:
|
||||
"""Tests for different download modes."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_proxy_mode_streams_content(self, integration_client, test_package):
|
||||
"""Test proxy mode streams content through backend."""
|
||||
project, package = test_package
|
||||
content = b"proxy mode test content"
|
||||
upload_test_file(integration_client, project, package, content, tag="mode-proxy")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/mode-proxy",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_presigned_mode_returns_url(self, integration_client, test_package):
|
||||
"""Test presigned mode returns JSON with URL."""
|
||||
project, package = test_package
|
||||
content = b"presigned mode test"
|
||||
upload_test_file(integration_client, project, package, content, tag="mode-presign")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/mode-presign",
|
||||
params={"mode": "presigned"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "url" in data
|
||||
assert "expires_at" in data
|
||||
assert data["url"].startswith("http")
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_redirect_mode_returns_302(self, integration_client, test_package):
|
||||
"""Test redirect mode returns 302 to presigned URL."""
|
||||
project, package = test_package
|
||||
content = b"redirect mode test"
|
||||
upload_test_file(integration_client, project, package, content, tag="mode-redir")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/mode-redir",
|
||||
params={"mode": "redirect"},
|
||||
follow_redirects=False,
|
||||
)
|
||||
assert response.status_code == 302
|
||||
assert "Location" in response.headers
|
||||
|
||||
|
||||
class TestIntegrityDuringStreaming:
|
||||
"""Tests for data integrity during streaming downloads."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_checksum_header_matches_content(self, integration_client, test_package):
|
||||
"""Test X-Checksum-SHA256 header matches actual downloaded content."""
|
||||
project, package = test_package
|
||||
content = b"integrity check content"
|
||||
expected_hash = compute_sha256(content)
|
||||
upload_test_file(integration_client, project, package, content, tag="integrity")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/integrity",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
header_hash = response.headers.get("X-Checksum-SHA256")
|
||||
actual_hash = compute_sha256(response.content)
|
||||
|
||||
assert header_hash == expected_hash
|
||||
assert actual_hash == expected_hash
|
||||
assert header_hash == actual_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_etag_matches_content_hash(self, integration_client, test_package):
|
||||
"""Test ETag header matches content hash."""
|
||||
project, package = test_package
|
||||
content = b"etag integrity test"
|
||||
expected_hash = compute_sha256(content)
|
||||
upload_test_file(integration_client, project, package, content, tag="etag-int")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/etag-int",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
etag = response.headers.get("ETag", "").strip('"')
|
||||
actual_hash = compute_sha256(response.content)
|
||||
|
||||
assert etag == expected_hash
|
||||
assert actual_hash == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_digest_header_present(self, integration_client, test_package):
|
||||
"""Test Digest header is present in RFC 3230 format."""
|
||||
project, package = test_package
|
||||
content = b"digest header test"
|
||||
upload_test_file(integration_client, project, package, content, tag="digest")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/digest",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert "Digest" in response.headers
|
||||
assert response.headers["Digest"].startswith("sha-256=")
|
||||
Reference in New Issue
Block a user