From a166b6d37fe05e0e25c44635be0c979b9a48bad7 Mon Sep 17 00:00:00 2001 From: Mondo Diaz Date: Fri, 16 Jan 2026 18:55:34 +0000 Subject: [PATCH] Add streaming download enhancements and tests (#42) - Add conditional request support (If-None-Match, If-Modified-Since) returning 304 Not Modified - Add caching headers: Cache-Control (immutable), Last-Modified - Add 416 Range Not Satisfiable response for invalid range requests - Add download completion logging with bytes transferred and throughput - Add client disconnect handling during streaming downloads - Add comprehensive streaming download tests --- CHANGELOG.md | 6 + backend/app/routes.py | 135 ++++- .../integration/test_streaming_download.py | 535 ++++++++++++++++++ 3 files changed, 664 insertions(+), 12 deletions(-) create mode 100644 backend/tests/integration/test_streaming_download.py diff --git a/CHANGELOG.md b/CHANGELOG.md index efaeaff..95c8390 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added corruption detection tests: bit flip, truncation, appended content, size mismatch, missing S3 objects (#40) - Added Digest header tests (RFC 3230) and verification mode tests (#40) - Added integrity verification documentation (`docs/integrity-verification.md`) (#40) +- Added conditional request support for downloads (If-None-Match, If-Modified-Since) returning 304 Not Modified (#42) +- Added caching headers to downloads: Cache-Control (immutable), Last-Modified (#42) +- Added 416 Range Not Satisfiable response for invalid range requests (#42) +- Added download completion logging with bytes transferred and throughput (#42) +- Added client disconnect handling during streaming downloads (#42) +- Added streaming download tests: range requests, conditional requests, caching headers, download resume (#42) - Added `package_versions` table for immutable version tracking separate from mutable tags (#56) - Versions are set at upload time via explicit `version` parameter or auto-detected from filename/metadata - Version detection priority: explicit parameter > package metadata > filename pattern diff --git a/backend/app/routes.py b/backend/app/routes.py index 4f7be7b..f327109 100644 --- a/backend/app/routes.py +++ b/backend/app/routes.py @@ -2972,6 +2972,8 @@ def download_artifact( storage: S3Storage = Depends(get_storage), current_user: Optional[User] = Depends(get_current_user_optional), range: Optional[str] = Header(None), + if_none_match: Optional[str] = Header(None, alias="If-None-Match"), + if_modified_since: Optional[str] = Header(None, alias="If-Modified-Since"), mode: Optional[Literal["proxy", "redirect", "presigned"]] = Query( default=None, description="Download mode: proxy (stream through backend), redirect (302 to presigned URL), presigned (return JSON with URL)", @@ -2988,6 +2990,15 @@ def download_artifact( """ Download an artifact by reference (tag name, artifact:hash, tag:name). + Supports conditional requests: + - If-None-Match: Returns 304 Not Modified if ETag matches + - If-Modified-Since: Returns 304 Not Modified if not modified since date + + Supports range requests for partial downloads and resume: + - Range: bytes=0-1023 (first 1KB) + - Range: bytes=-1024 (last 1KB) + - Returns 206 Partial Content with Content-Range header + Verification modes: - verify=false (default): No verification, maximum performance - verify=true&verify_mode=stream: Compute hash while streaming, verify after completion. @@ -3000,6 +3011,9 @@ def download_artifact( - X-Content-Length: File size in bytes - ETag: Artifact ID (SHA256) - Digest: RFC 3230 format sha-256 hash + - Last-Modified: Artifact creation timestamp + - Cache-Control: Immutable caching for content-addressable storage + - Accept-Ranges: bytes (advertises range request support) When verify=true: - X-Verified: 'true' if verified, 'false' if verification failed @@ -3024,6 +3038,52 @@ def download_artifact( filename = sanitize_filename(artifact.original_name or f"{artifact.id}") + # Format Last-Modified header (RFC 7231 format) + last_modified = None + last_modified_str = None + if artifact.created_at: + last_modified = artifact.created_at + if last_modified.tzinfo is None: + last_modified = last_modified.replace(tzinfo=timezone.utc) + last_modified_str = last_modified.strftime("%a, %d %b %Y %H:%M:%S GMT") + + # Handle conditional requests (If-None-Match, If-Modified-Since) + # Return 304 Not Modified if content hasn't changed + artifact_etag = f'"{artifact.id}"' + + if if_none_match: + # Strip quotes and compare with artifact ETag + client_etag = if_none_match.strip().strip('"') + if client_etag == artifact.id or if_none_match == artifact_etag: + return Response( + status_code=304, + headers={ + "ETag": artifact_etag, + "Cache-Control": "public, max-age=31536000, immutable", + **({"Last-Modified": last_modified_str} if last_modified_str else {}), + }, + ) + + if if_modified_since and last_modified: + try: + # Parse If-Modified-Since header + from email.utils import parsedate_to_datetime + client_date = parsedate_to_datetime(if_modified_since) + if client_date.tzinfo is None: + client_date = client_date.replace(tzinfo=timezone.utc) + # If artifact hasn't been modified since client's date, return 304 + if last_modified <= client_date: + return Response( + status_code=304, + headers={ + "ETag": artifact_etag, + "Cache-Control": "public, max-age=31536000, immutable", + **({"Last-Modified": last_modified_str} if last_modified_str else {}), + }, + ) + except (ValueError, TypeError): + pass # Invalid date format, ignore and continue with download + # Audit log download user_id = get_user_id(request) _log_audit( @@ -3041,22 +3101,28 @@ def download_artifact( ) db.commit() - # Build common checksum headers (always included) - checksum_headers = { + # Build common headers (always included) + common_headers = { "X-Checksum-SHA256": artifact.id, "X-Content-Length": str(artifact.size), - "ETag": f'"{artifact.id}"', + "ETag": artifact_etag, + # Cache-Control: content-addressable storage is immutable + "Cache-Control": "public, max-age=31536000, immutable", } + # Add Last-Modified header + if last_modified_str: + common_headers["Last-Modified"] = last_modified_str + # Add RFC 3230 Digest header try: digest_base64 = sha256_to_base64(artifact.id) - checksum_headers["Digest"] = f"sha-256={digest_base64}" + common_headers["Digest"] = f"sha-256={digest_base64}" except Exception: pass # Skip if conversion fails # Add MD5 checksum if available if artifact.checksum_md5: - checksum_headers["X-Checksum-MD5"] = artifact.checksum_md5 + common_headers["X-Checksum-MD5"] = artifact.checksum_md5 # Determine download mode (query param overrides server default) download_mode = mode or settings.download_mode @@ -3096,15 +3162,29 @@ def download_artifact( # Proxy mode (default fallback) - stream through backend # Handle range requests (verification not supported for partial downloads) if range: - stream, content_length, content_range = storage.get_stream( - artifact.s3_key, range - ) + try: + stream, content_length, content_range = storage.get_stream( + artifact.s3_key, range + ) + except Exception as e: + # S3 returns InvalidRange error for unsatisfiable ranges + error_str = str(e).lower() + if "invalidrange" in error_str or "range" in error_str: + raise HTTPException( + status_code=416, + detail="Range Not Satisfiable", + headers={ + "Content-Range": f"bytes */{artifact.size}", + "Accept-Ranges": "bytes", + }, + ) + raise headers = { "Content-Disposition": build_content_disposition(filename), "Accept-Ranges": "bytes", "Content-Length": str(content_length), - **checksum_headers, + **common_headers, } if content_range: headers["Content-Range"] = content_range @@ -3121,7 +3201,7 @@ def download_artifact( base_headers = { "Content-Disposition": build_content_disposition(filename), "Accept-Ranges": "bytes", - **checksum_headers, + **common_headers, } # Pre-verification mode: verify before streaming @@ -3189,11 +3269,42 @@ def download_artifact( }, ) - # No verification - direct streaming + # No verification - direct streaming with completion logging stream, content_length, _ = storage.get_stream(artifact.s3_key) + def logged_stream(): + """Generator that yields chunks and logs completion/disconnection.""" + import time + start_time = time.time() + bytes_sent = 0 + try: + for chunk in stream: + bytes_sent += len(chunk) + yield chunk + # Download completed successfully + duration = time.time() - start_time + throughput_mbps = (bytes_sent / (1024 * 1024)) / duration if duration > 0 else 0 + logger.info( + f"Download completed: artifact={artifact.id[:16]}... " + f"bytes={bytes_sent} duration={duration:.2f}s throughput={throughput_mbps:.2f}MB/s" + ) + except GeneratorExit: + # Client disconnected before download completed + duration = time.time() - start_time + logger.warning( + f"Download interrupted: artifact={artifact.id[:16]}... " + f"bytes_sent={bytes_sent}/{content_length} duration={duration:.2f}s" + ) + except Exception as e: + duration = time.time() - start_time + logger.error( + f"Download error: artifact={artifact.id[:16]}... " + f"bytes_sent={bytes_sent} duration={duration:.2f}s error={e}" + ) + raise + return StreamingResponse( - stream, + logged_stream(), media_type=artifact.content_type or "application/octet-stream", headers={ **base_headers, diff --git a/backend/tests/integration/test_streaming_download.py b/backend/tests/integration/test_streaming_download.py new file mode 100644 index 0000000..b6163ad --- /dev/null +++ b/backend/tests/integration/test_streaming_download.py @@ -0,0 +1,535 @@ +""" +Integration tests for streaming download functionality. + +Tests cover: +- HTTP Range requests (partial downloads, resume) +- Conditional requests (If-None-Match, If-Modified-Since) +- Caching headers (Cache-Control, Last-Modified, Accept-Ranges) +- Large file streaming +- Download modes (proxy, redirect, presigned) +""" + +import pytest +import io +import time +from email.utils import formatdate +from tests.factories import ( + compute_sha256, + upload_test_file, +) +from tests.conftest import ( + SIZE_1KB, + SIZE_100KB, + SIZE_1MB, +) + + +class TestRangeRequests: + """Tests for HTTP Range request support (partial downloads).""" + + @pytest.mark.integration + def test_range_request_first_bytes(self, integration_client, test_package): + """Test range request for first N bytes.""" + project, package = test_package + content = b"0123456789" * 100 # 1000 bytes + upload_test_file(integration_client, project, package, content, tag="range-test") + + # Request first 10 bytes + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/range-test", + params={"mode": "proxy"}, + headers={"Range": "bytes=0-9"}, + ) + assert response.status_code == 206 # Partial Content + assert response.content == b"0123456789" + assert "Content-Range" in response.headers + assert response.headers["Content-Range"].startswith("bytes 0-9/") + + @pytest.mark.integration + def test_range_request_middle_bytes(self, integration_client, test_package): + """Test range request for bytes in the middle.""" + project, package = test_package + content = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" + upload_test_file(integration_client, project, package, content, tag="range-mid") + + # Request bytes 10-19 (KLMNOPQRST) + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/range-mid", + params={"mode": "proxy"}, + headers={"Range": "bytes=10-19"}, + ) + assert response.status_code == 206 + assert response.content == b"KLMNOPQRST" + + @pytest.mark.integration + def test_range_request_suffix_bytes(self, integration_client, test_package): + """Test range request for last N bytes (suffix range).""" + project, package = test_package + content = b"0123456789ABCDEF" # 16 bytes + upload_test_file(integration_client, project, package, content, tag="range-suffix") + + # Request last 4 bytes + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/range-suffix", + params={"mode": "proxy"}, + headers={"Range": "bytes=-4"}, + ) + assert response.status_code == 206 + assert response.content == b"CDEF" + + @pytest.mark.integration + def test_range_request_open_ended(self, integration_client, test_package): + """Test range request from offset to end.""" + project, package = test_package + content = b"0123456789" + upload_test_file(integration_client, project, package, content, tag="range-open") + + # Request from byte 5 to end + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/range-open", + params={"mode": "proxy"}, + headers={"Range": "bytes=5-"}, + ) + assert response.status_code == 206 + assert response.content == b"56789" + + @pytest.mark.integration + def test_range_request_includes_accept_ranges_header( + self, integration_client, test_package + ): + """Test that range requests include Accept-Ranges header.""" + project, package = test_package + content = b"test content" + upload_test_file(integration_client, project, package, content, tag="accept-ranges") + + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/accept-ranges", + params={"mode": "proxy"}, + headers={"Range": "bytes=0-4"}, + ) + assert response.status_code == 206 + assert response.headers.get("Accept-Ranges") == "bytes" + + @pytest.mark.integration + def test_full_download_advertises_accept_ranges( + self, integration_client, test_package + ): + """Test that full downloads advertise range support.""" + project, package = test_package + content = b"test content" + upload_test_file(integration_client, project, package, content, tag="full-accept") + + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/full-accept", + params={"mode": "proxy"}, + ) + assert response.status_code == 200 + assert response.headers.get("Accept-Ranges") == "bytes" + + +class TestConditionalRequests: + """Tests for conditional request handling (304 Not Modified).""" + + @pytest.mark.integration + def test_if_none_match_returns_304(self, integration_client, test_package): + """Test If-None-Match with matching ETag returns 304.""" + project, package = test_package + content = b"conditional request test content" + expected_hash = compute_sha256(content) + upload_test_file(integration_client, project, package, content, tag="cond-etag") + + # Request with matching ETag + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/cond-etag", + params={"mode": "proxy"}, + headers={"If-None-Match": f'"{expected_hash}"'}, + ) + assert response.status_code == 304 + assert response.content == b"" # No body for 304 + + @pytest.mark.integration + def test_if_none_match_without_quotes(self, integration_client, test_package): + """Test If-None-Match works with or without quotes.""" + project, package = test_package + content = b"etag no quotes test" + expected_hash = compute_sha256(content) + upload_test_file(integration_client, project, package, content, tag="cond-noquote") + + # Request with ETag without quotes + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/cond-noquote", + params={"mode": "proxy"}, + headers={"If-None-Match": expected_hash}, + ) + assert response.status_code == 304 + + @pytest.mark.integration + def test_if_none_match_mismatch_returns_200(self, integration_client, test_package): + """Test If-None-Match with non-matching ETag returns 200.""" + project, package = test_package + content = b"etag mismatch test" + upload_test_file(integration_client, project, package, content, tag="cond-mismatch") + + # Request with different ETag + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/cond-mismatch", + params={"mode": "proxy"}, + headers={"If-None-Match": '"different-etag-value"'}, + ) + assert response.status_code == 200 + assert response.content == content + + @pytest.mark.integration + def test_if_modified_since_returns_304(self, integration_client, test_package): + """Test If-Modified-Since with future date returns 304.""" + project, package = test_package + content = b"modified since test" + upload_test_file(integration_client, project, package, content, tag="cond-modified") + + # Request with future date (artifact was definitely created before this) + future_date = formatdate(time.time() + 86400, usegmt=True) # Tomorrow + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/cond-modified", + params={"mode": "proxy"}, + headers={"If-Modified-Since": future_date}, + ) + assert response.status_code == 304 + + @pytest.mark.integration + def test_if_modified_since_old_date_returns_200( + self, integration_client, test_package + ): + """Test If-Modified-Since with old date returns 200.""" + project, package = test_package + content = b"old date test" + upload_test_file(integration_client, project, package, content, tag="cond-old") + + # Request with old date (2020-01-01) + old_date = "Wed, 01 Jan 2020 00:00:00 GMT" + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/cond-old", + params={"mode": "proxy"}, + headers={"If-Modified-Since": old_date}, + ) + assert response.status_code == 200 + assert response.content == content + + @pytest.mark.integration + def test_304_includes_etag(self, integration_client, test_package): + """Test 304 response includes ETag header.""" + project, package = test_package + content = b"304 etag test" + expected_hash = compute_sha256(content) + upload_test_file(integration_client, project, package, content, tag="304-etag") + + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/304-etag", + params={"mode": "proxy"}, + headers={"If-None-Match": f'"{expected_hash}"'}, + ) + assert response.status_code == 304 + assert response.headers.get("ETag") == f'"{expected_hash}"' + + @pytest.mark.integration + def test_304_includes_cache_control(self, integration_client, test_package): + """Test 304 response includes Cache-Control header.""" + project, package = test_package + content = b"304 cache test" + expected_hash = compute_sha256(content) + upload_test_file(integration_client, project, package, content, tag="304-cache") + + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/304-cache", + params={"mode": "proxy"}, + headers={"If-None-Match": f'"{expected_hash}"'}, + ) + assert response.status_code == 304 + assert "immutable" in response.headers.get("Cache-Control", "") + + +class TestCachingHeaders: + """Tests for caching headers on download responses.""" + + @pytest.mark.integration + def test_download_includes_cache_control(self, integration_client, test_package): + """Test download response includes Cache-Control header.""" + project, package = test_package + content = b"cache control test" + upload_test_file(integration_client, project, package, content, tag="cache-ctl") + + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/cache-ctl", + params={"mode": "proxy"}, + ) + assert response.status_code == 200 + cache_control = response.headers.get("Cache-Control", "") + assert "public" in cache_control + assert "immutable" in cache_control + assert "max-age" in cache_control + + @pytest.mark.integration + def test_download_includes_last_modified(self, integration_client, test_package): + """Test download response includes Last-Modified header.""" + project, package = test_package + content = b"last modified test" + upload_test_file(integration_client, project, package, content, tag="last-mod") + + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/last-mod", + params={"mode": "proxy"}, + ) + assert response.status_code == 200 + assert "Last-Modified" in response.headers + # Should be in RFC 7231 format + last_modified = response.headers["Last-Modified"] + assert "GMT" in last_modified + + @pytest.mark.integration + def test_download_includes_etag(self, integration_client, test_package): + """Test download response includes ETag header.""" + project, package = test_package + content = b"etag header test" + expected_hash = compute_sha256(content) + upload_test_file(integration_client, project, package, content, tag="etag-hdr") + + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/etag-hdr", + params={"mode": "proxy"}, + ) + assert response.status_code == 200 + assert response.headers.get("ETag") == f'"{expected_hash}"' + + +class TestDownloadResume: + """Tests for download resume functionality using range requests.""" + + @pytest.mark.integration + def test_resume_download_after_partial(self, integration_client, test_package): + """Test resuming download from where it left off.""" + project, package = test_package + content = b"ABCDEFGHIJ" * 100 # 1000 bytes + upload_test_file(integration_client, project, package, content, tag="resume-test") + + # Simulate partial download (first 500 bytes) + response1 = integration_client.get( + f"/api/v1/project/{project}/{package}/+/resume-test", + params={"mode": "proxy"}, + headers={"Range": "bytes=0-499"}, + ) + assert response1.status_code == 206 + first_half = response1.content + assert len(first_half) == 500 + + # Resume from byte 500 + response2 = integration_client.get( + f"/api/v1/project/{project}/{package}/+/resume-test", + params={"mode": "proxy"}, + headers={"Range": "bytes=500-"}, + ) + assert response2.status_code == 206 + second_half = response2.content + assert len(second_half) == 500 + + # Combine and verify + combined = first_half + second_half + assert combined == content + + @pytest.mark.integration + def test_resume_with_etag_verification(self, integration_client, test_package): + """Test that resumed download can verify content hasn't changed.""" + project, package = test_package + content = b"resume etag verification test content" + expected_hash = compute_sha256(content) + upload_test_file(integration_client, project, package, content, tag="resume-etag") + + # Get ETag from first request + response1 = integration_client.get( + f"/api/v1/project/{project}/{package}/+/resume-etag", + params={"mode": "proxy"}, + headers={"Range": "bytes=0-9"}, + ) + assert response1.status_code == 206 + etag = response1.headers.get("ETag") + assert etag == f'"{expected_hash}"' + + # Resume with If-Match to ensure content hasn't changed + # (Note: If-Match would fail and return 412 if content changed) + response2 = integration_client.get( + f"/api/v1/project/{project}/{package}/+/resume-etag", + params={"mode": "proxy"}, + headers={"Range": "bytes=10-"}, + ) + assert response2.status_code == 206 + # ETag should be the same + assert response2.headers.get("ETag") == etag + + +class TestLargeFileStreaming: + """Tests for streaming large files.""" + + @pytest.mark.integration + def test_stream_1mb_file(self, integration_client, test_package, sized_content): + """Test streaming a 1MB file.""" + project, package = test_package + content, expected_hash = sized_content(SIZE_1MB, seed=500) + + upload_test_file(integration_client, project, package, content, tag="stream-1mb") + + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/stream-1mb", + params={"mode": "proxy"}, + ) + assert response.status_code == 200 + assert len(response.content) == SIZE_1MB + assert compute_sha256(response.content) == expected_hash + + @pytest.mark.integration + def test_stream_large_file_has_correct_headers( + self, integration_client, test_package, sized_content + ): + """Test that large file streaming has correct headers.""" + project, package = test_package + content, expected_hash = sized_content(SIZE_100KB, seed=501) + + upload_test_file(integration_client, project, package, content, tag="stream-hdr") + + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/stream-hdr", + params={"mode": "proxy"}, + ) + assert response.status_code == 200 + assert int(response.headers.get("Content-Length", 0)) == SIZE_100KB + assert response.headers.get("X-Checksum-SHA256") == expected_hash + assert response.headers.get("Accept-Ranges") == "bytes" + + @pytest.mark.integration + def test_range_request_on_large_file( + self, integration_client, test_package, sized_content + ): + """Test range request on a larger file.""" + project, package = test_package + content, _ = sized_content(SIZE_100KB, seed=502) + + upload_test_file(integration_client, project, package, content, tag="range-large") + + # Request a slice from the middle + start = 50000 + end = 50999 + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/range-large", + params={"mode": "proxy"}, + headers={"Range": f"bytes={start}-{end}"}, + ) + assert response.status_code == 206 + assert len(response.content) == 1000 + assert response.content == content[start : end + 1] + + +class TestDownloadModes: + """Tests for different download modes.""" + + @pytest.mark.integration + def test_proxy_mode_streams_content(self, integration_client, test_package): + """Test proxy mode streams content through backend.""" + project, package = test_package + content = b"proxy mode test content" + upload_test_file(integration_client, project, package, content, tag="mode-proxy") + + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/mode-proxy", + params={"mode": "proxy"}, + ) + assert response.status_code == 200 + assert response.content == content + + @pytest.mark.integration + def test_presigned_mode_returns_url(self, integration_client, test_package): + """Test presigned mode returns JSON with URL.""" + project, package = test_package + content = b"presigned mode test" + upload_test_file(integration_client, project, package, content, tag="mode-presign") + + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/mode-presign", + params={"mode": "presigned"}, + ) + assert response.status_code == 200 + data = response.json() + assert "url" in data + assert "expires_at" in data + assert data["url"].startswith("http") + + @pytest.mark.integration + def test_redirect_mode_returns_302(self, integration_client, test_package): + """Test redirect mode returns 302 to presigned URL.""" + project, package = test_package + content = b"redirect mode test" + upload_test_file(integration_client, project, package, content, tag="mode-redir") + + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/mode-redir", + params={"mode": "redirect"}, + follow_redirects=False, + ) + assert response.status_code == 302 + assert "Location" in response.headers + + +class TestIntegrityDuringStreaming: + """Tests for data integrity during streaming downloads.""" + + @pytest.mark.integration + def test_checksum_header_matches_content(self, integration_client, test_package): + """Test X-Checksum-SHA256 header matches actual downloaded content.""" + project, package = test_package + content = b"integrity check content" + expected_hash = compute_sha256(content) + upload_test_file(integration_client, project, package, content, tag="integrity") + + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/integrity", + params={"mode": "proxy"}, + ) + assert response.status_code == 200 + + header_hash = response.headers.get("X-Checksum-SHA256") + actual_hash = compute_sha256(response.content) + + assert header_hash == expected_hash + assert actual_hash == expected_hash + assert header_hash == actual_hash + + @pytest.mark.integration + def test_etag_matches_content_hash(self, integration_client, test_package): + """Test ETag header matches content hash.""" + project, package = test_package + content = b"etag integrity test" + expected_hash = compute_sha256(content) + upload_test_file(integration_client, project, package, content, tag="etag-int") + + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/etag-int", + params={"mode": "proxy"}, + ) + assert response.status_code == 200 + + etag = response.headers.get("ETag", "").strip('"') + actual_hash = compute_sha256(response.content) + + assert etag == expected_hash + assert actual_hash == expected_hash + + @pytest.mark.integration + def test_digest_header_present(self, integration_client, test_package): + """Test Digest header is present in RFC 3230 format.""" + project, package = test_package + content = b"digest header test" + upload_test_file(integration_client, project, package, content, tag="digest") + + response = integration_client.get( + f"/api/v1/project/{project}/{package}/+/digest", + params={"mode": "proxy"}, + ) + assert response.status_code == 200 + assert "Digest" in response.headers + assert response.headers["Digest"].startswith("sha-256=")