""" Integration tests for streaming download functionality. Tests cover: - HTTP Range requests (partial downloads, resume) - Conditional requests (If-None-Match, If-Modified-Since) - Caching headers (Cache-Control, Last-Modified, Accept-Ranges) - Large file streaming - Download modes (proxy, redirect, presigned) """ import pytest import io import time from email.utils import formatdate from tests.factories import ( compute_sha256, upload_test_file, ) from tests.conftest import ( SIZE_1KB, SIZE_100KB, SIZE_1MB, ) class TestRangeRequests: """Tests for HTTP Range request support (partial downloads).""" @pytest.mark.integration def test_range_request_first_bytes(self, integration_client, test_package): """Test range request for first N bytes.""" project, package = test_package content = b"0123456789" * 100 # 1000 bytes upload_test_file(integration_client, project, package, content, version="range-test") # Request first 10 bytes response = integration_client.get( f"/api/v1/project/{project}/{package}/+/range-test", params={"mode": "proxy"}, headers={"Range": "bytes=0-9"}, ) assert response.status_code == 206 # Partial Content assert response.content == b"0123456789" assert "Content-Range" in response.headers assert response.headers["Content-Range"].startswith("bytes 0-9/") @pytest.mark.integration def test_range_request_middle_bytes(self, integration_client, test_package): """Test range request for bytes in the middle.""" project, package = test_package content = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" upload_test_file(integration_client, project, package, content, version="range-mid") # Request bytes 10-19 (KLMNOPQRST) response = integration_client.get( f"/api/v1/project/{project}/{package}/+/range-mid", params={"mode": "proxy"}, headers={"Range": "bytes=10-19"}, ) assert response.status_code == 206 assert response.content == b"KLMNOPQRST" @pytest.mark.integration def test_range_request_suffix_bytes(self, integration_client, test_package): """Test range request for last N bytes (suffix range).""" project, package = test_package content = b"0123456789ABCDEF" # 16 bytes upload_test_file(integration_client, project, package, content, version="range-suffix") # Request last 4 bytes response = integration_client.get( f"/api/v1/project/{project}/{package}/+/range-suffix", params={"mode": "proxy"}, headers={"Range": "bytes=-4"}, ) assert response.status_code == 206 assert response.content == b"CDEF" @pytest.mark.integration def test_range_request_open_ended(self, integration_client, test_package): """Test range request from offset to end.""" project, package = test_package content = b"0123456789" upload_test_file(integration_client, project, package, content, version="range-open") # Request from byte 5 to end response = integration_client.get( f"/api/v1/project/{project}/{package}/+/range-open", params={"mode": "proxy"}, headers={"Range": "bytes=5-"}, ) assert response.status_code == 206 assert response.content == b"56789" @pytest.mark.integration def test_range_request_includes_accept_ranges_header( self, integration_client, test_package ): """Test that range requests include Accept-Ranges header.""" project, package = test_package content = b"test content" upload_test_file(integration_client, project, package, content, version="accept-ranges") response = integration_client.get( f"/api/v1/project/{project}/{package}/+/accept-ranges", params={"mode": "proxy"}, headers={"Range": "bytes=0-4"}, ) assert response.status_code == 206 assert response.headers.get("Accept-Ranges") == "bytes" @pytest.mark.integration def test_full_download_advertises_accept_ranges( self, integration_client, test_package ): """Test that full downloads advertise range support.""" project, package = test_package content = b"test content" upload_test_file(integration_client, project, package, content, version="full-accept") response = integration_client.get( f"/api/v1/project/{project}/{package}/+/full-accept", params={"mode": "proxy"}, ) assert response.status_code == 200 assert response.headers.get("Accept-Ranges") == "bytes" class TestConditionalRequests: """Tests for conditional request handling (304 Not Modified).""" @pytest.mark.integration def test_if_none_match_returns_304(self, integration_client, test_package): """Test If-None-Match with matching ETag returns 304.""" project, package = test_package content = b"conditional request test content" expected_hash = compute_sha256(content) upload_test_file(integration_client, project, package, content, version="cond-etag") # Request with matching ETag response = integration_client.get( f"/api/v1/project/{project}/{package}/+/cond-etag", params={"mode": "proxy"}, headers={"If-None-Match": f'"{expected_hash}"'}, ) assert response.status_code == 304 assert response.content == b"" # No body for 304 @pytest.mark.integration def test_if_none_match_without_quotes(self, integration_client, test_package): """Test If-None-Match works with or without quotes.""" project, package = test_package content = b"etag no quotes test" expected_hash = compute_sha256(content) upload_test_file(integration_client, project, package, content, version="cond-noquote") # Request with ETag without quotes response = integration_client.get( f"/api/v1/project/{project}/{package}/+/cond-noquote", params={"mode": "proxy"}, headers={"If-None-Match": expected_hash}, ) assert response.status_code == 304 @pytest.mark.integration def test_if_none_match_mismatch_returns_200(self, integration_client, test_package): """Test If-None-Match with non-matching ETag returns 200.""" project, package = test_package content = b"etag mismatch test" upload_test_file(integration_client, project, package, content, version="cond-mismatch") # Request with different ETag response = integration_client.get( f"/api/v1/project/{project}/{package}/+/cond-mismatch", params={"mode": "proxy"}, headers={"If-None-Match": '"different-etag-value"'}, ) assert response.status_code == 200 assert response.content == content @pytest.mark.integration def test_if_modified_since_returns_304(self, integration_client, test_package): """Test If-Modified-Since with future date returns 304.""" project, package = test_package content = b"modified since test" upload_test_file(integration_client, project, package, content, version="cond-modified") # Request with future date (artifact was definitely created before this) future_date = formatdate(time.time() + 86400, usegmt=True) # Tomorrow response = integration_client.get( f"/api/v1/project/{project}/{package}/+/cond-modified", params={"mode": "proxy"}, headers={"If-Modified-Since": future_date}, ) assert response.status_code == 304 @pytest.mark.integration def test_if_modified_since_old_date_returns_200( self, integration_client, test_package ): """Test If-Modified-Since with old date returns 200.""" project, package = test_package content = b"old date test" upload_test_file(integration_client, project, package, content, version="cond-old") # Request with old date (2020-01-01) old_date = "Wed, 01 Jan 2020 00:00:00 GMT" response = integration_client.get( f"/api/v1/project/{project}/{package}/+/cond-old", params={"mode": "proxy"}, headers={"If-Modified-Since": old_date}, ) assert response.status_code == 200 assert response.content == content @pytest.mark.integration def test_304_includes_etag(self, integration_client, test_package): """Test 304 response includes ETag header.""" project, package = test_package content = b"304 etag test" expected_hash = compute_sha256(content) upload_test_file(integration_client, project, package, content, version="304-etag") response = integration_client.get( f"/api/v1/project/{project}/{package}/+/304-etag", params={"mode": "proxy"}, headers={"If-None-Match": f'"{expected_hash}"'}, ) assert response.status_code == 304 assert response.headers.get("ETag") == f'"{expected_hash}"' @pytest.mark.integration def test_304_includes_cache_control(self, integration_client, test_package): """Test 304 response includes Cache-Control header.""" project, package = test_package content = b"304 cache test" expected_hash = compute_sha256(content) upload_test_file(integration_client, project, package, content, version="304-cache") response = integration_client.get( f"/api/v1/project/{project}/{package}/+/304-cache", params={"mode": "proxy"}, headers={"If-None-Match": f'"{expected_hash}"'}, ) assert response.status_code == 304 assert "immutable" in response.headers.get("Cache-Control", "") class TestCachingHeaders: """Tests for caching headers on download responses.""" @pytest.mark.integration def test_download_includes_cache_control(self, integration_client, test_package): """Test download response includes Cache-Control header.""" project, package = test_package content = b"cache control test" upload_test_file(integration_client, project, package, content, version="cache-ctl") response = integration_client.get( f"/api/v1/project/{project}/{package}/+/cache-ctl", params={"mode": "proxy"}, ) assert response.status_code == 200 cache_control = response.headers.get("Cache-Control", "") assert "public" in cache_control assert "immutable" in cache_control assert "max-age" in cache_control @pytest.mark.integration def test_download_includes_last_modified(self, integration_client, test_package): """Test download response includes Last-Modified header.""" project, package = test_package content = b"last modified test" upload_test_file(integration_client, project, package, content, version="last-mod") response = integration_client.get( f"/api/v1/project/{project}/{package}/+/last-mod", params={"mode": "proxy"}, ) assert response.status_code == 200 assert "Last-Modified" in response.headers # Should be in RFC 7231 format last_modified = response.headers["Last-Modified"] assert "GMT" in last_modified @pytest.mark.integration def test_download_includes_etag(self, integration_client, test_package): """Test download response includes ETag header.""" project, package = test_package content = b"etag header test" expected_hash = compute_sha256(content) upload_test_file(integration_client, project, package, content, version="etag-hdr") response = integration_client.get( f"/api/v1/project/{project}/{package}/+/etag-hdr", params={"mode": "proxy"}, ) assert response.status_code == 200 assert response.headers.get("ETag") == f'"{expected_hash}"' class TestDownloadResume: """Tests for download resume functionality using range requests.""" @pytest.mark.integration def test_resume_download_after_partial(self, integration_client, test_package): """Test resuming download from where it left off.""" project, package = test_package content = b"ABCDEFGHIJ" * 100 # 1000 bytes upload_test_file(integration_client, project, package, content, version="resume-test") # Simulate partial download (first 500 bytes) response1 = integration_client.get( f"/api/v1/project/{project}/{package}/+/resume-test", params={"mode": "proxy"}, headers={"Range": "bytes=0-499"}, ) assert response1.status_code == 206 first_half = response1.content assert len(first_half) == 500 # Resume from byte 500 response2 = integration_client.get( f"/api/v1/project/{project}/{package}/+/resume-test", params={"mode": "proxy"}, headers={"Range": "bytes=500-"}, ) assert response2.status_code == 206 second_half = response2.content assert len(second_half) == 500 # Combine and verify combined = first_half + second_half assert combined == content @pytest.mark.integration def test_resume_with_etag_verification(self, integration_client, test_package): """Test that resumed download can verify content hasn't changed.""" project, package = test_package content = b"resume etag verification test content" expected_hash = compute_sha256(content) upload_test_file(integration_client, project, package, content, version="resume-etag") # Get ETag from first request response1 = integration_client.get( f"/api/v1/project/{project}/{package}/+/resume-etag", params={"mode": "proxy"}, headers={"Range": "bytes=0-9"}, ) assert response1.status_code == 206 etag = response1.headers.get("ETag") assert etag == f'"{expected_hash}"' # Resume with If-Match to ensure content hasn't changed # (Note: If-Match would fail and return 412 if content changed) response2 = integration_client.get( f"/api/v1/project/{project}/{package}/+/resume-etag", params={"mode": "proxy"}, headers={"Range": "bytes=10-"}, ) assert response2.status_code == 206 # ETag should be the same assert response2.headers.get("ETag") == etag class TestLargeFileStreaming: """Tests for streaming large files.""" @pytest.mark.integration def test_stream_1mb_file(self, integration_client, test_package, sized_content): """Test streaming a 1MB file.""" project, package = test_package content, expected_hash = sized_content(SIZE_1MB, seed=500) upload_test_file(integration_client, project, package, content, version="stream-1mb") response = integration_client.get( f"/api/v1/project/{project}/{package}/+/stream-1mb", params={"mode": "proxy"}, ) assert response.status_code == 200 assert len(response.content) == SIZE_1MB assert compute_sha256(response.content) == expected_hash @pytest.mark.integration def test_stream_large_file_has_correct_headers( self, integration_client, test_package, sized_content ): """Test that large file streaming has correct headers.""" project, package = test_package content, expected_hash = sized_content(SIZE_100KB, seed=501) upload_test_file(integration_client, project, package, content, version="stream-hdr") response = integration_client.get( f"/api/v1/project/{project}/{package}/+/stream-hdr", params={"mode": "proxy"}, ) assert response.status_code == 200 assert int(response.headers.get("Content-Length", 0)) == SIZE_100KB assert response.headers.get("X-Checksum-SHA256") == expected_hash assert response.headers.get("Accept-Ranges") == "bytes" @pytest.mark.integration def test_range_request_on_large_file( self, integration_client, test_package, sized_content ): """Test range request on a larger file.""" project, package = test_package content, _ = sized_content(SIZE_100KB, seed=502) upload_test_file(integration_client, project, package, content, version="range-large") # Request a slice from the middle start = 50000 end = 50999 response = integration_client.get( f"/api/v1/project/{project}/{package}/+/range-large", params={"mode": "proxy"}, headers={"Range": f"bytes={start}-{end}"}, ) assert response.status_code == 206 assert len(response.content) == 1000 assert response.content == content[start : end + 1] class TestDownloadModes: """Tests for different download modes.""" @pytest.mark.integration def test_proxy_mode_streams_content(self, integration_client, test_package): """Test proxy mode streams content through backend.""" project, package = test_package content = b"proxy mode test content" upload_test_file(integration_client, project, package, content, version="mode-proxy") response = integration_client.get( f"/api/v1/project/{project}/{package}/+/mode-proxy", params={"mode": "proxy"}, ) assert response.status_code == 200 assert response.content == content @pytest.mark.integration def test_presigned_mode_returns_url(self, integration_client, test_package): """Test presigned mode returns JSON with URL.""" project, package = test_package content = b"presigned mode test" upload_test_file(integration_client, project, package, content, version="mode-presign") response = integration_client.get( f"/api/v1/project/{project}/{package}/+/mode-presign", params={"mode": "presigned"}, ) assert response.status_code == 200 data = response.json() assert "url" in data assert "expires_at" in data assert data["url"].startswith("http") @pytest.mark.integration def test_redirect_mode_returns_302(self, integration_client, test_package): """Test redirect mode returns 302 to presigned URL.""" project, package = test_package content = b"redirect mode test" upload_test_file(integration_client, project, package, content, version="mode-redir") response = integration_client.get( f"/api/v1/project/{project}/{package}/+/mode-redir", params={"mode": "redirect"}, follow_redirects=False, ) assert response.status_code == 302 assert "Location" in response.headers class TestIntegrityDuringStreaming: """Tests for data integrity during streaming downloads.""" @pytest.mark.integration def test_checksum_header_matches_content(self, integration_client, test_package): """Test X-Checksum-SHA256 header matches actual downloaded content.""" project, package = test_package content = b"integrity check content" expected_hash = compute_sha256(content) upload_test_file(integration_client, project, package, content, version="integrity") response = integration_client.get( f"/api/v1/project/{project}/{package}/+/integrity", params={"mode": "proxy"}, ) assert response.status_code == 200 header_hash = response.headers.get("X-Checksum-SHA256") actual_hash = compute_sha256(response.content) assert header_hash == expected_hash assert actual_hash == expected_hash assert header_hash == actual_hash @pytest.mark.integration def test_etag_matches_content_hash(self, integration_client, test_package): """Test ETag header matches content hash.""" project, package = test_package content = b"etag integrity test" expected_hash = compute_sha256(content) upload_test_file(integration_client, project, package, content, version="etag-int") response = integration_client.get( f"/api/v1/project/{project}/{package}/+/etag-int", params={"mode": "proxy"}, ) assert response.status_code == 200 etag = response.headers.get("ETag", "").strip('"') actual_hash = compute_sha256(response.content) assert etag == expected_hash assert actual_hash == expected_hash @pytest.mark.integration def test_digest_header_present(self, integration_client, test_package): """Test Digest header is present in RFC 3230 format.""" project, package = test_package content = b"digest header test" upload_test_file(integration_client, project, package, content, version="digest") response = integration_client.get( f"/api/v1/project/{project}/{package}/+/digest", params={"mode": "proxy"}, ) assert response.status_code == 200 assert "Digest" in response.headers assert response.headers["Digest"].startswith("sha-256=")