Merge branch 'feature/upload-download-tests' into 'main'

Add comprehensive upload/download tests and streaming enhancements (#38, #40, #42, #43) Closes #38, #40, #42, and #43 See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!34
Add comprehensive upload/download tests and streaming enhancements (#38 , #40 , #42 , #43 )
2026-01-21 09:35:12 -06:00 · 2026-01-21 09:35:12 -06:00
23 changed files with 5385 additions and 405 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -8,6 +8,11 @@ variables:
  PROSPER_VERSION: v0.64.1
  # Use internal PyPI proxy instead of public internet
  PIP_INDEX_URL: https://deps.global.bsf.tools/artifactory/api/pypi/pypi.org/simple
  # Environment URLs (used by deploy and test jobs)
  STAGE_URL: https://orchard-stage.common.global.bsf.tools
  PROD_URL: https://orchard.common.global.bsf.tools
  # Shared pip cache directory
  PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"
 # Prevent duplicate pipelines for MRs
 workflow:
@@ -29,11 +34,47 @@ kics:
  variables:
    KICS_CONFIG: kics.config
-# Post-deployment integration tests template
+# Full integration test suite template (for feature/stage deployments)
 # Runs the complete pytest integration test suite against the deployed environment
 .integration_test_template: &integration_test_template
  stage: deploy  # Runs in deploy stage, but after deployment due to 'needs'
  image: deps.global.bsf.tools/docker/python:3.12-slim
-  timeout: 10m
+  timeout: 20m  # Full suite takes longer than smoke tests
  interruptible: true  # Cancel if new pipeline starts
  retry: 1  # Retry once on failure (network flakiness)
  cache:
    key: pip-$CI_COMMIT_REF_SLUG
    paths:
      - .pip-cache/
    policy: pull-push
  before_script:
    - pip install --index-url "$PIP_INDEX_URL" -r backend/requirements.txt
    - pip install --index-url "$PIP_INDEX_URL" pytest pytest-asyncio httpx
  script:
    - cd backend
    # Run full integration test suite, excluding:
    # - large/slow tests
    # - requires_direct_s3 tests (can't access MinIO from outside K8s cluster)
    # ORCHARD_TEST_URL tells the tests which server to connect to
    # Note: Auth tests work because dev/stage deployments have relaxed rate limits
    - |
      python -m pytest tests/integration/ -v \
        --junitxml=integration-report.xml \
        -m "not large and not slow and not requires_direct_s3" \
        --tb=short
  artifacts:
    when: always
    expire_in: 1 week
    paths:
      - backend/integration-report.xml
    reports:
      junit: backend/integration-report.xml
 # Lightweight smoke test template (for production - no test data creation)
 .smoke_test_template: &smoke_test_template
  stage: deploy
  image: deps.global.bsf.tools/docker/python:3.12-slim
  timeout: 5m
  before_script:
    - pip install --index-url "$PIP_INDEX_URL" httpx
  script:
@@ -43,12 +84,12 @@ kics:
      import os
      import sys
-      BASE_URL = os.environ.get("BASE_URL")
+      BASE_URL = os.environ.get("ORCHARD_TEST_URL")
      if not BASE_URL:
-          print("ERROR: BASE_URL not set")
+          print("ERROR: ORCHARD_TEST_URL not set")
          sys.exit(1)
-      print(f"Running integration tests against {BASE_URL}")
+      print(f"Running smoke tests against {BASE_URL}")
      client = httpx.Client(base_url=BASE_URL, timeout=30.0)
      errors = []
@@ -86,38 +127,37 @@ kics:
              print(f"  FAIL: {e}")
          sys.exit(1)
      else:
-          print("SUCCESS: All integration tests passed!")
+          print("SUCCESS: All smoke tests passed!")
          sys.exit(0)
      PYTEST_SCRIPT
-# Integration tests for stage deployment
+# Integration tests for stage deployment (full suite)
 integration_test_stage:
  <<: *integration_test_template
  needs: [deploy_stage]
  variables:
-    BASE_URL: https://orchard-stage.common.global.bsf.tools
+    ORCHARD_TEST_URL: $STAGE_URL
  rules:
    - if: '$CI_COMMIT_BRANCH == "main"'
      when: on_success
-# Integration tests for feature deployment
+# Integration tests for feature deployment (full suite)
 integration_test_feature:
  <<: *integration_test_template
  needs: [deploy_feature]
  variables:
-    BASE_URL: https://orchard-$CI_COMMIT_REF_SLUG.common.global.bsf.tools
+    ORCHARD_TEST_URL: https://orchard-$CI_COMMIT_REF_SLUG.common.global.bsf.tools
  rules:
    - if: '$CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != "main"'
      when: on_success
-# Run Python backend tests
+# Run Python backend unit tests
-python_tests:
+python_unit_tests:
  stage: test
  needs: []  # Run in parallel with build
  image: deps.global.bsf.tools/docker/python:3.12-slim
  timeout: 15m
-  variables:
+  interruptible: true  # Cancel if new pipeline starts
    PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"
  cache:
    key: pip-$CI_COMMIT_REF_SLUG
    paths:
@@ -128,7 +168,7 @@ python_tests:
    - pip install --index-url "$PIP_INDEX_URL" pytest pytest-asyncio pytest-cov httpx
  script:
    - cd backend
-    # Only run unit tests - integration tests require Docker Compose services
+    # Run unit tests (integration tests run post-deployment against live environment)
    - python -m pytest tests/unit/ -v --cov=app --cov-report=term --cov-report=xml:coverage.xml --cov-report=html:coverage_html --junitxml=pytest-report.xml
  artifacts:
    when: always
@@ -150,6 +190,7 @@ frontend_tests:
  needs: []  # Run in parallel with build
  image: deps.global.bsf.tools/docker/node:20-alpine
  timeout: 15m
  interruptible: true  # Cancel if new pipeline starts
  cache:
    key: npm-$CI_COMMIT_REF_SLUG
    paths:
@@ -175,7 +216,7 @@ frontend_tests:
 # Shared deploy configuration
 .deploy_template: &deploy_template
  stage: deploy
-  needs: [build_image, test_image, kics, hadolint, python_tests, frontend_tests, secrets, app_deps_scan, cve_scan, cve_sbom_analysis, app_sbom_analysis]
+  needs: [build_image, test_image, kics, hadolint, python_unit_tests, frontend_tests, secrets, app_deps_scan, cve_scan, cve_sbom_analysis, app_sbom_analysis]
  image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
 .helm_setup: &helm_setup
@@ -184,47 +225,21 @@ frontend_tests:
  # OCI-based charts from internal registry - no repo add needed
  - helm dependency update
 # Simplified deployment verification - just health check
 # Full API/frontend checks are done by integration tests post-deployment
 .verify_deployment: &verify_deployment |
  echo "=== Waiting for health endpoint (certs may take a few minutes) ==="
  for i in $(seq 1 30); do
    if curl -sf --max-time 10 "$BASE_URL/health" > /dev/null 2>&1; then
      echo "Health check passed!"
-      break
+      echo "Deployment URL: $BASE_URL"
      exit 0
    fi
    echo "Attempt $i/30 - waiting 10s..."
    sleep 10
  done
-
+  echo "Health check failed after 30 attempts"
-  # Verify health endpoint
+  exit 1
  echo ""
  echo "=== Health Check ==="
  curl -sf "$BASE_URL/health" || { echo "Health check failed"; exit 1; }
  echo ""
  # Verify API is responding
  echo ""
  echo "=== API Check (GET /api/v1/projects) ==="
  HTTP_CODE=$(curl -sf -o /dev/null -w "%{http_code}" "$BASE_URL/api/v1/projects")
  if [ "$HTTP_CODE" = "200" ]; then
    echo "API responding: HTTP $HTTP_CODE"
  else
    echo "API check failed: HTTP $HTTP_CODE"
    exit 1
  fi
  # Verify frontend is served
  echo ""
  echo "=== Frontend Check ==="
  if curl -sf "$BASE_URL/" | grep -q "</html>"; then
    echo "Frontend is being served"
  else
    echo "Frontend check failed"
    exit 1
  fi
  echo ""
  echo "=== All checks passed! ==="
  echo "Deployment URL: $BASE_URL"
 # Deploy to stage (main branch)
 deploy_stage:
@@ -232,7 +247,7 @@ deploy_stage:
  variables:
    NAMESPACE: orch-stage-namespace
    VALUES_FILE: helm/orchard/values-stage.yaml
-    BASE_URL: https://orchard-stage.common.global.bsf.tools
+    BASE_URL: $STAGE_URL
  before_script:
    - kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
    - *helm_setup
@@ -251,7 +266,7 @@ deploy_stage:
    - *verify_deployment
  environment:
    name: stage
-    url: https://orchard-stage.common.global.bsf.tools
+    url: $STAGE_URL
    kubernetes:
      agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
  rules:
@@ -297,10 +312,12 @@ deploy_feature:
    - if: '$CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != "main"'
      when: on_success
-# Cleanup feature branch deployment
+# Cleanup feature branch deployment (standalone - doesn't need deploy dependencies)
 cleanup_feature:
-  <<: *deploy_template
+  stage: deploy
  needs: []
  image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
  timeout: 5m
  variables:
    NAMESPACE: orch-dev-namespace
    GIT_STRATEGY: none  # No source needed, branch may be deleted
@@ -329,7 +346,7 @@ deploy_prod:
  variables:
    NAMESPACE: orch-prod-namespace
    VALUES_FILE: helm/orchard/values-prod.yaml
-    BASE_URL: https://orchard.common.global.bsf.tools
+    BASE_URL: $PROD_URL
  before_script:
    - kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-prod
    - *helm_setup
@@ -348,7 +365,7 @@ deploy_prod:
    - *verify_deployment
  environment:
    name: production
-    url: https://orchard.common.global.bsf.tools
+    url: $PROD_URL
    kubernetes:
      agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-prod
  rules:
@@ -357,12 +374,12 @@ deploy_prod:
      when: manual  # Require manual approval for prod
  allow_failure: false
-# Integration tests for production deployment
+# Smoke tests for production deployment (read-only, no test data creation)
-integration_test_prod:
+smoke_test_prod:
-  <<: *integration_test_template
+  <<: *smoke_test_template
  needs: [deploy_prod]
  variables:
-    BASE_URL: https://orchard.common.global.bsf.tools
+    ORCHARD_TEST_URL: $PROD_URL
  rules:
    - if: '$CI_COMMIT_TAG =~ /^v\d+\.\d+\.\d+$/'
      when: on_success
--- a/.gitleaksignore
+++ b/.gitleaksignore
@@ -11,3 +11,7 @@ bccbc71c13570d14b8b26a11335c45f102fe3072:backend/tests/unit/test_storage.py:gene
 90bb2a3a393d2361dc3136ee8d761debb0726d8a:backend/tests/unit/test_storage.py:generic-api-key:381
 37666e41a72d2a4f34447c0d1a8728e1d7271d24:backend/tests/unit/test_storage.py:generic-api-key:381
 0cc4f253621a9601c5193f6ae1e7ae33f0e7fc9b:backend/tests/unit/test_storage.py:generic-api-key:381
 35fda65d381acc5ab59bc592ee3013f75906c197:backend/tests/unit/test_storage.py:generic-api-key:381
 08dce6cbb836b687002751fed4159bfc2da61f8b:backend/tests/unit/test_storage.py:generic-api-key:381
 617bcbe89cff9a009d77e4f1f1864efed1820e63:backend/tests/unit/test_storage.py:generic-api-key:381
 1cbd33544388e0fe6db752fa8886fab33cf9ce7c:backend/tests/unit/test_storage.py:generic-api-key:381
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,36 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 ### Added
 - Added comprehensive upload/download tests for size boundaries (1B to 1GB) (#38)
 - Added concurrent upload/download tests (2, 5, 10 parallel operations) (#38)
 - Added data integrity tests (binary, text, unicode, compressed content) (#38)
 - Added chunk boundary tests for edge cases (#38)
 - Added `@pytest.mark.large` and `@pytest.mark.concurrent` test markers (#38)
 - Added `generate_content()` and `generate_content_with_hash()` test helpers (#38)
 - Added `sized_content` fixture for generating test content of specific sizes (#38)
 - Added upload API tests: upload without tag, artifact creation verification, S3 object creation (#38)
 - Added download API tests: tag: prefix resolution, 404 for nonexistent project/package/artifact (#38)
 - Added download header tests: Content-Type, Content-Length, Content-Disposition, ETag, X-Checksum-SHA256 (#38)
 - Added error handling tests: timeout behavior, checksum validation, resource cleanup, graceful error responses (#38)
 - Added version API tests: version creation, auto-detection, listing, download by version prefix (#38)
 - Added integrity verification tests: round-trip hash verification, client-side verification workflow, size variants (1KB-10MB) (#40)
 - Added consistency check endpoint tests with response format validation (#40)
 - Added corruption detection tests: bit flip, truncation, appended content, size mismatch, missing S3 objects (#40)
 - Added Digest header tests (RFC 3230) and verification mode tests (#40)
 - Added integrity verification documentation (`docs/integrity-verification.md`) (#40)
 - Added conditional request support for downloads (If-None-Match, If-Modified-Since) returning 304 Not Modified (#42)
 - Added caching headers to downloads: Cache-Control (immutable), Last-Modified (#42)
 - Added 416 Range Not Satisfiable response for invalid range requests (#42)
 - Added download completion logging with bytes transferred and throughput (#42)
 - Added client disconnect handling during streaming downloads (#42)
 - Added streaming download tests: range requests, conditional requests, caching headers, download resume (#42)
 - Added upload duration and throughput metrics (`duration_ms`, `throughput_mbps`) to upload response (#43)
 - Added upload progress logging for large files (hash computation and multipart upload phases) (#43)
 - Added client disconnect handling during uploads with proper cleanup (#43)
 - Added upload progress tracking endpoint `GET /upload/{upload_id}/progress` for resumable uploads (#43)
 - Added large file upload tests (10MB, 100MB, 1GB) with multipart upload verification (#43)
 - Added upload cancellation and timeout handling tests (#43)
 - Added comprehensive API documentation for upload endpoints with curl, Python, and JavaScript examples (#43)
 - Added `package_versions` table for immutable version tracking separate from mutable tags (#56)
  - Versions are set at upload time via explicit `version` parameter or auto-detected from filename/metadata
  - Version detection priority: explicit parameter > package metadata > filename pattern
@@ -31,6 +61,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added internal proxy configuration for npm, pip, helm, and apt (#51)
 ### Changed
 - CI integration tests now run full pytest suite (~350 tests) against deployed environment instead of 3 smoke tests
 - CI production deployment uses lightweight smoke tests only (no test data creation in prod)
 - CI pipeline improvements: shared pip cache, `interruptible` flag on test jobs, retry on integration tests
 - Simplified deploy verification to health check only (full checks done by integration tests)
 - Extracted environment URLs to global variables for maintainability
 - Made `cleanup_feature` job standalone (no longer inherits deploy template dependencies)
 - Renamed `integration_test_prod` to `smoke_test_prod` for clarity
 - Updated download ref resolution to check versions before tags (version → tag → artifact ID) (#56)
 - Deploy jobs now require all security scans to pass before deployment (added test_image, app_deps_scan, cve_scan, cve_sbom_analysis, app_sbom_analysis to dependencies) (#63)
 - Increased deploy job timeout from 5m to 10m (#63)
@@ -44,6 +81,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Improved pod naming: Orchard pods now named `orchard-{env}-server-*` for clarity (#51)
 ### Fixed
 - Fixed CI integration test rate limiting: added configurable `ORCHARD_LOGIN_RATE_LIMIT` env var, relaxed to 1000/minute for dev/stage
 - Fixed duplicate `TestSecurityEdgeCases` class definition in test_auth_api.py
 - Fixed integration tests auth: session-scoped client, configurable credentials via env vars, fail-fast on auth errors
 - Fixed 413 Request Entity Too Large errors on uploads by adding `proxy-body-size: "0"` nginx annotation to Orchard ingress
 - Fixed CI tests that require direct S3 access: added `@pytest.mark.requires_direct_s3` marker and excluded from CI
 - Fixed ref_count triggers not being created: added auto-migration for tags ref_count trigger functions
 - Fixed Content-Disposition header encoding for non-ASCII filenames using RFC 5987 (#38)
 - Fixed deploy jobs running even when tests or security scans fail (changed rules from `when: always` to `when: on_success`) (#63)
 - Fixed python_tests job not using internal PyPI proxy (#63)
 - Fixed `cleanup_feature` job failing when branch is deleted (`GIT_STRATEGY: none`) (#51)
@@ -53,6 +97,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fixed deploy jobs running when secrets scan fails (added `secrets` to deploy dependencies)
 - Fixed dev environment memory requests to equal limits per cluster Kyverno policy
 - Fixed init containers missing resource limits (Kyverno policy compliance)
 - Fixed Python SyntaxWarning for invalid escape sequence in database migration regex pattern
 ### Removed
 - Removed unused `store_streaming()` method from storage.py (#51)
--- a/backend/app/database.py
+++ b/backend/app/database.py
@@ -170,6 +170,62 @@ def _run_migrations():
            END IF;
        END $$;
        """,
        # Create ref_count trigger functions for tags (ensures triggers exist even if initial migration wasn't run)
        """
        CREATE OR REPLACE FUNCTION increment_artifact_ref_count()
        RETURNS TRIGGER AS $$
        BEGIN
            UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
            RETURN NEW;
        END;
        $$ LANGUAGE plpgsql;
        """,
        """
        CREATE OR REPLACE FUNCTION decrement_artifact_ref_count()
        RETURNS TRIGGER AS $$
        BEGIN
            UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
            RETURN OLD;
        END;
        $$ LANGUAGE plpgsql;
        """,
        """
        CREATE OR REPLACE FUNCTION update_artifact_ref_count()
        RETURNS TRIGGER AS $$
        BEGIN
            IF OLD.artifact_id != NEW.artifact_id THEN
                UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
                UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
            END IF;
            RETURN NEW;
        END;
        $$ LANGUAGE plpgsql;
        """,
        # Create triggers for tags ref_count management
        """
        DO $$
        BEGIN
            -- Drop and recreate triggers to ensure they're current
            DROP TRIGGER IF EXISTS tags_ref_count_insert_trigger ON tags;
            CREATE TRIGGER tags_ref_count_insert_trigger
                AFTER INSERT ON tags
                FOR EACH ROW
                EXECUTE FUNCTION increment_artifact_ref_count();
            DROP TRIGGER IF EXISTS tags_ref_count_delete_trigger ON tags;
            CREATE TRIGGER tags_ref_count_delete_trigger
                AFTER DELETE ON tags
                FOR EACH ROW
                EXECUTE FUNCTION decrement_artifact_ref_count();
            DROP TRIGGER IF EXISTS tags_ref_count_update_trigger ON tags;
            CREATE TRIGGER tags_ref_count_update_trigger
                AFTER UPDATE ON tags
                FOR EACH ROW
                WHEN (OLD.artifact_id IS DISTINCT FROM NEW.artifact_id)
                EXECUTE FUNCTION update_artifact_ref_count();
        END $$;
        """,
        # Create ref_count trigger functions for package_versions
        """
        CREATE OR REPLACE FUNCTION increment_version_ref_count()
@@ -210,7 +266,7 @@ def _run_migrations():
        END $$;
        """,
        # Migrate existing semver tags to package_versions
-        """
+        r"""
        DO $$
        BEGIN
            IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'package_versions') THEN
--- a/backend/app/routes.py
+++ b/backend/app/routes.py
@@ -82,6 +82,7 @@ from .schemas import (
    ResumableUploadCompleteRequest,
    ResumableUploadCompleteResponse,
    ResumableUploadStatusResponse,
    UploadProgressResponse,
    GlobalSearchResponse,
    SearchResultProject,
    SearchResultPackage,
@@ -143,6 +144,31 @@ def sanitize_filename(filename: str) -> str:
    return re.sub(r'[\r\n"]', "", filename)
 def build_content_disposition(filename: str) -> str:
    """Build a Content-Disposition header value with proper encoding.
    For ASCII filenames, uses simple: attachment; filename="name"
    For non-ASCII filenames, uses RFC 5987 encoding with UTF-8.
    """
    from urllib.parse import quote
    sanitized = sanitize_filename(filename)
    # Check if filename is pure ASCII
    try:
        sanitized.encode('ascii')
        # Pure ASCII - simple format
        return f'attachment; filename="{sanitized}"'
    except UnicodeEncodeError:
        # Non-ASCII - use RFC 5987 encoding
        # Provide both filename (ASCII fallback) and filename* (UTF-8 encoded)
        ascii_fallback = sanitized.encode('ascii', errors='replace').decode('ascii')
        # RFC 5987: filename*=charset'language'encoded_value
        # We use UTF-8 encoding and percent-encode non-ASCII chars
        encoded = quote(sanitized, safe='')
        return f'attachment; filename="{ascii_fallback}"; filename*=UTF-8\'\'{encoded}'
 def get_user_id_from_request(
    request: Request,
    db: Session,
@@ -2258,10 +2284,56 @@ def upload_artifact(
    """
    Upload an artifact to a package.
-    Headers:
+    **Size Limits:**
-    - X-Checksum-SHA256: Optional client-provided SHA256 for verification
+    - Minimum: 1 byte (empty files rejected)
-    - User-Agent: Captured for audit purposes
+    - Maximum: 10GB (configurable via ORCHARD_MAX_FILE_SIZE)
-    - Authorization: Bearer <api-key> for authentication
+    - Files > 100MB automatically use S3 multipart upload
    **Headers:**
    - `X-Checksum-SHA256`: Optional SHA256 hash for server-side verification
    - `Content-Length`: File size (required for early rejection of oversized files)
    - `Authorization`: Bearer <api-key> for authentication
    **Deduplication:**
    Content-addressable storage automatically deduplicates identical files.
    If the same content is uploaded multiple times, only one copy is stored.
    **Response Metrics:**
    - `duration_ms`: Upload duration in milliseconds
    - `throughput_mbps`: Upload throughput in MB/s
    - `deduplicated`: True if content already existed
    **Example (curl):**
    ```bash
    curl -X POST "http://localhost:8080/api/v1/project/myproject/mypackage/upload" \\
      -H "Authorization: Bearer <api-key>" \\
      -F "file=@myfile.tar.gz" \\
      -F "tag=v1.0.0"
    ```
    **Example (Python requests):**
    ```python
    import requests
    with open('myfile.tar.gz', 'rb') as f:
        response = requests.post(
            'http://localhost:8080/api/v1/project/myproject/mypackage/upload',
            files={'file': f},
            data={'tag': 'v1.0.0'},
            headers={'Authorization': 'Bearer <api-key>'}
        )
    ```
    **Example (JavaScript fetch):**
    ```javascript
    const formData = new FormData();
    formData.append('file', fileInput.files[0]);
    formData.append('tag', 'v1.0.0');
    const response = await fetch('/api/v1/project/myproject/mypackage/upload', {
        method: 'POST',
        headers: { 'Authorization': 'Bearer <api-key>' },
        body: formData
    });
    ```
    """
    start_time = time.time()
    settings = get_settings()
@@ -2363,6 +2435,30 @@ def upload_artifact(
    except StorageError as e:
        logger.error(f"Storage error during upload: {e}")
        raise HTTPException(status_code=500, detail="Internal storage error")
    except (ConnectionResetError, BrokenPipeError) as e:
        # Client disconnected during upload
        logger.warning(
            f"Client disconnected during upload: project={project_name} "
            f"package={package_name} filename={file.filename} error={e}"
        )
        raise HTTPException(
            status_code=499,  # Client Closed Request (nginx convention)
            detail="Client disconnected during upload",
        )
    except Exception as e:
        # Catch-all for unexpected errors including client disconnects
        error_str = str(e).lower()
        if "connection" in error_str or "broken pipe" in error_str or "reset" in error_str:
            logger.warning(
                f"Client connection error during upload: project={project_name} "
                f"package={package_name} filename={file.filename} error={e}"
            )
            raise HTTPException(
                status_code=499,
                detail="Client connection error during upload",
            )
        logger.error(f"Unexpected error during upload: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail="Internal server error during upload")
    # Verify client-provided checksum if present
    checksum_verified = True
@@ -2555,6 +2651,12 @@ def upload_artifact(
            detail="Failed to save upload record. Please retry.",
        )
    # Calculate throughput
    throughput_mbps = None
    if duration_ms > 0:
        duration_seconds = duration_ms / 1000.0
        throughput_mbps = round((storage_result.size / (1024 * 1024)) / duration_seconds, 2)
    return UploadResponse(
        artifact_id=storage_result.sha256,
        sha256=storage_result.sha256,
@@ -2574,6 +2676,8 @@ def upload_artifact(
        content_type=artifact.content_type,
        original_name=artifact.original_name,
        created_at=artifact.created_at,
        duration_ms=duration_ms,
        throughput_mbps=throughput_mbps,
    )
@@ -2591,8 +2695,46 @@ def init_resumable_upload(
    storage: S3Storage = Depends(get_storage),
 ):
    """
-    Initialize a resumable upload session.
+    Initialize a resumable upload session for large files.
-    Client must provide the SHA256 hash of the file in advance.
+
    Resumable uploads allow uploading large files in chunks, with the ability
    to resume after interruption. The client must compute the SHA256 hash
    of the entire file before starting.
    **Workflow:**
    1. POST /upload/init - Initialize upload session (this endpoint)
    2. PUT /upload/{upload_id}/part/{part_number} - Upload each part
    3. GET /upload/{upload_id}/progress - Check upload progress (optional)
    4. POST /upload/{upload_id}/complete - Finalize upload
    5. DELETE /upload/{upload_id} - Abort upload (if needed)
    **Chunk Size:**
    Use the `chunk_size` returned in the response (10MB default).
    Each part except the last must be exactly this size.
    **Deduplication:**
    If the expected_hash already exists in storage, the response will include
    `already_exists: true` and no upload session is created.
    **Example (curl):**
    ```bash
    # Step 1: Initialize
    curl -X POST "http://localhost:8080/api/v1/project/myproject/mypackage/upload/init" \\
      -H "Authorization: Bearer <api-key>" \\
      -H "Content-Type: application/json" \\
      -d '{"expected_hash": "<sha256>", "filename": "large.tar.gz", "size": 104857600}'
    # Step 2: Upload parts
    curl -X PUT "http://localhost:8080/api/v1/project/myproject/mypackage/upload/<upload_id>/part/1" \\
      -H "Authorization: Bearer <api-key>" \\
      --data-binary @part1.bin
    # Step 3: Complete
    curl -X POST "http://localhost:8080/api/v1/project/myproject/mypackage/upload/<upload_id>/complete" \\
      -H "Authorization: Bearer <api-key>" \\
      -H "Content-Type: application/json" \\
      -d '{"tag": "v1.0.0"}'
    ```
    """
    user_id = get_user_id(request)
@@ -2686,6 +2828,10 @@ def init_resumable_upload(
    # Initialize resumable upload
    session = storage.initiate_resumable_upload(init_request.expected_hash)
    # Set expected size for progress tracking
    if session["upload_id"] and init_request.size:
        storage.set_upload_expected_size(session["upload_id"], init_request.size)
    return ResumableUploadInitResponse(
        upload_id=session["upload_id"],
        already_exists=False,
@@ -2752,6 +2898,64 @@ def upload_part(
        raise HTTPException(status_code=404, detail=str(e))
@router.get(
    "/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/progress",
    response_model=UploadProgressResponse,
 )
 def get_upload_progress(
    project_name: str,
    package_name: str,
    upload_id: str,
    db: Session = Depends(get_db),
    storage: S3Storage = Depends(get_storage),
 ):
    """
    Get progress information for an in-flight resumable upload.
    Returns progress metrics including bytes uploaded, percent complete,
    elapsed time, and throughput.
    """
    # Validate project and package exist
    project = db.query(Project).filter(Project.name == project_name).first()
    if not project:
        raise HTTPException(status_code=404, detail="Project not found")
    package = (
        db.query(Package)
        .filter(Package.project_id == project.id, Package.name == package_name)
        .first()
    )
    if not package:
        raise HTTPException(status_code=404, detail="Package not found")
    progress = storage.get_upload_progress(upload_id)
    if not progress:
        # Return not_found status instead of 404 to allow polling
        return UploadProgressResponse(
            upload_id=upload_id,
            status="not_found",
            bytes_uploaded=0,
        )
    from datetime import datetime, timezone
    started_at_dt = None
    if progress.get("started_at"):
        started_at_dt = datetime.fromtimestamp(progress["started_at"], tz=timezone.utc)
    return UploadProgressResponse(
        upload_id=upload_id,
        status=progress.get("status", "in_progress"),
        bytes_uploaded=progress.get("bytes_uploaded", 0),
        bytes_total=progress.get("bytes_total"),
        percent_complete=progress.get("percent_complete"),
        parts_uploaded=progress.get("parts_uploaded", 0),
        parts_total=progress.get("parts_total"),
        started_at=started_at_dt,
        elapsed_seconds=progress.get("elapsed_seconds"),
        throughput_mbps=progress.get("throughput_mbps"),
    )
@router.post(
    "/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/complete"
 )
@@ -2947,6 +3151,8 @@ def download_artifact(
    storage: S3Storage = Depends(get_storage),
    current_user: Optional[User] = Depends(get_current_user_optional),
    range: Optional[str] = Header(None),
    if_none_match: Optional[str] = Header(None, alias="If-None-Match"),
    if_modified_since: Optional[str] = Header(None, alias="If-Modified-Since"),
    mode: Optional[Literal["proxy", "redirect", "presigned"]] = Query(
        default=None,
        description="Download mode: proxy (stream through backend), redirect (302 to presigned URL), presigned (return JSON with URL)",
@@ -2963,6 +3169,15 @@ def download_artifact(
    """
    Download an artifact by reference (tag name, artifact:hash, tag:name).
    Supports conditional requests:
    - If-None-Match: Returns 304 Not Modified if ETag matches
    - If-Modified-Since: Returns 304 Not Modified if not modified since date
    Supports range requests for partial downloads and resume:
    - Range: bytes=0-1023 (first 1KB)
    - Range: bytes=-1024 (last 1KB)
    - Returns 206 Partial Content with Content-Range header
    Verification modes:
    - verify=false (default): No verification, maximum performance
    - verify=true&verify_mode=stream: Compute hash while streaming, verify after completion.
@@ -2975,6 +3190,9 @@ def download_artifact(
    - X-Content-Length: File size in bytes
    - ETag: Artifact ID (SHA256)
    - Digest: RFC 3230 format sha-256 hash
    - Last-Modified: Artifact creation timestamp
    - Cache-Control: Immutable caching for content-addressable storage
    - Accept-Ranges: bytes (advertises range request support)
    When verify=true:
    - X-Verified: 'true' if verified, 'false' if verification failed
@@ -2999,6 +3217,52 @@ def download_artifact(
    filename = sanitize_filename(artifact.original_name or f"{artifact.id}")
    # Format Last-Modified header (RFC 7231 format)
    last_modified = None
    last_modified_str = None
    if artifact.created_at:
        last_modified = artifact.created_at
        if last_modified.tzinfo is None:
            last_modified = last_modified.replace(tzinfo=timezone.utc)
        last_modified_str = last_modified.strftime("%a, %d %b %Y %H:%M:%S GMT")
    # Handle conditional requests (If-None-Match, If-Modified-Since)
    # Return 304 Not Modified if content hasn't changed
    artifact_etag = f'"{artifact.id}"'
    if if_none_match:
        # Strip quotes and compare with artifact ETag
        client_etag = if_none_match.strip().strip('"')
        if client_etag == artifact.id or if_none_match == artifact_etag:
            return Response(
                status_code=304,
                headers={
                    "ETag": artifact_etag,
                    "Cache-Control": "public, max-age=31536000, immutable",
                    **({"Last-Modified": last_modified_str} if last_modified_str else {}),
                },
            )
    if if_modified_since and last_modified:
        try:
            # Parse If-Modified-Since header
            from email.utils import parsedate_to_datetime
            client_date = parsedate_to_datetime(if_modified_since)
            if client_date.tzinfo is None:
                client_date = client_date.replace(tzinfo=timezone.utc)
            # If artifact hasn't been modified since client's date, return 304
            if last_modified <= client_date:
                return Response(
                    status_code=304,
                    headers={
                        "ETag": artifact_etag,
                        "Cache-Control": "public, max-age=31536000, immutable",
                        **({"Last-Modified": last_modified_str} if last_modified_str else {}),
                    },
                )
        except (ValueError, TypeError):
            pass  # Invalid date format, ignore and continue with download
    # Audit log download
    user_id = get_user_id(request)
    _log_audit(
@@ -3016,22 +3280,28 @@ def download_artifact(
    )
    db.commit()
-    # Build common checksum headers (always included)
+    # Build common headers (always included)
-    checksum_headers = {
+    common_headers = {
        "X-Checksum-SHA256": artifact.id,
        "X-Content-Length": str(artifact.size),
-        "ETag": f'"{artifact.id}"',
+        "ETag": artifact_etag,
        # Cache-Control: content-addressable storage is immutable
        "Cache-Control": "public, max-age=31536000, immutable",
    }
    # Add Last-Modified header
    if last_modified_str:
        common_headers["Last-Modified"] = last_modified_str
    # Add RFC 3230 Digest header
    try:
        digest_base64 = sha256_to_base64(artifact.id)
-        checksum_headers["Digest"] = f"sha-256={digest_base64}"
+        common_headers["Digest"] = f"sha-256={digest_base64}"
    except Exception:
        pass  # Skip if conversion fails
    # Add MD5 checksum if available
    if artifact.checksum_md5:
-        checksum_headers["X-Checksum-MD5"] = artifact.checksum_md5
+        common_headers["X-Checksum-MD5"] = artifact.checksum_md5
    # Determine download mode (query param overrides server default)
    download_mode = mode or settings.download_mode
@@ -3071,15 +3341,29 @@ def download_artifact(
    # Proxy mode (default fallback) - stream through backend
    # Handle range requests (verification not supported for partial downloads)
    if range:
-        stream, content_length, content_range = storage.get_stream(
+        try:
-            artifact.s3_key, range
+            stream, content_length, content_range = storage.get_stream(
-        )
+                artifact.s3_key, range
            )
        except Exception as e:
            # S3 returns InvalidRange error for unsatisfiable ranges
            error_str = str(e).lower()
            if "invalidrange" in error_str or "range" in error_str:
                raise HTTPException(
                    status_code=416,
                    detail="Range Not Satisfiable",
                    headers={
                        "Content-Range": f"bytes */{artifact.size}",
                        "Accept-Ranges": "bytes",
                    },
                )
            raise
        headers = {
-            "Content-Disposition": f'attachment; filename="{filename}"',
+            "Content-Disposition": build_content_disposition(filename),
            "Accept-Ranges": "bytes",
            "Content-Length": str(content_length),
-            **checksum_headers,
+            **common_headers,
        }
        if content_range:
            headers["Content-Range"] = content_range
@@ -3094,9 +3378,9 @@ def download_artifact(
    # Full download with optional verification
    base_headers = {
-        "Content-Disposition": f'attachment; filename="{filename}"',
+        "Content-Disposition": build_content_disposition(filename),
        "Accept-Ranges": "bytes",
-        **checksum_headers,
+        **common_headers,
    }
    # Pre-verification mode: verify before streaming
@@ -3164,11 +3448,42 @@ def download_artifact(
            },
        )
-    # No verification - direct streaming
+    # No verification - direct streaming with completion logging
    stream, content_length, _ = storage.get_stream(artifact.s3_key)
    def logged_stream():
        """Generator that yields chunks and logs completion/disconnection."""
        import time
        start_time = time.time()
        bytes_sent = 0
        try:
            for chunk in stream:
                bytes_sent += len(chunk)
                yield chunk
            # Download completed successfully
            duration = time.time() - start_time
            throughput_mbps = (bytes_sent / (1024 * 1024)) / duration if duration > 0 else 0
            logger.info(
                f"Download completed: artifact={artifact.id[:16]}... "
                f"bytes={bytes_sent} duration={duration:.2f}s throughput={throughput_mbps:.2f}MB/s"
            )
        except GeneratorExit:
            # Client disconnected before download completed
            duration = time.time() - start_time
            logger.warning(
                f"Download interrupted: artifact={artifact.id[:16]}... "
                f"bytes_sent={bytes_sent}/{content_length} duration={duration:.2f}s"
            )
        except Exception as e:
            duration = time.time() - start_time
            logger.error(
                f"Download error: artifact={artifact.id[:16]}... "
                f"bytes_sent={bytes_sent} duration={duration:.2f}s error={e}"
            )
            raise
    return StreamingResponse(
-        stream,
+        logged_stream(),
        media_type=artifact.content_type or "application/octet-stream",
        headers={
            **base_headers,
@@ -3276,7 +3591,7 @@ def head_artifact(
    # Build headers with checksum information
    headers = {
-        "Content-Disposition": f'attachment; filename="{filename}"',
+        "Content-Disposition": build_content_disposition(filename),
        "Accept-Ranges": "bytes",
        "Content-Length": str(artifact.size),
        "X-Artifact-Id": artifact.id,
--- a/backend/app/schemas.py
+++ b/backend/app/schemas.py
@@ -412,6 +412,9 @@ class UploadResponse(BaseModel):
    content_type: Optional[str] = None
    original_name: Optional[str] = None
    created_at: Optional[datetime] = None
    # Upload metrics (Issue #43)
    duration_ms: Optional[int] = None  # Upload duration in milliseconds
    throughput_mbps: Optional[float] = None  # Upload throughput in MB/s
 # Resumable upload schemas
@@ -478,6 +481,21 @@ class ResumableUploadStatusResponse(BaseModel):
    total_uploaded_bytes: int
 class UploadProgressResponse(BaseModel):
    """Progress information for an in-flight upload"""
    upload_id: str
    status: str  # 'in_progress', 'completed', 'failed', 'not_found'
    bytes_uploaded: int = 0
    bytes_total: Optional[int] = None
    percent_complete: Optional[float] = None
    parts_uploaded: int = 0
    parts_total: Optional[int] = None
    started_at: Optional[datetime] = None
    elapsed_seconds: Optional[float] = None
    throughput_mbps: Optional[float] = None
 # Consumer schemas
 class ConsumerResponse(BaseModel):
    id: UUID
--- a/backend/app/storage.py
+++ b/backend/app/storage.py
@@ -378,10 +378,16 @@ class S3Storage:
        """
        # First pass: compute all hashes by streaming through file
        try:
            import time
            sha256_hasher = hashlib.sha256()
            md5_hasher = hashlib.md5()
            sha1_hasher = hashlib.sha1()
            size = 0
            hash_start_time = time.time()
            last_log_time = hash_start_time
            log_interval_seconds = 5  # Log progress every 5 seconds
            logger.info(f"Computing hashes for large file: expected_size={content_length}")
            # Read file in chunks to compute hashes
            while True:
@@ -393,6 +399,18 @@ class S3Storage:
                sha1_hasher.update(chunk)
                size += len(chunk)
                # Log hash computation progress periodically
                current_time = time.time()
                if current_time - last_log_time >= log_interval_seconds:
                    elapsed = current_time - hash_start_time
                    percent = (size / content_length) * 100 if content_length > 0 else 0
                    throughput = (size / (1024 * 1024)) / elapsed if elapsed > 0 else 0
                    logger.info(
                        f"Hash computation progress: bytes={size}/{content_length} ({percent:.1f}%) "
                        f"throughput={throughput:.2f}MB/s"
                    )
                    last_log_time = current_time
                # Enforce file size limit during streaming (protection against spoofing)
                if size > settings.max_file_size:
                    raise FileSizeExceededError(
@@ -405,6 +423,14 @@ class S3Storage:
            sha256_hash = sha256_hasher.hexdigest()
            md5_hash = md5_hasher.hexdigest()
            sha1_hash = sha1_hasher.hexdigest()
            # Log hash computation completion
            hash_elapsed = time.time() - hash_start_time
            hash_throughput = (size / (1024 * 1024)) / hash_elapsed if hash_elapsed > 0 else 0
            logger.info(
                f"Hash computation completed: hash={sha256_hash[:16]}... "
                f"size={size} duration={hash_elapsed:.2f}s throughput={hash_throughput:.2f}MB/s"
            )
        except (HashComputationError, FileSizeExceededError):
            raise
        except Exception as e:
@@ -458,8 +484,19 @@ class S3Storage:
        upload_id = mpu["UploadId"]
        try:
            import time
            parts = []
            part_number = 1
            bytes_uploaded = 0
            upload_start_time = time.time()
            last_log_time = upload_start_time
            log_interval_seconds = 5  # Log progress every 5 seconds
            total_parts = (content_length + MULTIPART_CHUNK_SIZE - 1) // MULTIPART_CHUNK_SIZE
            logger.info(
                f"Starting multipart upload: hash={sha256_hash[:16]}... "
                f"size={content_length} parts={total_parts}"
            )
            while True:
                chunk = file.read(MULTIPART_CHUNK_SIZE)
@@ -479,8 +516,32 @@ class S3Storage:
                        "ETag": response["ETag"],
                    }
                )
                bytes_uploaded += len(chunk)
                # Log progress periodically
                current_time = time.time()
                if current_time - last_log_time >= log_interval_seconds:
                    elapsed = current_time - upload_start_time
                    percent = (bytes_uploaded / content_length) * 100
                    throughput = (bytes_uploaded / (1024 * 1024)) / elapsed if elapsed > 0 else 0
                    logger.info(
                        f"Upload progress: hash={sha256_hash[:16]}... "
                        f"part={part_number}/{total_parts} "
                        f"bytes={bytes_uploaded}/{content_length} ({percent:.1f}%) "
                        f"throughput={throughput:.2f}MB/s"
                    )
                    last_log_time = current_time
                part_number += 1
            # Log completion
            total_elapsed = time.time() - upload_start_time
            final_throughput = (content_length / (1024 * 1024)) / total_elapsed if total_elapsed > 0 else 0
            logger.info(
                f"Multipart upload completed: hash={sha256_hash[:16]}... "
                f"size={content_length} duration={total_elapsed:.2f}s throughput={final_throughput:.2f}MB/s"
            )
            # Complete multipart upload
            complete_response = self.client.complete_multipart_upload(
                Bucket=self.bucket,
@@ -502,12 +563,28 @@ class S3Storage:
        except Exception as e:
            # Abort multipart upload on failure
-            logger.error(f"Multipart upload failed: {e}")
+            error_str = str(e).lower()
-            self.client.abort_multipart_upload(
+            is_client_disconnect = (
-                Bucket=self.bucket,
+                isinstance(e, (ConnectionResetError, BrokenPipeError)) or
-                Key=s3_key,
+                "connection" in error_str or "broken pipe" in error_str or "reset" in error_str
                UploadId=upload_id,
            )
            if is_client_disconnect:
                logger.warning(
                    f"Multipart upload aborted (client disconnect): hash={sha256_hash[:16]}... "
                    f"parts_uploaded={len(parts)} bytes_uploaded={bytes_uploaded}"
                )
            else:
                logger.error(f"Multipart upload failed: hash={sha256_hash[:16]}... error={e}")
            try:
                self.client.abort_multipart_upload(
                    Bucket=self.bucket,
                    Key=s3_key,
                    UploadId=upload_id,
                )
                logger.info(f"Multipart upload aborted and cleaned up: upload_id={upload_id[:16]}...")
            except Exception as abort_error:
                logger.error(f"Failed to abort multipart upload: {abort_error}")
            raise
    def initiate_resumable_upload(self, expected_hash: str) -> Dict[str, Any]:
@@ -529,12 +606,17 @@ class S3Storage:
        mpu = self.client.create_multipart_upload(Bucket=self.bucket, Key=s3_key)
        upload_id = mpu["UploadId"]
        import time
        session = {
            "upload_id": upload_id,
            "s3_key": s3_key,
            "already_exists": False,
            "parts": [],
            "expected_hash": expected_hash,
            "started_at": time.time(),
            "bytes_uploaded": 0,
            "expected_size": None,  # Set when init provides size
            "status": "in_progress",
        }
        self._active_uploads[upload_id] = session
        return session
@@ -561,10 +643,57 @@ class S3Storage:
        part_info = {
            "PartNumber": part_number,
            "ETag": response["ETag"],
            "size": len(data),
        }
        session["parts"].append(part_info)
        session["bytes_uploaded"] = session.get("bytes_uploaded", 0) + len(data)
        return part_info
    def get_upload_progress(self, upload_id: str) -> Optional[Dict[str, Any]]:
        """
        Get progress information for a resumable upload.
        Returns None if upload not found.
        """
        import time
        session = self._active_uploads.get(upload_id)
        if not session:
            return None
        bytes_uploaded = session.get("bytes_uploaded", 0)
        expected_size = session.get("expected_size")
        started_at = session.get("started_at")
        progress = {
            "upload_id": upload_id,
            "status": session.get("status", "in_progress"),
            "bytes_uploaded": bytes_uploaded,
            "bytes_total": expected_size,
            "parts_uploaded": len(session.get("parts", [])),
            "parts_total": None,
            "started_at": started_at,
            "elapsed_seconds": None,
            "percent_complete": None,
            "throughput_mbps": None,
        }
        if expected_size and expected_size > 0:
            progress["percent_complete"] = round((bytes_uploaded / expected_size) * 100, 2)
            progress["parts_total"] = (expected_size + MULTIPART_CHUNK_SIZE - 1) // MULTIPART_CHUNK_SIZE
        if started_at:
            elapsed = time.time() - started_at
            progress["elapsed_seconds"] = round(elapsed, 2)
            if elapsed > 0 and bytes_uploaded > 0:
                progress["throughput_mbps"] = round((bytes_uploaded / (1024 * 1024)) / elapsed, 2)
        return progress
    def set_upload_expected_size(self, upload_id: str, size: int):
        """Set the expected size for an upload (for progress tracking)."""
        session = self._active_uploads.get(upload_id)
        if session:
            session["expected_size"] = size
    def complete_resumable_upload(self, upload_id: str) -> Tuple[str, str]:
        """
        Complete a resumable upload.
--- a/backend/pytest.ini
+++ b/backend/pytest.ini
@@ -12,6 +12,8 @@ markers =
    unit: Unit tests (no external dependencies)
    integration: Integration tests (require database/storage)
    slow: Slow tests (skip with -m "not slow")
    large: Large file tests (100MB+, skip with -m "not large")
    concurrent: Concurrent operation tests
 # Coverage configuration
 [coverage:run]
--- a/backend/tests/conftest.py
+++ b/backend/tests/conftest.py
@@ -9,6 +9,37 @@ This module provides:
 import os
 import pytest
 # =============================================================================
 # Pytest Markers
 # =============================================================================
 def pytest_configure(config):
    """Register custom pytest markers."""
    config.addinivalue_line(
        "markers",
        "auth_intensive: marks tests that make many login requests (excluded from CI integration tests due to rate limiting)",
    )
    config.addinivalue_line(
        "markers",
        "integration: marks tests as integration tests",
    )
    config.addinivalue_line(
        "markers",
        "large: marks tests that handle large files (slow)",
    )
    config.addinivalue_line(
        "markers",
        "slow: marks tests as slow running",
    )
    config.addinivalue_line(
        "markers",
        "requires_direct_s3: marks tests that require direct S3/MinIO access (skipped in CI where S3 is not directly accessible)",
    )
 import io
 from typing import Generator
 from unittest.mock import MagicMock
@@ -32,6 +63,8 @@ from tests.factories import (
    compute_md5,
    compute_sha1,
    upload_test_file,
    generate_content,
    generate_content_with_hash,
    TEST_CONTENT_HELLO,
    TEST_HASH_HELLO,
    TEST_MD5_HELLO,
@@ -179,29 +212,64 @@ def test_app():
 # =============================================================================
-@pytest.fixture
+@pytest.fixture(scope="session")
 def integration_client():
    """
    Create an authenticated test client for integration tests.
-    Uses the real database and MinIO from docker-compose.local.yml.
+    Uses the real database and MinIO from docker-compose.local.yml or deployed environment.
-    Authenticates as admin for write operations.
+    Authenticates as admin for write operations. Session-scoped to reuse login across tests.
    Environment variables:
        ORCHARD_TEST_URL: Base URL of the Orchard server (default: http://localhost:8080)
        ORCHARD_TEST_USERNAME: Admin username for authentication (default: admin)
        ORCHARD_TEST_PASSWORD: Admin password for authentication (default: changeme123)
    """
-    from httpx import Client
+    import httpx
-    # Connect to the running orchard-server container
+    # Connect to the running orchard-server container or deployed environment
    base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
    username = os.environ.get("ORCHARD_TEST_USERNAME", "admin")
    password = os.environ.get("ORCHARD_TEST_PASSWORD", "changeme123")
-    with Client(base_url=base_url, timeout=30.0) as client:
+    with httpx.Client(base_url=base_url, timeout=30.0) as client:
        # Login as admin to enable write operations
        login_response = client.post(
            "/api/v1/auth/login",
-            json={"username": "admin", "password": "changeme123"},
+            json={"username": username, "password": password},
        )
        # If login fails, tests will fail - that's expected if auth is broken
        if login_response.status_code != 200:
-            # Try to continue without auth for backward compatibility
+            pytest.fail(
-            pass
+                f"Authentication failed against {base_url}: {login_response.status_code} - {login_response.text}. "
                f"Set ORCHARD_TEST_USERNAME and ORCHARD_TEST_PASSWORD environment variables if using non-default credentials."
            )
        # Verify cookie was set
        if not client.cookies:
            pytest.fail(
                f"Login succeeded but no session cookie was set. Response headers: {login_response.headers}"
            )
        yield client
@pytest.fixture
 def auth_client():
    """
    Create a function-scoped test client for authentication tests.
    Unlike integration_client (session-scoped), this creates a fresh client
    for each test. Use this for tests that manipulate authentication state
    (login, logout, cookie clearing) to avoid polluting other tests.
    Environment variables:
        ORCHARD_TEST_URL: Base URL of the Orchard server (default: http://localhost:8080)
    """
    import httpx
    base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
    with httpx.Client(base_url=base_url, timeout=30.0) as client:
        yield client
@@ -271,3 +339,41 @@ def test_content():
    content = f"test-content-{uuid.uuid4().hex}".encode()
    sha256 = compute_sha256(content)
    return (content, sha256)
@pytest.fixture
 def sized_content():
    """
    Factory fixture for generating content of specific sizes.
    Usage:
        def test_example(sized_content):
            content, hash = sized_content(1024)  # 1KB
            content, hash = sized_content(1024 * 1024)  # 1MB
    """
    def _generate(size: int, seed: int = None):
        return generate_content_with_hash(size, seed)
    return _generate
 # =============================================================================
 # Size Constants for Tests
 # =============================================================================
 # Common file sizes for boundary testing
 SIZE_1B = 1
 SIZE_1KB = 1024
 SIZE_10KB = 10 * 1024
 SIZE_100KB = 100 * 1024
 SIZE_1MB = 1024 * 1024
 SIZE_5MB = 5 * 1024 * 1024
 SIZE_10MB = 10 * 1024 * 1024
 SIZE_50MB = 50 * 1024 * 1024
 SIZE_100MB = 100 * 1024 * 1024
 SIZE_250MB = 250 * 1024 * 1024
 SIZE_500MB = 500 * 1024 * 1024
 SIZE_1GB = 1024 * 1024 * 1024
 # Chunk size boundaries (based on typical S3 multipart chunk sizes)
 CHUNK_SIZE = 64 * 1024  # 64KB typical chunk
 MULTIPART_THRESHOLD = 100 * 1024 * 1024  # 100MB multipart threshold
--- a/backend/tests/factories.py
+++ b/backend/tests/factories.py
@@ -130,6 +130,41 @@ def upload_test_file(
    return response.json()
 def generate_content(size: int, seed: Optional[int] = None) -> bytes:
    """
    Generate deterministic or random content of a specified size.
    Args:
        size: Size of content in bytes
        seed: Optional seed for reproducible content (None for random)
    Returns:
        Bytes of the specified size
    """
    if size == 0:
        return b""
    if seed is not None:
        import random
        rng = random.Random(seed)
        return bytes(rng.randint(0, 255) for _ in range(size))
    return os.urandom(size)
 def generate_content_with_hash(size: int, seed: Optional[int] = None) -> tuple[bytes, str]:
    """
    Generate content of specified size and compute its SHA256 hash.
    Args:
        size: Size of content in bytes
        seed: Optional seed for reproducible content
    Returns:
        Tuple of (content_bytes, sha256_hash)
    """
    content = generate_content(size, seed)
    return content, compute_sha256(content)
 # =============================================================================
 # Project/Package Factories
 # =============================================================================
--- a/backend/tests/integration/test_auth_api.py
+++ b/backend/tests/integration/test_auth_api.py
@@ -1,16 +1,25 @@
-"""Integration tests for authentication API endpoints."""
+"""Integration tests for authentication API endpoints.
 Note: These tests are marked as auth_intensive because they make many login
 requests. Dev/stage deployments have relaxed rate limits (1000/minute) to
 allow these tests to run. Production uses strict rate limits (5/minute).
 """
 import pytest
 from uuid import uuid4
 # Mark all tests in this module as auth_intensive (informational, not excluded from CI)
 pytestmark = pytest.mark.auth_intensive
 class TestAuthLogin:
    """Tests for login endpoint."""
    @pytest.mark.integration
-    def test_login_success(self, integration_client):
+    def test_login_success(self, auth_client):
        """Test successful login with default admin credentials."""
-        response = integration_client.post(
+        response = auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
@@ -21,9 +30,9 @@ class TestAuthLogin:
        assert "orchard_session" in response.cookies
    @pytest.mark.integration
-    def test_login_invalid_password(self, integration_client):
+    def test_login_invalid_password(self, auth_client):
        """Test login with wrong password."""
-        response = integration_client.post(
+        response = auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "wrongpassword"},
        )
@@ -31,9 +40,9 @@ class TestAuthLogin:
        assert "Invalid username or password" in response.json()["detail"]
    @pytest.mark.integration
-    def test_login_nonexistent_user(self, integration_client):
+    def test_login_nonexistent_user(self, auth_client):
        """Test login with non-existent user."""
-        response = integration_client.post(
+        response = auth_client.post(
            "/api/v1/auth/login",
            json={"username": "nonexistent", "password": "password"},
        )
@@ -44,24 +53,24 @@ class TestAuthLogout:
    """Tests for logout endpoint."""
    @pytest.mark.integration
-    def test_logout_success(self, integration_client):
+    def test_logout_success(self, auth_client):
        """Test successful logout."""
        # First login
-        login_response = integration_client.post(
+        login_response = auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        assert login_response.status_code == 200
        # Then logout
-        logout_response = integration_client.post("/api/v1/auth/logout")
+        logout_response = auth_client.post("/api/v1/auth/logout")
        assert logout_response.status_code == 200
        assert "Logged out successfully" in logout_response.json()["message"]
    @pytest.mark.integration
-    def test_logout_without_session(self, integration_client):
+    def test_logout_without_session(self, auth_client):
        """Test logout without being logged in."""
-        response = integration_client.post("/api/v1/auth/logout")
+        response = auth_client.post("/api/v1/auth/logout")
        # Should succeed even without session
        assert response.status_code == 200
@@ -70,15 +79,15 @@ class TestAuthMe:
    """Tests for get current user endpoint."""
    @pytest.mark.integration
-    def test_get_me_authenticated(self, integration_client):
+    def test_get_me_authenticated(self, auth_client):
        """Test getting current user when authenticated."""
        # Login first
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
-        response = integration_client.get("/api/v1/auth/me")
+        response = auth_client.get("/api/v1/auth/me")
        assert response.status_code == 200
        data = response.json()
        assert data["username"] == "admin"
@@ -87,67 +96,88 @@ class TestAuthMe:
        assert "created_at" in data
    @pytest.mark.integration
-    def test_get_me_unauthenticated(self, integration_client):
+    def test_get_me_unauthenticated(self, auth_client):
        """Test getting current user without authentication."""
        # Clear any existing cookies
-        integration_client.cookies.clear()
+        auth_client.cookies.clear()
-        response = integration_client.get("/api/v1/auth/me")
+        response = auth_client.get("/api/v1/auth/me")
        assert response.status_code == 401
        assert "Not authenticated" in response.json()["detail"]
 class TestAuthChangePassword:
-    """Tests for change password endpoint."""
+    """Tests for change password endpoint.
    Note: These tests use dedicated test users instead of admin to avoid
    invalidating the integration_client session (which uses admin).
    """
    @pytest.mark.integration
-    def test_change_password_success(self, integration_client):
+    def test_change_password_success(self, auth_client):
        """Test successful password change."""
-        # Login first
+        # Login as admin to create a test user
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        test_username = f"pwchange_{uuid4().hex[:8]}"
        auth_client.post(
            "/api/v1/admin/users",
            json={"username": test_username, "password": "oldpassword123"},
        )
        # Login as test user
        auth_client.cookies.clear()
        auth_client.post(
            "/api/v1/auth/login",
            json={"username": test_username, "password": "oldpassword123"},
        )
        # Change password
-        response = integration_client.post(
+        response = auth_client.post(
            "/api/v1/auth/change-password",
-            json={"current_password": "changeme123", "new_password": "newpassword123"},
+            json={"current_password": "oldpassword123", "new_password": "newpassword123"},
        )
        assert response.status_code == 200
        # Verify old password no longer works
-        integration_client.cookies.clear()
+        auth_client.cookies.clear()
-        response = integration_client.post(
+        response = auth_client.post(
            "/api/v1/auth/login",
-            json={"username": "admin", "password": "changeme123"},
+            json={"username": test_username, "password": "oldpassword123"},
        )
        assert response.status_code == 401
        # Verify new password works
-        response = integration_client.post(
+        response = auth_client.post(
            "/api/v1/auth/login",
-            json={"username": "admin", "password": "newpassword123"},
+            json={"username": test_username, "password": "newpassword123"},
        )
        assert response.status_code == 200
        # Reset password back to original for other tests
        reset_response = integration_client.post(
            "/api/v1/auth/change-password",
            json={"current_password": "newpassword123", "new_password": "changeme123"},
        )
        assert reset_response.status_code == 200, "Failed to reset admin password back to default"
    @pytest.mark.integration
-    def test_change_password_wrong_current(self, integration_client):
+    def test_change_password_wrong_current(self, auth_client):
        """Test password change with wrong current password."""
-        # Login first
+        # Login as admin to create a test user
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        test_username = f"pwwrong_{uuid4().hex[:8]}"
        auth_client.post(
            "/api/v1/admin/users",
            json={"username": test_username, "password": "password123"},
        )
-        response = integration_client.post(
+        # Login as test user
        auth_client.cookies.clear()
        auth_client.post(
            "/api/v1/auth/login",
            json={"username": test_username, "password": "password123"},
        )
        response = auth_client.post(
            "/api/v1/auth/change-password",
            json={"current_password": "wrongpassword", "new_password": "newpassword"},
        )
@@ -159,16 +189,16 @@ class TestAPIKeys:
    """Tests for API key management endpoints."""
    @pytest.mark.integration
-    def test_create_and_list_api_key(self, integration_client):
+    def test_create_and_list_api_key(self, auth_client):
        """Test creating and listing API keys."""
        # Login first
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        # Create API key
-        create_response = integration_client.post(
+        create_response = auth_client.post(
            "/api/v1/auth/keys",
            json={"name": "test-key", "description": "Test API key"},
        )
@@ -182,23 +212,23 @@ class TestAPIKeys:
        api_key = data["key"]
        # List API keys
-        list_response = integration_client.get("/api/v1/auth/keys")
+        list_response = auth_client.get("/api/v1/auth/keys")
        assert list_response.status_code == 200
        keys = list_response.json()
        assert any(k["id"] == key_id for k in keys)
        # Clean up - delete the key
-        integration_client.delete(f"/api/v1/auth/keys/{key_id}")
+        auth_client.delete(f"/api/v1/auth/keys/{key_id}")
    @pytest.mark.integration
-    def test_use_api_key_for_auth(self, integration_client):
+    def test_use_api_key_for_auth(self, auth_client):
        """Test using API key for authentication."""
        # Login and create API key
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
-        create_response = integration_client.post(
+        create_response = auth_client.post(
            "/api/v1/auth/keys",
            json={"name": "auth-test-key"},
        )
@@ -206,8 +236,8 @@ class TestAPIKeys:
        key_id = create_response.json()["id"]
        # Clear cookies and use API key
-        integration_client.cookies.clear()
+        auth_client.cookies.clear()
-        response = integration_client.get(
+        response = auth_client.get(
            "/api/v1/auth/me",
            headers={"Authorization": f"Bearer {api_key}"},
        )
@@ -215,21 +245,21 @@ class TestAPIKeys:
        assert response.json()["username"] == "admin"
        # Clean up
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
-        integration_client.delete(f"/api/v1/auth/keys/{key_id}")
+        auth_client.delete(f"/api/v1/auth/keys/{key_id}")
    @pytest.mark.integration
-    def test_delete_api_key(self, integration_client):
+    def test_delete_api_key(self, auth_client):
        """Test revoking an API key."""
        # Login and create API key
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
-        create_response = integration_client.post(
+        create_response = auth_client.post(
            "/api/v1/auth/keys",
            json={"name": "delete-test-key"},
        )
@@ -237,12 +267,12 @@ class TestAPIKeys:
        api_key = create_response.json()["key"]
        # Delete the key
-        delete_response = integration_client.delete(f"/api/v1/auth/keys/{key_id}")
+        delete_response = auth_client.delete(f"/api/v1/auth/keys/{key_id}")
        assert delete_response.status_code == 200
        # Verify key no longer works
-        integration_client.cookies.clear()
+        auth_client.cookies.clear()
-        response = integration_client.get(
+        response = auth_client.get(
            "/api/v1/auth/me",
            headers={"Authorization": f"Bearer {api_key}"},
        )
@@ -253,32 +283,32 @@ class TestAdminUserManagement:
    """Tests for admin user management endpoints."""
    @pytest.mark.integration
-    def test_list_users(self, integration_client):
+    def test_list_users(self, auth_client):
        """Test listing users as admin."""
        # Login as admin
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
-        response = integration_client.get("/api/v1/admin/users")
+        response = auth_client.get("/api/v1/admin/users")
        assert response.status_code == 200
        users = response.json()
        assert len(users) >= 1
        assert any(u["username"] == "admin" for u in users)
    @pytest.mark.integration
-    def test_create_user(self, integration_client):
+    def test_create_user(self, auth_client):
        """Test creating a new user as admin."""
        # Login as admin
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        # Create new user
        test_username = f"testuser_{uuid4().hex[:8]}"
-        response = integration_client.post(
+        response = auth_client.post(
            "/api/v1/admin/users",
            json={
                "username": test_username,
@@ -293,31 +323,31 @@ class TestAdminUserManagement:
        assert data["is_admin"] is False
        # Verify new user can login
-        integration_client.cookies.clear()
+        auth_client.cookies.clear()
-        login_response = integration_client.post(
+        login_response = auth_client.post(
            "/api/v1/auth/login",
            json={"username": test_username, "password": "testpassword"},
        )
        assert login_response.status_code == 200
    @pytest.mark.integration
-    def test_update_user(self, integration_client):
+    def test_update_user(self, auth_client):
        """Test updating a user as admin."""
        # Login as admin
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        # Create a test user
        test_username = f"updateuser_{uuid4().hex[:8]}"
-        integration_client.post(
+        auth_client.post(
            "/api/v1/admin/users",
            json={"username": test_username, "password": "password"},
        )
        # Update the user
-        response = integration_client.put(
+        response = auth_client.put(
            f"/api/v1/admin/users/{test_username}",
            json={"email": "updated@example.com", "is_admin": True},
        )
@@ -327,59 +357,59 @@ class TestAdminUserManagement:
        assert data["is_admin"] is True
    @pytest.mark.integration
-    def test_reset_user_password(self, integration_client):
+    def test_reset_user_password(self, auth_client):
        """Test resetting a user's password as admin."""
        # Login as admin
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        # Create a test user
        test_username = f"resetuser_{uuid4().hex[:8]}"
-        integration_client.post(
+        auth_client.post(
            "/api/v1/admin/users",
            json={"username": test_username, "password": "oldpassword"},
        )
        # Reset password
-        response = integration_client.post(
+        response = auth_client.post(
            f"/api/v1/admin/users/{test_username}/reset-password",
            json={"new_password": "newpassword"},
        )
        assert response.status_code == 200
        # Verify new password works
-        integration_client.cookies.clear()
+        auth_client.cookies.clear()
-        login_response = integration_client.post(
+        login_response = auth_client.post(
            "/api/v1/auth/login",
            json={"username": test_username, "password": "newpassword"},
        )
        assert login_response.status_code == 200
    @pytest.mark.integration
-    def test_non_admin_cannot_access_admin_endpoints(self, integration_client):
+    def test_non_admin_cannot_access_admin_endpoints(self, auth_client):
        """Test that non-admin users cannot access admin endpoints."""
        # Login as admin and create non-admin user
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        test_username = f"nonadmin_{uuid4().hex[:8]}"
-        integration_client.post(
+        auth_client.post(
            "/api/v1/admin/users",
            json={"username": test_username, "password": "password", "is_admin": False},
        )
        # Login as non-admin
-        integration_client.cookies.clear()
+        auth_client.cookies.clear()
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": test_username, "password": "password"},
        )
        # Try to access admin endpoints
-        response = integration_client.get("/api/v1/admin/users")
+        response = auth_client.get("/api/v1/admin/users")
        assert response.status_code == 403
        assert "Admin privileges required" in response.json()["detail"]
@@ -388,28 +418,28 @@ class TestSecurityEdgeCases:
    """Tests for security edge cases and validation."""
    @pytest.mark.integration
-    def test_login_inactive_user(self, integration_client):
+    def test_login_inactive_user(self, auth_client):
        """Test that inactive users cannot login."""
        # Login as admin and create a user
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        test_username = f"inactive_{uuid4().hex[:8]}"
-        integration_client.post(
+        auth_client.post(
            "/api/v1/admin/users",
            json={"username": test_username, "password": "password123"},
        )
        # Deactivate the user
-        integration_client.put(
+        auth_client.put(
            f"/api/v1/admin/users/{test_username}",
            json={"is_active": False},
        )
        # Try to login as inactive user
-        integration_client.cookies.clear()
+        auth_client.cookies.clear()
-        response = integration_client.post(
+        response = auth_client.post(
            "/api/v1/auth/login",
            json={"username": test_username, "password": "password123"},
        )
@@ -417,14 +447,14 @@ class TestSecurityEdgeCases:
        assert "Invalid username or password" in response.json()["detail"]
    @pytest.mark.integration
-    def test_password_too_short_on_create(self, integration_client):
+    def test_password_too_short_on_create(self, auth_client):
        """Test that short passwords are rejected when creating users."""
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
-        response = integration_client.post(
+        response = auth_client.post(
            "/api/v1/admin/users",
            json={"username": f"shortpw_{uuid4().hex[:8]}", "password": "short"},
        )
@@ -432,36 +462,49 @@ class TestSecurityEdgeCases:
        assert "at least 8 characters" in response.json()["detail"]
    @pytest.mark.integration
-    def test_password_too_short_on_change(self, integration_client):
+    def test_password_too_short_on_change(self, auth_client):
        """Test that short passwords are rejected when changing password."""
-        integration_client.post(
+        # Create test user
        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        test_username = f"shortchange_{uuid4().hex[:8]}"
        auth_client.post(
            "/api/v1/admin/users",
            json={"username": test_username, "password": "password123"},
        )
-        response = integration_client.post(
+        # Login as test user
        auth_client.cookies.clear()
        auth_client.post(
            "/api/v1/auth/login",
            json={"username": test_username, "password": "password123"},
        )
        response = auth_client.post(
            "/api/v1/auth/change-password",
-            json={"current_password": "changeme123", "new_password": "short"},
+            json={"current_password": "password123", "new_password": "short"},
        )
        assert response.status_code == 400
        assert "at least 8 characters" in response.json()["detail"]
    @pytest.mark.integration
-    def test_password_too_short_on_reset(self, integration_client):
+    def test_password_too_short_on_reset(self, auth_client):
        """Test that short passwords are rejected when resetting password."""
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        # Create a test user first
        test_username = f"resetshort_{uuid4().hex[:8]}"
-        integration_client.post(
+        auth_client.post(
            "/api/v1/admin/users",
            json={"username": test_username, "password": "password123"},
        )
-        response = integration_client.post(
+        response = auth_client.post(
            f"/api/v1/admin/users/{test_username}/reset-password",
            json={"new_password": "short"},
        )
@@ -469,23 +512,23 @@ class TestSecurityEdgeCases:
        assert "at least 8 characters" in response.json()["detail"]
    @pytest.mark.integration
-    def test_duplicate_username_rejected(self, integration_client):
+    def test_duplicate_username_rejected(self, auth_client):
        """Test that duplicate usernames are rejected."""
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        test_username = f"duplicate_{uuid4().hex[:8]}"
        # Create user first time
-        response1 = integration_client.post(
+        response1 = auth_client.post(
            "/api/v1/admin/users",
            json={"username": test_username, "password": "password123"},
        )
        assert response1.status_code == 200
        # Try to create same username again
-        response2 = integration_client.post(
+        response2 = auth_client.post(
            "/api/v1/admin/users",
            json={"username": test_username, "password": "password456"},
        )
@@ -493,14 +536,14 @@ class TestSecurityEdgeCases:
        assert "already exists" in response2.json()["detail"]
    @pytest.mark.integration
-    def test_cannot_delete_other_users_api_key(self, integration_client):
+    def test_cannot_delete_other_users_api_key(self, auth_client):
        """Test that users cannot delete API keys owned by other users."""
        # Login as admin and create an API key
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
-        create_response = integration_client.post(
+        create_response = auth_client.post(
            "/api/v1/auth/keys",
            json={"name": "admin-key"},
        )
@@ -508,253 +551,65 @@ class TestSecurityEdgeCases:
        # Create a non-admin user
        test_username = f"nonadmin_{uuid4().hex[:8]}"
-        integration_client.post(
+        auth_client.post(
            "/api/v1/admin/users",
            json={"username": test_username, "password": "password123"},
        )
        # Login as non-admin
-        integration_client.cookies.clear()
+        auth_client.cookies.clear()
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": test_username, "password": "password123"},
        )
        # Try to delete admin's API key
-        response = integration_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
+        response = auth_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
        assert response.status_code == 403
        assert "Cannot delete another user's API key" in response.json()["detail"]
        # Cleanup: login as admin and delete the key
-        integration_client.cookies.clear()
+        auth_client.cookies.clear()
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
-        integration_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
+        auth_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
    @pytest.mark.integration
-    def test_sessions_invalidated_on_password_change(self, integration_client):
+    def test_sessions_invalidated_on_password_change(self, auth_client):
        """Test that all sessions are invalidated when password is changed."""
        # Create a test user
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        test_username = f"sessiontest_{uuid4().hex[:8]}"
-        integration_client.post(
+        auth_client.post(
            "/api/v1/admin/users",
            json={"username": test_username, "password": "password123"},
        )
        # Login as test user
-        integration_client.cookies.clear()
+        auth_client.cookies.clear()
-        login_response = integration_client.post(
+        login_response = auth_client.post(
            "/api/v1/auth/login",
            json={"username": test_username, "password": "password123"},
        )
        assert login_response.status_code == 200
        # Verify session works
-        me_response = integration_client.get("/api/v1/auth/me")
+        me_response = auth_client.get("/api/v1/auth/me")
        assert me_response.status_code == 200
        # Change password
-        integration_client.post(
+        auth_client.post(
            "/api/v1/auth/change-password",
            json={"current_password": "password123", "new_password": "newpassword123"},
        )
        # Old session should be invalidated - try to access /me
        # (note: the change-password call itself may have cleared the session cookie)
-        me_response2 = integration_client.get("/api/v1/auth/me")
+        me_response2 = auth_client.get("/api/v1/auth/me")
        # This should fail because all sessions were invalidated
        assert me_response2.status_code == 401
 class TestSecurityEdgeCases:
    """Tests for security edge cases and validation."""
    @pytest.mark.integration
    def test_login_inactive_user(self, integration_client):
        """Test that inactive users cannot login."""
        # Login as admin and create a user
        integration_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        test_username = f"inactive_{uuid4().hex[:8]}"
        integration_client.post(
            "/api/v1/admin/users",
            json={"username": test_username, "password": "password123"},
        )
        # Deactivate the user
        integration_client.put(
            f"/api/v1/admin/users/{test_username}",
            json={"is_active": False},
        )
        # Try to login as inactive user
        integration_client.cookies.clear()
        response = integration_client.post(
            "/api/v1/auth/login",
            json={"username": test_username, "password": "password123"},
        )
        assert response.status_code == 401
        assert "Invalid username or password" in response.json()["detail"]
    @pytest.mark.integration
    def test_password_too_short_on_create(self, integration_client):
        """Test that short passwords are rejected when creating users."""
        integration_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        response = integration_client.post(
            "/api/v1/admin/users",
            json={"username": f"shortpw_{uuid4().hex[:8]}", "password": "short"},
        )
        assert response.status_code == 400
        assert "at least 8 characters" in response.json()["detail"]
    @pytest.mark.integration
    def test_password_too_short_on_change(self, integration_client):
        """Test that short passwords are rejected when changing password."""
        integration_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        response = integration_client.post(
            "/api/v1/auth/change-password",
            json={"current_password": "changeme123", "new_password": "short"},
        )
        assert response.status_code == 400
        assert "at least 8 characters" in response.json()["detail"]
    @pytest.mark.integration
    def test_password_too_short_on_reset(self, integration_client):
        """Test that short passwords are rejected when resetting password."""
        integration_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        # Create a test user first
        test_username = f"resetshort_{uuid4().hex[:8]}"
        integration_client.post(
            "/api/v1/admin/users",
            json={"username": test_username, "password": "password123"},
        )
        response = integration_client.post(
            f"/api/v1/admin/users/{test_username}/reset-password",
            json={"new_password": "short"},
        )
        assert response.status_code == 400
        assert "at least 8 characters" in response.json()["detail"]
    @pytest.mark.integration
    def test_duplicate_username_rejected(self, integration_client):
        """Test that duplicate usernames are rejected."""
        integration_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        test_username = f"duplicate_{uuid4().hex[:8]}"
        # Create user first time
        response1 = integration_client.post(
            "/api/v1/admin/users",
            json={"username": test_username, "password": "password123"},
        )
        assert response1.status_code == 200
        # Try to create same username again
        response2 = integration_client.post(
            "/api/v1/admin/users",
            json={"username": test_username, "password": "password456"},
        )
        assert response2.status_code == 409
        assert "already exists" in response2.json()["detail"]
    @pytest.mark.integration
    def test_cannot_delete_other_users_api_key(self, integration_client):
        """Test that users cannot delete API keys owned by other users."""
        # Login as admin and create an API key
        integration_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        create_response = integration_client.post(
            "/api/v1/auth/keys",
            json={"name": "admin-key"},
        )
        admin_key_id = create_response.json()["id"]
        # Create a non-admin user
        test_username = f"nonadmin_{uuid4().hex[:8]}"
        integration_client.post(
            "/api/v1/admin/users",
            json={"username": test_username, "password": "password123"},
        )
        # Login as non-admin
        integration_client.cookies.clear()
        integration_client.post(
            "/api/v1/auth/login",
            json={"username": test_username, "password": "password123"},
        )
        # Try to delete admin's API key
        response = integration_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
        assert response.status_code == 403
        assert "Cannot delete another user's API key" in response.json()["detail"]
        # Cleanup: login as admin and delete the key
        integration_client.cookies.clear()
        integration_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        integration_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
    @pytest.mark.integration
    def test_sessions_invalidated_on_password_change(self, integration_client):
        """Test that all sessions are invalidated when password is changed."""
        # Create a test user
        integration_client.post(
            "/api/v1/auth/login",
            json={"username": "admin", "password": "changeme123"},
        )
        test_username = f"sessiontest_{uuid4().hex[:8]}"
        integration_client.post(
            "/api/v1/admin/users",
            json={"username": test_username, "password": "password123"},
        )
        # Login as test user
        integration_client.cookies.clear()
        login_response = integration_client.post(
            "/api/v1/auth/login",
            json={"username": test_username, "password": "password123"},
        )
        assert login_response.status_code == 200
        # Verify session works
        me_response = integration_client.get("/api/v1/auth/me")
        assert me_response.status_code == 200
        # Change password
        integration_client.post(
            "/api/v1/auth/change-password",
            json={"current_password": "password123", "new_password": "newpassword123"},
        )
        # Old session should be invalidated - try to access /me
        # (note: the change-password call itself may have cleared the session cookie)
        me_response2 = integration_client.get("/api/v1/auth/me")
        # This should fail because all sessions were invalidated
        assert me_response2.status_code == 401
--- a/backend/tests/integration/test_concurrent_operations.py
+++ b/backend/tests/integration/test_concurrent_operations.py
@@ -0,0 +1,737 @@
 """
 Integration tests for concurrent upload and download operations.
 Tests cover:
 - Concurrent uploads of different files
 - Concurrent uploads of same file (deduplication race)
 - Concurrent downloads of same artifact
 - Concurrent downloads of different artifacts
 - Mixed concurrent uploads and downloads
 - Data corruption prevention under concurrency
 """
 import pytest
 import io
 import os
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from tests.factories import (
    compute_sha256,
    upload_test_file,
    generate_content_with_hash,
 )
 def get_api_key(integration_client):
    """Create an API key for concurrent test workers."""
    import uuid
    response = integration_client.post(
        "/api/v1/auth/keys",
        json={"name": f"concurrent-test-{uuid.uuid4().hex[:8]}"},
    )
    if response.status_code == 200:
        return response.json()["key"]
    return None
 class TestConcurrentUploads:
    """Tests for concurrent upload operations."""
    @pytest.mark.integration
    @pytest.mark.concurrent
    def test_2_concurrent_uploads_different_files(self, integration_client, test_package):
        """Test 2 concurrent uploads of different files."""
        project, package = test_package
        api_key = get_api_key(integration_client)
        assert api_key, "Failed to create API key"
        files_data = [
            generate_content_with_hash(1024, seed=i) for i in range(2)
        ]
        results = []
        errors = []
        def upload_worker(idx, content, expected_hash):
            try:
                from httpx import Client
                base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
                with Client(base_url=base_url, timeout=60.0) as client:
                    files = {
                        "file": (f"file-{idx}.bin", io.BytesIO(content), "application/octet-stream")
                    }
                    response = client.post(
                        f"/api/v1/project/{project}/{package}/upload",
                        files=files,
                        data={"tag": f"concurrent-{idx}"},
                        headers={"Authorization": f"Bearer {api_key}"},
                    )
                    if response.status_code == 200:
                        result = response.json()
                        results.append((idx, result, expected_hash))
                    else:
                        errors.append(f"Worker {idx}: Status {response.status_code}: {response.text}")
            except Exception as e:
                errors.append(f"Worker {idx}: {str(e)}")
        with ThreadPoolExecutor(max_workers=2) as executor:
            futures = [
                executor.submit(upload_worker, i, content, hash)
                for i, (content, hash) in enumerate(files_data)
            ]
            for future in as_completed(futures):
                pass
        assert len(errors) == 0, f"Errors: {errors}"
        assert len(results) == 2
        # Verify each upload returned correct artifact_id
        for idx, result, expected_hash in results:
            assert result["artifact_id"] == expected_hash
    @pytest.mark.integration
    @pytest.mark.concurrent
    def test_5_concurrent_uploads_different_files(self, integration_client, test_package):
        """Test 5 concurrent uploads of different files."""
        project, package = test_package
        api_key = get_api_key(integration_client)
        assert api_key, "Failed to create API key"
        num_files = 5
        files_data = [
            generate_content_with_hash(2048, seed=100 + i) for i in range(num_files)
        ]
        results = []
        errors = []
        def upload_worker(idx, content, expected_hash):
            try:
                from httpx import Client
                base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
                with Client(base_url=base_url, timeout=60.0) as client:
                    files = {
                        "file": (f"file-{idx}.bin", io.BytesIO(content), "application/octet-stream")
                    }
                    response = client.post(
                        f"/api/v1/project/{project}/{package}/upload",
                        files=files,
                        data={"tag": f"concurrent5-{idx}"},
                        headers={"Authorization": f"Bearer {api_key}"},
                    )
                    if response.status_code == 200:
                        result = response.json()
                        results.append((idx, result, expected_hash))
                    else:
                        errors.append(f"Worker {idx}: Status {response.status_code}")
            except Exception as e:
                errors.append(f"Worker {idx}: {str(e)}")
        with ThreadPoolExecutor(max_workers=num_files) as executor:
            futures = [
                executor.submit(upload_worker, i, content, hash)
                for i, (content, hash) in enumerate(files_data)
            ]
            for future in as_completed(futures):
                pass
        assert len(errors) == 0, f"Errors: {errors}"
        assert len(results) == num_files
        # Verify all uploads have unique artifact_ids
        artifact_ids = set(r[1]["artifact_id"] for r in results)
        assert len(artifact_ids) == num_files
    @pytest.mark.integration
    @pytest.mark.concurrent
    def test_10_concurrent_uploads_different_files(self, integration_client, test_package):
        """Test 10 concurrent uploads of different files."""
        project, package = test_package
        api_key = get_api_key(integration_client)
        assert api_key, "Failed to create API key"
        num_files = 10
        files_data = [
            generate_content_with_hash(1024, seed=200 + i) for i in range(num_files)
        ]
        results = []
        errors = []
        def upload_worker(idx, content, expected_hash):
            try:
                from httpx import Client
                base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
                with Client(base_url=base_url, timeout=60.0) as client:
                    files = {
                        "file": (f"file-{idx}.bin", io.BytesIO(content), "application/octet-stream")
                    }
                    response = client.post(
                        f"/api/v1/project/{project}/{package}/upload",
                        files=files,
                        data={"tag": f"concurrent10-{idx}"},
                        headers={"Authorization": f"Bearer {api_key}"},
                    )
                    if response.status_code == 200:
                        result = response.json()
                        results.append((idx, result, expected_hash))
                    else:
                        errors.append(f"Worker {idx}: Status {response.status_code}")
            except Exception as e:
                errors.append(f"Worker {idx}: {str(e)}")
        with ThreadPoolExecutor(max_workers=num_files) as executor:
            futures = [
                executor.submit(upload_worker, i, content, hash)
                for i, (content, hash) in enumerate(files_data)
            ]
            for future in as_completed(futures):
                pass
        assert len(errors) == 0, f"Errors: {errors}"
        assert len(results) == num_files
    @pytest.mark.integration
    @pytest.mark.concurrent
    def test_concurrent_uploads_same_file_deduplication(self, integration_client, test_package):
        """Test concurrent uploads of same file handle deduplication correctly."""
        project, package = test_package
        api_key = get_api_key(integration_client)
        assert api_key, "Failed to create API key"
        content, expected_hash = generate_content_with_hash(4096, seed=999)
        num_concurrent = 5
        results = []
        errors = []
        def upload_worker(idx):
            try:
                from httpx import Client
                base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
                with Client(base_url=base_url, timeout=60.0) as client:
                    files = {
                        "file": (f"same-{idx}.bin", io.BytesIO(content), "application/octet-stream")
                    }
                    response = client.post(
                        f"/api/v1/project/{project}/{package}/upload",
                        files=files,
                        data={"tag": f"dedup-{idx}"},
                        headers={"Authorization": f"Bearer {api_key}"},
                    )
                    if response.status_code == 200:
                        results.append(response.json())
                    else:
                        errors.append(f"Worker {idx}: Status {response.status_code}")
            except Exception as e:
                errors.append(f"Worker {idx}: {str(e)}")
        with ThreadPoolExecutor(max_workers=num_concurrent) as executor:
            futures = [executor.submit(upload_worker, i) for i in range(num_concurrent)]
            for future in as_completed(futures):
                pass
        assert len(errors) == 0, f"Errors: {errors}"
        assert len(results) == num_concurrent
        # All should have same artifact_id
        artifact_ids = set(r["artifact_id"] for r in results)
        assert len(artifact_ids) == 1
        assert expected_hash in artifact_ids
        # Verify final ref_count equals number of uploads
        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
        assert response.status_code == 200
        assert response.json()["ref_count"] == num_concurrent
    @pytest.mark.integration
    @pytest.mark.concurrent
    def test_concurrent_uploads_to_different_packages(self, integration_client, test_project, unique_test_id):
        """Test concurrent uploads to different packages."""
        project = test_project
        api_key = get_api_key(integration_client)
        assert api_key, "Failed to create API key"
        num_packages = 3
        package_names = []
        # Create multiple packages
        for i in range(num_packages):
            pkg_name = f"pkg-{unique_test_id}-{i}"
            response = integration_client.post(
                f"/api/v1/project/{project}/packages",
                json={"name": pkg_name, "description": f"Package {i}"},
            )
            assert response.status_code == 200
            package_names.append(pkg_name)
        files_data = [
            generate_content_with_hash(1024, seed=300 + i) for i in range(num_packages)
        ]
        results = []
        errors = []
        def upload_worker(idx, package, content, expected_hash):
            try:
                from httpx import Client
                base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
                with Client(base_url=base_url, timeout=60.0) as client:
                    files = {
                        "file": (f"file-{idx}.bin", io.BytesIO(content), "application/octet-stream")
                    }
                    response = client.post(
                        f"/api/v1/project/{project}/{package}/upload",
                        files=files,
                        data={"tag": "latest"},
                        headers={"Authorization": f"Bearer {api_key}"},
                    )
                    if response.status_code == 200:
                        result = response.json()
                        results.append((package, result, expected_hash))
                    else:
                        errors.append(f"Worker {idx}: Status {response.status_code}")
            except Exception as e:
                errors.append(f"Worker {idx}: {str(e)}")
        with ThreadPoolExecutor(max_workers=num_packages) as executor:
            futures = [
                executor.submit(upload_worker, i, package_names[i], content, hash)
                for i, (content, hash) in enumerate(files_data)
            ]
            for future in as_completed(futures):
                pass
        assert len(errors) == 0, f"Errors: {errors}"
        assert len(results) == num_packages
 class TestConcurrentDownloads:
    """Tests for concurrent download operations."""
    @pytest.mark.integration
    @pytest.mark.concurrent
    def test_2_concurrent_downloads_same_artifact(self, integration_client, test_package):
        """Test 2 concurrent downloads of same artifact."""
        project, package = test_package
        content, expected_hash = generate_content_with_hash(2048, seed=400)
        # Upload first
        upload_test_file(integration_client, project, package, content, tag="download-test")
        results = []
        errors = []
        def download_worker(idx):
            try:
                from httpx import Client
                base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
                with Client(base_url=base_url, timeout=60.0) as client:
                    response = client.get(
                        f"/api/v1/project/{project}/{package}/+/download-test",
                        params={"mode": "proxy"},
                    )
                    if response.status_code == 200:
                        results.append((idx, response.content))
                    else:
                        errors.append(f"Worker {idx}: Status {response.status_code}")
            except Exception as e:
                errors.append(f"Worker {idx}: {str(e)}")
        with ThreadPoolExecutor(max_workers=2) as executor:
            futures = [executor.submit(download_worker, i) for i in range(2)]
            for future in as_completed(futures):
                pass
        assert len(errors) == 0, f"Errors: {errors}"
        assert len(results) == 2
        # All downloads should match original
        for idx, downloaded in results:
            assert downloaded == content
    @pytest.mark.integration
    @pytest.mark.concurrent
    def test_5_concurrent_downloads_same_artifact(self, integration_client, test_package):
        """Test 5 concurrent downloads of same artifact."""
        project, package = test_package
        content, expected_hash = generate_content_with_hash(4096, seed=500)
        upload_test_file(integration_client, project, package, content, tag="download5-test")
        num_downloads = 5
        results = []
        errors = []
        def download_worker(idx):
            try:
                from httpx import Client
                base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
                with Client(base_url=base_url, timeout=60.0) as client:
                    response = client.get(
                        f"/api/v1/project/{project}/{package}/+/download5-test",
                        params={"mode": "proxy"},
                    )
                    if response.status_code == 200:
                        results.append((idx, response.content))
                    else:
                        errors.append(f"Worker {idx}: Status {response.status_code}")
            except Exception as e:
                errors.append(f"Worker {idx}: {str(e)}")
        with ThreadPoolExecutor(max_workers=num_downloads) as executor:
            futures = [executor.submit(download_worker, i) for i in range(num_downloads)]
            for future in as_completed(futures):
                pass
        assert len(errors) == 0, f"Errors: {errors}"
        assert len(results) == num_downloads
        for idx, downloaded in results:
            assert downloaded == content
    @pytest.mark.integration
    @pytest.mark.concurrent
    def test_10_concurrent_downloads_same_artifact(self, integration_client, test_package):
        """Test 10 concurrent downloads of same artifact."""
        project, package = test_package
        content, expected_hash = generate_content_with_hash(8192, seed=600)
        upload_test_file(integration_client, project, package, content, tag="download10-test")
        num_downloads = 10
        results = []
        errors = []
        def download_worker(idx):
            try:
                from httpx import Client
                base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
                with Client(base_url=base_url, timeout=60.0) as client:
                    response = client.get(
                        f"/api/v1/project/{project}/{package}/+/download10-test",
                        params={"mode": "proxy"},
                    )
                    if response.status_code == 200:
                        results.append((idx, response.content))
                    else:
                        errors.append(f"Worker {idx}: Status {response.status_code}")
            except Exception as e:
                errors.append(f"Worker {idx}: {str(e)}")
        with ThreadPoolExecutor(max_workers=num_downloads) as executor:
            futures = [executor.submit(download_worker, i) for i in range(num_downloads)]
            for future in as_completed(futures):
                pass
        assert len(errors) == 0, f"Errors: {errors}"
        assert len(results) == num_downloads
        for idx, downloaded in results:
            assert downloaded == content
    @pytest.mark.integration
    @pytest.mark.concurrent
    def test_concurrent_downloads_different_artifacts(self, integration_client, test_package):
        """Test concurrent downloads of different artifacts."""
        project, package = test_package
        # Upload multiple files
        num_files = 5
        uploads = []
        for i in range(num_files):
            content, expected_hash = generate_content_with_hash(1024, seed=700 + i)
            upload_test_file(
                integration_client, project, package, content,
                tag=f"multi-download-{i}"
            )
            uploads.append((f"multi-download-{i}", content))
        results = []
        errors = []
        def download_worker(tag, expected_content):
            try:
                from httpx import Client
                base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
                with Client(base_url=base_url, timeout=60.0) as client:
                    response = client.get(
                        f"/api/v1/project/{project}/{package}/+/{tag}",
                        params={"mode": "proxy"},
                    )
                    if response.status_code == 200:
                        results.append((tag, response.content, expected_content))
                    else:
                        errors.append(f"Tag {tag}: Status {response.status_code}")
            except Exception as e:
                errors.append(f"Tag {tag}: {str(e)}")
        with ThreadPoolExecutor(max_workers=num_files) as executor:
            futures = [
                executor.submit(download_worker, tag, content)
                for tag, content in uploads
            ]
            for future in as_completed(futures):
                pass
        assert len(errors) == 0, f"Errors: {errors}"
        assert len(results) == num_files
        for tag, downloaded, expected in results:
            assert downloaded == expected, f"Content mismatch for {tag}"
 class TestMixedConcurrentOperations:
    """Tests for mixed concurrent upload and download operations."""
    @pytest.mark.integration
    @pytest.mark.concurrent
    def test_upload_while_download_in_progress(self, integration_client, test_package):
        """Test uploading while a download is in progress."""
        project, package = test_package
        api_key = get_api_key(integration_client)
        assert api_key, "Failed to create API key"
        # Upload initial content
        content1, hash1 = generate_content_with_hash(10240, seed=800)  # 10KB
        upload_test_file(integration_client, project, package, content1, tag="initial")
        # New content for upload during download
        content2, hash2 = generate_content_with_hash(10240, seed=801)
        results = {"downloads": [], "uploads": []}
        errors = []
        def download_worker():
            try:
                from httpx import Client
                base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
                with Client(base_url=base_url, timeout=60.0) as client:
                    response = client.get(
                        f"/api/v1/project/{project}/{package}/+/initial",
                        params={"mode": "proxy"},
                    )
                    if response.status_code == 200:
                        results["downloads"].append(response.content)
                    else:
                        errors.append(f"Download: Status {response.status_code}")
            except Exception as e:
                errors.append(f"Download: {str(e)}")
        def upload_worker():
            try:
                from httpx import Client
                base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
                with Client(base_url=base_url, timeout=60.0) as client:
                    files = {
                        "file": ("new.bin", io.BytesIO(content2), "application/octet-stream")
                    }
                    response = client.post(
                        f"/api/v1/project/{project}/{package}/upload",
                        files=files,
                        data={"tag": "during-download"},
                        headers={"Authorization": f"Bearer {api_key}"},
                    )
                    if response.status_code == 200:
                        results["uploads"].append(response.json())
                    else:
                        errors.append(f"Upload: Status {response.status_code}")
            except Exception as e:
                errors.append(f"Upload: {str(e)}")
        with ThreadPoolExecutor(max_workers=2) as executor:
            futures = [
                executor.submit(download_worker),
                executor.submit(upload_worker),
            ]
            for future in as_completed(futures):
                pass
        assert len(errors) == 0, f"Errors: {errors}"
        assert len(results["downloads"]) == 1
        assert len(results["uploads"]) == 1
        # Verify download got correct content
        assert results["downloads"][0] == content1
        # Verify upload succeeded
        assert results["uploads"][0]["artifact_id"] == hash2
    @pytest.mark.integration
    @pytest.mark.concurrent
    def test_multiple_uploads_and_downloads_simultaneously(self, integration_client, test_package):
        """Test multiple uploads and downloads running simultaneously."""
        project, package = test_package
        api_key = get_api_key(integration_client)
        assert api_key, "Failed to create API key"
        # Pre-upload some files for downloading
        existing_files = []
        for i in range(3):
            content, hash = generate_content_with_hash(2048, seed=900 + i)
            upload_test_file(integration_client, project, package, content, tag=f"existing-{i}")
            existing_files.append((f"existing-{i}", content))
        # New files for uploading
        new_files = [
            generate_content_with_hash(2048, seed=910 + i) for i in range(3)
        ]
        results = {"downloads": [], "uploads": []}
        errors = []
        def download_worker(tag, expected):
            try:
                from httpx import Client
                base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
                with Client(base_url=base_url, timeout=60.0) as client:
                    response = client.get(
                        f"/api/v1/project/{project}/{package}/+/{tag}",
                        params={"mode": "proxy"},
                    )
                    if response.status_code == 200:
                        results["downloads"].append((tag, response.content, expected))
                    else:
                        errors.append(f"Download {tag}: Status {response.status_code}")
            except Exception as e:
                errors.append(f"Download {tag}: {str(e)}")
        def upload_worker(idx, content, expected_hash):
            try:
                from httpx import Client
                base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
                with Client(base_url=base_url, timeout=60.0) as client:
                    files = {
                        "file": (f"new-{idx}.bin", io.BytesIO(content), "application/octet-stream")
                    }
                    response = client.post(
                        f"/api/v1/project/{project}/{package}/upload",
                        files=files,
                        data={"tag": f"new-{idx}"},
                        headers={"Authorization": f"Bearer {api_key}"},
                    )
                    if response.status_code == 200:
                        results["uploads"].append((idx, response.json(), expected_hash))
                    else:
                        errors.append(f"Upload {idx}: Status {response.status_code}")
            except Exception as e:
                errors.append(f"Upload {idx}: {str(e)}")
        with ThreadPoolExecutor(max_workers=6) as executor:
            futures = []
            # Submit downloads
            for tag, content in existing_files:
                futures.append(executor.submit(download_worker, tag, content))
            # Submit uploads
            for i, (content, hash) in enumerate(new_files):
                futures.append(executor.submit(upload_worker, i, content, hash))
            for future in as_completed(futures):
                pass
        assert len(errors) == 0, f"Errors: {errors}"
        assert len(results["downloads"]) == 3
        assert len(results["uploads"]) == 3
        # Verify downloads
        for tag, downloaded, expected in results["downloads"]:
            assert downloaded == expected, f"Download mismatch for {tag}"
        # Verify uploads
        for idx, result, expected_hash in results["uploads"]:
            assert result["artifact_id"] == expected_hash
    @pytest.mark.integration
    @pytest.mark.concurrent
    def test_no_data_corruption_under_concurrency(self, integration_client, test_package):
        """Test that no data corruption occurs under concurrent operations."""
        project, package = test_package
        api_key = get_api_key(integration_client)
        assert api_key, "Failed to create API key"
        # Create content with recognizable patterns
        num_files = 5
        files_data = []
        for i in range(num_files):
            # Each file has unique repeating pattern for easy corruption detection
            pattern = bytes([i] * 256)
            content = pattern * 40  # 10KB each
            hash = compute_sha256(content)
            files_data.append((content, hash))
        results = []
        errors = []
        def upload_and_verify(idx, content, expected_hash):
            try:
                from httpx import Client
                base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
                with Client(base_url=base_url, timeout=60.0) as client:
                    # Upload
                    files = {
                        "file": (f"pattern-{idx}.bin", io.BytesIO(content), "application/octet-stream")
                    }
                    upload_resp = client.post(
                        f"/api/v1/project/{project}/{package}/upload",
                        files=files,
                        data={"tag": f"pattern-{idx}"},
                        headers={"Authorization": f"Bearer {api_key}"},
                    )
                    if upload_resp.status_code != 200:
                        errors.append(f"Upload {idx}: Status {upload_resp.status_code}")
                        return
                    upload_result = upload_resp.json()
                    if upload_result["artifact_id"] != expected_hash:
                        errors.append(f"Upload {idx}: Hash mismatch")
                        return
                    # Immediately download and verify
                    download_resp = client.get(
                        f"/api/v1/project/{project}/{package}/+/pattern-{idx}",
                        params={"mode": "proxy"},
                    )
                    if download_resp.status_code != 200:
                        errors.append(f"Download {idx}: Status {download_resp.status_code}")
                        return
                    if download_resp.content != content:
                        errors.append(f"Worker {idx}: DATA CORRUPTION DETECTED")
                        return
                    # Verify the downloaded content hash
                    downloaded_hash = compute_sha256(download_resp.content)
                    if downloaded_hash != expected_hash:
                        errors.append(f"Worker {idx}: Hash verification failed")
                        return
                    results.append(idx)
            except Exception as e:
                errors.append(f"Worker {idx}: {str(e)}")
        with ThreadPoolExecutor(max_workers=num_files) as executor:
            futures = [
                executor.submit(upload_and_verify, i, content, hash)
                for i, (content, hash) in enumerate(files_data)
            ]
            for future in as_completed(futures):
                pass
        assert len(errors) == 0, f"Errors: {errors}"
        assert len(results) == num_files
--- a/backend/tests/integration/test_error_handling.py
+++ b/backend/tests/integration/test_error_handling.py
@@ -0,0 +1,322 @@
 """
 Integration tests for error handling in upload and download operations.
 Tests cover:
 - Timeout handling
 - Invalid request handling
 - Resource cleanup on failures
 - Graceful error responses
 """
 import pytest
 import io
 import time
 from tests.factories import (
    compute_sha256,
    upload_test_file,
    generate_content_with_hash,
 )
 class TestUploadErrorHandling:
    """Tests for upload error handling."""
    @pytest.mark.integration
    def test_upload_to_nonexistent_project_returns_404(
        self, integration_client, unique_test_id
    ):
        """Test upload to nonexistent project returns 404."""
        content = b"test content for nonexistent project"
        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/nonexistent-project-{unique_test_id}/nonexistent-pkg/upload",
            files=files,
        )
        assert response.status_code == 404
    @pytest.mark.integration
    def test_upload_to_nonexistent_package_returns_404(
        self, integration_client, test_project, unique_test_id
    ):
        """Test upload to nonexistent package returns 404."""
        content = b"test content for nonexistent package"
        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
            files=files,
        )
        assert response.status_code == 404
    @pytest.mark.integration
    def test_upload_empty_file_rejected(self, integration_client, test_package):
        """Test empty file upload is rejected."""
        project, package = test_package
        files = {"file": ("empty.bin", io.BytesIO(b""), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
        )
        assert response.status_code in [400, 422]
    @pytest.mark.integration
    def test_upload_missing_file_returns_422(self, integration_client, test_package):
        """Test upload without file field returns 422."""
        project, package = test_package
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            data={"tag": "no-file-provided"},
        )
        assert response.status_code == 422
    @pytest.mark.integration
    def test_upload_invalid_checksum_format_returns_400(
        self, integration_client, test_package
    ):
        """Test upload with invalid checksum format returns 400."""
        project, package = test_package
        content = b"checksum format test"
        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
            headers={"X-Checksum-SHA256": "invalid-hash-format"},
        )
        assert response.status_code == 400
    @pytest.mark.integration
    def test_upload_checksum_mismatch_returns_422(
        self, integration_client, test_package
    ):
        """Test upload with mismatched checksum returns 422."""
        project, package = test_package
        content = b"checksum mismatch test"
        wrong_hash = "0" * 64  # Valid format but wrong hash
        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
            headers={"X-Checksum-SHA256": wrong_hash},
        )
        assert response.status_code == 422
    @pytest.mark.integration
    def test_upload_with_correct_checksum_succeeds(
        self, integration_client, test_package
    ):
        """Test upload with correct checksum succeeds."""
        project, package = test_package
        content = b"correct checksum test"
        correct_hash = compute_sha256(content)
        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
            headers={"X-Checksum-SHA256": correct_hash},
        )
        assert response.status_code == 200
        assert response.json()["artifact_id"] == correct_hash
 class TestDownloadErrorHandling:
    """Tests for download error handling."""
    @pytest.mark.integration
    def test_download_nonexistent_tag_returns_404(
        self, integration_client, test_package
    ):
        """Test download of nonexistent tag returns 404."""
        project, package = test_package
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/nonexistent-tag-xyz"
        )
        assert response.status_code == 404
    @pytest.mark.integration
    def test_download_nonexistent_artifact_returns_404(
        self, integration_client, test_package
    ):
        """Test download of nonexistent artifact ID returns 404."""
        project, package = test_package
        fake_hash = "a" * 64
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/artifact:{fake_hash}"
        )
        assert response.status_code == 404
    @pytest.mark.integration
    def test_download_invalid_artifact_id_format(
        self, integration_client, test_package
    ):
        """Test download with invalid artifact ID format."""
        project, package = test_package
        # Too short
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/artifact:abc123"
        )
        assert response.status_code == 404
    @pytest.mark.integration
    def test_download_from_nonexistent_project_returns_404(
        self, integration_client, unique_test_id
    ):
        """Test download from nonexistent project returns 404."""
        response = integration_client.get(
            f"/api/v1/project/nonexistent-{unique_test_id}/pkg/+/tag"
        )
        assert response.status_code == 404
    @pytest.mark.integration
    def test_download_from_nonexistent_package_returns_404(
        self, integration_client, test_project, unique_test_id
    ):
        """Test download from nonexistent package returns 404."""
        response = integration_client.get(
            f"/api/v1/project/{test_project}/nonexistent-{unique_test_id}/+/tag"
        )
        assert response.status_code == 404
 class TestTimeoutBehavior:
    """Tests for timeout behavior (integration level)."""
    @pytest.mark.integration
    @pytest.mark.slow
    def test_large_upload_completes_within_reasonable_time(
        self, integration_client, test_package, sized_content
    ):
        """Test that a 10MB upload completes within reasonable time."""
        project, package = test_package
        content, expected_hash = sized_content(10 * 1024 * 1024, seed=999)  # 10MB
        start_time = time.time()
        result = upload_test_file(
            integration_client, project, package, content, tag="timeout-test"
        )
        elapsed = time.time() - start_time
        assert result["artifact_id"] == expected_hash
        # Should complete within 60 seconds for 10MB on local docker
        assert elapsed < 60, f"Upload took too long: {elapsed:.2f}s"
    @pytest.mark.integration
    @pytest.mark.slow
    def test_large_download_completes_within_reasonable_time(
        self, integration_client, test_package, sized_content
    ):
        """Test that a 10MB download completes within reasonable time."""
        project, package = test_package
        content, expected_hash = sized_content(10 * 1024 * 1024, seed=998)  # 10MB
        # First upload
        upload_test_file(
            integration_client, project, package, content, tag="download-timeout-test"
        )
        # Then download and time it
        start_time = time.time()
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/download-timeout-test",
            params={"mode": "proxy"},
        )
        elapsed = time.time() - start_time
        assert response.status_code == 200
        assert len(response.content) == len(content)
        # Should complete within 60 seconds for 10MB on local docker
        assert elapsed < 60, f"Download took too long: {elapsed:.2f}s"
 class TestResourceCleanup:
    """Tests for proper resource cleanup on failures.
    Note: More comprehensive cleanup tests are in test_upload_download_api.py
    (TestUploadFailureCleanup class) including S3 object cleanup verification.
    """
    @pytest.mark.integration
    def test_checksum_mismatch_no_orphaned_artifact(
        self, integration_client, test_package, unique_test_id
    ):
        """Test checksum mismatch doesn't leave orphaned artifact."""
        project, package = test_package
        # Use unique content to ensure artifact doesn't exist from prior tests
        content = f"checksum mismatch orphan test {unique_test_id}".encode()
        wrong_hash = "0" * 64
        actual_hash = compute_sha256(content)
        # Verify artifact doesn't exist before test
        pre_check = integration_client.get(f"/api/v1/artifact/{actual_hash}")
        assert pre_check.status_code == 404, "Artifact should not exist before test"
        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
            headers={"X-Checksum-SHA256": wrong_hash},
        )
        assert response.status_code == 422
        # Verify no artifact was created with either hash
        response1 = integration_client.get(f"/api/v1/artifact/{wrong_hash}")
        response2 = integration_client.get(f"/api/v1/artifact/{actual_hash}")
        assert response1.status_code == 404
        assert response2.status_code == 404
 class TestGracefulErrorResponses:
    """Tests for graceful and informative error responses."""
    @pytest.mark.integration
    def test_404_response_has_detail_message(
        self, integration_client, test_package
    ):
        """Test 404 responses include a detail message."""
        project, package = test_package
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/nonexistent-tag"
        )
        assert response.status_code == 404
        data = response.json()
        assert "detail" in data
        assert len(data["detail"]) > 0
    @pytest.mark.integration
    def test_422_response_has_detail_message(self, integration_client, test_package):
        """Test 422 responses include a detail message."""
        project, package = test_package
        # Upload with mismatched checksum
        content = b"detail message test"
        wrong_hash = "0" * 64
        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
            headers={"X-Checksum-SHA256": wrong_hash},
        )
        assert response.status_code == 422
        data = response.json()
        assert "detail" in data
    @pytest.mark.integration
    def test_error_response_is_json(self, integration_client, unique_test_id):
        """Test error responses are valid JSON."""
        response = integration_client.get(
            f"/api/v1/project/nonexistent-{unique_test_id}/pkg/+/tag"
        )
        assert response.status_code == 404
        # Should not raise exception - valid JSON
        data = response.json()
        assert isinstance(data, dict)
--- a/backend/tests/integration/test_integrity_verification.py
+++ b/backend/tests/integration/test_integrity_verification.py
@@ -0,0 +1,768 @@
 """
 Integration tests for artifact integrity verification.
 Tests cover:
 - Round-trip verification (upload -> download -> verify hash)
 - Consistency check endpoint
 - Header-based verification
 - Integrity verification across file sizes
 - Client-side verification workflow
 """
 import pytest
 import io
 import hashlib
 from tests.factories import (
    compute_sha256,
    upload_test_file,
    generate_content_with_hash,
    s3_object_exists,
    get_s3_client,
    get_s3_bucket,
 )
 from tests.conftest import (
    SIZE_1KB,
    SIZE_10KB,
    SIZE_100KB,
    SIZE_1MB,
    SIZE_10MB,
 )
 class TestRoundTripVerification:
    """Tests for complete round-trip integrity verification."""
    @pytest.mark.integration
    def test_upload_download_hash_matches(self, integration_client, test_package):
        """Test that upload -> download round trip preserves content integrity."""
        project, package = test_package
        content = b"Round trip integrity test content"
        expected_hash = compute_sha256(content)
        # Upload and capture returned hash
        result = upload_test_file(
            integration_client, project, package, content, tag="roundtrip"
        )
        uploaded_hash = result["artifact_id"]
        # Verify upload returned correct hash
        assert uploaded_hash == expected_hash
        # Download artifact
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/roundtrip",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        # Compute hash of downloaded content
        downloaded_hash = compute_sha256(response.content)
        # All three hashes should match
        assert downloaded_hash == expected_hash
        assert downloaded_hash == uploaded_hash
    @pytest.mark.integration
    def test_upload_response_contains_hash(self, integration_client, test_package):
        """Test upload response contains artifact_id which is the SHA256 hash."""
        project, package = test_package
        content = b"Upload response hash test"
        expected_hash = compute_sha256(content)
        result = upload_test_file(integration_client, project, package, content)
        assert "artifact_id" in result
        assert result["artifact_id"] == expected_hash
        assert len(result["artifact_id"]) == 64
        assert all(c in "0123456789abcdef" for c in result["artifact_id"])
    @pytest.mark.integration
    def test_download_header_matches_artifact_id(self, integration_client, test_package):
        """Test X-Checksum-SHA256 header matches artifact ID."""
        project, package = test_package
        content = b"Header verification test"
        expected_hash = compute_sha256(content)
        upload_test_file(
            integration_client, project, package, content, tag="header-check"
        )
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/header-check",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.headers.get("X-Checksum-SHA256") == expected_hash
    @pytest.mark.integration
    def test_etag_matches_artifact_id(self, integration_client, test_package):
        """Test ETag header matches artifact ID."""
        project, package = test_package
        content = b"ETag verification test"
        expected_hash = compute_sha256(content)
        upload_test_file(
            integration_client, project, package, content, tag="etag-check"
        )
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/etag-check",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        etag = response.headers.get("ETag", "").strip('"')
        assert etag == expected_hash
    @pytest.mark.integration
    def test_artifact_endpoint_returns_correct_hash(self, integration_client, test_package):
        """Test artifact endpoint returns correct hash/ID."""
        project, package = test_package
        content = b"Artifact endpoint hash test"
        expected_hash = compute_sha256(content)
        upload_test_file(integration_client, project, package, content)
        # Query artifact directly
        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
        assert response.status_code == 200
        data = response.json()
        assert data["id"] == expected_hash
        assert data.get("sha256") == expected_hash
 class TestClientSideVerificationWorkflow:
    """Tests for client-side verification workflow."""
    @pytest.mark.integration
    def test_client_can_verify_before_upload(self, integration_client, test_package):
        """Test client can compute hash before upload and verify response matches."""
        project, package = test_package
        content = b"Client pre-upload verification test"
        # Client computes hash locally before upload
        client_hash = compute_sha256(content)
        # Upload
        result = upload_test_file(integration_client, project, package, content)
        # Client verifies server returned the same hash
        assert result["artifact_id"] == client_hash
    @pytest.mark.integration
    def test_client_can_provide_checksum_header(self, integration_client, test_package):
        """Test client can provide X-Checksum-SHA256 header for verification."""
        project, package = test_package
        content = b"Client checksum header test"
        client_hash = compute_sha256(content)
        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
            headers={"X-Checksum-SHA256": client_hash},
        )
        assert response.status_code == 200
        assert response.json()["artifact_id"] == client_hash
    @pytest.mark.integration
    def test_checksum_mismatch_rejected(self, integration_client, test_package):
        """Test upload with wrong client checksum is rejected."""
        project, package = test_package
        content = b"Checksum mismatch test"
        wrong_hash = "0" * 64
        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
            headers={"X-Checksum-SHA256": wrong_hash},
        )
        assert response.status_code == 422
    @pytest.mark.integration
    def test_client_can_verify_after_download(self, integration_client, test_package):
        """Test client can verify downloaded content matches header hash."""
        project, package = test_package
        content = b"Client post-download verification"
        upload_test_file(
            integration_client, project, package, content, tag="verify-after"
        )
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/verify-after",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        # Client gets hash from header
        header_hash = response.headers.get("X-Checksum-SHA256")
        # Client computes hash of downloaded content
        downloaded_hash = compute_sha256(response.content)
        # Client verifies they match
        assert downloaded_hash == header_hash
 class TestIntegritySizeVariants:
    """Tests for integrity verification across different file sizes."""
    @pytest.mark.integration
    def test_integrity_1kb(self, integration_client, test_package, sized_content):
        """Test integrity verification for 1KB file."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_1KB, seed=100)
        result = upload_test_file(
            integration_client, project, package, content, tag="int-1kb"
        )
        assert result["artifact_id"] == expected_hash
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/int-1kb",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert compute_sha256(response.content) == expected_hash
        assert response.headers.get("X-Checksum-SHA256") == expected_hash
    @pytest.mark.integration
    def test_integrity_100kb(self, integration_client, test_package, sized_content):
        """Test integrity verification for 100KB file."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_100KB, seed=101)
        result = upload_test_file(
            integration_client, project, package, content, tag="int-100kb"
        )
        assert result["artifact_id"] == expected_hash
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/int-100kb",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert compute_sha256(response.content) == expected_hash
        assert response.headers.get("X-Checksum-SHA256") == expected_hash
    @pytest.mark.integration
    def test_integrity_1mb(self, integration_client, test_package, sized_content):
        """Test integrity verification for 1MB file."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_1MB, seed=102)
        result = upload_test_file(
            integration_client, project, package, content, tag="int-1mb"
        )
        assert result["artifact_id"] == expected_hash
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/int-1mb",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert compute_sha256(response.content) == expected_hash
        assert response.headers.get("X-Checksum-SHA256") == expected_hash
    @pytest.mark.integration
    @pytest.mark.slow
    def test_integrity_10mb(self, integration_client, test_package, sized_content):
        """Test integrity verification for 10MB file."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_10MB, seed=103)
        result = upload_test_file(
            integration_client, project, package, content, tag="int-10mb"
        )
        assert result["artifact_id"] == expected_hash
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/int-10mb",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert compute_sha256(response.content) == expected_hash
        assert response.headers.get("X-Checksum-SHA256") == expected_hash
 class TestConsistencyCheck:
    """Tests for the admin consistency check endpoint."""
    @pytest.mark.integration
    def test_consistency_check_returns_200(self, integration_client):
        """Test consistency check endpoint returns 200."""
        response = integration_client.get("/api/v1/admin/consistency-check")
        assert response.status_code == 200
    @pytest.mark.integration
    def test_consistency_check_response_format(self, integration_client):
        """Test consistency check returns expected response format."""
        response = integration_client.get("/api/v1/admin/consistency-check")
        assert response.status_code == 200
        data = response.json()
        # Check expected fields
        assert "total_artifacts_checked" in data
        assert "orphaned_s3_objects" in data
        assert "missing_s3_objects" in data
        assert "size_mismatches" in data
        assert "healthy" in data
        assert "orphaned_s3_keys" in data
        assert "missing_s3_keys" in data
        assert "size_mismatch_artifacts" in data
        # Verify types
        assert isinstance(data["total_artifacts_checked"], int)
        assert isinstance(data["orphaned_s3_objects"], int)
        assert isinstance(data["missing_s3_objects"], int)
        assert isinstance(data["size_mismatches"], int)
        assert isinstance(data["healthy"], bool)
        assert isinstance(data["orphaned_s3_keys"], list)
        assert isinstance(data["missing_s3_keys"], list)
        assert isinstance(data["size_mismatch_artifacts"], list)
    @pytest.mark.integration
    def test_consistency_check_after_upload(self, integration_client, test_package):
        """Test consistency check passes after valid upload."""
        project, package = test_package
        content = b"Consistency check test content"
        # Upload artifact
        upload_test_file(integration_client, project, package, content)
        # Run consistency check
        response = integration_client.get("/api/v1/admin/consistency-check")
        assert response.status_code == 200
        data = response.json()
        # Verify check ran and no issues
        assert data["total_artifacts_checked"] >= 1
        assert data["healthy"] is True
    @pytest.mark.integration
    def test_consistency_check_limit_parameter(self, integration_client):
        """Test consistency check respects limit parameter."""
        response = integration_client.get(
            "/api/v1/admin/consistency-check",
            params={"limit": 10}
        )
        assert response.status_code == 200
        data = response.json()
        # Lists should not exceed limit
        assert len(data["orphaned_s3_keys"]) <= 10
        assert len(data["missing_s3_keys"]) <= 10
        assert len(data["size_mismatch_artifacts"]) <= 10
 class TestDigestHeader:
    """Tests for RFC 3230 Digest header."""
    @pytest.mark.integration
    def test_download_includes_digest_header(self, integration_client, test_package):
        """Test download includes Digest header in RFC 3230 format."""
        project, package = test_package
        content = b"Digest header test"
        expected_hash = compute_sha256(content)
        upload_test_file(
            integration_client, project, package, content, tag="digest-test"
        )
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/digest-test",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert "Digest" in response.headers
        # Verify Digest format (sha-256=base64hash)
        digest = response.headers["Digest"]
        assert digest.startswith("sha-256=")
    @pytest.mark.integration
    def test_digest_header_base64_valid(self, integration_client, test_package):
        """Test Digest header contains valid base64 encoding."""
        import base64
        project, package = test_package
        content = b"Digest base64 test"
        expected_hash = compute_sha256(content)
        upload_test_file(
            integration_client, project, package, content, tag="digest-b64"
        )
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/digest-b64",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        digest = response.headers["Digest"]
        base64_part = digest.split("=", 1)[1]
        # Should be valid base64
        try:
            decoded = base64.b64decode(base64_part)
            assert len(decoded) == 32  # SHA256 is 32 bytes
        except Exception as e:
            pytest.fail(f"Invalid base64 in Digest header: {e}")
 class TestVerificationModes:
    """Tests for download verification modes."""
    @pytest.mark.integration
    def test_pre_verification_mode(self, integration_client, test_package):
        """Test pre-verification mode verifies before streaming."""
        project, package = test_package
        content = b"Pre-verification mode test"
        upload_test_file(
            integration_client, project, package, content, tag="pre-verify"
        )
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/pre-verify",
            params={"mode": "proxy", "verify": "true", "verify_mode": "pre"},
        )
        assert response.status_code == 200
        assert response.content == content
        # X-Verified header should be true
        assert response.headers.get("X-Verified") == "true"
    @pytest.mark.integration
    def test_stream_verification_mode(self, integration_client, test_package):
        """Test streaming verification mode."""
        project, package = test_package
        content = b"Stream verification mode test"
        upload_test_file(
            integration_client, project, package, content, tag="stream-verify"
        )
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/stream-verify",
            params={"mode": "proxy", "verify": "true", "verify_mode": "stream"},
        )
        assert response.status_code == 200
        assert response.content == content
 class TestArtifactIntegrityEndpoint:
    """Tests for artifact-specific integrity operations."""
    @pytest.mark.integration
    def test_artifact_size_matches(self, integration_client, test_package):
        """Test artifact endpoint returns correct size."""
        project, package = test_package
        content = b"Artifact size test content"
        expected_size = len(content)
        result = upload_test_file(integration_client, project, package, content)
        artifact_id = result["artifact_id"]
        response = integration_client.get(f"/api/v1/artifact/{artifact_id}")
        assert response.status_code == 200
        data = response.json()
        assert data["size"] == expected_size
    @pytest.mark.integration
    def test_content_length_header_matches_size(self, integration_client, test_package):
        """Test Content-Length header matches artifact size."""
        project, package = test_package
        content = b"Content-Length header test"
        expected_size = len(content)
        upload_test_file(
            integration_client, project, package, content, tag="content-len"
        )
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/content-len",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert int(response.headers.get("Content-Length", 0)) == expected_size
        assert len(response.content) == expected_size
@pytest.mark.requires_direct_s3
 class TestCorruptionDetection:
    """Tests for detecting corrupted S3 objects.
    These tests directly manipulate S3 objects to simulate corruption
    and verify that the system can detect hash mismatches.
    Note: These tests require direct S3/MinIO access and are skipped in CI
    where S3 is not directly accessible from the test runner.
    """
    @pytest.mark.integration
    def test_detection_of_corrupted_content(self, integration_client, test_package):
        """Test that corrupted S3 content is detected via hash mismatch.
        Uploads content, then directly modifies the S3 object, then
        verifies that the downloaded content hash doesn't match.
        """
        project, package = test_package
        content = b"Original content for corruption test"
        expected_hash = compute_sha256(content)
        # Upload original content
        result = upload_test_file(
            integration_client, project, package, content, tag="corrupt-test"
        )
        assert result["artifact_id"] == expected_hash
        # Get the S3 object and corrupt it
        s3_client = get_s3_client()
        bucket = get_s3_bucket()
        s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
        # Replace with corrupted content
        corrupted_content = b"Corrupted content - different from original!"
        s3_client.put_object(Bucket=bucket, Key=s3_key, Body=corrupted_content)
        # Download via proxy (bypasses hash verification)
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/corrupt-test",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        # Verify the downloaded content doesn't match original hash
        downloaded_hash = compute_sha256(response.content)
        assert downloaded_hash != expected_hash, "Corruption was not detected - hashes match"
        assert response.content == corrupted_content
        # The X-Checksum-SHA256 header should still show the original hash (from DB)
        # but the actual content hash is different
        header_hash = response.headers.get("X-Checksum-SHA256")
        assert header_hash == expected_hash  # Header shows expected hash
        assert downloaded_hash != header_hash  # But content is corrupted
        # Restore original content for cleanup
        s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
    @pytest.mark.integration
    def test_detection_of_single_bit_flip(self, integration_client, test_package):
        """Test detection of a single bit flip in S3 object content."""
        project, package = test_package
        content = b"Content for single bit flip detection test"
        expected_hash = compute_sha256(content)
        result = upload_test_file(
            integration_client, project, package, content, tag="bitflip-test"
        )
        assert result["artifact_id"] == expected_hash
        # Get S3 object and flip a single bit
        s3_client = get_s3_client()
        bucket = get_s3_bucket()
        s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
        # Flip the first bit of the first byte
        corrupted_content = bytearray(content)
        corrupted_content[0] ^= 0x01
        corrupted_content = bytes(corrupted_content)
        s3_client.put_object(Bucket=bucket, Key=s3_key, Body=corrupted_content)
        # Download and verify hash mismatch
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/bitflip-test",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        downloaded_hash = compute_sha256(response.content)
        assert downloaded_hash != expected_hash, "Single bit flip not detected"
        # Restore original
        s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
    @pytest.mark.integration
    def test_detection_of_truncated_content(self, integration_client, test_package):
        """Test detection of truncated S3 object."""
        project, package = test_package
        content = b"This is content that will be truncated for testing purposes"
        expected_hash = compute_sha256(content)
        result = upload_test_file(
            integration_client, project, package, content, tag="truncate-test"
        )
        assert result["artifact_id"] == expected_hash
        # Get S3 object and truncate it
        s3_client = get_s3_client()
        bucket = get_s3_bucket()
        s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
        # Truncate to half the original size
        truncated_content = content[: len(content) // 2]
        s3_client.put_object(Bucket=bucket, Key=s3_key, Body=truncated_content)
        # Download and verify hash mismatch
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/truncate-test",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        downloaded_hash = compute_sha256(response.content)
        assert downloaded_hash != expected_hash, "Truncation not detected"
        assert len(response.content) < len(content), "Content was not truncated"
        # Restore original
        s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
    @pytest.mark.integration
    def test_detection_of_appended_content(self, integration_client, test_package):
        """Test detection of content with extra bytes appended."""
        project, package = test_package
        content = b"Original content"
        expected_hash = compute_sha256(content)
        result = upload_test_file(
            integration_client, project, package, content, tag="append-test"
        )
        assert result["artifact_id"] == expected_hash
        # Get S3 object and append extra bytes
        s3_client = get_s3_client()
        bucket = get_s3_bucket()
        s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
        appended_content = content + b" - extra bytes appended"
        s3_client.put_object(Bucket=bucket, Key=s3_key, Body=appended_content)
        # Download and verify hash mismatch
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/append-test",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        downloaded_hash = compute_sha256(response.content)
        assert downloaded_hash != expected_hash, "Appended content not detected"
        assert len(response.content) > len(content), "Content was not extended"
        # Restore original
        s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
    @pytest.mark.integration
    def test_client_detects_hash_mismatch_post_download(
        self, integration_client, test_package
    ):
        """Test that a client can detect hash mismatch after downloading corrupted content.
        This simulates the full client verification workflow:
        1. Download content
        2. Get expected hash from header
        3. Compute actual hash of content
        4. Verify they match (or detect corruption)
        """
        project, package = test_package
        content = b"Content for client-side corruption detection"
        expected_hash = compute_sha256(content)
        result = upload_test_file(
            integration_client, project, package, content, tag="client-detect"
        )
        # Corrupt the S3 object
        s3_client = get_s3_client()
        bucket = get_s3_bucket()
        s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
        corrupted = b"This is completely different content"
        s3_client.put_object(Bucket=bucket, Key=s3_key, Body=corrupted)
        # Simulate client download and verification
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/client-detect",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        # Client gets expected hash from header
        header_hash = response.headers.get("X-Checksum-SHA256")
        # Client computes hash of downloaded content
        actual_hash = compute_sha256(response.content)
        # Client detects the mismatch
        corruption_detected = actual_hash != header_hash
        assert corruption_detected, "Client should detect hash mismatch"
        # Restore original
        s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
    @pytest.mark.integration
    def test_consistency_check_detects_size_mismatch(
        self, integration_client, test_package, unique_test_id
    ):
        """Test that consistency check detects size mismatches.
        Uploads content, modifies S3 object size, then runs consistency check.
        """
        project, package = test_package
        content = b"Content for size mismatch consistency check test " + unique_test_id.encode()
        expected_hash = compute_sha256(content)
        result = upload_test_file(
            integration_client, project, package, content, tag="size-mismatch"
        )
        # Modify S3 object to have different size
        s3_client = get_s3_client()
        bucket = get_s3_bucket()
        s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
        different_size_content = content + b"extra extra extra"
        s3_client.put_object(Bucket=bucket, Key=s3_key, Body=different_size_content)
        # Run consistency check
        response = integration_client.get("/api/v1/admin/consistency-check")
        assert response.status_code == 200
        data = response.json()
        # Should detect the size mismatch
        assert data["size_mismatches"] >= 1 or len(data["size_mismatch_artifacts"]) >= 1
        # Restore original
        s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
    @pytest.mark.integration
    def test_consistency_check_detects_missing_s3_object(
        self, integration_client, test_package, unique_test_id
    ):
        """Test that consistency check detects missing S3 objects.
        Uploads content, deletes S3 object, then runs consistency check.
        """
        project, package = test_package
        content = b"Content for missing S3 object test " + unique_test_id.encode()
        expected_hash = compute_sha256(content)
        result = upload_test_file(
            integration_client, project, package, content, tag="missing-s3"
        )
        # Delete the S3 object
        s3_client = get_s3_client()
        bucket = get_s3_bucket()
        s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
        s3_client.delete_object(Bucket=bucket, Key=s3_key)
        # Run consistency check
        response = integration_client.get("/api/v1/admin/consistency-check")
        assert response.status_code == 200
        data = response.json()
        # Should detect the missing S3 object
        assert data["missing_s3_objects"] >= 1 or len(data["missing_s3_keys"]) >= 1
        # Restore the object for cleanup
        s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
--- a/backend/tests/integration/test_large_uploads.py
+++ b/backend/tests/integration/test_large_uploads.py
@@ -0,0 +1,552 @@
 """
 Integration tests for large file upload functionality.
 Tests cover:
 - Large file uploads (100MB, 1GB)
 - Multipart upload behavior
 - Upload metrics (duration, throughput)
 - Memory efficiency during uploads
 - Upload progress tracking
 Note: Large tests are marked with @pytest.mark.slow and will be skipped
 by default. Run with `pytest --run-slow` to include them.
 """
 import os
 import pytest
 import io
 import time
 from tests.factories import (
    compute_sha256,
    upload_test_file,
    s3_object_exists,
 )
 from tests.conftest import (
    SIZE_1KB,
    SIZE_100KB,
    SIZE_1MB,
    SIZE_10MB,
    SIZE_100MB,
    SIZE_1GB,
 )
 class TestUploadMetrics:
    """Tests for upload duration and throughput metrics."""
    @pytest.mark.integration
    def test_upload_response_includes_duration_ms(self, integration_client, test_package):
        """Test upload response includes duration_ms field."""
        project, package = test_package
        content = b"duration test content"
        result = upload_test_file(
            integration_client, project, package, content, tag="duration-test"
        )
        assert "duration_ms" in result
        assert result["duration_ms"] is not None
        assert result["duration_ms"] >= 0
    @pytest.mark.integration
    def test_upload_response_includes_throughput(self, integration_client, test_package):
        """Test upload response includes throughput_mbps field."""
        project, package = test_package
        content = b"throughput test content"
        result = upload_test_file(
            integration_client, project, package, content, tag="throughput-test"
        )
        assert "throughput_mbps" in result
        # For small files throughput may be very high or None
        # Just verify the field exists
    @pytest.mark.integration
    def test_upload_duration_reasonable(
        self, integration_client, test_package, sized_content
    ):
        """Test upload duration is reasonable for file size."""
        project, package = test_package
        content, _ = sized_content(SIZE_1MB, seed=100)
        start = time.time()
        result = upload_test_file(
            integration_client, project, package, content, tag="duration-check"
        )
        actual_duration = (time.time() - start) * 1000  # ms
        # Reported duration should be close to actual
        assert result["duration_ms"] is not None
        # Allow some variance (network overhead)
        assert result["duration_ms"] <= actual_duration + 1000  # Within 1s
 class TestLargeFileUploads:
    """Tests for large file uploads using multipart."""
    @pytest.mark.integration
    def test_upload_10mb_file(self, integration_client, test_package, sized_content):
        """Test uploading a 10MB file."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_10MB, seed=200)
        result = upload_test_file(
            integration_client, project, package, content, tag="large-10mb"
        )
        assert result["artifact_id"] == expected_hash
        assert result["size"] == SIZE_10MB
        assert result["duration_ms"] is not None
        assert result["throughput_mbps"] is not None
    @pytest.mark.integration
    @pytest.mark.slow
    @pytest.mark.requires_direct_s3
    def test_upload_100mb_file(self, integration_client, test_package, sized_content):
        """Test uploading a 100MB file (triggers multipart upload)."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_100MB, seed=300)
        result = upload_test_file(
            integration_client, project, package, content, tag="large-100mb"
        )
        assert result["artifact_id"] == expected_hash
        assert result["size"] == SIZE_100MB
        # Verify S3 object exists
        assert s3_object_exists(expected_hash)
    @pytest.mark.integration
    @pytest.mark.slow
    @pytest.mark.large
    def test_upload_1gb_file(self, integration_client, test_package, sized_content):
        """Test uploading a 1GB file."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_1GB, seed=400)
        result = upload_test_file(
            integration_client, project, package, content, tag="large-1gb"
        )
        assert result["artifact_id"] == expected_hash
        assert result["size"] == SIZE_1GB
        # Should have measurable throughput
        assert result["throughput_mbps"] is not None
        assert result["throughput_mbps"] > 0
    @pytest.mark.integration
    def test_large_file_deduplication(
        self, integration_client, test_package, sized_content, unique_test_id
    ):
        """Test deduplication works for large files."""
        project, package = test_package
        # Use unique_test_id to ensure unique content per test run
        seed = hash(unique_test_id) % 10000
        content, expected_hash = sized_content(SIZE_10MB, seed=seed)
        # First upload
        result1 = upload_test_file(
            integration_client, project, package, content, tag=f"dedup-{unique_test_id}-1"
        )
        # Note: may be True if previous test uploaded same content
        first_dedupe = result1["deduplicated"]
        # Second upload of same content
        result2 = upload_test_file(
            integration_client, project, package, content, tag=f"dedup-{unique_test_id}-2"
        )
        assert result2["artifact_id"] == expected_hash
        # Second upload MUST be deduplicated
        assert result2["deduplicated"] is True
 class TestUploadProgress:
    """Tests for upload progress tracking endpoint."""
    @pytest.mark.integration
    def test_progress_endpoint_returns_not_found_for_invalid_id(
        self, integration_client, test_package
    ):
        """Test progress endpoint returns not_found status for invalid upload ID."""
        project, package = test_package
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/upload/invalid-upload-id/progress"
        )
        assert response.status_code == 200
        data = response.json()
        assert data["status"] == "not_found"
        assert data["upload_id"] == "invalid-upload-id"
    @pytest.mark.integration
    def test_progress_endpoint_requires_valid_project(
        self, integration_client, unique_test_id
    ):
        """Test progress endpoint validates project exists."""
        response = integration_client.get(
            f"/api/v1/project/nonexistent-{unique_test_id}/pkg/upload/upload-id/progress"
        )
        assert response.status_code == 404
    @pytest.mark.integration
    def test_progress_endpoint_requires_valid_package(
        self, integration_client, test_project, unique_test_id
    ):
        """Test progress endpoint validates package exists."""
        response = integration_client.get(
            f"/api/v1/project/{test_project}/nonexistent-{unique_test_id}/upload/upload-id/progress"
        )
        assert response.status_code == 404
 class TestResumableUploadProgress:
    """Tests for progress tracking during resumable uploads."""
    @pytest.mark.integration
    def test_resumable_upload_init_and_progress(
        self, integration_client, test_package, sized_content
    ):
        """Test initializing resumable upload and checking progress."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_100KB, seed=600)
        # Get API key for auth
        api_key_response = integration_client.post(
            "/api/v1/auth/keys",
            json={"name": "progress-test-key"},
        )
        assert api_key_response.status_code == 200
        api_key = api_key_response.json()["key"]
        # Initialize resumable upload
        init_response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload/init",
            json={
                "expected_hash": expected_hash,
                "filename": "progress-test.bin",
                "size": SIZE_100KB,
            },
            headers={"Authorization": f"Bearer {api_key}"},
        )
        assert init_response.status_code == 200
        upload_id = init_response.json().get("upload_id")
        if upload_id:
            # Check initial progress
            progress_response = integration_client.get(
                f"/api/v1/project/{project}/{package}/upload/{upload_id}/progress",
                headers={"Authorization": f"Bearer {api_key}"},
            )
            assert progress_response.status_code == 200
            progress = progress_response.json()
            assert progress["status"] == "in_progress"
            assert progress["bytes_uploaded"] == 0
            assert progress["bytes_total"] == SIZE_100KB
            # Abort to clean up
            integration_client.delete(
                f"/api/v1/project/{project}/{package}/upload/{upload_id}",
                headers={"Authorization": f"Bearer {api_key}"},
            )
 class TestUploadSizeLimits:
    """Tests for upload size limit enforcement."""
    @pytest.mark.integration
    def test_empty_file_rejected(self, integration_client, test_package):
        """Test empty files are rejected."""
        project, package = test_package
        files = {"file": ("empty.txt", io.BytesIO(b""), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
        )
        assert response.status_code in [400, 422]
    @pytest.mark.integration
    def test_minimum_size_accepted(self, integration_client, test_package):
        """Test 1-byte file is accepted."""
        project, package = test_package
        content = b"X"
        result = upload_test_file(
            integration_client, project, package, content, tag="min-size"
        )
        assert result["size"] == 1
    @pytest.mark.integration
    def test_content_length_header_used_in_response(self, integration_client, test_package):
        """Test that upload response size matches Content-Length."""
        project, package = test_package
        content = b"content length verification test"
        result = upload_test_file(
            integration_client, project, package, content, tag="content-length-test"
        )
        # Size in response should match actual content length
        assert result["size"] == len(content)
 class TestUploadErrorHandling:
    """Tests for upload error handling."""
    @pytest.mark.integration
    def test_upload_to_nonexistent_project_returns_404(
        self, integration_client, unique_test_id
    ):
        """Test upload to nonexistent project returns 404."""
        content = b"test content"
        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/nonexistent-{unique_test_id}/pkg/upload",
            files=files,
        )
        assert response.status_code == 404
    @pytest.mark.integration
    def test_upload_to_nonexistent_package_returns_404(
        self, integration_client, test_project, unique_test_id
    ):
        """Test upload to nonexistent package returns 404."""
        content = b"test content"
        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{test_project}/nonexistent-{unique_test_id}/upload",
            files=files,
        )
        assert response.status_code == 404
    @pytest.mark.integration
    def test_upload_without_file_returns_422(self, integration_client, test_package):
        """Test upload without file field returns 422."""
        project, package = test_package
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            data={"tag": "no-file"},
        )
        assert response.status_code == 422
    @pytest.mark.integration
    def test_upload_with_invalid_checksum_rejected(
        self, integration_client, test_package
    ):
        """Test upload with invalid checksum header format is rejected."""
        project, package = test_package
        content = b"checksum test"
        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
            headers={"X-Checksum-SHA256": "invalid-checksum"},
        )
        assert response.status_code == 400
    @pytest.mark.integration
    def test_upload_with_mismatched_checksum_rejected(
        self, integration_client, test_package
    ):
        """Test upload with wrong checksum is rejected."""
        project, package = test_package
        content = b"mismatch test"
        wrong_hash = "0" * 64
        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
            headers={"X-Checksum-SHA256": wrong_hash},
        )
        assert response.status_code == 422
        assert "verification failed" in response.json().get("detail", "").lower()
 class TestResumableUploadCancellation:
    """Tests for resumable upload cancellation."""
    @pytest.mark.integration
    def test_abort_resumable_upload(self, integration_client, test_package, sized_content):
        """Test aborting a resumable upload cleans up properly."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_100KB, seed=700)
        # Get API key for auth
        api_key_response = integration_client.post(
            "/api/v1/auth/keys",
            json={"name": "abort-test-key"},
        )
        assert api_key_response.status_code == 200
        api_key = api_key_response.json()["key"]
        # Initialize resumable upload
        init_response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload/init",
            json={
                "expected_hash": expected_hash,
                "filename": "abort-test.bin",
                "size": SIZE_100KB,
            },
            headers={"Authorization": f"Bearer {api_key}"},
        )
        assert init_response.status_code == 200
        upload_id = init_response.json().get("upload_id")
        if upload_id:
            # Abort the upload (without uploading any parts)
            abort_response = integration_client.delete(
                f"/api/v1/project/{project}/{package}/upload/{upload_id}",
                headers={"Authorization": f"Bearer {api_key}"},
            )
            assert abort_response.status_code in [200, 204]
            # Verify progress shows not_found after abort
            progress_response = integration_client.get(
                f"/api/v1/project/{project}/{package}/upload/{upload_id}/progress",
                headers={"Authorization": f"Bearer {api_key}"},
            )
            assert progress_response.status_code == 200
            assert progress_response.json()["status"] == "not_found"
    @pytest.mark.integration
    def test_abort_nonexistent_upload(self, integration_client, test_package):
        """Test aborting nonexistent upload returns appropriate error."""
        project, package = test_package
        # Get API key for auth
        api_key_response = integration_client.post(
            "/api/v1/auth/keys",
            json={"name": "abort-nonexistent-key"},
        )
        assert api_key_response.status_code == 200
        api_key = api_key_response.json()["key"]
        response = integration_client.delete(
            f"/api/v1/project/{project}/{package}/upload/nonexistent-upload-id",
            headers={"Authorization": f"Bearer {api_key}"},
        )
        # Should return 404 or 200 (idempotent delete)
        assert response.status_code in [200, 204, 404]
 class TestUploadTimeout:
    """Tests for upload timeout handling."""
    @pytest.mark.integration
    def test_upload_with_short_timeout_succeeds_for_small_file(
        self, integration_client, test_package
    ):
        """Test small file upload succeeds with reasonable timeout."""
        project, package = test_package
        content = b"small timeout test"
        # httpx client should handle this quickly
        result = upload_test_file(
            integration_client, project, package, content, tag="timeout-small"
        )
        assert result["artifact_id"] is not None
    @pytest.mark.integration
    def test_upload_response_duration_under_timeout(
        self, integration_client, test_package, sized_content
    ):
        """Test upload completes within reasonable time."""
        project, package = test_package
        content, _ = sized_content(SIZE_1MB, seed=800)
        start = time.time()
        result = upload_test_file(
            integration_client, project, package, content, tag="timeout-check"
        )
        duration = time.time() - start
        # 1MB should upload in well under 60 seconds on local
        assert duration < 60
        assert result["artifact_id"] is not None
 class TestConcurrentUploads:
    """Tests for concurrent upload handling."""
    @pytest.mark.integration
    def test_concurrent_different_files(
        self, integration_client, test_package, sized_content
    ):
        """Test concurrent uploads of different files succeed."""
        from concurrent.futures import ThreadPoolExecutor, as_completed
        project, package = test_package
        # Get API key for auth
        api_key_response = integration_client.post(
            "/api/v1/auth/keys",
            json={"name": "concurrent-diff-key"},
        )
        assert api_key_response.status_code == 200
        api_key = api_key_response.json()["key"]
        num_uploads = 3
        results = []
        errors = []
        def upload_unique_file(idx):
            try:
                from httpx import Client
                content, expected_hash = sized_content(SIZE_100KB, seed=900 + idx)
                base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
                with Client(base_url=base_url, timeout=30.0) as client:
                    files = {
                        "file": (
                            f"concurrent-{idx}.bin",
                            io.BytesIO(content),
                            "application/octet-stream",
                        )
                    }
                    response = client.post(
                        f"/api/v1/project/{project}/{package}/upload",
                        files=files,
                        data={"tag": f"concurrent-diff-{idx}"},
                        headers={"Authorization": f"Bearer {api_key}"},
                    )
                    if response.status_code == 200:
                        results.append((idx, response.json(), expected_hash))
                    else:
                        errors.append(f"Upload {idx}: {response.status_code} - {response.text}")
            except Exception as e:
                errors.append(f"Upload {idx}: {str(e)}")
        with ThreadPoolExecutor(max_workers=num_uploads) as executor:
            futures = [executor.submit(upload_unique_file, i) for i in range(num_uploads)]
            for future in as_completed(futures):
                pass
        assert len(errors) == 0, f"Concurrent upload errors: {errors}"
        assert len(results) == num_uploads
        # Each upload should have unique artifact ID
        artifact_ids = set(r[1]["artifact_id"] for r in results)
        assert len(artifact_ids) == num_uploads
        # Each should match expected hash
        for idx, result, expected_hash in results:
            assert result["artifact_id"] == expected_hash
--- a/backend/tests/integration/test_size_boundary.py
+++ b/backend/tests/integration/test_size_boundary.py
@@ -0,0 +1,583 @@
 """
 Integration tests for upload/download with various file sizes.
 Tests cover:
 - Small files (0B - 100KB)
 - Medium files (1MB - 50MB)
 - Large files (100MB - 1GB) - marked as slow/large
 - Exact chunk boundaries
 - Data integrity verification across all sizes
 """
 import pytest
 import io
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from tests.factories import (
    compute_sha256,
    upload_test_file,
    generate_content,
    generate_content_with_hash,
 )
 from tests.conftest import (
    SIZE_1B,
    SIZE_1KB,
    SIZE_10KB,
    SIZE_100KB,
    SIZE_1MB,
    SIZE_5MB,
    SIZE_10MB,
    SIZE_50MB,
    SIZE_100MB,
    SIZE_250MB,
    SIZE_500MB,
    SIZE_1GB,
    CHUNK_SIZE,
    MULTIPART_THRESHOLD,
 )
 class TestSmallFileSizes:
    """Tests for small file uploads/downloads (0B - 100KB)."""
    @pytest.mark.integration
    def test_upload_download_1_byte(self, integration_client, test_package, sized_content):
        """Test upload/download of 1 byte file."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_1B, seed=1)
        result = upload_test_file(
            integration_client, project, package, content,
            filename="1byte.bin", tag="1byte"
        )
        assert result["artifact_id"] == expected_hash
        assert result["size"] == SIZE_1B
        # Download and verify
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/1byte",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.content == content
        assert len(response.content) == SIZE_1B
    @pytest.mark.integration
    def test_upload_download_1kb(self, integration_client, test_package, sized_content):
        """Test upload/download of 1KB file."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_1KB, seed=2)
        result = upload_test_file(
            integration_client, project, package, content,
            filename="1kb.bin", tag="1kb"
        )
        assert result["artifact_id"] == expected_hash
        assert result["size"] == SIZE_1KB
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/1kb",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.content == content
    @pytest.mark.integration
    def test_upload_download_10kb(self, integration_client, test_package, sized_content):
        """Test upload/download of 10KB file."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_10KB, seed=3)
        result = upload_test_file(
            integration_client, project, package, content,
            filename="10kb.bin", tag="10kb"
        )
        assert result["artifact_id"] == expected_hash
        assert result["size"] == SIZE_10KB
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/10kb",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.content == content
    @pytest.mark.integration
    def test_upload_download_100kb(self, integration_client, test_package, sized_content):
        """Test upload/download of 100KB file."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_100KB, seed=4)
        result = upload_test_file(
            integration_client, project, package, content,
            filename="100kb.bin", tag="100kb"
        )
        assert result["artifact_id"] == expected_hash
        assert result["size"] == SIZE_100KB
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/100kb",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.content == content
 class TestMediumFileSizes:
    """Tests for medium file uploads/downloads (1MB - 50MB)."""
    @pytest.mark.integration
    def test_upload_download_1mb(self, integration_client, test_package, sized_content):
        """Test upload/download of 1MB file."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_1MB, seed=10)
        result = upload_test_file(
            integration_client, project, package, content,
            filename="1mb.bin", tag="1mb"
        )
        assert result["artifact_id"] == expected_hash
        assert result["size"] == SIZE_1MB
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/1mb",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert len(response.content) == SIZE_1MB
        assert compute_sha256(response.content) == expected_hash
    @pytest.mark.integration
    def test_upload_download_5mb(self, integration_client, test_package, sized_content):
        """Test upload/download of 5MB file (multipart threshold boundary area)."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_5MB, seed=11)
        result = upload_test_file(
            integration_client, project, package, content,
            filename="5mb.bin", tag="5mb"
        )
        assert result["artifact_id"] == expected_hash
        assert result["size"] == SIZE_5MB
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/5mb",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert len(response.content) == SIZE_5MB
        assert compute_sha256(response.content) == expected_hash
    @pytest.mark.integration
    @pytest.mark.slow
    def test_upload_download_10mb(self, integration_client, test_package, sized_content):
        """Test upload/download of 10MB file."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_10MB, seed=12)
        result = upload_test_file(
            integration_client, project, package, content,
            filename="10mb.bin", tag="10mb"
        )
        assert result["artifact_id"] == expected_hash
        assert result["size"] == SIZE_10MB
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/10mb",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert len(response.content) == SIZE_10MB
        assert compute_sha256(response.content) == expected_hash
    @pytest.mark.integration
    @pytest.mark.slow
    def test_upload_download_50mb(self, integration_client, test_package, sized_content):
        """Test upload/download of 50MB file."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_50MB, seed=13)
        start_time = time.time()
        result = upload_test_file(
            integration_client, project, package, content,
            filename="50mb.bin", tag="50mb"
        )
        upload_time = time.time() - start_time
        assert result["artifact_id"] == expected_hash
        assert result["size"] == SIZE_50MB
        start_time = time.time()
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/50mb",
            params={"mode": "proxy"},
        )
        download_time = time.time() - start_time
        assert response.status_code == 200
        assert len(response.content) == SIZE_50MB
        assert compute_sha256(response.content) == expected_hash
        # Log timing for performance tracking
        print(f"\n50MB upload: {upload_time:.2f}s, download: {download_time:.2f}s")
 class TestLargeFileSizes:
    """Tests for large file uploads/downloads (100MB - 1GB).
    These tests are marked as slow and large, skipped by default.
    Run with: pytest -m "large" to include these tests.
    """
    @pytest.mark.integration
    @pytest.mark.slow
    @pytest.mark.large
    def test_upload_download_100mb(self, integration_client, test_package, sized_content):
        """Test upload/download of 100MB file (multipart threshold)."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_100MB, seed=100)
        start_time = time.time()
        result = upload_test_file(
            integration_client, project, package, content,
            filename="100mb.bin", tag="100mb"
        )
        upload_time = time.time() - start_time
        assert result["artifact_id"] == expected_hash
        assert result["size"] == SIZE_100MB
        start_time = time.time()
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/100mb",
            params={"mode": "proxy"},
        )
        download_time = time.time() - start_time
        assert response.status_code == 200
        assert len(response.content) == SIZE_100MB
        assert compute_sha256(response.content) == expected_hash
        print(f"\n100MB upload: {upload_time:.2f}s, download: {download_time:.2f}s")
    @pytest.mark.integration
    @pytest.mark.slow
    @pytest.mark.large
    def test_upload_download_250mb(self, integration_client, test_package, sized_content):
        """Test upload/download of 250MB file."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_250MB, seed=250)
        start_time = time.time()
        result = upload_test_file(
            integration_client, project, package, content,
            filename="250mb.bin", tag="250mb"
        )
        upload_time = time.time() - start_time
        assert result["artifact_id"] == expected_hash
        assert result["size"] == SIZE_250MB
        start_time = time.time()
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/250mb",
            params={"mode": "proxy"},
        )
        download_time = time.time() - start_time
        assert response.status_code == 200
        assert len(response.content) == SIZE_250MB
        assert compute_sha256(response.content) == expected_hash
        print(f"\n250MB upload: {upload_time:.2f}s, download: {download_time:.2f}s")
    @pytest.mark.integration
    @pytest.mark.slow
    @pytest.mark.large
    def test_upload_download_500mb(self, integration_client, test_package, sized_content):
        """Test upload/download of 500MB file."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_500MB, seed=500)
        start_time = time.time()
        result = upload_test_file(
            integration_client, project, package, content,
            filename="500mb.bin", tag="500mb"
        )
        upload_time = time.time() - start_time
        assert result["artifact_id"] == expected_hash
        assert result["size"] == SIZE_500MB
        start_time = time.time()
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/500mb",
            params={"mode": "proxy"},
        )
        download_time = time.time() - start_time
        assert response.status_code == 200
        assert len(response.content) == SIZE_500MB
        assert compute_sha256(response.content) == expected_hash
        print(f"\n500MB upload: {upload_time:.2f}s, download: {download_time:.2f}s")
    @pytest.mark.integration
    @pytest.mark.slow
    @pytest.mark.large
    def test_upload_download_1gb(self, integration_client, test_package, sized_content):
        """Test upload/download of 1GB file.
        This test may take several minutes depending on network/disk speed.
        """
        project, package = test_package
        content, expected_hash = sized_content(SIZE_1GB, seed=1024)
        start_time = time.time()
        result = upload_test_file(
            integration_client, project, package, content,
            filename="1gb.bin", tag="1gb"
        )
        upload_time = time.time() - start_time
        assert result["artifact_id"] == expected_hash
        assert result["size"] == SIZE_1GB
        start_time = time.time()
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/1gb",
            params={"mode": "proxy"},
        )
        download_time = time.time() - start_time
        assert response.status_code == 200
        assert len(response.content) == SIZE_1GB
        assert compute_sha256(response.content) == expected_hash
        print(f"\n1GB upload: {upload_time:.2f}s, download: {download_time:.2f}s")
 class TestChunkBoundaries:
    """Tests for exact chunk size boundaries."""
    @pytest.mark.integration
    def test_upload_download_at_chunk_size(self, integration_client, test_package, sized_content):
        """Test upload/download at exact chunk size (64KB)."""
        project, package = test_package
        content, expected_hash = sized_content(CHUNK_SIZE, seed=64)
        result = upload_test_file(
            integration_client, project, package, content,
            filename="chunk.bin", tag="chunk-exact"
        )
        assert result["artifact_id"] == expected_hash
        assert result["size"] == CHUNK_SIZE
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/chunk-exact",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.content == content
    @pytest.mark.integration
    def test_upload_download_chunk_size_plus_1(self, integration_client, test_package, sized_content):
        """Test upload/download at chunk size + 1 byte."""
        project, package = test_package
        size = CHUNK_SIZE + 1
        content, expected_hash = sized_content(size, seed=65)
        result = upload_test_file(
            integration_client, project, package, content,
            filename="chunk_plus.bin", tag="chunk-plus"
        )
        assert result["artifact_id"] == expected_hash
        assert result["size"] == size
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/chunk-plus",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.content == content
    @pytest.mark.integration
    def test_upload_download_chunk_size_minus_1(self, integration_client, test_package, sized_content):
        """Test upload/download at chunk size - 1 byte."""
        project, package = test_package
        size = CHUNK_SIZE - 1
        content, expected_hash = sized_content(size, seed=63)
        result = upload_test_file(
            integration_client, project, package, content,
            filename="chunk_minus.bin", tag="chunk-minus"
        )
        assert result["artifact_id"] == expected_hash
        assert result["size"] == size
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/chunk-minus",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.content == content
    @pytest.mark.integration
    def test_upload_download_multiple_chunks(self, integration_client, test_package, sized_content):
        """Test upload/download spanning multiple chunks."""
        project, package = test_package
        size = CHUNK_SIZE * 3 + 1000  # 3 full chunks + partial
        content, expected_hash = sized_content(size, seed=300)
        result = upload_test_file(
            integration_client, project, package, content,
            filename="multi_chunk.bin", tag="multi-chunk"
        )
        assert result["artifact_id"] == expected_hash
        assert result["size"] == size
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/multi-chunk",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.content == content
 class TestDataIntegrity:
    """Tests for data integrity with various content types."""
    @pytest.mark.integration
    def test_binary_content_integrity(self, integration_client, test_package):
        """Test binary content (all byte values 0-255) integrity."""
        project, package = test_package
        # Content with all 256 possible byte values
        content = bytes(range(256)) * 100  # 25.6KB
        expected_hash = compute_sha256(content)
        result = upload_test_file(
            integration_client, project, package, content,
            filename="binary.bin", tag="binary"
        )
        assert result["artifact_id"] == expected_hash
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/binary",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.content == content
    @pytest.mark.integration
    def test_text_content_integrity(self, integration_client, test_package):
        """Test UTF-8 text content integrity."""
        project, package = test_package
        content = "Hello, World! 你好世界 🌍 مرحبا العالم".encode("utf-8")
        expected_hash = compute_sha256(content)
        result = upload_test_file(
            integration_client, project, package, content,
            filename="text.txt", tag="text"
        )
        assert result["artifact_id"] == expected_hash
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/text",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.content == content
        assert response.content.decode("utf-8") == "Hello, World! 你好世界 🌍 مرحبا العالم"
    @pytest.mark.integration
    def test_null_bytes_content_integrity(self, integration_client, test_package):
        """Test content with null bytes."""
        project, package = test_package
        content = b"before\x00null\x00bytes\x00after"
        expected_hash = compute_sha256(content)
        result = upload_test_file(
            integration_client, project, package, content,
            filename="nulls.bin", tag="nulls"
        )
        assert result["artifact_id"] == expected_hash
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/nulls",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.content == content
        assert b"\x00" in response.content
    @pytest.mark.integration
    def test_unicode_filename_integrity(self, integration_client, test_package):
        """Test file with unicode filename."""
        project, package = test_package
        content = b"unicode filename test"
        expected_hash = compute_sha256(content)
        result = upload_test_file(
            integration_client, project, package, content,
            filename="文件名.txt", tag="unicode-name"
        )
        assert result["artifact_id"] == expected_hash
        assert result["original_name"] == "文件名.txt"
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/unicode-name",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.content == content
    @pytest.mark.integration
    def test_compressed_content_integrity(self, integration_client, test_package):
        """Test gzip-compressed content integrity."""
        import gzip
        project, package = test_package
        original = b"This is some text that will be compressed " * 100
        content = gzip.compress(original)
        expected_hash = compute_sha256(content)
        result = upload_test_file(
            integration_client, project, package, content,
            filename="data.gz", tag="compressed"
        )
        assert result["artifact_id"] == expected_hash
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/compressed",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.content == content
        # Verify we can decompress
        assert gzip.decompress(response.content) == original
    @pytest.mark.integration
    def test_hash_verification_matches(self, integration_client, test_package, sized_content):
        """Test that computed hash matches artifact_id for various sizes."""
        project, package = test_package
        sizes = [SIZE_1B, SIZE_1KB, SIZE_10KB, SIZE_100KB, SIZE_1MB]
        for i, size in enumerate(sizes):
            content, expected_hash = sized_content(size, seed=1000 + i)
            result = upload_test_file(
                integration_client, project, package, content,
                filename=f"hash_test_{size}.bin", tag=f"hash-{size}"
            )
            # Verify artifact_id matches expected hash
            assert result["artifact_id"] == expected_hash
            # Download and verify hash of downloaded content
            response = integration_client.get(
                f"/api/v1/project/{project}/{package}/+/hash-{size}",
                params={"mode": "proxy"},
            )
            downloaded_hash = compute_sha256(response.content)
            assert downloaded_hash == expected_hash
--- a/backend/tests/integration/test_streaming_download.py
+++ b/backend/tests/integration/test_streaming_download.py
@@ -0,0 +1,535 @@
 """
 Integration tests for streaming download functionality.
 Tests cover:
 - HTTP Range requests (partial downloads, resume)
 - Conditional requests (If-None-Match, If-Modified-Since)
 - Caching headers (Cache-Control, Last-Modified, Accept-Ranges)
 - Large file streaming
 - Download modes (proxy, redirect, presigned)
 """
 import pytest
 import io
 import time
 from email.utils import formatdate
 from tests.factories import (
    compute_sha256,
    upload_test_file,
 )
 from tests.conftest import (
    SIZE_1KB,
    SIZE_100KB,
    SIZE_1MB,
 )
 class TestRangeRequests:
    """Tests for HTTP Range request support (partial downloads)."""
    @pytest.mark.integration
    def test_range_request_first_bytes(self, integration_client, test_package):
        """Test range request for first N bytes."""
        project, package = test_package
        content = b"0123456789" * 100  # 1000 bytes
        upload_test_file(integration_client, project, package, content, tag="range-test")
        # Request first 10 bytes
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/range-test",
            params={"mode": "proxy"},
            headers={"Range": "bytes=0-9"},
        )
        assert response.status_code == 206  # Partial Content
        assert response.content == b"0123456789"
        assert "Content-Range" in response.headers
        assert response.headers["Content-Range"].startswith("bytes 0-9/")
    @pytest.mark.integration
    def test_range_request_middle_bytes(self, integration_client, test_package):
        """Test range request for bytes in the middle."""
        project, package = test_package
        content = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
        upload_test_file(integration_client, project, package, content, tag="range-mid")
        # Request bytes 10-19 (KLMNOPQRST)
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/range-mid",
            params={"mode": "proxy"},
            headers={"Range": "bytes=10-19"},
        )
        assert response.status_code == 206
        assert response.content == b"KLMNOPQRST"
    @pytest.mark.integration
    def test_range_request_suffix_bytes(self, integration_client, test_package):
        """Test range request for last N bytes (suffix range)."""
        project, package = test_package
        content = b"0123456789ABCDEF"  # 16 bytes
        upload_test_file(integration_client, project, package, content, tag="range-suffix")
        # Request last 4 bytes
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/range-suffix",
            params={"mode": "proxy"},
            headers={"Range": "bytes=-4"},
        )
        assert response.status_code == 206
        assert response.content == b"CDEF"
    @pytest.mark.integration
    def test_range_request_open_ended(self, integration_client, test_package):
        """Test range request from offset to end."""
        project, package = test_package
        content = b"0123456789"
        upload_test_file(integration_client, project, package, content, tag="range-open")
        # Request from byte 5 to end
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/range-open",
            params={"mode": "proxy"},
            headers={"Range": "bytes=5-"},
        )
        assert response.status_code == 206
        assert response.content == b"56789"
    @pytest.mark.integration
    def test_range_request_includes_accept_ranges_header(
        self, integration_client, test_package
    ):
        """Test that range requests include Accept-Ranges header."""
        project, package = test_package
        content = b"test content"
        upload_test_file(integration_client, project, package, content, tag="accept-ranges")
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/accept-ranges",
            params={"mode": "proxy"},
            headers={"Range": "bytes=0-4"},
        )
        assert response.status_code == 206
        assert response.headers.get("Accept-Ranges") == "bytes"
    @pytest.mark.integration
    def test_full_download_advertises_accept_ranges(
        self, integration_client, test_package
    ):
        """Test that full downloads advertise range support."""
        project, package = test_package
        content = b"test content"
        upload_test_file(integration_client, project, package, content, tag="full-accept")
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/full-accept",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.headers.get("Accept-Ranges") == "bytes"
 class TestConditionalRequests:
    """Tests for conditional request handling (304 Not Modified)."""
    @pytest.mark.integration
    def test_if_none_match_returns_304(self, integration_client, test_package):
        """Test If-None-Match with matching ETag returns 304."""
        project, package = test_package
        content = b"conditional request test content"
        expected_hash = compute_sha256(content)
        upload_test_file(integration_client, project, package, content, tag="cond-etag")
        # Request with matching ETag
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/cond-etag",
            params={"mode": "proxy"},
            headers={"If-None-Match": f'"{expected_hash}"'},
        )
        assert response.status_code == 304
        assert response.content == b""  # No body for 304
    @pytest.mark.integration
    def test_if_none_match_without_quotes(self, integration_client, test_package):
        """Test If-None-Match works with or without quotes."""
        project, package = test_package
        content = b"etag no quotes test"
        expected_hash = compute_sha256(content)
        upload_test_file(integration_client, project, package, content, tag="cond-noquote")
        # Request with ETag without quotes
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/cond-noquote",
            params={"mode": "proxy"},
            headers={"If-None-Match": expected_hash},
        )
        assert response.status_code == 304
    @pytest.mark.integration
    def test_if_none_match_mismatch_returns_200(self, integration_client, test_package):
        """Test If-None-Match with non-matching ETag returns 200."""
        project, package = test_package
        content = b"etag mismatch test"
        upload_test_file(integration_client, project, package, content, tag="cond-mismatch")
        # Request with different ETag
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/cond-mismatch",
            params={"mode": "proxy"},
            headers={"If-None-Match": '"different-etag-value"'},
        )
        assert response.status_code == 200
        assert response.content == content
    @pytest.mark.integration
    def test_if_modified_since_returns_304(self, integration_client, test_package):
        """Test If-Modified-Since with future date returns 304."""
        project, package = test_package
        content = b"modified since test"
        upload_test_file(integration_client, project, package, content, tag="cond-modified")
        # Request with future date (artifact was definitely created before this)
        future_date = formatdate(time.time() + 86400, usegmt=True)  # Tomorrow
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/cond-modified",
            params={"mode": "proxy"},
            headers={"If-Modified-Since": future_date},
        )
        assert response.status_code == 304
    @pytest.mark.integration
    def test_if_modified_since_old_date_returns_200(
        self, integration_client, test_package
    ):
        """Test If-Modified-Since with old date returns 200."""
        project, package = test_package
        content = b"old date test"
        upload_test_file(integration_client, project, package, content, tag="cond-old")
        # Request with old date (2020-01-01)
        old_date = "Wed, 01 Jan 2020 00:00:00 GMT"
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/cond-old",
            params={"mode": "proxy"},
            headers={"If-Modified-Since": old_date},
        )
        assert response.status_code == 200
        assert response.content == content
    @pytest.mark.integration
    def test_304_includes_etag(self, integration_client, test_package):
        """Test 304 response includes ETag header."""
        project, package = test_package
        content = b"304 etag test"
        expected_hash = compute_sha256(content)
        upload_test_file(integration_client, project, package, content, tag="304-etag")
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/304-etag",
            params={"mode": "proxy"},
            headers={"If-None-Match": f'"{expected_hash}"'},
        )
        assert response.status_code == 304
        assert response.headers.get("ETag") == f'"{expected_hash}"'
    @pytest.mark.integration
    def test_304_includes_cache_control(self, integration_client, test_package):
        """Test 304 response includes Cache-Control header."""
        project, package = test_package
        content = b"304 cache test"
        expected_hash = compute_sha256(content)
        upload_test_file(integration_client, project, package, content, tag="304-cache")
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/304-cache",
            params={"mode": "proxy"},
            headers={"If-None-Match": f'"{expected_hash}"'},
        )
        assert response.status_code == 304
        assert "immutable" in response.headers.get("Cache-Control", "")
 class TestCachingHeaders:
    """Tests for caching headers on download responses."""
    @pytest.mark.integration
    def test_download_includes_cache_control(self, integration_client, test_package):
        """Test download response includes Cache-Control header."""
        project, package = test_package
        content = b"cache control test"
        upload_test_file(integration_client, project, package, content, tag="cache-ctl")
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/cache-ctl",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        cache_control = response.headers.get("Cache-Control", "")
        assert "public" in cache_control
        assert "immutable" in cache_control
        assert "max-age" in cache_control
    @pytest.mark.integration
    def test_download_includes_last_modified(self, integration_client, test_package):
        """Test download response includes Last-Modified header."""
        project, package = test_package
        content = b"last modified test"
        upload_test_file(integration_client, project, package, content, tag="last-mod")
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/last-mod",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert "Last-Modified" in response.headers
        # Should be in RFC 7231 format
        last_modified = response.headers["Last-Modified"]
        assert "GMT" in last_modified
    @pytest.mark.integration
    def test_download_includes_etag(self, integration_client, test_package):
        """Test download response includes ETag header."""
        project, package = test_package
        content = b"etag header test"
        expected_hash = compute_sha256(content)
        upload_test_file(integration_client, project, package, content, tag="etag-hdr")
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/etag-hdr",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.headers.get("ETag") == f'"{expected_hash}"'
 class TestDownloadResume:
    """Tests for download resume functionality using range requests."""
    @pytest.mark.integration
    def test_resume_download_after_partial(self, integration_client, test_package):
        """Test resuming download from where it left off."""
        project, package = test_package
        content = b"ABCDEFGHIJ" * 100  # 1000 bytes
        upload_test_file(integration_client, project, package, content, tag="resume-test")
        # Simulate partial download (first 500 bytes)
        response1 = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/resume-test",
            params={"mode": "proxy"},
            headers={"Range": "bytes=0-499"},
        )
        assert response1.status_code == 206
        first_half = response1.content
        assert len(first_half) == 500
        # Resume from byte 500
        response2 = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/resume-test",
            params={"mode": "proxy"},
            headers={"Range": "bytes=500-"},
        )
        assert response2.status_code == 206
        second_half = response2.content
        assert len(second_half) == 500
        # Combine and verify
        combined = first_half + second_half
        assert combined == content
    @pytest.mark.integration
    def test_resume_with_etag_verification(self, integration_client, test_package):
        """Test that resumed download can verify content hasn't changed."""
        project, package = test_package
        content = b"resume etag verification test content"
        expected_hash = compute_sha256(content)
        upload_test_file(integration_client, project, package, content, tag="resume-etag")
        # Get ETag from first request
        response1 = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/resume-etag",
            params={"mode": "proxy"},
            headers={"Range": "bytes=0-9"},
        )
        assert response1.status_code == 206
        etag = response1.headers.get("ETag")
        assert etag == f'"{expected_hash}"'
        # Resume with If-Match to ensure content hasn't changed
        # (Note: If-Match would fail and return 412 if content changed)
        response2 = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/resume-etag",
            params={"mode": "proxy"},
            headers={"Range": "bytes=10-"},
        )
        assert response2.status_code == 206
        # ETag should be the same
        assert response2.headers.get("ETag") == etag
 class TestLargeFileStreaming:
    """Tests for streaming large files."""
    @pytest.mark.integration
    def test_stream_1mb_file(self, integration_client, test_package, sized_content):
        """Test streaming a 1MB file."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_1MB, seed=500)
        upload_test_file(integration_client, project, package, content, tag="stream-1mb")
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/stream-1mb",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert len(response.content) == SIZE_1MB
        assert compute_sha256(response.content) == expected_hash
    @pytest.mark.integration
    def test_stream_large_file_has_correct_headers(
        self, integration_client, test_package, sized_content
    ):
        """Test that large file streaming has correct headers."""
        project, package = test_package
        content, expected_hash = sized_content(SIZE_100KB, seed=501)
        upload_test_file(integration_client, project, package, content, tag="stream-hdr")
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/stream-hdr",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert int(response.headers.get("Content-Length", 0)) == SIZE_100KB
        assert response.headers.get("X-Checksum-SHA256") == expected_hash
        assert response.headers.get("Accept-Ranges") == "bytes"
    @pytest.mark.integration
    def test_range_request_on_large_file(
        self, integration_client, test_package, sized_content
    ):
        """Test range request on a larger file."""
        project, package = test_package
        content, _ = sized_content(SIZE_100KB, seed=502)
        upload_test_file(integration_client, project, package, content, tag="range-large")
        # Request a slice from the middle
        start = 50000
        end = 50999
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/range-large",
            params={"mode": "proxy"},
            headers={"Range": f"bytes={start}-{end}"},
        )
        assert response.status_code == 206
        assert len(response.content) == 1000
        assert response.content == content[start : end + 1]
 class TestDownloadModes:
    """Tests for different download modes."""
    @pytest.mark.integration
    def test_proxy_mode_streams_content(self, integration_client, test_package):
        """Test proxy mode streams content through backend."""
        project, package = test_package
        content = b"proxy mode test content"
        upload_test_file(integration_client, project, package, content, tag="mode-proxy")
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/mode-proxy",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.content == content
    @pytest.mark.integration
    def test_presigned_mode_returns_url(self, integration_client, test_package):
        """Test presigned mode returns JSON with URL."""
        project, package = test_package
        content = b"presigned mode test"
        upload_test_file(integration_client, project, package, content, tag="mode-presign")
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/mode-presign",
            params={"mode": "presigned"},
        )
        assert response.status_code == 200
        data = response.json()
        assert "url" in data
        assert "expires_at" in data
        assert data["url"].startswith("http")
    @pytest.mark.integration
    def test_redirect_mode_returns_302(self, integration_client, test_package):
        """Test redirect mode returns 302 to presigned URL."""
        project, package = test_package
        content = b"redirect mode test"
        upload_test_file(integration_client, project, package, content, tag="mode-redir")
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/mode-redir",
            params={"mode": "redirect"},
            follow_redirects=False,
        )
        assert response.status_code == 302
        assert "Location" in response.headers
 class TestIntegrityDuringStreaming:
    """Tests for data integrity during streaming downloads."""
    @pytest.mark.integration
    def test_checksum_header_matches_content(self, integration_client, test_package):
        """Test X-Checksum-SHA256 header matches actual downloaded content."""
        project, package = test_package
        content = b"integrity check content"
        expected_hash = compute_sha256(content)
        upload_test_file(integration_client, project, package, content, tag="integrity")
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/integrity",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        header_hash = response.headers.get("X-Checksum-SHA256")
        actual_hash = compute_sha256(response.content)
        assert header_hash == expected_hash
        assert actual_hash == expected_hash
        assert header_hash == actual_hash
    @pytest.mark.integration
    def test_etag_matches_content_hash(self, integration_client, test_package):
        """Test ETag header matches content hash."""
        project, package = test_package
        content = b"etag integrity test"
        expected_hash = compute_sha256(content)
        upload_test_file(integration_client, project, package, content, tag="etag-int")
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/etag-int",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        etag = response.headers.get("ETag", "").strip('"')
        actual_hash = compute_sha256(response.content)
        assert etag == expected_hash
        assert actual_hash == expected_hash
    @pytest.mark.integration
    def test_digest_header_present(self, integration_client, test_package):
        """Test Digest header is present in RFC 3230 format."""
        project, package = test_package
        content = b"digest header test"
        upload_test_file(integration_client, project, package, content, tag="digest")
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/digest",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert "Digest" in response.headers
        assert response.headers["Digest"].startswith("sha-256=")
--- a/backend/tests/integration/test_upload_download_api.py
+++ b/backend/tests/integration/test_upload_download_api.py
@@ -10,6 +10,7 @@ Tests cover:
 - S3 storage verification
 """
 import os
 import pytest
 import io
 import threading
@@ -25,6 +26,19 @@ from tests.factories import (
 class TestUploadBasics:
    """Tests for basic upload functionality."""
    @pytest.mark.integration
    def test_upload_returns_200(self, integration_client, test_package):
        """Test upload with valid file returns 200."""
        project, package = test_package
        content = b"valid file upload test"
        files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
        )
        assert response.status_code == 200
    @pytest.mark.integration
    def test_upload_returns_artifact_id(self, integration_client, test_package):
        """Test upload returns the artifact ID (SHA256 hash)."""
@@ -101,6 +115,83 @@ class TestUploadBasics:
        assert "created_at" in result
        assert result["created_at"] is not None
    @pytest.mark.integration
    def test_upload_without_tag_succeeds(self, integration_client, test_package):
        """Test upload without tag succeeds (no tag created)."""
        project, package = test_package
        content = b"upload without tag test"
        expected_hash = compute_sha256(content)
        files = {"file": ("no_tag.bin", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
            # No tag parameter
        )
        assert response.status_code == 200
        result = response.json()
        assert result["artifact_id"] == expected_hash
        # Verify no tag was created - list tags and check
        tags_response = integration_client.get(
            f"/api/v1/project/{project}/{package}/tags"
        )
        assert tags_response.status_code == 200
        tags = tags_response.json()
        # Filter for tags pointing to this artifact
        artifact_tags = [t for t in tags.get("items", tags) if t.get("artifact_id") == expected_hash]
        assert len(artifact_tags) == 0, "Tag should not be created when not specified"
    @pytest.mark.integration
    def test_upload_creates_artifact_in_database(self, integration_client, test_package):
        """Test upload creates artifact record in database."""
        project, package = test_package
        content = b"database artifact test"
        expected_hash = compute_sha256(content)
        upload_test_file(integration_client, project, package, content)
        # Verify artifact exists via API
        response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
        assert response.status_code == 200
        artifact = response.json()
        assert artifact["id"] == expected_hash
        assert artifact["size"] == len(content)
    @pytest.mark.integration
    @pytest.mark.requires_direct_s3
    def test_upload_creates_object_in_s3(self, integration_client, test_package):
        """Test upload creates object in S3 storage."""
        project, package = test_package
        content = b"s3 object creation test"
        expected_hash = compute_sha256(content)
        upload_test_file(integration_client, project, package, content)
        # Verify S3 object exists
        assert s3_object_exists(expected_hash), "S3 object should exist after upload"
    @pytest.mark.integration
    def test_upload_with_tag_creates_tag_record(self, integration_client, test_package):
        """Test upload with tag creates tag record."""
        project, package = test_package
        content = b"tag creation test"
        expected_hash = compute_sha256(content)
        tag_name = "my-tag-v1"
        upload_test_file(
            integration_client, project, package, content, tag=tag_name
        )
        # Verify tag exists
        tags_response = integration_client.get(
            f"/api/v1/project/{project}/{package}/tags"
        )
        assert tags_response.status_code == 200
        tags = tags_response.json()
        tag_names = [t["name"] for t in tags.get("items", tags)]
        assert tag_name in tag_names
 class TestDuplicateUploads:
    """Tests for duplicate upload deduplication behavior."""
@@ -248,6 +339,23 @@ class TestDownload:
        assert response.status_code == 200
        assert response.content == original_content
    @pytest.mark.integration
    def test_download_by_tag_prefix(self, integration_client, test_package):
        """Test downloading artifact using tag: prefix."""
        project, package = test_package
        original_content = b"download by tag prefix test"
        upload_test_file(
            integration_client, project, package, original_content, tag="prefix-tag"
        )
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/tag:prefix-tag",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.content == original_content
    @pytest.mark.integration
    def test_download_nonexistent_tag(self, integration_client, test_package):
        """Test downloading nonexistent tag returns 404."""
@@ -258,6 +366,33 @@ class TestDownload:
        )
        assert response.status_code == 404
    @pytest.mark.integration
    def test_download_nonexistent_artifact(self, integration_client, test_package):
        """Test downloading nonexistent artifact ID returns 404."""
        project, package = test_package
        fake_hash = "0" * 64
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/artifact:{fake_hash}"
        )
        assert response.status_code == 404
    @pytest.mark.integration
    def test_download_from_nonexistent_project(self, integration_client, unique_test_id):
        """Test downloading from nonexistent project returns 404."""
        response = integration_client.get(
            f"/api/v1/project/nonexistent-project-{unique_test_id}/somepackage/+/sometag"
        )
        assert response.status_code == 404
    @pytest.mark.integration
    def test_download_from_nonexistent_package(self, integration_client, test_project, unique_test_id):
        """Test downloading from nonexistent package returns 404."""
        response = integration_client.get(
            f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/+/sometag"
        )
        assert response.status_code == 404
    @pytest.mark.integration
    def test_content_matches_original(self, integration_client, test_package):
        """Test downloaded content matches original exactly."""
@@ -275,6 +410,111 @@ class TestDownload:
        assert response.content == original_content
 class TestDownloadHeaders:
    """Tests for download response headers."""
    @pytest.mark.integration
    def test_download_content_type_header(self, integration_client, test_package):
        """Test download returns correct Content-Type header."""
        project, package = test_package
        content = b"content type header test"
        upload_test_file(
            integration_client, project, package, content,
            filename="test.txt", tag="content-type-test"
        )
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/content-type-test",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        # Content-Type should be set (either text/plain or application/octet-stream)
        assert "content-type" in response.headers
    @pytest.mark.integration
    def test_download_content_length_header(self, integration_client, test_package):
        """Test download returns correct Content-Length header."""
        project, package = test_package
        content = b"content length header test - exactly 41 bytes!"
        expected_length = len(content)
        upload_test_file(
            integration_client, project, package, content, tag="content-length-test"
        )
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/content-length-test",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert "content-length" in response.headers
        assert int(response.headers["content-length"]) == expected_length
    @pytest.mark.integration
    def test_download_content_disposition_header(self, integration_client, test_package):
        """Test download returns correct Content-Disposition header."""
        project, package = test_package
        content = b"content disposition test"
        filename = "my-test-file.bin"
        upload_test_file(
            integration_client, project, package, content,
            filename=filename, tag="disposition-test"
        )
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/disposition-test",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert "content-disposition" in response.headers
        disposition = response.headers["content-disposition"]
        assert "attachment" in disposition
        assert filename in disposition
    @pytest.mark.integration
    def test_download_checksum_headers(self, integration_client, test_package):
        """Test download returns checksum headers."""
        project, package = test_package
        content = b"checksum header test content"
        expected_hash = compute_sha256(content)
        upload_test_file(
            integration_client, project, package, content, tag="checksum-headers"
        )
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/checksum-headers",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        # Check for checksum headers
        assert "x-checksum-sha256" in response.headers
        assert response.headers["x-checksum-sha256"] == expected_hash
    @pytest.mark.integration
    def test_download_etag_header(self, integration_client, test_package):
        """Test download returns ETag header (artifact ID)."""
        project, package = test_package
        content = b"etag header test"
        expected_hash = compute_sha256(content)
        upload_test_file(
            integration_client, project, package, content, tag="etag-test"
        )
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/etag-test",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert "etag" in response.headers
        # ETag should contain the artifact ID (hash)
        etag = response.headers["etag"].strip('"')
        assert etag == expected_hash
 class TestConcurrentUploads:
    """Tests for concurrent upload handling."""
@@ -301,7 +541,7 @@ class TestConcurrentUploads:
            try:
                from httpx import Client
-                base_url = "http://localhost:8080"
+                base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
                with Client(base_url=base_url, timeout=30.0) as client:
                    files = {
                        "file": (
@@ -397,6 +637,7 @@ class TestUploadFailureCleanup:
    """Tests for cleanup when uploads fail."""
    @pytest.mark.integration
    @pytest.mark.requires_direct_s3
    def test_upload_failure_invalid_project_no_orphaned_s3(
        self, integration_client, unique_test_id
    ):
@@ -419,6 +660,7 @@ class TestUploadFailureCleanup:
        )
    @pytest.mark.integration
    @pytest.mark.requires_direct_s3
    def test_upload_failure_invalid_package_no_orphaned_s3(
        self, integration_client, test_project, unique_test_id
    ):
@@ -466,6 +708,7 @@ class TestS3StorageVerification:
    """Tests to verify S3 storage behavior."""
    @pytest.mark.integration
    @pytest.mark.requires_direct_s3
    def test_s3_single_object_after_duplicates(
        self, integration_client, test_package, unique_test_id
    ):
@@ -521,6 +764,7 @@ class TestSecurityPathTraversal:
    """
    @pytest.mark.integration
    @pytest.mark.requires_direct_s3
    def test_path_traversal_in_filename_stored_safely(
        self, integration_client, test_package
    ):
--- a/backend/tests/integration/test_version_api.py
+++ b/backend/tests/integration/test_version_api.py
@@ -0,0 +1,347 @@
 """
 Integration tests for package version API endpoints.
 Tests cover:
 - Version creation via upload
 - Version auto-detection from filename
 - Version listing and retrieval
 - Download by version prefix
 - Version deletion
 """
 import pytest
 import io
 from tests.factories import (
    compute_sha256,
    upload_test_file,
 )
 class TestVersionCreation:
    """Tests for creating versions via upload."""
    @pytest.mark.integration
    def test_upload_with_explicit_version(self, integration_client, test_package):
        """Test upload with explicit version parameter creates version record."""
        project, package = test_package
        content = b"version creation test"
        expected_hash = compute_sha256(content)
        files = {"file": ("app.tar.gz", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
            data={"version": "1.0.0"},
        )
        assert response.status_code == 200
        result = response.json()
        assert result["artifact_id"] == expected_hash
        assert result.get("version") == "1.0.0"
        assert result.get("version_source") == "explicit"
    @pytest.mark.integration
    def test_upload_with_version_and_tag(self, integration_client, test_package):
        """Test upload with both version and tag creates both records."""
        project, package = test_package
        content = b"version and tag test"
        files = {"file": ("app.tar.gz", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
            data={"version": "2.0.0", "tag": "latest"},
        )
        assert response.status_code == 200
        result = response.json()
        assert result.get("version") == "2.0.0"
        # Verify tag was also created
        tags_response = integration_client.get(
            f"/api/v1/project/{project}/{package}/tags"
        )
        assert tags_response.status_code == 200
        tags = tags_response.json()
        tag_names = [t["name"] for t in tags.get("items", tags)]
        assert "latest" in tag_names
    @pytest.mark.integration
    def test_duplicate_version_same_content_succeeds(self, integration_client, test_package):
        """Test uploading same version with same content succeeds (deduplication)."""
        project, package = test_package
        content = b"version dedup test"
        # First upload with version
        files1 = {"file": ("app1.tar.gz", io.BytesIO(content), "application/octet-stream")}
        response1 = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files1,
            data={"version": "3.0.0"},
        )
        assert response1.status_code == 200
        # Second upload with same version and same content succeeds
        files2 = {"file": ("app2.tar.gz", io.BytesIO(content), "application/octet-stream")}
        response2 = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files2,
            data={"version": "3.0.0"},
        )
        # This succeeds because it's the same artifact (deduplication)
        assert response2.status_code == 200
 class TestVersionAutoDetection:
    """Tests for automatic version detection from filename."""
    @pytest.mark.integration
    def test_version_detected_from_filename_tarball(self, integration_client, test_package):
        """Test version is auto-detected from tarball filename or metadata."""
        project, package = test_package
        content = b"auto detect version tarball"
        files = {"file": ("myapp-1.2.3.tar.gz", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
        )
        assert response.status_code == 200
        result = response.json()
        assert result.get("version") == "1.2.3"
        # Version source can be 'filename' or 'metadata' depending on detection order
        assert result.get("version_source") in ["filename", "metadata"]
    @pytest.mark.integration
    def test_version_detected_from_filename_zip(self, integration_client, test_package):
        """Test version is auto-detected from zip filename."""
        project, package = test_package
        content = b"auto detect version zip"
        files = {"file": ("package-2.0.0.zip", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
        )
        assert response.status_code == 200
        result = response.json()
        assert result.get("version") == "2.0.0"
        assert result.get("version_source") == "filename"
    @pytest.mark.integration
    def test_explicit_version_overrides_filename(self, integration_client, test_package):
        """Test explicit version parameter overrides filename detection."""
        project, package = test_package
        content = b"explicit override test"
        files = {"file": ("myapp-1.0.0.tar.gz", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
            data={"version": "9.9.9"},
        )
        assert response.status_code == 200
        result = response.json()
        assert result.get("version") == "9.9.9"
        assert result.get("version_source") == "explicit"
    @pytest.mark.integration
    def test_no_version_detected_from_plain_filename(self, integration_client, test_package):
        """Test no version is created for filenames without version pattern."""
        project, package = test_package
        content = b"no version in filename"
        files = {"file": ("plain-file.bin", io.BytesIO(content), "application/octet-stream")}
        response = integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
        )
        assert response.status_code == 200
        result = response.json()
        # Version should be None or not present
        assert result.get("version") is None
 class TestVersionListing:
    """Tests for listing and retrieving versions."""
    @pytest.mark.integration
    def test_list_versions(self, integration_client, test_package):
        """Test listing all versions for a package."""
        project, package = test_package
        # Create multiple versions
        for ver in ["1.0.0", "1.1.0", "2.0.0"]:
            content = f"version {ver} content".encode()
            files = {"file": (f"app-{ver}.tar.gz", io.BytesIO(content), "application/octet-stream")}
            response = integration_client.post(
                f"/api/v1/project/{project}/{package}/upload",
                files=files,
                data={"version": ver},
            )
            assert response.status_code == 200
        # List versions
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/versions"
        )
        assert response.status_code == 200
        data = response.json()
        versions = [v["version"] for v in data.get("items", data)]
        assert "1.0.0" in versions
        assert "1.1.0" in versions
        assert "2.0.0" in versions
    @pytest.mark.integration
    def test_get_specific_version(self, integration_client, test_package):
        """Test getting details for a specific version."""
        project, package = test_package
        content = b"specific version test"
        expected_hash = compute_sha256(content)
        # Create version
        files = {"file": ("app-4.0.0.tar.gz", io.BytesIO(content), "application/octet-stream")}
        integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
            data={"version": "4.0.0"},
        )
        # Get version details
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/versions/4.0.0"
        )
        assert response.status_code == 200
        data = response.json()
        assert data["version"] == "4.0.0"
        assert data["artifact_id"] == expected_hash
    @pytest.mark.integration
    def test_get_nonexistent_version_returns_404(self, integration_client, test_package):
        """Test getting nonexistent version returns 404."""
        project, package = test_package
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/versions/99.99.99"
        )
        assert response.status_code == 404
 class TestDownloadByVersion:
    """Tests for downloading artifacts by version."""
    @pytest.mark.integration
    def test_download_by_version_prefix(self, integration_client, test_package):
        """Test downloading artifact using version: prefix."""
        project, package = test_package
        content = b"download by version test"
        expected_hash = compute_sha256(content)
        # Upload with version
        files = {"file": ("app.tar.gz", io.BytesIO(content), "application/octet-stream")}
        integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
            data={"version": "5.0.0"},
        )
        # Download by version prefix
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/version:5.0.0",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.content == content
    @pytest.mark.integration
    def test_download_nonexistent_version_returns_404(self, integration_client, test_package):
        """Test downloading nonexistent version returns 404."""
        project, package = test_package
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/version:99.0.0"
        )
        assert response.status_code == 404
    @pytest.mark.integration
    def test_version_resolution_priority(self, integration_client, test_package):
        """Test that version: prefix explicitly resolves to version, not tag."""
        project, package = test_package
        version_content = b"this is the version content"
        tag_content = b"this is the tag content"
        # Create a version 6.0.0
        files1 = {"file": ("app-v.tar.gz", io.BytesIO(version_content), "application/octet-stream")}
        integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files1,
            data={"version": "6.0.0"},
        )
        # Create a tag named "6.0.0" pointing to different content
        files2 = {"file": ("app-t.tar.gz", io.BytesIO(tag_content), "application/octet-stream")}
        integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files2,
            data={"tag": "6.0.0"},
        )
        # Download with version: prefix should get version content
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/version:6.0.0",
            params={"mode": "proxy"},
        )
        assert response.status_code == 200
        assert response.content == version_content
        # Download with tag: prefix should get tag content
        response2 = integration_client.get(
            f"/api/v1/project/{project}/{package}/+/tag:6.0.0",
            params={"mode": "proxy"},
        )
        assert response2.status_code == 200
        assert response2.content == tag_content
 class TestVersionDeletion:
    """Tests for deleting versions."""
    @pytest.mark.integration
    def test_delete_version(self, integration_client, test_package):
        """Test deleting a version."""
        project, package = test_package
        content = b"delete version test"
        # Create version
        files = {"file": ("app.tar.gz", io.BytesIO(content), "application/octet-stream")}
        integration_client.post(
            f"/api/v1/project/{project}/{package}/upload",
            files=files,
            data={"version": "7.0.0"},
        )
        # Verify version exists
        response = integration_client.get(
            f"/api/v1/project/{project}/{package}/versions/7.0.0"
        )
        assert response.status_code == 200
        # Delete version - returns 204 No Content on success
        delete_response = integration_client.delete(
            f"/api/v1/project/{project}/{package}/versions/7.0.0"
        )
        assert delete_response.status_code == 204
        # Verify version no longer exists
        response2 = integration_client.get(
            f"/api/v1/project/{project}/{package}/versions/7.0.0"
        )
        assert response2.status_code == 404
    @pytest.mark.integration
    def test_delete_nonexistent_version_returns_404(self, integration_client, test_package):
        """Test deleting nonexistent version returns 404."""
        project, package = test_package
        response = integration_client.delete(
            f"/api/v1/project/{project}/{package}/versions/99.0.0"
        )
        assert response.status_code == 404
--- a/docs/integrity-verification.md
+++ b/docs/integrity-verification.md
@@ -0,0 +1,294 @@
 # Integrity Verification
 Orchard uses content-addressable storage with SHA256 hashing to ensure artifact integrity. This document describes how integrity verification works and how to use it.
 ## How It Works
 ### Content-Addressable Storage
 Orchard stores artifacts using their SHA256 hash as the unique identifier. This provides several benefits:
 1. **Automatic deduplication**: Identical content is stored only once
 2. **Built-in integrity**: The artifact ID *is* the content hash
 3. **Tamper detection**: Any modification changes the hash, making corruption detectable
 When you upload a file:
 1. Orchard computes the SHA256 hash of the content
 2. The hash becomes the artifact ID (64-character hex string)
 3. The file is stored in S3 at `fruits/{hash[0:2]}/{hash[2:4]}/{hash}`
 4. The hash and metadata are recorded in the database
 ### Hash Format
 - Algorithm: SHA256
 - Format: 64-character lowercase hexadecimal string
 - Example: `dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f`
 ## Client-Side Verification
 ### Before Upload
 Compute the hash locally before uploading to verify the server received your content correctly:
 ```python
 import hashlib
 def compute_sha256(content: bytes) -> str:
    return hashlib.sha256(content).hexdigest()
 # Compute hash before upload
 content = open("myfile.tar.gz", "rb").read()
 local_hash = compute_sha256(content)
 # Upload the file
 response = requests.post(
    f"{base_url}/api/v1/project/{project}/{package}/upload",
    files={"file": ("myfile.tar.gz", content)},
 )
 result = response.json()
 # Verify server computed the same hash
 assert result["artifact_id"] == local_hash, "Hash mismatch!"
 ```
 ### Providing Expected Hash on Upload
 You can provide the expected hash in the upload request. The server will reject the upload if the computed hash doesn't match:
 ```python
 response = requests.post(
    f"{base_url}/api/v1/project/{project}/{package}/upload",
    files={"file": ("myfile.tar.gz", content)},
    headers={"X-Checksum-SHA256": local_hash},
 )
 # Returns 422 if hash doesn't match
 if response.status_code == 422:
    print("Checksum mismatch - upload rejected")
 ```
 ### After Download
 Verify downloaded content matches the expected hash using response headers:
 ```python
 response = requests.get(
    f"{base_url}/api/v1/project/{project}/{package}/+/{tag}",
    params={"mode": "proxy"},
 )
 # Get expected hash from header
 expected_hash = response.headers.get("X-Checksum-SHA256")
 # Compute hash of downloaded content
 actual_hash = compute_sha256(response.content)
 # Verify
 if actual_hash != expected_hash:
    raise Exception(f"Integrity check failed! Expected {expected_hash}, got {actual_hash}")
 ```
 ### Response Headers for Verification
 Download responses include multiple headers for verification:
 | Header | Format | Description |
 |--------|--------|-------------|
 | `X-Checksum-SHA256` | Hex string | SHA256 hash (64 chars) |
 | `ETag` | `"<hash>"` | SHA256 hash in quotes |
 | `Digest` | `sha-256=<base64>` | RFC 3230 format (base64-encoded) |
 | `Content-Length` | Integer | File size in bytes |
 ### Server-Side Verification on Download
 Request server-side verification during download:
 ```bash
 # Pre-verification: Server verifies before streaming (returns 500 if corrupt)
 curl "${base_url}/api/v1/project/${project}/${package}/+/${tag}?mode=proxy&verify=true&verify_mode=pre"
 # Stream verification: Server verifies while streaming (logs error if corrupt)
 curl "${base_url}/api/v1/project/${project}/${package}/+/${tag}?mode=proxy&verify=true&verify_mode=stream"
 ```
 The `X-Verified` header indicates whether server-side verification was performed:
 - `X-Verified: true` - Content was verified by the server
 ## Server-Side Consistency Check
 ### Consistency Check Endpoint
 Administrators can run a consistency check to verify all stored artifacts:
 ```bash
 curl "${base_url}/api/v1/admin/consistency-check"
 ```
 Response:
 ```json
 {
  "total_artifacts_checked": 1234,
  "healthy": true,
  "orphaned_s3_objects": 0,
  "missing_s3_objects": 0,
  "size_mismatches": 0,
  "orphaned_s3_keys": [],
  "missing_s3_keys": [],
  "size_mismatch_artifacts": []
 }
 ```
 ### What the Check Verifies
 1. **Missing S3 objects**: Database records with no corresponding S3 object
 2. **Orphaned S3 objects**: S3 objects with no database record
 3. **Size mismatches**: S3 object size doesn't match database record
 ### Running Consistency Checks
 **Manual check:**
 ```bash
 # Check all artifacts
 curl "${base_url}/api/v1/admin/consistency-check"
 # Limit results (for large deployments)
 curl "${base_url}/api/v1/admin/consistency-check?limit=100"
 ```
 **Scheduled checks (recommended):**
 Set up a cron job or Kubernetes CronJob to run periodic checks:
 ```yaml
 # Kubernetes CronJob example
 apiVersion: batch/v1
 kind: CronJob
 metadata:
  name: orchard-consistency-check
 spec:
  schedule: "0 2 * * *"  # Daily at 2 AM
  jobTemplate:
    spec:
      template:
        spec:
          containers:
          - name: check
            image: curlimages/curl
            command:
            - /bin/sh
            - -c
            - |
              response=$(curl -s "${ORCHARD_URL}/api/v1/admin/consistency-check")
              healthy=$(echo "$response" | jq -r '.healthy')
              if [ "$healthy" != "true" ]; then
                echo "ALERT: Consistency check failed!"
                echo "$response"
                exit 1
              fi
              echo "Consistency check passed"
          restartPolicy: OnFailure
 ```
 ## Recovery Procedures
 ### Corrupted Artifact (Size Mismatch)
 If the consistency check reports size mismatches:
 1. **Identify affected artifacts:**
   ```bash
   curl "${base_url}/api/v1/admin/consistency-check" | jq '.size_mismatch_artifacts'
   ```
 2. **Check if artifact can be re-uploaded:**
   - If the original content is available, delete the corrupted artifact and re-upload
   - The same content will produce the same artifact ID
 3. **If original content is lost:**
   - The artifact data is corrupted and cannot be recovered
   - Delete the artifact record and notify affected users
   - Consider restoring from backup if available
 ### Missing S3 Object
 If database records exist but S3 objects are missing:
 1. **Identify affected artifacts:**
   ```bash
   curl "${base_url}/api/v1/admin/consistency-check" | jq '.missing_s3_keys'
   ```
 2. **Check S3 bucket:**
   - Verify the S3 bucket exists and is accessible
   - Check S3 access logs for deletion events
   - Check if objects were moved or lifecycle-deleted
 3. **Recovery options:**
   - Restore from S3 versioning (if enabled)
   - Restore from backup
   - Re-upload original content (if available)
   - Delete orphaned database records
 ### Orphaned S3 Objects
 If S3 objects exist without database records:
 1. **Identify orphaned objects:**
   ```bash
   curl "${base_url}/api/v1/admin/consistency-check" | jq '.orphaned_s3_keys'
   ```
 2. **Investigate cause:**
   - Upload interrupted before database commit?
   - Database record deleted but S3 cleanup failed?
 3. **Resolution:**
   - If content is needed, create database record manually
   - If content is not needed, delete the S3 object to reclaim storage
 ### Preventive Measures
 1. **Enable S3 versioning** to recover from accidental deletions
 2. **Regular backups** of both database and S3 bucket
 3. **Scheduled consistency checks** to detect issues early
 4. **Monitoring and alerting** on consistency check failures
 5. **Audit logging** to track all artifact operations
 ## Verification in CI/CD
 ### Verifying Artifacts in Pipelines
 ```bash
 #!/bin/bash
 # Download and verify artifact in CI pipeline
 ARTIFACT_URL="${ORCHARD_URL}/api/v1/project/${PROJECT}/${PACKAGE}/+/${TAG}"
 # Download with verification headers
 response=$(curl -s -D - "${ARTIFACT_URL}?mode=proxy" -o artifact.tar.gz)
 expected_hash=$(echo "$response" | grep -i "X-Checksum-SHA256" | cut -d: -f2 | tr -d ' \r')
 # Compute actual hash
 actual_hash=$(sha256sum artifact.tar.gz | cut -d' ' -f1)
 # Verify
 if [ "$actual_hash" != "$expected_hash" ]; then
    echo "ERROR: Integrity check failed!"
    echo "Expected: $expected_hash"
    echo "Actual:   $actual_hash"
    exit 1
 fi
 echo "Integrity verified: $actual_hash"
 ```
 ### Using Server-Side Verification
 For critical deployments, use server-side pre-verification:
 ```bash
 # Server verifies before streaming - returns 500 if corrupt
 curl -f "${ARTIFACT_URL}?mode=proxy&verify=true&verify_mode=pre" -o artifact.tar.gz
 ```
 This ensures the artifact is verified before any bytes are streamed to your pipeline.
--- a/helm/orchard/templates/deployment.yaml
+++ b/helm/orchard/templates/deployment.yaml
@@ -110,6 +110,12 @@ spec:
              value: {{ .Values.orchard.download.mode | quote }}
            - name: ORCHARD_PRESIGNED_URL_EXPIRY
              value: {{ .Values.orchard.download.presignedUrlExpiry | quote }}
            {{- if .Values.orchard.rateLimit }}
            {{- if .Values.orchard.rateLimit.login }}
            - name: ORCHARD_LOGIN_RATE_LIMIT
              value: {{ .Values.orchard.rateLimit.login | quote }}
            {{- end }}
            {{- end }}
          livenessProbe:
            {{- toYaml .Values.livenessProbe | nindent 12 }}
          readinessProbe:
--- a/helm/orchard/values-dev.yaml
+++ b/helm/orchard/values-dev.yaml
@@ -42,6 +42,7 @@ ingress:
  className: "nginx"
  annotations:
    cert-manager.io/cluster-issuer: "letsencrypt"
    nginx.ingress.kubernetes.io/proxy-body-size: "0"  # Disable body size limit for uploads
  hosts:
    - host: orchard-dev.common.global.bsf.tools  # Overridden by CI
      paths:
@@ -113,6 +114,10 @@ orchard:
    mode: "presigned"
    presignedUrlExpiry: 3600
  # Relaxed rate limits for dev/feature environments (allows integration tests to run)
  rateLimit:
    login: "1000/minute"  # Default is 5/minute, relaxed for CI integration tests
 # PostgreSQL - ephemeral, no persistence
 postgresql:
  enabled: true
--- a/helm/orchard/values-stage.yaml
+++ b/helm/orchard/values-stage.yaml
@@ -41,6 +41,7 @@ ingress:
  className: "nginx"
  annotations:
    cert-manager.io/cluster-issuer: "letsencrypt"
    nginx.ingress.kubernetes.io/proxy-body-size: "0"  # Disable body size limit for uploads
  hosts:
    - host: orchard-stage.common.global.bsf.tools
      paths:
@@ -120,6 +121,10 @@ orchard:
    mode: "presigned"  # presigned, redirect, or proxy
    presignedUrlExpiry: 3600  # Presigned URL expiry in seconds
  # Relaxed rate limits for stage (allows CI integration tests to run)
  rateLimit:
    login: "1000/minute"  # Default is 5/minute, relaxed for CI integration tests
 # PostgreSQL subchart configuration
 postgresql:
  enabled: true
Author	SHA1	Message	Date
Mondo Diaz	199821b34d	Merge branch 'feature/upload-download-tests' into 'main' Add comprehensive upload/download tests and streaming enhancements (#38, #40, #42, #43) Closes #38, #40, #42, and #43 See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!34	2026-01-21 09:35:12 -06:00
Mondo Diaz	584acd1e90	Add comprehensive upload/download tests and streaming enhancements (#38 , #40 , #42 , #43 )	2026-01-21 09:35:12 -06:00