Add comprehensive upload/download tests and streaming enhancements (#38, #40, #42, #43)

This commit is contained in:
Mondo Diaz
2026-01-21 09:35:12 -06:00
parent f7ffc1c877
commit 584acd1e90
23 changed files with 5385 additions and 405 deletions

View File

@@ -8,6 +8,11 @@ variables:
PROSPER_VERSION: v0.64.1
# Use internal PyPI proxy instead of public internet
PIP_INDEX_URL: https://deps.global.bsf.tools/artifactory/api/pypi/pypi.org/simple
# Environment URLs (used by deploy and test jobs)
STAGE_URL: https://orchard-stage.common.global.bsf.tools
PROD_URL: https://orchard.common.global.bsf.tools
# Shared pip cache directory
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"
# Prevent duplicate pipelines for MRs
workflow:
@@ -29,11 +34,47 @@ kics:
variables:
KICS_CONFIG: kics.config
# Post-deployment integration tests template
# Full integration test suite template (for feature/stage deployments)
# Runs the complete pytest integration test suite against the deployed environment
.integration_test_template: &integration_test_template
stage: deploy # Runs in deploy stage, but after deployment due to 'needs'
image: deps.global.bsf.tools/docker/python:3.12-slim
timeout: 10m
timeout: 20m # Full suite takes longer than smoke tests
interruptible: true # Cancel if new pipeline starts
retry: 1 # Retry once on failure (network flakiness)
cache:
key: pip-$CI_COMMIT_REF_SLUG
paths:
- .pip-cache/
policy: pull-push
before_script:
- pip install --index-url "$PIP_INDEX_URL" -r backend/requirements.txt
- pip install --index-url "$PIP_INDEX_URL" pytest pytest-asyncio httpx
script:
- cd backend
# Run full integration test suite, excluding:
# - large/slow tests
# - requires_direct_s3 tests (can't access MinIO from outside K8s cluster)
# ORCHARD_TEST_URL tells the tests which server to connect to
# Note: Auth tests work because dev/stage deployments have relaxed rate limits
- |
python -m pytest tests/integration/ -v \
--junitxml=integration-report.xml \
-m "not large and not slow and not requires_direct_s3" \
--tb=short
artifacts:
when: always
expire_in: 1 week
paths:
- backend/integration-report.xml
reports:
junit: backend/integration-report.xml
# Lightweight smoke test template (for production - no test data creation)
.smoke_test_template: &smoke_test_template
stage: deploy
image: deps.global.bsf.tools/docker/python:3.12-slim
timeout: 5m
before_script:
- pip install --index-url "$PIP_INDEX_URL" httpx
script:
@@ -43,12 +84,12 @@ kics:
import os
import sys
BASE_URL = os.environ.get("BASE_URL")
BASE_URL = os.environ.get("ORCHARD_TEST_URL")
if not BASE_URL:
print("ERROR: BASE_URL not set")
print("ERROR: ORCHARD_TEST_URL not set")
sys.exit(1)
print(f"Running integration tests against {BASE_URL}")
print(f"Running smoke tests against {BASE_URL}")
client = httpx.Client(base_url=BASE_URL, timeout=30.0)
errors = []
@@ -86,38 +127,37 @@ kics:
print(f" FAIL: {e}")
sys.exit(1)
else:
print("SUCCESS: All integration tests passed!")
print("SUCCESS: All smoke tests passed!")
sys.exit(0)
PYTEST_SCRIPT
# Integration tests for stage deployment
# Integration tests for stage deployment (full suite)
integration_test_stage:
<<: *integration_test_template
needs: [deploy_stage]
variables:
BASE_URL: https://orchard-stage.common.global.bsf.tools
ORCHARD_TEST_URL: $STAGE_URL
rules:
- if: '$CI_COMMIT_BRANCH == "main"'
when: on_success
# Integration tests for feature deployment
# Integration tests for feature deployment (full suite)
integration_test_feature:
<<: *integration_test_template
needs: [deploy_feature]
variables:
BASE_URL: https://orchard-$CI_COMMIT_REF_SLUG.common.global.bsf.tools
ORCHARD_TEST_URL: https://orchard-$CI_COMMIT_REF_SLUG.common.global.bsf.tools
rules:
- if: '$CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != "main"'
when: on_success
# Run Python backend tests
python_tests:
# Run Python backend unit tests
python_unit_tests:
stage: test
needs: [] # Run in parallel with build
image: deps.global.bsf.tools/docker/python:3.12-slim
timeout: 15m
variables:
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"
interruptible: true # Cancel if new pipeline starts
cache:
key: pip-$CI_COMMIT_REF_SLUG
paths:
@@ -128,7 +168,7 @@ python_tests:
- pip install --index-url "$PIP_INDEX_URL" pytest pytest-asyncio pytest-cov httpx
script:
- cd backend
# Only run unit tests - integration tests require Docker Compose services
# Run unit tests (integration tests run post-deployment against live environment)
- python -m pytest tests/unit/ -v --cov=app --cov-report=term --cov-report=xml:coverage.xml --cov-report=html:coverage_html --junitxml=pytest-report.xml
artifacts:
when: always
@@ -150,6 +190,7 @@ frontend_tests:
needs: [] # Run in parallel with build
image: deps.global.bsf.tools/docker/node:20-alpine
timeout: 15m
interruptible: true # Cancel if new pipeline starts
cache:
key: npm-$CI_COMMIT_REF_SLUG
paths:
@@ -175,7 +216,7 @@ frontend_tests:
# Shared deploy configuration
.deploy_template: &deploy_template
stage: deploy
needs: [build_image, test_image, kics, hadolint, python_tests, frontend_tests, secrets, app_deps_scan, cve_scan, cve_sbom_analysis, app_sbom_analysis]
needs: [build_image, test_image, kics, hadolint, python_unit_tests, frontend_tests, secrets, app_deps_scan, cve_scan, cve_sbom_analysis, app_sbom_analysis]
image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
.helm_setup: &helm_setup
@@ -184,47 +225,21 @@ frontend_tests:
# OCI-based charts from internal registry - no repo add needed
- helm dependency update
# Simplified deployment verification - just health check
# Full API/frontend checks are done by integration tests post-deployment
.verify_deployment: &verify_deployment |
echo "=== Waiting for health endpoint (certs may take a few minutes) ==="
for i in $(seq 1 30); do
if curl -sf --max-time 10 "$BASE_URL/health" > /dev/null 2>&1; then
echo "Health check passed!"
break
echo "Deployment URL: $BASE_URL"
exit 0
fi
echo "Attempt $i/30 - waiting 10s..."
sleep 10
done
# Verify health endpoint
echo ""
echo "=== Health Check ==="
curl -sf "$BASE_URL/health" || { echo "Health check failed"; exit 1; }
echo ""
# Verify API is responding
echo ""
echo "=== API Check (GET /api/v1/projects) ==="
HTTP_CODE=$(curl -sf -o /dev/null -w "%{http_code}" "$BASE_URL/api/v1/projects")
if [ "$HTTP_CODE" = "200" ]; then
echo "API responding: HTTP $HTTP_CODE"
else
echo "API check failed: HTTP $HTTP_CODE"
exit 1
fi
# Verify frontend is served
echo ""
echo "=== Frontend Check ==="
if curl -sf "$BASE_URL/" | grep -q "</html>"; then
echo "Frontend is being served"
else
echo "Frontend check failed"
exit 1
fi
echo ""
echo "=== All checks passed! ==="
echo "Deployment URL: $BASE_URL"
echo "Health check failed after 30 attempts"
exit 1
# Deploy to stage (main branch)
deploy_stage:
@@ -232,7 +247,7 @@ deploy_stage:
variables:
NAMESPACE: orch-stage-namespace
VALUES_FILE: helm/orchard/values-stage.yaml
BASE_URL: https://orchard-stage.common.global.bsf.tools
BASE_URL: $STAGE_URL
before_script:
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
- *helm_setup
@@ -251,7 +266,7 @@ deploy_stage:
- *verify_deployment
environment:
name: stage
url: https://orchard-stage.common.global.bsf.tools
url: $STAGE_URL
kubernetes:
agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
rules:
@@ -297,10 +312,12 @@ deploy_feature:
- if: '$CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != "main"'
when: on_success
# Cleanup feature branch deployment
# Cleanup feature branch deployment (standalone - doesn't need deploy dependencies)
cleanup_feature:
<<: *deploy_template
stage: deploy
needs: []
image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
timeout: 5m
variables:
NAMESPACE: orch-dev-namespace
GIT_STRATEGY: none # No source needed, branch may be deleted
@@ -329,7 +346,7 @@ deploy_prod:
variables:
NAMESPACE: orch-prod-namespace
VALUES_FILE: helm/orchard/values-prod.yaml
BASE_URL: https://orchard.common.global.bsf.tools
BASE_URL: $PROD_URL
before_script:
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-prod
- *helm_setup
@@ -348,7 +365,7 @@ deploy_prod:
- *verify_deployment
environment:
name: production
url: https://orchard.common.global.bsf.tools
url: $PROD_URL
kubernetes:
agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-prod
rules:
@@ -357,12 +374,12 @@ deploy_prod:
when: manual # Require manual approval for prod
allow_failure: false
# Integration tests for production deployment
integration_test_prod:
<<: *integration_test_template
# Smoke tests for production deployment (read-only, no test data creation)
smoke_test_prod:
<<: *smoke_test_template
needs: [deploy_prod]
variables:
BASE_URL: https://orchard.common.global.bsf.tools
ORCHARD_TEST_URL: $PROD_URL
rules:
- if: '$CI_COMMIT_TAG =~ /^v\d+\.\d+\.\d+$/'
when: on_success

View File

@@ -11,3 +11,7 @@ bccbc71c13570d14b8b26a11335c45f102fe3072:backend/tests/unit/test_storage.py:gene
90bb2a3a393d2361dc3136ee8d761debb0726d8a:backend/tests/unit/test_storage.py:generic-api-key:381
37666e41a72d2a4f34447c0d1a8728e1d7271d24:backend/tests/unit/test_storage.py:generic-api-key:381
0cc4f253621a9601c5193f6ae1e7ae33f0e7fc9b:backend/tests/unit/test_storage.py:generic-api-key:381
35fda65d381acc5ab59bc592ee3013f75906c197:backend/tests/unit/test_storage.py:generic-api-key:381
08dce6cbb836b687002751fed4159bfc2da61f8b:backend/tests/unit/test_storage.py:generic-api-key:381
617bcbe89cff9a009d77e4f1f1864efed1820e63:backend/tests/unit/test_storage.py:generic-api-key:381
1cbd33544388e0fe6db752fa8886fab33cf9ce7c:backend/tests/unit/test_storage.py:generic-api-key:381

View File

@@ -7,6 +7,36 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
- Added comprehensive upload/download tests for size boundaries (1B to 1GB) (#38)
- Added concurrent upload/download tests (2, 5, 10 parallel operations) (#38)
- Added data integrity tests (binary, text, unicode, compressed content) (#38)
- Added chunk boundary tests for edge cases (#38)
- Added `@pytest.mark.large` and `@pytest.mark.concurrent` test markers (#38)
- Added `generate_content()` and `generate_content_with_hash()` test helpers (#38)
- Added `sized_content` fixture for generating test content of specific sizes (#38)
- Added upload API tests: upload without tag, artifact creation verification, S3 object creation (#38)
- Added download API tests: tag: prefix resolution, 404 for nonexistent project/package/artifact (#38)
- Added download header tests: Content-Type, Content-Length, Content-Disposition, ETag, X-Checksum-SHA256 (#38)
- Added error handling tests: timeout behavior, checksum validation, resource cleanup, graceful error responses (#38)
- Added version API tests: version creation, auto-detection, listing, download by version prefix (#38)
- Added integrity verification tests: round-trip hash verification, client-side verification workflow, size variants (1KB-10MB) (#40)
- Added consistency check endpoint tests with response format validation (#40)
- Added corruption detection tests: bit flip, truncation, appended content, size mismatch, missing S3 objects (#40)
- Added Digest header tests (RFC 3230) and verification mode tests (#40)
- Added integrity verification documentation (`docs/integrity-verification.md`) (#40)
- Added conditional request support for downloads (If-None-Match, If-Modified-Since) returning 304 Not Modified (#42)
- Added caching headers to downloads: Cache-Control (immutable), Last-Modified (#42)
- Added 416 Range Not Satisfiable response for invalid range requests (#42)
- Added download completion logging with bytes transferred and throughput (#42)
- Added client disconnect handling during streaming downloads (#42)
- Added streaming download tests: range requests, conditional requests, caching headers, download resume (#42)
- Added upload duration and throughput metrics (`duration_ms`, `throughput_mbps`) to upload response (#43)
- Added upload progress logging for large files (hash computation and multipart upload phases) (#43)
- Added client disconnect handling during uploads with proper cleanup (#43)
- Added upload progress tracking endpoint `GET /upload/{upload_id}/progress` for resumable uploads (#43)
- Added large file upload tests (10MB, 100MB, 1GB) with multipart upload verification (#43)
- Added upload cancellation and timeout handling tests (#43)
- Added comprehensive API documentation for upload endpoints with curl, Python, and JavaScript examples (#43)
- Added `package_versions` table for immutable version tracking separate from mutable tags (#56)
- Versions are set at upload time via explicit `version` parameter or auto-detected from filename/metadata
- Version detection priority: explicit parameter > package metadata > filename pattern
@@ -31,6 +61,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added internal proxy configuration for npm, pip, helm, and apt (#51)
### Changed
- CI integration tests now run full pytest suite (~350 tests) against deployed environment instead of 3 smoke tests
- CI production deployment uses lightweight smoke tests only (no test data creation in prod)
- CI pipeline improvements: shared pip cache, `interruptible` flag on test jobs, retry on integration tests
- Simplified deploy verification to health check only (full checks done by integration tests)
- Extracted environment URLs to global variables for maintainability
- Made `cleanup_feature` job standalone (no longer inherits deploy template dependencies)
- Renamed `integration_test_prod` to `smoke_test_prod` for clarity
- Updated download ref resolution to check versions before tags (version → tag → artifact ID) (#56)
- Deploy jobs now require all security scans to pass before deployment (added test_image, app_deps_scan, cve_scan, cve_sbom_analysis, app_sbom_analysis to dependencies) (#63)
- Increased deploy job timeout from 5m to 10m (#63)
@@ -44,6 +81,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Improved pod naming: Orchard pods now named `orchard-{env}-server-*` for clarity (#51)
### Fixed
- Fixed CI integration test rate limiting: added configurable `ORCHARD_LOGIN_RATE_LIMIT` env var, relaxed to 1000/minute for dev/stage
- Fixed duplicate `TestSecurityEdgeCases` class definition in test_auth_api.py
- Fixed integration tests auth: session-scoped client, configurable credentials via env vars, fail-fast on auth errors
- Fixed 413 Request Entity Too Large errors on uploads by adding `proxy-body-size: "0"` nginx annotation to Orchard ingress
- Fixed CI tests that require direct S3 access: added `@pytest.mark.requires_direct_s3` marker and excluded from CI
- Fixed ref_count triggers not being created: added auto-migration for tags ref_count trigger functions
- Fixed Content-Disposition header encoding for non-ASCII filenames using RFC 5987 (#38)
- Fixed deploy jobs running even when tests or security scans fail (changed rules from `when: always` to `when: on_success`) (#63)
- Fixed python_tests job not using internal PyPI proxy (#63)
- Fixed `cleanup_feature` job failing when branch is deleted (`GIT_STRATEGY: none`) (#51)
@@ -53,6 +97,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fixed deploy jobs running when secrets scan fails (added `secrets` to deploy dependencies)
- Fixed dev environment memory requests to equal limits per cluster Kyverno policy
- Fixed init containers missing resource limits (Kyverno policy compliance)
- Fixed Python SyntaxWarning for invalid escape sequence in database migration regex pattern
### Removed
- Removed unused `store_streaming()` method from storage.py (#51)

View File

@@ -170,6 +170,62 @@ def _run_migrations():
END IF;
END $$;
""",
# Create ref_count trigger functions for tags (ensures triggers exist even if initial migration wasn't run)
"""
CREATE OR REPLACE FUNCTION increment_artifact_ref_count()
RETURNS TRIGGER AS $$
BEGIN
UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
""",
"""
CREATE OR REPLACE FUNCTION decrement_artifact_ref_count()
RETURNS TRIGGER AS $$
BEGIN
UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
RETURN OLD;
END;
$$ LANGUAGE plpgsql;
""",
"""
CREATE OR REPLACE FUNCTION update_artifact_ref_count()
RETURNS TRIGGER AS $$
BEGIN
IF OLD.artifact_id != NEW.artifact_id THEN
UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
""",
# Create triggers for tags ref_count management
"""
DO $$
BEGIN
-- Drop and recreate triggers to ensure they're current
DROP TRIGGER IF EXISTS tags_ref_count_insert_trigger ON tags;
CREATE TRIGGER tags_ref_count_insert_trigger
AFTER INSERT ON tags
FOR EACH ROW
EXECUTE FUNCTION increment_artifact_ref_count();
DROP TRIGGER IF EXISTS tags_ref_count_delete_trigger ON tags;
CREATE TRIGGER tags_ref_count_delete_trigger
AFTER DELETE ON tags
FOR EACH ROW
EXECUTE FUNCTION decrement_artifact_ref_count();
DROP TRIGGER IF EXISTS tags_ref_count_update_trigger ON tags;
CREATE TRIGGER tags_ref_count_update_trigger
AFTER UPDATE ON tags
FOR EACH ROW
WHEN (OLD.artifact_id IS DISTINCT FROM NEW.artifact_id)
EXECUTE FUNCTION update_artifact_ref_count();
END $$;
""",
# Create ref_count trigger functions for package_versions
"""
CREATE OR REPLACE FUNCTION increment_version_ref_count()
@@ -210,7 +266,7 @@ def _run_migrations():
END $$;
""",
# Migrate existing semver tags to package_versions
"""
r"""
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'package_versions') THEN

View File

@@ -82,6 +82,7 @@ from .schemas import (
ResumableUploadCompleteRequest,
ResumableUploadCompleteResponse,
ResumableUploadStatusResponse,
UploadProgressResponse,
GlobalSearchResponse,
SearchResultProject,
SearchResultPackage,
@@ -143,6 +144,31 @@ def sanitize_filename(filename: str) -> str:
return re.sub(r'[\r\n"]', "", filename)
def build_content_disposition(filename: str) -> str:
"""Build a Content-Disposition header value with proper encoding.
For ASCII filenames, uses simple: attachment; filename="name"
For non-ASCII filenames, uses RFC 5987 encoding with UTF-8.
"""
from urllib.parse import quote
sanitized = sanitize_filename(filename)
# Check if filename is pure ASCII
try:
sanitized.encode('ascii')
# Pure ASCII - simple format
return f'attachment; filename="{sanitized}"'
except UnicodeEncodeError:
# Non-ASCII - use RFC 5987 encoding
# Provide both filename (ASCII fallback) and filename* (UTF-8 encoded)
ascii_fallback = sanitized.encode('ascii', errors='replace').decode('ascii')
# RFC 5987: filename*=charset'language'encoded_value
# We use UTF-8 encoding and percent-encode non-ASCII chars
encoded = quote(sanitized, safe='')
return f'attachment; filename="{ascii_fallback}"; filename*=UTF-8\'\'{encoded}'
def get_user_id_from_request(
request: Request,
db: Session,
@@ -2258,10 +2284,56 @@ def upload_artifact(
"""
Upload an artifact to a package.
Headers:
- X-Checksum-SHA256: Optional client-provided SHA256 for verification
- User-Agent: Captured for audit purposes
- Authorization: Bearer <api-key> for authentication
**Size Limits:**
- Minimum: 1 byte (empty files rejected)
- Maximum: 10GB (configurable via ORCHARD_MAX_FILE_SIZE)
- Files > 100MB automatically use S3 multipart upload
**Headers:**
- `X-Checksum-SHA256`: Optional SHA256 hash for server-side verification
- `Content-Length`: File size (required for early rejection of oversized files)
- `Authorization`: Bearer <api-key> for authentication
**Deduplication:**
Content-addressable storage automatically deduplicates identical files.
If the same content is uploaded multiple times, only one copy is stored.
**Response Metrics:**
- `duration_ms`: Upload duration in milliseconds
- `throughput_mbps`: Upload throughput in MB/s
- `deduplicated`: True if content already existed
**Example (curl):**
```bash
curl -X POST "http://localhost:8080/api/v1/project/myproject/mypackage/upload" \\
-H "Authorization: Bearer <api-key>" \\
-F "file=@myfile.tar.gz" \\
-F "tag=v1.0.0"
```
**Example (Python requests):**
```python
import requests
with open('myfile.tar.gz', 'rb') as f:
response = requests.post(
'http://localhost:8080/api/v1/project/myproject/mypackage/upload',
files={'file': f},
data={'tag': 'v1.0.0'},
headers={'Authorization': 'Bearer <api-key>'}
)
```
**Example (JavaScript fetch):**
```javascript
const formData = new FormData();
formData.append('file', fileInput.files[0]);
formData.append('tag', 'v1.0.0');
const response = await fetch('/api/v1/project/myproject/mypackage/upload', {
method: 'POST',
headers: { 'Authorization': 'Bearer <api-key>' },
body: formData
});
```
"""
start_time = time.time()
settings = get_settings()
@@ -2363,6 +2435,30 @@ def upload_artifact(
except StorageError as e:
logger.error(f"Storage error during upload: {e}")
raise HTTPException(status_code=500, detail="Internal storage error")
except (ConnectionResetError, BrokenPipeError) as e:
# Client disconnected during upload
logger.warning(
f"Client disconnected during upload: project={project_name} "
f"package={package_name} filename={file.filename} error={e}"
)
raise HTTPException(
status_code=499, # Client Closed Request (nginx convention)
detail="Client disconnected during upload",
)
except Exception as e:
# Catch-all for unexpected errors including client disconnects
error_str = str(e).lower()
if "connection" in error_str or "broken pipe" in error_str or "reset" in error_str:
logger.warning(
f"Client connection error during upload: project={project_name} "
f"package={package_name} filename={file.filename} error={e}"
)
raise HTTPException(
status_code=499,
detail="Client connection error during upload",
)
logger.error(f"Unexpected error during upload: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Internal server error during upload")
# Verify client-provided checksum if present
checksum_verified = True
@@ -2555,6 +2651,12 @@ def upload_artifact(
detail="Failed to save upload record. Please retry.",
)
# Calculate throughput
throughput_mbps = None
if duration_ms > 0:
duration_seconds = duration_ms / 1000.0
throughput_mbps = round((storage_result.size / (1024 * 1024)) / duration_seconds, 2)
return UploadResponse(
artifact_id=storage_result.sha256,
sha256=storage_result.sha256,
@@ -2574,6 +2676,8 @@ def upload_artifact(
content_type=artifact.content_type,
original_name=artifact.original_name,
created_at=artifact.created_at,
duration_ms=duration_ms,
throughput_mbps=throughput_mbps,
)
@@ -2591,8 +2695,46 @@ def init_resumable_upload(
storage: S3Storage = Depends(get_storage),
):
"""
Initialize a resumable upload session.
Client must provide the SHA256 hash of the file in advance.
Initialize a resumable upload session for large files.
Resumable uploads allow uploading large files in chunks, with the ability
to resume after interruption. The client must compute the SHA256 hash
of the entire file before starting.
**Workflow:**
1. POST /upload/init - Initialize upload session (this endpoint)
2. PUT /upload/{upload_id}/part/{part_number} - Upload each part
3. GET /upload/{upload_id}/progress - Check upload progress (optional)
4. POST /upload/{upload_id}/complete - Finalize upload
5. DELETE /upload/{upload_id} - Abort upload (if needed)
**Chunk Size:**
Use the `chunk_size` returned in the response (10MB default).
Each part except the last must be exactly this size.
**Deduplication:**
If the expected_hash already exists in storage, the response will include
`already_exists: true` and no upload session is created.
**Example (curl):**
```bash
# Step 1: Initialize
curl -X POST "http://localhost:8080/api/v1/project/myproject/mypackage/upload/init" \\
-H "Authorization: Bearer <api-key>" \\
-H "Content-Type: application/json" \\
-d '{"expected_hash": "<sha256>", "filename": "large.tar.gz", "size": 104857600}'
# Step 2: Upload parts
curl -X PUT "http://localhost:8080/api/v1/project/myproject/mypackage/upload/<upload_id>/part/1" \\
-H "Authorization: Bearer <api-key>" \\
--data-binary @part1.bin
# Step 3: Complete
curl -X POST "http://localhost:8080/api/v1/project/myproject/mypackage/upload/<upload_id>/complete" \\
-H "Authorization: Bearer <api-key>" \\
-H "Content-Type: application/json" \\
-d '{"tag": "v1.0.0"}'
```
"""
user_id = get_user_id(request)
@@ -2686,6 +2828,10 @@ def init_resumable_upload(
# Initialize resumable upload
session = storage.initiate_resumable_upload(init_request.expected_hash)
# Set expected size for progress tracking
if session["upload_id"] and init_request.size:
storage.set_upload_expected_size(session["upload_id"], init_request.size)
return ResumableUploadInitResponse(
upload_id=session["upload_id"],
already_exists=False,
@@ -2752,6 +2898,64 @@ def upload_part(
raise HTTPException(status_code=404, detail=str(e))
@router.get(
"/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/progress",
response_model=UploadProgressResponse,
)
def get_upload_progress(
project_name: str,
package_name: str,
upload_id: str,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
):
"""
Get progress information for an in-flight resumable upload.
Returns progress metrics including bytes uploaded, percent complete,
elapsed time, and throughput.
"""
# Validate project and package exist
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = (
db.query(Package)
.filter(Package.project_id == project.id, Package.name == package_name)
.first()
)
if not package:
raise HTTPException(status_code=404, detail="Package not found")
progress = storage.get_upload_progress(upload_id)
if not progress:
# Return not_found status instead of 404 to allow polling
return UploadProgressResponse(
upload_id=upload_id,
status="not_found",
bytes_uploaded=0,
)
from datetime import datetime, timezone
started_at_dt = None
if progress.get("started_at"):
started_at_dt = datetime.fromtimestamp(progress["started_at"], tz=timezone.utc)
return UploadProgressResponse(
upload_id=upload_id,
status=progress.get("status", "in_progress"),
bytes_uploaded=progress.get("bytes_uploaded", 0),
bytes_total=progress.get("bytes_total"),
percent_complete=progress.get("percent_complete"),
parts_uploaded=progress.get("parts_uploaded", 0),
parts_total=progress.get("parts_total"),
started_at=started_at_dt,
elapsed_seconds=progress.get("elapsed_seconds"),
throughput_mbps=progress.get("throughput_mbps"),
)
@router.post(
"/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/complete"
)
@@ -2947,6 +3151,8 @@ def download_artifact(
storage: S3Storage = Depends(get_storage),
current_user: Optional[User] = Depends(get_current_user_optional),
range: Optional[str] = Header(None),
if_none_match: Optional[str] = Header(None, alias="If-None-Match"),
if_modified_since: Optional[str] = Header(None, alias="If-Modified-Since"),
mode: Optional[Literal["proxy", "redirect", "presigned"]] = Query(
default=None,
description="Download mode: proxy (stream through backend), redirect (302 to presigned URL), presigned (return JSON with URL)",
@@ -2963,6 +3169,15 @@ def download_artifact(
"""
Download an artifact by reference (tag name, artifact:hash, tag:name).
Supports conditional requests:
- If-None-Match: Returns 304 Not Modified if ETag matches
- If-Modified-Since: Returns 304 Not Modified if not modified since date
Supports range requests for partial downloads and resume:
- Range: bytes=0-1023 (first 1KB)
- Range: bytes=-1024 (last 1KB)
- Returns 206 Partial Content with Content-Range header
Verification modes:
- verify=false (default): No verification, maximum performance
- verify=true&verify_mode=stream: Compute hash while streaming, verify after completion.
@@ -2975,6 +3190,9 @@ def download_artifact(
- X-Content-Length: File size in bytes
- ETag: Artifact ID (SHA256)
- Digest: RFC 3230 format sha-256 hash
- Last-Modified: Artifact creation timestamp
- Cache-Control: Immutable caching for content-addressable storage
- Accept-Ranges: bytes (advertises range request support)
When verify=true:
- X-Verified: 'true' if verified, 'false' if verification failed
@@ -2999,6 +3217,52 @@ def download_artifact(
filename = sanitize_filename(artifact.original_name or f"{artifact.id}")
# Format Last-Modified header (RFC 7231 format)
last_modified = None
last_modified_str = None
if artifact.created_at:
last_modified = artifact.created_at
if last_modified.tzinfo is None:
last_modified = last_modified.replace(tzinfo=timezone.utc)
last_modified_str = last_modified.strftime("%a, %d %b %Y %H:%M:%S GMT")
# Handle conditional requests (If-None-Match, If-Modified-Since)
# Return 304 Not Modified if content hasn't changed
artifact_etag = f'"{artifact.id}"'
if if_none_match:
# Strip quotes and compare with artifact ETag
client_etag = if_none_match.strip().strip('"')
if client_etag == artifact.id or if_none_match == artifact_etag:
return Response(
status_code=304,
headers={
"ETag": artifact_etag,
"Cache-Control": "public, max-age=31536000, immutable",
**({"Last-Modified": last_modified_str} if last_modified_str else {}),
},
)
if if_modified_since and last_modified:
try:
# Parse If-Modified-Since header
from email.utils import parsedate_to_datetime
client_date = parsedate_to_datetime(if_modified_since)
if client_date.tzinfo is None:
client_date = client_date.replace(tzinfo=timezone.utc)
# If artifact hasn't been modified since client's date, return 304
if last_modified <= client_date:
return Response(
status_code=304,
headers={
"ETag": artifact_etag,
"Cache-Control": "public, max-age=31536000, immutable",
**({"Last-Modified": last_modified_str} if last_modified_str else {}),
},
)
except (ValueError, TypeError):
pass # Invalid date format, ignore and continue with download
# Audit log download
user_id = get_user_id(request)
_log_audit(
@@ -3016,22 +3280,28 @@ def download_artifact(
)
db.commit()
# Build common checksum headers (always included)
checksum_headers = {
# Build common headers (always included)
common_headers = {
"X-Checksum-SHA256": artifact.id,
"X-Content-Length": str(artifact.size),
"ETag": f'"{artifact.id}"',
"ETag": artifact_etag,
# Cache-Control: content-addressable storage is immutable
"Cache-Control": "public, max-age=31536000, immutable",
}
# Add Last-Modified header
if last_modified_str:
common_headers["Last-Modified"] = last_modified_str
# Add RFC 3230 Digest header
try:
digest_base64 = sha256_to_base64(artifact.id)
checksum_headers["Digest"] = f"sha-256={digest_base64}"
common_headers["Digest"] = f"sha-256={digest_base64}"
except Exception:
pass # Skip if conversion fails
# Add MD5 checksum if available
if artifact.checksum_md5:
checksum_headers["X-Checksum-MD5"] = artifact.checksum_md5
common_headers["X-Checksum-MD5"] = artifact.checksum_md5
# Determine download mode (query param overrides server default)
download_mode = mode or settings.download_mode
@@ -3071,15 +3341,29 @@ def download_artifact(
# Proxy mode (default fallback) - stream through backend
# Handle range requests (verification not supported for partial downloads)
if range:
stream, content_length, content_range = storage.get_stream(
artifact.s3_key, range
)
try:
stream, content_length, content_range = storage.get_stream(
artifact.s3_key, range
)
except Exception as e:
# S3 returns InvalidRange error for unsatisfiable ranges
error_str = str(e).lower()
if "invalidrange" in error_str or "range" in error_str:
raise HTTPException(
status_code=416,
detail="Range Not Satisfiable",
headers={
"Content-Range": f"bytes */{artifact.size}",
"Accept-Ranges": "bytes",
},
)
raise
headers = {
"Content-Disposition": f'attachment; filename="{filename}"',
"Content-Disposition": build_content_disposition(filename),
"Accept-Ranges": "bytes",
"Content-Length": str(content_length),
**checksum_headers,
**common_headers,
}
if content_range:
headers["Content-Range"] = content_range
@@ -3094,9 +3378,9 @@ def download_artifact(
# Full download with optional verification
base_headers = {
"Content-Disposition": f'attachment; filename="{filename}"',
"Content-Disposition": build_content_disposition(filename),
"Accept-Ranges": "bytes",
**checksum_headers,
**common_headers,
}
# Pre-verification mode: verify before streaming
@@ -3164,11 +3448,42 @@ def download_artifact(
},
)
# No verification - direct streaming
# No verification - direct streaming with completion logging
stream, content_length, _ = storage.get_stream(artifact.s3_key)
def logged_stream():
"""Generator that yields chunks and logs completion/disconnection."""
import time
start_time = time.time()
bytes_sent = 0
try:
for chunk in stream:
bytes_sent += len(chunk)
yield chunk
# Download completed successfully
duration = time.time() - start_time
throughput_mbps = (bytes_sent / (1024 * 1024)) / duration if duration > 0 else 0
logger.info(
f"Download completed: artifact={artifact.id[:16]}... "
f"bytes={bytes_sent} duration={duration:.2f}s throughput={throughput_mbps:.2f}MB/s"
)
except GeneratorExit:
# Client disconnected before download completed
duration = time.time() - start_time
logger.warning(
f"Download interrupted: artifact={artifact.id[:16]}... "
f"bytes_sent={bytes_sent}/{content_length} duration={duration:.2f}s"
)
except Exception as e:
duration = time.time() - start_time
logger.error(
f"Download error: artifact={artifact.id[:16]}... "
f"bytes_sent={bytes_sent} duration={duration:.2f}s error={e}"
)
raise
return StreamingResponse(
stream,
logged_stream(),
media_type=artifact.content_type or "application/octet-stream",
headers={
**base_headers,
@@ -3276,7 +3591,7 @@ def head_artifact(
# Build headers with checksum information
headers = {
"Content-Disposition": f'attachment; filename="{filename}"',
"Content-Disposition": build_content_disposition(filename),
"Accept-Ranges": "bytes",
"Content-Length": str(artifact.size),
"X-Artifact-Id": artifact.id,

View File

@@ -412,6 +412,9 @@ class UploadResponse(BaseModel):
content_type: Optional[str] = None
original_name: Optional[str] = None
created_at: Optional[datetime] = None
# Upload metrics (Issue #43)
duration_ms: Optional[int] = None # Upload duration in milliseconds
throughput_mbps: Optional[float] = None # Upload throughput in MB/s
# Resumable upload schemas
@@ -478,6 +481,21 @@ class ResumableUploadStatusResponse(BaseModel):
total_uploaded_bytes: int
class UploadProgressResponse(BaseModel):
"""Progress information for an in-flight upload"""
upload_id: str
status: str # 'in_progress', 'completed', 'failed', 'not_found'
bytes_uploaded: int = 0
bytes_total: Optional[int] = None
percent_complete: Optional[float] = None
parts_uploaded: int = 0
parts_total: Optional[int] = None
started_at: Optional[datetime] = None
elapsed_seconds: Optional[float] = None
throughput_mbps: Optional[float] = None
# Consumer schemas
class ConsumerResponse(BaseModel):
id: UUID

View File

@@ -378,10 +378,16 @@ class S3Storage:
"""
# First pass: compute all hashes by streaming through file
try:
import time
sha256_hasher = hashlib.sha256()
md5_hasher = hashlib.md5()
sha1_hasher = hashlib.sha1()
size = 0
hash_start_time = time.time()
last_log_time = hash_start_time
log_interval_seconds = 5 # Log progress every 5 seconds
logger.info(f"Computing hashes for large file: expected_size={content_length}")
# Read file in chunks to compute hashes
while True:
@@ -393,6 +399,18 @@ class S3Storage:
sha1_hasher.update(chunk)
size += len(chunk)
# Log hash computation progress periodically
current_time = time.time()
if current_time - last_log_time >= log_interval_seconds:
elapsed = current_time - hash_start_time
percent = (size / content_length) * 100 if content_length > 0 else 0
throughput = (size / (1024 * 1024)) / elapsed if elapsed > 0 else 0
logger.info(
f"Hash computation progress: bytes={size}/{content_length} ({percent:.1f}%) "
f"throughput={throughput:.2f}MB/s"
)
last_log_time = current_time
# Enforce file size limit during streaming (protection against spoofing)
if size > settings.max_file_size:
raise FileSizeExceededError(
@@ -405,6 +423,14 @@ class S3Storage:
sha256_hash = sha256_hasher.hexdigest()
md5_hash = md5_hasher.hexdigest()
sha1_hash = sha1_hasher.hexdigest()
# Log hash computation completion
hash_elapsed = time.time() - hash_start_time
hash_throughput = (size / (1024 * 1024)) / hash_elapsed if hash_elapsed > 0 else 0
logger.info(
f"Hash computation completed: hash={sha256_hash[:16]}... "
f"size={size} duration={hash_elapsed:.2f}s throughput={hash_throughput:.2f}MB/s"
)
except (HashComputationError, FileSizeExceededError):
raise
except Exception as e:
@@ -458,8 +484,19 @@ class S3Storage:
upload_id = mpu["UploadId"]
try:
import time
parts = []
part_number = 1
bytes_uploaded = 0
upload_start_time = time.time()
last_log_time = upload_start_time
log_interval_seconds = 5 # Log progress every 5 seconds
total_parts = (content_length + MULTIPART_CHUNK_SIZE - 1) // MULTIPART_CHUNK_SIZE
logger.info(
f"Starting multipart upload: hash={sha256_hash[:16]}... "
f"size={content_length} parts={total_parts}"
)
while True:
chunk = file.read(MULTIPART_CHUNK_SIZE)
@@ -479,8 +516,32 @@ class S3Storage:
"ETag": response["ETag"],
}
)
bytes_uploaded += len(chunk)
# Log progress periodically
current_time = time.time()
if current_time - last_log_time >= log_interval_seconds:
elapsed = current_time - upload_start_time
percent = (bytes_uploaded / content_length) * 100
throughput = (bytes_uploaded / (1024 * 1024)) / elapsed if elapsed > 0 else 0
logger.info(
f"Upload progress: hash={sha256_hash[:16]}... "
f"part={part_number}/{total_parts} "
f"bytes={bytes_uploaded}/{content_length} ({percent:.1f}%) "
f"throughput={throughput:.2f}MB/s"
)
last_log_time = current_time
part_number += 1
# Log completion
total_elapsed = time.time() - upload_start_time
final_throughput = (content_length / (1024 * 1024)) / total_elapsed if total_elapsed > 0 else 0
logger.info(
f"Multipart upload completed: hash={sha256_hash[:16]}... "
f"size={content_length} duration={total_elapsed:.2f}s throughput={final_throughput:.2f}MB/s"
)
# Complete multipart upload
complete_response = self.client.complete_multipart_upload(
Bucket=self.bucket,
@@ -502,12 +563,28 @@ class S3Storage:
except Exception as e:
# Abort multipart upload on failure
logger.error(f"Multipart upload failed: {e}")
self.client.abort_multipart_upload(
Bucket=self.bucket,
Key=s3_key,
UploadId=upload_id,
error_str = str(e).lower()
is_client_disconnect = (
isinstance(e, (ConnectionResetError, BrokenPipeError)) or
"connection" in error_str or "broken pipe" in error_str or "reset" in error_str
)
if is_client_disconnect:
logger.warning(
f"Multipart upload aborted (client disconnect): hash={sha256_hash[:16]}... "
f"parts_uploaded={len(parts)} bytes_uploaded={bytes_uploaded}"
)
else:
logger.error(f"Multipart upload failed: hash={sha256_hash[:16]}... error={e}")
try:
self.client.abort_multipart_upload(
Bucket=self.bucket,
Key=s3_key,
UploadId=upload_id,
)
logger.info(f"Multipart upload aborted and cleaned up: upload_id={upload_id[:16]}...")
except Exception as abort_error:
logger.error(f"Failed to abort multipart upload: {abort_error}")
raise
def initiate_resumable_upload(self, expected_hash: str) -> Dict[str, Any]:
@@ -529,12 +606,17 @@ class S3Storage:
mpu = self.client.create_multipart_upload(Bucket=self.bucket, Key=s3_key)
upload_id = mpu["UploadId"]
import time
session = {
"upload_id": upload_id,
"s3_key": s3_key,
"already_exists": False,
"parts": [],
"expected_hash": expected_hash,
"started_at": time.time(),
"bytes_uploaded": 0,
"expected_size": None, # Set when init provides size
"status": "in_progress",
}
self._active_uploads[upload_id] = session
return session
@@ -561,10 +643,57 @@ class S3Storage:
part_info = {
"PartNumber": part_number,
"ETag": response["ETag"],
"size": len(data),
}
session["parts"].append(part_info)
session["bytes_uploaded"] = session.get("bytes_uploaded", 0) + len(data)
return part_info
def get_upload_progress(self, upload_id: str) -> Optional[Dict[str, Any]]:
"""
Get progress information for a resumable upload.
Returns None if upload not found.
"""
import time
session = self._active_uploads.get(upload_id)
if not session:
return None
bytes_uploaded = session.get("bytes_uploaded", 0)
expected_size = session.get("expected_size")
started_at = session.get("started_at")
progress = {
"upload_id": upload_id,
"status": session.get("status", "in_progress"),
"bytes_uploaded": bytes_uploaded,
"bytes_total": expected_size,
"parts_uploaded": len(session.get("parts", [])),
"parts_total": None,
"started_at": started_at,
"elapsed_seconds": None,
"percent_complete": None,
"throughput_mbps": None,
}
if expected_size and expected_size > 0:
progress["percent_complete"] = round((bytes_uploaded / expected_size) * 100, 2)
progress["parts_total"] = (expected_size + MULTIPART_CHUNK_SIZE - 1) // MULTIPART_CHUNK_SIZE
if started_at:
elapsed = time.time() - started_at
progress["elapsed_seconds"] = round(elapsed, 2)
if elapsed > 0 and bytes_uploaded > 0:
progress["throughput_mbps"] = round((bytes_uploaded / (1024 * 1024)) / elapsed, 2)
return progress
def set_upload_expected_size(self, upload_id: str, size: int):
"""Set the expected size for an upload (for progress tracking)."""
session = self._active_uploads.get(upload_id)
if session:
session["expected_size"] = size
def complete_resumable_upload(self, upload_id: str) -> Tuple[str, str]:
"""
Complete a resumable upload.

View File

@@ -12,6 +12,8 @@ markers =
unit: Unit tests (no external dependencies)
integration: Integration tests (require database/storage)
slow: Slow tests (skip with -m "not slow")
large: Large file tests (100MB+, skip with -m "not large")
concurrent: Concurrent operation tests
# Coverage configuration
[coverage:run]

View File

@@ -9,6 +9,37 @@ This module provides:
import os
import pytest
# =============================================================================
# Pytest Markers
# =============================================================================
def pytest_configure(config):
"""Register custom pytest markers."""
config.addinivalue_line(
"markers",
"auth_intensive: marks tests that make many login requests (excluded from CI integration tests due to rate limiting)",
)
config.addinivalue_line(
"markers",
"integration: marks tests as integration tests",
)
config.addinivalue_line(
"markers",
"large: marks tests that handle large files (slow)",
)
config.addinivalue_line(
"markers",
"slow: marks tests as slow running",
)
config.addinivalue_line(
"markers",
"requires_direct_s3: marks tests that require direct S3/MinIO access (skipped in CI where S3 is not directly accessible)",
)
import io
from typing import Generator
from unittest.mock import MagicMock
@@ -32,6 +63,8 @@ from tests.factories import (
compute_md5,
compute_sha1,
upload_test_file,
generate_content,
generate_content_with_hash,
TEST_CONTENT_HELLO,
TEST_HASH_HELLO,
TEST_MD5_HELLO,
@@ -179,29 +212,64 @@ def test_app():
# =============================================================================
@pytest.fixture
@pytest.fixture(scope="session")
def integration_client():
"""
Create an authenticated test client for integration tests.
Uses the real database and MinIO from docker-compose.local.yml.
Authenticates as admin for write operations.
Uses the real database and MinIO from docker-compose.local.yml or deployed environment.
Authenticates as admin for write operations. Session-scoped to reuse login across tests.
Environment variables:
ORCHARD_TEST_URL: Base URL of the Orchard server (default: http://localhost:8080)
ORCHARD_TEST_USERNAME: Admin username for authentication (default: admin)
ORCHARD_TEST_PASSWORD: Admin password for authentication (default: changeme123)
"""
from httpx import Client
import httpx
# Connect to the running orchard-server container
# Connect to the running orchard-server container or deployed environment
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
username = os.environ.get("ORCHARD_TEST_USERNAME", "admin")
password = os.environ.get("ORCHARD_TEST_PASSWORD", "changeme123")
with Client(base_url=base_url, timeout=30.0) as client:
with httpx.Client(base_url=base_url, timeout=30.0) as client:
# Login as admin to enable write operations
login_response = client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
json={"username": username, "password": password},
)
# If login fails, tests will fail - that's expected if auth is broken
if login_response.status_code != 200:
# Try to continue without auth for backward compatibility
pass
pytest.fail(
f"Authentication failed against {base_url}: {login_response.status_code} - {login_response.text}. "
f"Set ORCHARD_TEST_USERNAME and ORCHARD_TEST_PASSWORD environment variables if using non-default credentials."
)
# Verify cookie was set
if not client.cookies:
pytest.fail(
f"Login succeeded but no session cookie was set. Response headers: {login_response.headers}"
)
yield client
@pytest.fixture
def auth_client():
"""
Create a function-scoped test client for authentication tests.
Unlike integration_client (session-scoped), this creates a fresh client
for each test. Use this for tests that manipulate authentication state
(login, logout, cookie clearing) to avoid polluting other tests.
Environment variables:
ORCHARD_TEST_URL: Base URL of the Orchard server (default: http://localhost:8080)
"""
import httpx
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with httpx.Client(base_url=base_url, timeout=30.0) as client:
yield client
@@ -271,3 +339,41 @@ def test_content():
content = f"test-content-{uuid.uuid4().hex}".encode()
sha256 = compute_sha256(content)
return (content, sha256)
@pytest.fixture
def sized_content():
"""
Factory fixture for generating content of specific sizes.
Usage:
def test_example(sized_content):
content, hash = sized_content(1024) # 1KB
content, hash = sized_content(1024 * 1024) # 1MB
"""
def _generate(size: int, seed: int = None):
return generate_content_with_hash(size, seed)
return _generate
# =============================================================================
# Size Constants for Tests
# =============================================================================
# Common file sizes for boundary testing
SIZE_1B = 1
SIZE_1KB = 1024
SIZE_10KB = 10 * 1024
SIZE_100KB = 100 * 1024
SIZE_1MB = 1024 * 1024
SIZE_5MB = 5 * 1024 * 1024
SIZE_10MB = 10 * 1024 * 1024
SIZE_50MB = 50 * 1024 * 1024
SIZE_100MB = 100 * 1024 * 1024
SIZE_250MB = 250 * 1024 * 1024
SIZE_500MB = 500 * 1024 * 1024
SIZE_1GB = 1024 * 1024 * 1024
# Chunk size boundaries (based on typical S3 multipart chunk sizes)
CHUNK_SIZE = 64 * 1024 # 64KB typical chunk
MULTIPART_THRESHOLD = 100 * 1024 * 1024 # 100MB multipart threshold

View File

@@ -130,6 +130,41 @@ def upload_test_file(
return response.json()
def generate_content(size: int, seed: Optional[int] = None) -> bytes:
"""
Generate deterministic or random content of a specified size.
Args:
size: Size of content in bytes
seed: Optional seed for reproducible content (None for random)
Returns:
Bytes of the specified size
"""
if size == 0:
return b""
if seed is not None:
import random
rng = random.Random(seed)
return bytes(rng.randint(0, 255) for _ in range(size))
return os.urandom(size)
def generate_content_with_hash(size: int, seed: Optional[int] = None) -> tuple[bytes, str]:
"""
Generate content of specified size and compute its SHA256 hash.
Args:
size: Size of content in bytes
seed: Optional seed for reproducible content
Returns:
Tuple of (content_bytes, sha256_hash)
"""
content = generate_content(size, seed)
return content, compute_sha256(content)
# =============================================================================
# Project/Package Factories
# =============================================================================

View File

@@ -1,16 +1,25 @@
"""Integration tests for authentication API endpoints."""
"""Integration tests for authentication API endpoints.
Note: These tests are marked as auth_intensive because they make many login
requests. Dev/stage deployments have relaxed rate limits (1000/minute) to
allow these tests to run. Production uses strict rate limits (5/minute).
"""
import pytest
from uuid import uuid4
# Mark all tests in this module as auth_intensive (informational, not excluded from CI)
pytestmark = pytest.mark.auth_intensive
class TestAuthLogin:
"""Tests for login endpoint."""
@pytest.mark.integration
def test_login_success(self, integration_client):
def test_login_success(self, auth_client):
"""Test successful login with default admin credentials."""
response = integration_client.post(
response = auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
@@ -21,9 +30,9 @@ class TestAuthLogin:
assert "orchard_session" in response.cookies
@pytest.mark.integration
def test_login_invalid_password(self, integration_client):
def test_login_invalid_password(self, auth_client):
"""Test login with wrong password."""
response = integration_client.post(
response = auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "wrongpassword"},
)
@@ -31,9 +40,9 @@ class TestAuthLogin:
assert "Invalid username or password" in response.json()["detail"]
@pytest.mark.integration
def test_login_nonexistent_user(self, integration_client):
def test_login_nonexistent_user(self, auth_client):
"""Test login with non-existent user."""
response = integration_client.post(
response = auth_client.post(
"/api/v1/auth/login",
json={"username": "nonexistent", "password": "password"},
)
@@ -44,24 +53,24 @@ class TestAuthLogout:
"""Tests for logout endpoint."""
@pytest.mark.integration
def test_logout_success(self, integration_client):
def test_logout_success(self, auth_client):
"""Test successful logout."""
# First login
login_response = integration_client.post(
login_response = auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
assert login_response.status_code == 200
# Then logout
logout_response = integration_client.post("/api/v1/auth/logout")
logout_response = auth_client.post("/api/v1/auth/logout")
assert logout_response.status_code == 200
assert "Logged out successfully" in logout_response.json()["message"]
@pytest.mark.integration
def test_logout_without_session(self, integration_client):
def test_logout_without_session(self, auth_client):
"""Test logout without being logged in."""
response = integration_client.post("/api/v1/auth/logout")
response = auth_client.post("/api/v1/auth/logout")
# Should succeed even without session
assert response.status_code == 200
@@ -70,15 +79,15 @@ class TestAuthMe:
"""Tests for get current user endpoint."""
@pytest.mark.integration
def test_get_me_authenticated(self, integration_client):
def test_get_me_authenticated(self, auth_client):
"""Test getting current user when authenticated."""
# Login first
integration_client.post(
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
response = integration_client.get("/api/v1/auth/me")
response = auth_client.get("/api/v1/auth/me")
assert response.status_code == 200
data = response.json()
assert data["username"] == "admin"
@@ -87,67 +96,88 @@ class TestAuthMe:
assert "created_at" in data
@pytest.mark.integration
def test_get_me_unauthenticated(self, integration_client):
def test_get_me_unauthenticated(self, auth_client):
"""Test getting current user without authentication."""
# Clear any existing cookies
integration_client.cookies.clear()
auth_client.cookies.clear()
response = integration_client.get("/api/v1/auth/me")
response = auth_client.get("/api/v1/auth/me")
assert response.status_code == 401
assert "Not authenticated" in response.json()["detail"]
class TestAuthChangePassword:
"""Tests for change password endpoint."""
"""Tests for change password endpoint.
Note: These tests use dedicated test users instead of admin to avoid
invalidating the integration_client session (which uses admin).
"""
@pytest.mark.integration
def test_change_password_success(self, integration_client):
def test_change_password_success(self, auth_client):
"""Test successful password change."""
# Login first
integration_client.post(
# Login as admin to create a test user
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"pwchange_{uuid4().hex[:8]}"
auth_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "oldpassword123"},
)
# Login as test user
auth_client.cookies.clear()
auth_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "oldpassword123"},
)
# Change password
response = integration_client.post(
response = auth_client.post(
"/api/v1/auth/change-password",
json={"current_password": "changeme123", "new_password": "newpassword123"},
json={"current_password": "oldpassword123", "new_password": "newpassword123"},
)
assert response.status_code == 200
# Verify old password no longer works
integration_client.cookies.clear()
response = integration_client.post(
auth_client.cookies.clear()
response = auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
json={"username": test_username, "password": "oldpassword123"},
)
assert response.status_code == 401
# Verify new password works
response = integration_client.post(
response = auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "newpassword123"},
json={"username": test_username, "password": "newpassword123"},
)
assert response.status_code == 200
# Reset password back to original for other tests
reset_response = integration_client.post(
"/api/v1/auth/change-password",
json={"current_password": "newpassword123", "new_password": "changeme123"},
)
assert reset_response.status_code == 200, "Failed to reset admin password back to default"
@pytest.mark.integration
def test_change_password_wrong_current(self, integration_client):
def test_change_password_wrong_current(self, auth_client):
"""Test password change with wrong current password."""
# Login first
integration_client.post(
# Login as admin to create a test user
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"pwwrong_{uuid4().hex[:8]}"
auth_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
response = integration_client.post(
# Login as test user
auth_client.cookies.clear()
auth_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "password123"},
)
response = auth_client.post(
"/api/v1/auth/change-password",
json={"current_password": "wrongpassword", "new_password": "newpassword"},
)
@@ -159,16 +189,16 @@ class TestAPIKeys:
"""Tests for API key management endpoints."""
@pytest.mark.integration
def test_create_and_list_api_key(self, integration_client):
def test_create_and_list_api_key(self, auth_client):
"""Test creating and listing API keys."""
# Login first
integration_client.post(
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
# Create API key
create_response = integration_client.post(
create_response = auth_client.post(
"/api/v1/auth/keys",
json={"name": "test-key", "description": "Test API key"},
)
@@ -182,23 +212,23 @@ class TestAPIKeys:
api_key = data["key"]
# List API keys
list_response = integration_client.get("/api/v1/auth/keys")
list_response = auth_client.get("/api/v1/auth/keys")
assert list_response.status_code == 200
keys = list_response.json()
assert any(k["id"] == key_id for k in keys)
# Clean up - delete the key
integration_client.delete(f"/api/v1/auth/keys/{key_id}")
auth_client.delete(f"/api/v1/auth/keys/{key_id}")
@pytest.mark.integration
def test_use_api_key_for_auth(self, integration_client):
def test_use_api_key_for_auth(self, auth_client):
"""Test using API key for authentication."""
# Login and create API key
integration_client.post(
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
create_response = integration_client.post(
create_response = auth_client.post(
"/api/v1/auth/keys",
json={"name": "auth-test-key"},
)
@@ -206,8 +236,8 @@ class TestAPIKeys:
key_id = create_response.json()["id"]
# Clear cookies and use API key
integration_client.cookies.clear()
response = integration_client.get(
auth_client.cookies.clear()
response = auth_client.get(
"/api/v1/auth/me",
headers={"Authorization": f"Bearer {api_key}"},
)
@@ -215,21 +245,21 @@ class TestAPIKeys:
assert response.json()["username"] == "admin"
# Clean up
integration_client.post(
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
integration_client.delete(f"/api/v1/auth/keys/{key_id}")
auth_client.delete(f"/api/v1/auth/keys/{key_id}")
@pytest.mark.integration
def test_delete_api_key(self, integration_client):
def test_delete_api_key(self, auth_client):
"""Test revoking an API key."""
# Login and create API key
integration_client.post(
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
create_response = integration_client.post(
create_response = auth_client.post(
"/api/v1/auth/keys",
json={"name": "delete-test-key"},
)
@@ -237,12 +267,12 @@ class TestAPIKeys:
api_key = create_response.json()["key"]
# Delete the key
delete_response = integration_client.delete(f"/api/v1/auth/keys/{key_id}")
delete_response = auth_client.delete(f"/api/v1/auth/keys/{key_id}")
assert delete_response.status_code == 200
# Verify key no longer works
integration_client.cookies.clear()
response = integration_client.get(
auth_client.cookies.clear()
response = auth_client.get(
"/api/v1/auth/me",
headers={"Authorization": f"Bearer {api_key}"},
)
@@ -253,32 +283,32 @@ class TestAdminUserManagement:
"""Tests for admin user management endpoints."""
@pytest.mark.integration
def test_list_users(self, integration_client):
def test_list_users(self, auth_client):
"""Test listing users as admin."""
# Login as admin
integration_client.post(
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
response = integration_client.get("/api/v1/admin/users")
response = auth_client.get("/api/v1/admin/users")
assert response.status_code == 200
users = response.json()
assert len(users) >= 1
assert any(u["username"] == "admin" for u in users)
@pytest.mark.integration
def test_create_user(self, integration_client):
def test_create_user(self, auth_client):
"""Test creating a new user as admin."""
# Login as admin
integration_client.post(
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
# Create new user
test_username = f"testuser_{uuid4().hex[:8]}"
response = integration_client.post(
response = auth_client.post(
"/api/v1/admin/users",
json={
"username": test_username,
@@ -293,31 +323,31 @@ class TestAdminUserManagement:
assert data["is_admin"] is False
# Verify new user can login
integration_client.cookies.clear()
login_response = integration_client.post(
auth_client.cookies.clear()
login_response = auth_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "testpassword"},
)
assert login_response.status_code == 200
@pytest.mark.integration
def test_update_user(self, integration_client):
def test_update_user(self, auth_client):
"""Test updating a user as admin."""
# Login as admin
integration_client.post(
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
# Create a test user
test_username = f"updateuser_{uuid4().hex[:8]}"
integration_client.post(
auth_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password"},
)
# Update the user
response = integration_client.put(
response = auth_client.put(
f"/api/v1/admin/users/{test_username}",
json={"email": "updated@example.com", "is_admin": True},
)
@@ -327,59 +357,59 @@ class TestAdminUserManagement:
assert data["is_admin"] is True
@pytest.mark.integration
def test_reset_user_password(self, integration_client):
def test_reset_user_password(self, auth_client):
"""Test resetting a user's password as admin."""
# Login as admin
integration_client.post(
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
# Create a test user
test_username = f"resetuser_{uuid4().hex[:8]}"
integration_client.post(
auth_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "oldpassword"},
)
# Reset password
response = integration_client.post(
response = auth_client.post(
f"/api/v1/admin/users/{test_username}/reset-password",
json={"new_password": "newpassword"},
)
assert response.status_code == 200
# Verify new password works
integration_client.cookies.clear()
login_response = integration_client.post(
auth_client.cookies.clear()
login_response = auth_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "newpassword"},
)
assert login_response.status_code == 200
@pytest.mark.integration
def test_non_admin_cannot_access_admin_endpoints(self, integration_client):
def test_non_admin_cannot_access_admin_endpoints(self, auth_client):
"""Test that non-admin users cannot access admin endpoints."""
# Login as admin and create non-admin user
integration_client.post(
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"nonadmin_{uuid4().hex[:8]}"
integration_client.post(
auth_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password", "is_admin": False},
)
# Login as non-admin
integration_client.cookies.clear()
integration_client.post(
auth_client.cookies.clear()
auth_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "password"},
)
# Try to access admin endpoints
response = integration_client.get("/api/v1/admin/users")
response = auth_client.get("/api/v1/admin/users")
assert response.status_code == 403
assert "Admin privileges required" in response.json()["detail"]
@@ -388,28 +418,28 @@ class TestSecurityEdgeCases:
"""Tests for security edge cases and validation."""
@pytest.mark.integration
def test_login_inactive_user(self, integration_client):
def test_login_inactive_user(self, auth_client):
"""Test that inactive users cannot login."""
# Login as admin and create a user
integration_client.post(
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"inactive_{uuid4().hex[:8]}"
integration_client.post(
auth_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
# Deactivate the user
integration_client.put(
auth_client.put(
f"/api/v1/admin/users/{test_username}",
json={"is_active": False},
)
# Try to login as inactive user
integration_client.cookies.clear()
response = integration_client.post(
auth_client.cookies.clear()
response = auth_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "password123"},
)
@@ -417,14 +447,14 @@ class TestSecurityEdgeCases:
assert "Invalid username or password" in response.json()["detail"]
@pytest.mark.integration
def test_password_too_short_on_create(self, integration_client):
def test_password_too_short_on_create(self, auth_client):
"""Test that short passwords are rejected when creating users."""
integration_client.post(
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
response = integration_client.post(
response = auth_client.post(
"/api/v1/admin/users",
json={"username": f"shortpw_{uuid4().hex[:8]}", "password": "short"},
)
@@ -432,36 +462,49 @@ class TestSecurityEdgeCases:
assert "at least 8 characters" in response.json()["detail"]
@pytest.mark.integration
def test_password_too_short_on_change(self, integration_client):
def test_password_too_short_on_change(self, auth_client):
"""Test that short passwords are rejected when changing password."""
integration_client.post(
# Create test user
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"shortchange_{uuid4().hex[:8]}"
auth_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
response = integration_client.post(
# Login as test user
auth_client.cookies.clear()
auth_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "password123"},
)
response = auth_client.post(
"/api/v1/auth/change-password",
json={"current_password": "changeme123", "new_password": "short"},
json={"current_password": "password123", "new_password": "short"},
)
assert response.status_code == 400
assert "at least 8 characters" in response.json()["detail"]
@pytest.mark.integration
def test_password_too_short_on_reset(self, integration_client):
def test_password_too_short_on_reset(self, auth_client):
"""Test that short passwords are rejected when resetting password."""
integration_client.post(
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
# Create a test user first
test_username = f"resetshort_{uuid4().hex[:8]}"
integration_client.post(
auth_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
response = integration_client.post(
response = auth_client.post(
f"/api/v1/admin/users/{test_username}/reset-password",
json={"new_password": "short"},
)
@@ -469,23 +512,23 @@ class TestSecurityEdgeCases:
assert "at least 8 characters" in response.json()["detail"]
@pytest.mark.integration
def test_duplicate_username_rejected(self, integration_client):
def test_duplicate_username_rejected(self, auth_client):
"""Test that duplicate usernames are rejected."""
integration_client.post(
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"duplicate_{uuid4().hex[:8]}"
# Create user first time
response1 = integration_client.post(
response1 = auth_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
assert response1.status_code == 200
# Try to create same username again
response2 = integration_client.post(
response2 = auth_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password456"},
)
@@ -493,14 +536,14 @@ class TestSecurityEdgeCases:
assert "already exists" in response2.json()["detail"]
@pytest.mark.integration
def test_cannot_delete_other_users_api_key(self, integration_client):
def test_cannot_delete_other_users_api_key(self, auth_client):
"""Test that users cannot delete API keys owned by other users."""
# Login as admin and create an API key
integration_client.post(
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
create_response = integration_client.post(
create_response = auth_client.post(
"/api/v1/auth/keys",
json={"name": "admin-key"},
)
@@ -508,253 +551,65 @@ class TestSecurityEdgeCases:
# Create a non-admin user
test_username = f"nonadmin_{uuid4().hex[:8]}"
integration_client.post(
auth_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
# Login as non-admin
integration_client.cookies.clear()
integration_client.post(
auth_client.cookies.clear()
auth_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "password123"},
)
# Try to delete admin's API key
response = integration_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
response = auth_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
assert response.status_code == 403
assert "Cannot delete another user's API key" in response.json()["detail"]
# Cleanup: login as admin and delete the key
integration_client.cookies.clear()
integration_client.post(
auth_client.cookies.clear()
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
integration_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
auth_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
@pytest.mark.integration
def test_sessions_invalidated_on_password_change(self, integration_client):
def test_sessions_invalidated_on_password_change(self, auth_client):
"""Test that all sessions are invalidated when password is changed."""
# Create a test user
integration_client.post(
auth_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"sessiontest_{uuid4().hex[:8]}"
integration_client.post(
auth_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
# Login as test user
integration_client.cookies.clear()
login_response = integration_client.post(
auth_client.cookies.clear()
login_response = auth_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "password123"},
)
assert login_response.status_code == 200
# Verify session works
me_response = integration_client.get("/api/v1/auth/me")
me_response = auth_client.get("/api/v1/auth/me")
assert me_response.status_code == 200
# Change password
integration_client.post(
auth_client.post(
"/api/v1/auth/change-password",
json={"current_password": "password123", "new_password": "newpassword123"},
)
# Old session should be invalidated - try to access /me
# (note: the change-password call itself may have cleared the session cookie)
me_response2 = integration_client.get("/api/v1/auth/me")
# This should fail because all sessions were invalidated
assert me_response2.status_code == 401
class TestSecurityEdgeCases:
"""Tests for security edge cases and validation."""
@pytest.mark.integration
def test_login_inactive_user(self, integration_client):
"""Test that inactive users cannot login."""
# Login as admin and create a user
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"inactive_{uuid4().hex[:8]}"
integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
# Deactivate the user
integration_client.put(
f"/api/v1/admin/users/{test_username}",
json={"is_active": False},
)
# Try to login as inactive user
integration_client.cookies.clear()
response = integration_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "password123"},
)
assert response.status_code == 401
assert "Invalid username or password" in response.json()["detail"]
@pytest.mark.integration
def test_password_too_short_on_create(self, integration_client):
"""Test that short passwords are rejected when creating users."""
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
response = integration_client.post(
"/api/v1/admin/users",
json={"username": f"shortpw_{uuid4().hex[:8]}", "password": "short"},
)
assert response.status_code == 400
assert "at least 8 characters" in response.json()["detail"]
@pytest.mark.integration
def test_password_too_short_on_change(self, integration_client):
"""Test that short passwords are rejected when changing password."""
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
response = integration_client.post(
"/api/v1/auth/change-password",
json={"current_password": "changeme123", "new_password": "short"},
)
assert response.status_code == 400
assert "at least 8 characters" in response.json()["detail"]
@pytest.mark.integration
def test_password_too_short_on_reset(self, integration_client):
"""Test that short passwords are rejected when resetting password."""
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
# Create a test user first
test_username = f"resetshort_{uuid4().hex[:8]}"
integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
response = integration_client.post(
f"/api/v1/admin/users/{test_username}/reset-password",
json={"new_password": "short"},
)
assert response.status_code == 400
assert "at least 8 characters" in response.json()["detail"]
@pytest.mark.integration
def test_duplicate_username_rejected(self, integration_client):
"""Test that duplicate usernames are rejected."""
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"duplicate_{uuid4().hex[:8]}"
# Create user first time
response1 = integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
assert response1.status_code == 200
# Try to create same username again
response2 = integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password456"},
)
assert response2.status_code == 409
assert "already exists" in response2.json()["detail"]
@pytest.mark.integration
def test_cannot_delete_other_users_api_key(self, integration_client):
"""Test that users cannot delete API keys owned by other users."""
# Login as admin and create an API key
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
create_response = integration_client.post(
"/api/v1/auth/keys",
json={"name": "admin-key"},
)
admin_key_id = create_response.json()["id"]
# Create a non-admin user
test_username = f"nonadmin_{uuid4().hex[:8]}"
integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
# Login as non-admin
integration_client.cookies.clear()
integration_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "password123"},
)
# Try to delete admin's API key
response = integration_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
assert response.status_code == 403
assert "Cannot delete another user's API key" in response.json()["detail"]
# Cleanup: login as admin and delete the key
integration_client.cookies.clear()
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
integration_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
@pytest.mark.integration
def test_sessions_invalidated_on_password_change(self, integration_client):
"""Test that all sessions are invalidated when password is changed."""
# Create a test user
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"sessiontest_{uuid4().hex[:8]}"
integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
# Login as test user
integration_client.cookies.clear()
login_response = integration_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "password123"},
)
assert login_response.status_code == 200
# Verify session works
me_response = integration_client.get("/api/v1/auth/me")
assert me_response.status_code == 200
# Change password
integration_client.post(
"/api/v1/auth/change-password",
json={"current_password": "password123", "new_password": "newpassword123"},
)
# Old session should be invalidated - try to access /me
# (note: the change-password call itself may have cleared the session cookie)
me_response2 = integration_client.get("/api/v1/auth/me")
me_response2 = auth_client.get("/api/v1/auth/me")
# This should fail because all sessions were invalidated
assert me_response2.status_code == 401

View File

@@ -0,0 +1,737 @@
"""
Integration tests for concurrent upload and download operations.
Tests cover:
- Concurrent uploads of different files
- Concurrent uploads of same file (deduplication race)
- Concurrent downloads of same artifact
- Concurrent downloads of different artifacts
- Mixed concurrent uploads and downloads
- Data corruption prevention under concurrency
"""
import pytest
import io
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from tests.factories import (
compute_sha256,
upload_test_file,
generate_content_with_hash,
)
def get_api_key(integration_client):
"""Create an API key for concurrent test workers."""
import uuid
response = integration_client.post(
"/api/v1/auth/keys",
json={"name": f"concurrent-test-{uuid.uuid4().hex[:8]}"},
)
if response.status_code == 200:
return response.json()["key"]
return None
class TestConcurrentUploads:
"""Tests for concurrent upload operations."""
@pytest.mark.integration
@pytest.mark.concurrent
def test_2_concurrent_uploads_different_files(self, integration_client, test_package):
"""Test 2 concurrent uploads of different files."""
project, package = test_package
api_key = get_api_key(integration_client)
assert api_key, "Failed to create API key"
files_data = [
generate_content_with_hash(1024, seed=i) for i in range(2)
]
results = []
errors = []
def upload_worker(idx, content, expected_hash):
try:
from httpx import Client
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with Client(base_url=base_url, timeout=60.0) as client:
files = {
"file": (f"file-{idx}.bin", io.BytesIO(content), "application/octet-stream")
}
response = client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"tag": f"concurrent-{idx}"},
headers={"Authorization": f"Bearer {api_key}"},
)
if response.status_code == 200:
result = response.json()
results.append((idx, result, expected_hash))
else:
errors.append(f"Worker {idx}: Status {response.status_code}: {response.text}")
except Exception as e:
errors.append(f"Worker {idx}: {str(e)}")
with ThreadPoolExecutor(max_workers=2) as executor:
futures = [
executor.submit(upload_worker, i, content, hash)
for i, (content, hash) in enumerate(files_data)
]
for future in as_completed(futures):
pass
assert len(errors) == 0, f"Errors: {errors}"
assert len(results) == 2
# Verify each upload returned correct artifact_id
for idx, result, expected_hash in results:
assert result["artifact_id"] == expected_hash
@pytest.mark.integration
@pytest.mark.concurrent
def test_5_concurrent_uploads_different_files(self, integration_client, test_package):
"""Test 5 concurrent uploads of different files."""
project, package = test_package
api_key = get_api_key(integration_client)
assert api_key, "Failed to create API key"
num_files = 5
files_data = [
generate_content_with_hash(2048, seed=100 + i) for i in range(num_files)
]
results = []
errors = []
def upload_worker(idx, content, expected_hash):
try:
from httpx import Client
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with Client(base_url=base_url, timeout=60.0) as client:
files = {
"file": (f"file-{idx}.bin", io.BytesIO(content), "application/octet-stream")
}
response = client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"tag": f"concurrent5-{idx}"},
headers={"Authorization": f"Bearer {api_key}"},
)
if response.status_code == 200:
result = response.json()
results.append((idx, result, expected_hash))
else:
errors.append(f"Worker {idx}: Status {response.status_code}")
except Exception as e:
errors.append(f"Worker {idx}: {str(e)}")
with ThreadPoolExecutor(max_workers=num_files) as executor:
futures = [
executor.submit(upload_worker, i, content, hash)
for i, (content, hash) in enumerate(files_data)
]
for future in as_completed(futures):
pass
assert len(errors) == 0, f"Errors: {errors}"
assert len(results) == num_files
# Verify all uploads have unique artifact_ids
artifact_ids = set(r[1]["artifact_id"] for r in results)
assert len(artifact_ids) == num_files
@pytest.mark.integration
@pytest.mark.concurrent
def test_10_concurrent_uploads_different_files(self, integration_client, test_package):
"""Test 10 concurrent uploads of different files."""
project, package = test_package
api_key = get_api_key(integration_client)
assert api_key, "Failed to create API key"
num_files = 10
files_data = [
generate_content_with_hash(1024, seed=200 + i) for i in range(num_files)
]
results = []
errors = []
def upload_worker(idx, content, expected_hash):
try:
from httpx import Client
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with Client(base_url=base_url, timeout=60.0) as client:
files = {
"file": (f"file-{idx}.bin", io.BytesIO(content), "application/octet-stream")
}
response = client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"tag": f"concurrent10-{idx}"},
headers={"Authorization": f"Bearer {api_key}"},
)
if response.status_code == 200:
result = response.json()
results.append((idx, result, expected_hash))
else:
errors.append(f"Worker {idx}: Status {response.status_code}")
except Exception as e:
errors.append(f"Worker {idx}: {str(e)}")
with ThreadPoolExecutor(max_workers=num_files) as executor:
futures = [
executor.submit(upload_worker, i, content, hash)
for i, (content, hash) in enumerate(files_data)
]
for future in as_completed(futures):
pass
assert len(errors) == 0, f"Errors: {errors}"
assert len(results) == num_files
@pytest.mark.integration
@pytest.mark.concurrent
def test_concurrent_uploads_same_file_deduplication(self, integration_client, test_package):
"""Test concurrent uploads of same file handle deduplication correctly."""
project, package = test_package
api_key = get_api_key(integration_client)
assert api_key, "Failed to create API key"
content, expected_hash = generate_content_with_hash(4096, seed=999)
num_concurrent = 5
results = []
errors = []
def upload_worker(idx):
try:
from httpx import Client
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with Client(base_url=base_url, timeout=60.0) as client:
files = {
"file": (f"same-{idx}.bin", io.BytesIO(content), "application/octet-stream")
}
response = client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"tag": f"dedup-{idx}"},
headers={"Authorization": f"Bearer {api_key}"},
)
if response.status_code == 200:
results.append(response.json())
else:
errors.append(f"Worker {idx}: Status {response.status_code}")
except Exception as e:
errors.append(f"Worker {idx}: {str(e)}")
with ThreadPoolExecutor(max_workers=num_concurrent) as executor:
futures = [executor.submit(upload_worker, i) for i in range(num_concurrent)]
for future in as_completed(futures):
pass
assert len(errors) == 0, f"Errors: {errors}"
assert len(results) == num_concurrent
# All should have same artifact_id
artifact_ids = set(r["artifact_id"] for r in results)
assert len(artifact_ids) == 1
assert expected_hash in artifact_ids
# Verify final ref_count equals number of uploads
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
assert response.json()["ref_count"] == num_concurrent
@pytest.mark.integration
@pytest.mark.concurrent
def test_concurrent_uploads_to_different_packages(self, integration_client, test_project, unique_test_id):
"""Test concurrent uploads to different packages."""
project = test_project
api_key = get_api_key(integration_client)
assert api_key, "Failed to create API key"
num_packages = 3
package_names = []
# Create multiple packages
for i in range(num_packages):
pkg_name = f"pkg-{unique_test_id}-{i}"
response = integration_client.post(
f"/api/v1/project/{project}/packages",
json={"name": pkg_name, "description": f"Package {i}"},
)
assert response.status_code == 200
package_names.append(pkg_name)
files_data = [
generate_content_with_hash(1024, seed=300 + i) for i in range(num_packages)
]
results = []
errors = []
def upload_worker(idx, package, content, expected_hash):
try:
from httpx import Client
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with Client(base_url=base_url, timeout=60.0) as client:
files = {
"file": (f"file-{idx}.bin", io.BytesIO(content), "application/octet-stream")
}
response = client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"tag": "latest"},
headers={"Authorization": f"Bearer {api_key}"},
)
if response.status_code == 200:
result = response.json()
results.append((package, result, expected_hash))
else:
errors.append(f"Worker {idx}: Status {response.status_code}")
except Exception as e:
errors.append(f"Worker {idx}: {str(e)}")
with ThreadPoolExecutor(max_workers=num_packages) as executor:
futures = [
executor.submit(upload_worker, i, package_names[i], content, hash)
for i, (content, hash) in enumerate(files_data)
]
for future in as_completed(futures):
pass
assert len(errors) == 0, f"Errors: {errors}"
assert len(results) == num_packages
class TestConcurrentDownloads:
"""Tests for concurrent download operations."""
@pytest.mark.integration
@pytest.mark.concurrent
def test_2_concurrent_downloads_same_artifact(self, integration_client, test_package):
"""Test 2 concurrent downloads of same artifact."""
project, package = test_package
content, expected_hash = generate_content_with_hash(2048, seed=400)
# Upload first
upload_test_file(integration_client, project, package, content, tag="download-test")
results = []
errors = []
def download_worker(idx):
try:
from httpx import Client
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with Client(base_url=base_url, timeout=60.0) as client:
response = client.get(
f"/api/v1/project/{project}/{package}/+/download-test",
params={"mode": "proxy"},
)
if response.status_code == 200:
results.append((idx, response.content))
else:
errors.append(f"Worker {idx}: Status {response.status_code}")
except Exception as e:
errors.append(f"Worker {idx}: {str(e)}")
with ThreadPoolExecutor(max_workers=2) as executor:
futures = [executor.submit(download_worker, i) for i in range(2)]
for future in as_completed(futures):
pass
assert len(errors) == 0, f"Errors: {errors}"
assert len(results) == 2
# All downloads should match original
for idx, downloaded in results:
assert downloaded == content
@pytest.mark.integration
@pytest.mark.concurrent
def test_5_concurrent_downloads_same_artifact(self, integration_client, test_package):
"""Test 5 concurrent downloads of same artifact."""
project, package = test_package
content, expected_hash = generate_content_with_hash(4096, seed=500)
upload_test_file(integration_client, project, package, content, tag="download5-test")
num_downloads = 5
results = []
errors = []
def download_worker(idx):
try:
from httpx import Client
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with Client(base_url=base_url, timeout=60.0) as client:
response = client.get(
f"/api/v1/project/{project}/{package}/+/download5-test",
params={"mode": "proxy"},
)
if response.status_code == 200:
results.append((idx, response.content))
else:
errors.append(f"Worker {idx}: Status {response.status_code}")
except Exception as e:
errors.append(f"Worker {idx}: {str(e)}")
with ThreadPoolExecutor(max_workers=num_downloads) as executor:
futures = [executor.submit(download_worker, i) for i in range(num_downloads)]
for future in as_completed(futures):
pass
assert len(errors) == 0, f"Errors: {errors}"
assert len(results) == num_downloads
for idx, downloaded in results:
assert downloaded == content
@pytest.mark.integration
@pytest.mark.concurrent
def test_10_concurrent_downloads_same_artifact(self, integration_client, test_package):
"""Test 10 concurrent downloads of same artifact."""
project, package = test_package
content, expected_hash = generate_content_with_hash(8192, seed=600)
upload_test_file(integration_client, project, package, content, tag="download10-test")
num_downloads = 10
results = []
errors = []
def download_worker(idx):
try:
from httpx import Client
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with Client(base_url=base_url, timeout=60.0) as client:
response = client.get(
f"/api/v1/project/{project}/{package}/+/download10-test",
params={"mode": "proxy"},
)
if response.status_code == 200:
results.append((idx, response.content))
else:
errors.append(f"Worker {idx}: Status {response.status_code}")
except Exception as e:
errors.append(f"Worker {idx}: {str(e)}")
with ThreadPoolExecutor(max_workers=num_downloads) as executor:
futures = [executor.submit(download_worker, i) for i in range(num_downloads)]
for future in as_completed(futures):
pass
assert len(errors) == 0, f"Errors: {errors}"
assert len(results) == num_downloads
for idx, downloaded in results:
assert downloaded == content
@pytest.mark.integration
@pytest.mark.concurrent
def test_concurrent_downloads_different_artifacts(self, integration_client, test_package):
"""Test concurrent downloads of different artifacts."""
project, package = test_package
# Upload multiple files
num_files = 5
uploads = []
for i in range(num_files):
content, expected_hash = generate_content_with_hash(1024, seed=700 + i)
upload_test_file(
integration_client, project, package, content,
tag=f"multi-download-{i}"
)
uploads.append((f"multi-download-{i}", content))
results = []
errors = []
def download_worker(tag, expected_content):
try:
from httpx import Client
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with Client(base_url=base_url, timeout=60.0) as client:
response = client.get(
f"/api/v1/project/{project}/{package}/+/{tag}",
params={"mode": "proxy"},
)
if response.status_code == 200:
results.append((tag, response.content, expected_content))
else:
errors.append(f"Tag {tag}: Status {response.status_code}")
except Exception as e:
errors.append(f"Tag {tag}: {str(e)}")
with ThreadPoolExecutor(max_workers=num_files) as executor:
futures = [
executor.submit(download_worker, tag, content)
for tag, content in uploads
]
for future in as_completed(futures):
pass
assert len(errors) == 0, f"Errors: {errors}"
assert len(results) == num_files
for tag, downloaded, expected in results:
assert downloaded == expected, f"Content mismatch for {tag}"
class TestMixedConcurrentOperations:
"""Tests for mixed concurrent upload and download operations."""
@pytest.mark.integration
@pytest.mark.concurrent
def test_upload_while_download_in_progress(self, integration_client, test_package):
"""Test uploading while a download is in progress."""
project, package = test_package
api_key = get_api_key(integration_client)
assert api_key, "Failed to create API key"
# Upload initial content
content1, hash1 = generate_content_with_hash(10240, seed=800) # 10KB
upload_test_file(integration_client, project, package, content1, tag="initial")
# New content for upload during download
content2, hash2 = generate_content_with_hash(10240, seed=801)
results = {"downloads": [], "uploads": []}
errors = []
def download_worker():
try:
from httpx import Client
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with Client(base_url=base_url, timeout=60.0) as client:
response = client.get(
f"/api/v1/project/{project}/{package}/+/initial",
params={"mode": "proxy"},
)
if response.status_code == 200:
results["downloads"].append(response.content)
else:
errors.append(f"Download: Status {response.status_code}")
except Exception as e:
errors.append(f"Download: {str(e)}")
def upload_worker():
try:
from httpx import Client
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with Client(base_url=base_url, timeout=60.0) as client:
files = {
"file": ("new.bin", io.BytesIO(content2), "application/octet-stream")
}
response = client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"tag": "during-download"},
headers={"Authorization": f"Bearer {api_key}"},
)
if response.status_code == 200:
results["uploads"].append(response.json())
else:
errors.append(f"Upload: Status {response.status_code}")
except Exception as e:
errors.append(f"Upload: {str(e)}")
with ThreadPoolExecutor(max_workers=2) as executor:
futures = [
executor.submit(download_worker),
executor.submit(upload_worker),
]
for future in as_completed(futures):
pass
assert len(errors) == 0, f"Errors: {errors}"
assert len(results["downloads"]) == 1
assert len(results["uploads"]) == 1
# Verify download got correct content
assert results["downloads"][0] == content1
# Verify upload succeeded
assert results["uploads"][0]["artifact_id"] == hash2
@pytest.mark.integration
@pytest.mark.concurrent
def test_multiple_uploads_and_downloads_simultaneously(self, integration_client, test_package):
"""Test multiple uploads and downloads running simultaneously."""
project, package = test_package
api_key = get_api_key(integration_client)
assert api_key, "Failed to create API key"
# Pre-upload some files for downloading
existing_files = []
for i in range(3):
content, hash = generate_content_with_hash(2048, seed=900 + i)
upload_test_file(integration_client, project, package, content, tag=f"existing-{i}")
existing_files.append((f"existing-{i}", content))
# New files for uploading
new_files = [
generate_content_with_hash(2048, seed=910 + i) for i in range(3)
]
results = {"downloads": [], "uploads": []}
errors = []
def download_worker(tag, expected):
try:
from httpx import Client
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with Client(base_url=base_url, timeout=60.0) as client:
response = client.get(
f"/api/v1/project/{project}/{package}/+/{tag}",
params={"mode": "proxy"},
)
if response.status_code == 200:
results["downloads"].append((tag, response.content, expected))
else:
errors.append(f"Download {tag}: Status {response.status_code}")
except Exception as e:
errors.append(f"Download {tag}: {str(e)}")
def upload_worker(idx, content, expected_hash):
try:
from httpx import Client
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with Client(base_url=base_url, timeout=60.0) as client:
files = {
"file": (f"new-{idx}.bin", io.BytesIO(content), "application/octet-stream")
}
response = client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"tag": f"new-{idx}"},
headers={"Authorization": f"Bearer {api_key}"},
)
if response.status_code == 200:
results["uploads"].append((idx, response.json(), expected_hash))
else:
errors.append(f"Upload {idx}: Status {response.status_code}")
except Exception as e:
errors.append(f"Upload {idx}: {str(e)}")
with ThreadPoolExecutor(max_workers=6) as executor:
futures = []
# Submit downloads
for tag, content in existing_files:
futures.append(executor.submit(download_worker, tag, content))
# Submit uploads
for i, (content, hash) in enumerate(new_files):
futures.append(executor.submit(upload_worker, i, content, hash))
for future in as_completed(futures):
pass
assert len(errors) == 0, f"Errors: {errors}"
assert len(results["downloads"]) == 3
assert len(results["uploads"]) == 3
# Verify downloads
for tag, downloaded, expected in results["downloads"]:
assert downloaded == expected, f"Download mismatch for {tag}"
# Verify uploads
for idx, result, expected_hash in results["uploads"]:
assert result["artifact_id"] == expected_hash
@pytest.mark.integration
@pytest.mark.concurrent
def test_no_data_corruption_under_concurrency(self, integration_client, test_package):
"""Test that no data corruption occurs under concurrent operations."""
project, package = test_package
api_key = get_api_key(integration_client)
assert api_key, "Failed to create API key"
# Create content with recognizable patterns
num_files = 5
files_data = []
for i in range(num_files):
# Each file has unique repeating pattern for easy corruption detection
pattern = bytes([i] * 256)
content = pattern * 40 # 10KB each
hash = compute_sha256(content)
files_data.append((content, hash))
results = []
errors = []
def upload_and_verify(idx, content, expected_hash):
try:
from httpx import Client
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with Client(base_url=base_url, timeout=60.0) as client:
# Upload
files = {
"file": (f"pattern-{idx}.bin", io.BytesIO(content), "application/octet-stream")
}
upload_resp = client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"tag": f"pattern-{idx}"},
headers={"Authorization": f"Bearer {api_key}"},
)
if upload_resp.status_code != 200:
errors.append(f"Upload {idx}: Status {upload_resp.status_code}")
return
upload_result = upload_resp.json()
if upload_result["artifact_id"] != expected_hash:
errors.append(f"Upload {idx}: Hash mismatch")
return
# Immediately download and verify
download_resp = client.get(
f"/api/v1/project/{project}/{package}/+/pattern-{idx}",
params={"mode": "proxy"},
)
if download_resp.status_code != 200:
errors.append(f"Download {idx}: Status {download_resp.status_code}")
return
if download_resp.content != content:
errors.append(f"Worker {idx}: DATA CORRUPTION DETECTED")
return
# Verify the downloaded content hash
downloaded_hash = compute_sha256(download_resp.content)
if downloaded_hash != expected_hash:
errors.append(f"Worker {idx}: Hash verification failed")
return
results.append(idx)
except Exception as e:
errors.append(f"Worker {idx}: {str(e)}")
with ThreadPoolExecutor(max_workers=num_files) as executor:
futures = [
executor.submit(upload_and_verify, i, content, hash)
for i, (content, hash) in enumerate(files_data)
]
for future in as_completed(futures):
pass
assert len(errors) == 0, f"Errors: {errors}"
assert len(results) == num_files

View File

@@ -0,0 +1,322 @@
"""
Integration tests for error handling in upload and download operations.
Tests cover:
- Timeout handling
- Invalid request handling
- Resource cleanup on failures
- Graceful error responses
"""
import pytest
import io
import time
from tests.factories import (
compute_sha256,
upload_test_file,
generate_content_with_hash,
)
class TestUploadErrorHandling:
"""Tests for upload error handling."""
@pytest.mark.integration
def test_upload_to_nonexistent_project_returns_404(
self, integration_client, unique_test_id
):
"""Test upload to nonexistent project returns 404."""
content = b"test content for nonexistent project"
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/nonexistent-project-{unique_test_id}/nonexistent-pkg/upload",
files=files,
)
assert response.status_code == 404
@pytest.mark.integration
def test_upload_to_nonexistent_package_returns_404(
self, integration_client, test_project, unique_test_id
):
"""Test upload to nonexistent package returns 404."""
content = b"test content for nonexistent package"
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
files=files,
)
assert response.status_code == 404
@pytest.mark.integration
def test_upload_empty_file_rejected(self, integration_client, test_package):
"""Test empty file upload is rejected."""
project, package = test_package
files = {"file": ("empty.bin", io.BytesIO(b""), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
)
assert response.status_code in [400, 422]
@pytest.mark.integration
def test_upload_missing_file_returns_422(self, integration_client, test_package):
"""Test upload without file field returns 422."""
project, package = test_package
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
data={"tag": "no-file-provided"},
)
assert response.status_code == 422
@pytest.mark.integration
def test_upload_invalid_checksum_format_returns_400(
self, integration_client, test_package
):
"""Test upload with invalid checksum format returns 400."""
project, package = test_package
content = b"checksum format test"
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
headers={"X-Checksum-SHA256": "invalid-hash-format"},
)
assert response.status_code == 400
@pytest.mark.integration
def test_upload_checksum_mismatch_returns_422(
self, integration_client, test_package
):
"""Test upload with mismatched checksum returns 422."""
project, package = test_package
content = b"checksum mismatch test"
wrong_hash = "0" * 64 # Valid format but wrong hash
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
headers={"X-Checksum-SHA256": wrong_hash},
)
assert response.status_code == 422
@pytest.mark.integration
def test_upload_with_correct_checksum_succeeds(
self, integration_client, test_package
):
"""Test upload with correct checksum succeeds."""
project, package = test_package
content = b"correct checksum test"
correct_hash = compute_sha256(content)
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
headers={"X-Checksum-SHA256": correct_hash},
)
assert response.status_code == 200
assert response.json()["artifact_id"] == correct_hash
class TestDownloadErrorHandling:
"""Tests for download error handling."""
@pytest.mark.integration
def test_download_nonexistent_tag_returns_404(
self, integration_client, test_package
):
"""Test download of nonexistent tag returns 404."""
project, package = test_package
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/nonexistent-tag-xyz"
)
assert response.status_code == 404
@pytest.mark.integration
def test_download_nonexistent_artifact_returns_404(
self, integration_client, test_package
):
"""Test download of nonexistent artifact ID returns 404."""
project, package = test_package
fake_hash = "a" * 64
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/artifact:{fake_hash}"
)
assert response.status_code == 404
@pytest.mark.integration
def test_download_invalid_artifact_id_format(
self, integration_client, test_package
):
"""Test download with invalid artifact ID format."""
project, package = test_package
# Too short
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/artifact:abc123"
)
assert response.status_code == 404
@pytest.mark.integration
def test_download_from_nonexistent_project_returns_404(
self, integration_client, unique_test_id
):
"""Test download from nonexistent project returns 404."""
response = integration_client.get(
f"/api/v1/project/nonexistent-{unique_test_id}/pkg/+/tag"
)
assert response.status_code == 404
@pytest.mark.integration
def test_download_from_nonexistent_package_returns_404(
self, integration_client, test_project, unique_test_id
):
"""Test download from nonexistent package returns 404."""
response = integration_client.get(
f"/api/v1/project/{test_project}/nonexistent-{unique_test_id}/+/tag"
)
assert response.status_code == 404
class TestTimeoutBehavior:
"""Tests for timeout behavior (integration level)."""
@pytest.mark.integration
@pytest.mark.slow
def test_large_upload_completes_within_reasonable_time(
self, integration_client, test_package, sized_content
):
"""Test that a 10MB upload completes within reasonable time."""
project, package = test_package
content, expected_hash = sized_content(10 * 1024 * 1024, seed=999) # 10MB
start_time = time.time()
result = upload_test_file(
integration_client, project, package, content, tag="timeout-test"
)
elapsed = time.time() - start_time
assert result["artifact_id"] == expected_hash
# Should complete within 60 seconds for 10MB on local docker
assert elapsed < 60, f"Upload took too long: {elapsed:.2f}s"
@pytest.mark.integration
@pytest.mark.slow
def test_large_download_completes_within_reasonable_time(
self, integration_client, test_package, sized_content
):
"""Test that a 10MB download completes within reasonable time."""
project, package = test_package
content, expected_hash = sized_content(10 * 1024 * 1024, seed=998) # 10MB
# First upload
upload_test_file(
integration_client, project, package, content, tag="download-timeout-test"
)
# Then download and time it
start_time = time.time()
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/download-timeout-test",
params={"mode": "proxy"},
)
elapsed = time.time() - start_time
assert response.status_code == 200
assert len(response.content) == len(content)
# Should complete within 60 seconds for 10MB on local docker
assert elapsed < 60, f"Download took too long: {elapsed:.2f}s"
class TestResourceCleanup:
"""Tests for proper resource cleanup on failures.
Note: More comprehensive cleanup tests are in test_upload_download_api.py
(TestUploadFailureCleanup class) including S3 object cleanup verification.
"""
@pytest.mark.integration
def test_checksum_mismatch_no_orphaned_artifact(
self, integration_client, test_package, unique_test_id
):
"""Test checksum mismatch doesn't leave orphaned artifact."""
project, package = test_package
# Use unique content to ensure artifact doesn't exist from prior tests
content = f"checksum mismatch orphan test {unique_test_id}".encode()
wrong_hash = "0" * 64
actual_hash = compute_sha256(content)
# Verify artifact doesn't exist before test
pre_check = integration_client.get(f"/api/v1/artifact/{actual_hash}")
assert pre_check.status_code == 404, "Artifact should not exist before test"
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
headers={"X-Checksum-SHA256": wrong_hash},
)
assert response.status_code == 422
# Verify no artifact was created with either hash
response1 = integration_client.get(f"/api/v1/artifact/{wrong_hash}")
response2 = integration_client.get(f"/api/v1/artifact/{actual_hash}")
assert response1.status_code == 404
assert response2.status_code == 404
class TestGracefulErrorResponses:
"""Tests for graceful and informative error responses."""
@pytest.mark.integration
def test_404_response_has_detail_message(
self, integration_client, test_package
):
"""Test 404 responses include a detail message."""
project, package = test_package
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/nonexistent-tag"
)
assert response.status_code == 404
data = response.json()
assert "detail" in data
assert len(data["detail"]) > 0
@pytest.mark.integration
def test_422_response_has_detail_message(self, integration_client, test_package):
"""Test 422 responses include a detail message."""
project, package = test_package
# Upload with mismatched checksum
content = b"detail message test"
wrong_hash = "0" * 64
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
headers={"X-Checksum-SHA256": wrong_hash},
)
assert response.status_code == 422
data = response.json()
assert "detail" in data
@pytest.mark.integration
def test_error_response_is_json(self, integration_client, unique_test_id):
"""Test error responses are valid JSON."""
response = integration_client.get(
f"/api/v1/project/nonexistent-{unique_test_id}/pkg/+/tag"
)
assert response.status_code == 404
# Should not raise exception - valid JSON
data = response.json()
assert isinstance(data, dict)

View File

@@ -0,0 +1,768 @@
"""
Integration tests for artifact integrity verification.
Tests cover:
- Round-trip verification (upload -> download -> verify hash)
- Consistency check endpoint
- Header-based verification
- Integrity verification across file sizes
- Client-side verification workflow
"""
import pytest
import io
import hashlib
from tests.factories import (
compute_sha256,
upload_test_file,
generate_content_with_hash,
s3_object_exists,
get_s3_client,
get_s3_bucket,
)
from tests.conftest import (
SIZE_1KB,
SIZE_10KB,
SIZE_100KB,
SIZE_1MB,
SIZE_10MB,
)
class TestRoundTripVerification:
"""Tests for complete round-trip integrity verification."""
@pytest.mark.integration
def test_upload_download_hash_matches(self, integration_client, test_package):
"""Test that upload -> download round trip preserves content integrity."""
project, package = test_package
content = b"Round trip integrity test content"
expected_hash = compute_sha256(content)
# Upload and capture returned hash
result = upload_test_file(
integration_client, project, package, content, tag="roundtrip"
)
uploaded_hash = result["artifact_id"]
# Verify upload returned correct hash
assert uploaded_hash == expected_hash
# Download artifact
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/roundtrip",
params={"mode": "proxy"},
)
assert response.status_code == 200
# Compute hash of downloaded content
downloaded_hash = compute_sha256(response.content)
# All three hashes should match
assert downloaded_hash == expected_hash
assert downloaded_hash == uploaded_hash
@pytest.mark.integration
def test_upload_response_contains_hash(self, integration_client, test_package):
"""Test upload response contains artifact_id which is the SHA256 hash."""
project, package = test_package
content = b"Upload response hash test"
expected_hash = compute_sha256(content)
result = upload_test_file(integration_client, project, package, content)
assert "artifact_id" in result
assert result["artifact_id"] == expected_hash
assert len(result["artifact_id"]) == 64
assert all(c in "0123456789abcdef" for c in result["artifact_id"])
@pytest.mark.integration
def test_download_header_matches_artifact_id(self, integration_client, test_package):
"""Test X-Checksum-SHA256 header matches artifact ID."""
project, package = test_package
content = b"Header verification test"
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project, package, content, tag="header-check"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/header-check",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.headers.get("X-Checksum-SHA256") == expected_hash
@pytest.mark.integration
def test_etag_matches_artifact_id(self, integration_client, test_package):
"""Test ETag header matches artifact ID."""
project, package = test_package
content = b"ETag verification test"
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project, package, content, tag="etag-check"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/etag-check",
params={"mode": "proxy"},
)
assert response.status_code == 200
etag = response.headers.get("ETag", "").strip('"')
assert etag == expected_hash
@pytest.mark.integration
def test_artifact_endpoint_returns_correct_hash(self, integration_client, test_package):
"""Test artifact endpoint returns correct hash/ID."""
project, package = test_package
content = b"Artifact endpoint hash test"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content)
# Query artifact directly
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
data = response.json()
assert data["id"] == expected_hash
assert data.get("sha256") == expected_hash
class TestClientSideVerificationWorkflow:
"""Tests for client-side verification workflow."""
@pytest.mark.integration
def test_client_can_verify_before_upload(self, integration_client, test_package):
"""Test client can compute hash before upload and verify response matches."""
project, package = test_package
content = b"Client pre-upload verification test"
# Client computes hash locally before upload
client_hash = compute_sha256(content)
# Upload
result = upload_test_file(integration_client, project, package, content)
# Client verifies server returned the same hash
assert result["artifact_id"] == client_hash
@pytest.mark.integration
def test_client_can_provide_checksum_header(self, integration_client, test_package):
"""Test client can provide X-Checksum-SHA256 header for verification."""
project, package = test_package
content = b"Client checksum header test"
client_hash = compute_sha256(content)
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
headers={"X-Checksum-SHA256": client_hash},
)
assert response.status_code == 200
assert response.json()["artifact_id"] == client_hash
@pytest.mark.integration
def test_checksum_mismatch_rejected(self, integration_client, test_package):
"""Test upload with wrong client checksum is rejected."""
project, package = test_package
content = b"Checksum mismatch test"
wrong_hash = "0" * 64
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
headers={"X-Checksum-SHA256": wrong_hash},
)
assert response.status_code == 422
@pytest.mark.integration
def test_client_can_verify_after_download(self, integration_client, test_package):
"""Test client can verify downloaded content matches header hash."""
project, package = test_package
content = b"Client post-download verification"
upload_test_file(
integration_client, project, package, content, tag="verify-after"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/verify-after",
params={"mode": "proxy"},
)
assert response.status_code == 200
# Client gets hash from header
header_hash = response.headers.get("X-Checksum-SHA256")
# Client computes hash of downloaded content
downloaded_hash = compute_sha256(response.content)
# Client verifies they match
assert downloaded_hash == header_hash
class TestIntegritySizeVariants:
"""Tests for integrity verification across different file sizes."""
@pytest.mark.integration
def test_integrity_1kb(self, integration_client, test_package, sized_content):
"""Test integrity verification for 1KB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_1KB, seed=100)
result = upload_test_file(
integration_client, project, package, content, tag="int-1kb"
)
assert result["artifact_id"] == expected_hash
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/int-1kb",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert compute_sha256(response.content) == expected_hash
assert response.headers.get("X-Checksum-SHA256") == expected_hash
@pytest.mark.integration
def test_integrity_100kb(self, integration_client, test_package, sized_content):
"""Test integrity verification for 100KB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_100KB, seed=101)
result = upload_test_file(
integration_client, project, package, content, tag="int-100kb"
)
assert result["artifact_id"] == expected_hash
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/int-100kb",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert compute_sha256(response.content) == expected_hash
assert response.headers.get("X-Checksum-SHA256") == expected_hash
@pytest.mark.integration
def test_integrity_1mb(self, integration_client, test_package, sized_content):
"""Test integrity verification for 1MB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_1MB, seed=102)
result = upload_test_file(
integration_client, project, package, content, tag="int-1mb"
)
assert result["artifact_id"] == expected_hash
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/int-1mb",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert compute_sha256(response.content) == expected_hash
assert response.headers.get("X-Checksum-SHA256") == expected_hash
@pytest.mark.integration
@pytest.mark.slow
def test_integrity_10mb(self, integration_client, test_package, sized_content):
"""Test integrity verification for 10MB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_10MB, seed=103)
result = upload_test_file(
integration_client, project, package, content, tag="int-10mb"
)
assert result["artifact_id"] == expected_hash
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/int-10mb",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert compute_sha256(response.content) == expected_hash
assert response.headers.get("X-Checksum-SHA256") == expected_hash
class TestConsistencyCheck:
"""Tests for the admin consistency check endpoint."""
@pytest.mark.integration
def test_consistency_check_returns_200(self, integration_client):
"""Test consistency check endpoint returns 200."""
response = integration_client.get("/api/v1/admin/consistency-check")
assert response.status_code == 200
@pytest.mark.integration
def test_consistency_check_response_format(self, integration_client):
"""Test consistency check returns expected response format."""
response = integration_client.get("/api/v1/admin/consistency-check")
assert response.status_code == 200
data = response.json()
# Check expected fields
assert "total_artifacts_checked" in data
assert "orphaned_s3_objects" in data
assert "missing_s3_objects" in data
assert "size_mismatches" in data
assert "healthy" in data
assert "orphaned_s3_keys" in data
assert "missing_s3_keys" in data
assert "size_mismatch_artifacts" in data
# Verify types
assert isinstance(data["total_artifacts_checked"], int)
assert isinstance(data["orphaned_s3_objects"], int)
assert isinstance(data["missing_s3_objects"], int)
assert isinstance(data["size_mismatches"], int)
assert isinstance(data["healthy"], bool)
assert isinstance(data["orphaned_s3_keys"], list)
assert isinstance(data["missing_s3_keys"], list)
assert isinstance(data["size_mismatch_artifacts"], list)
@pytest.mark.integration
def test_consistency_check_after_upload(self, integration_client, test_package):
"""Test consistency check passes after valid upload."""
project, package = test_package
content = b"Consistency check test content"
# Upload artifact
upload_test_file(integration_client, project, package, content)
# Run consistency check
response = integration_client.get("/api/v1/admin/consistency-check")
assert response.status_code == 200
data = response.json()
# Verify check ran and no issues
assert data["total_artifacts_checked"] >= 1
assert data["healthy"] is True
@pytest.mark.integration
def test_consistency_check_limit_parameter(self, integration_client):
"""Test consistency check respects limit parameter."""
response = integration_client.get(
"/api/v1/admin/consistency-check",
params={"limit": 10}
)
assert response.status_code == 200
data = response.json()
# Lists should not exceed limit
assert len(data["orphaned_s3_keys"]) <= 10
assert len(data["missing_s3_keys"]) <= 10
assert len(data["size_mismatch_artifacts"]) <= 10
class TestDigestHeader:
"""Tests for RFC 3230 Digest header."""
@pytest.mark.integration
def test_download_includes_digest_header(self, integration_client, test_package):
"""Test download includes Digest header in RFC 3230 format."""
project, package = test_package
content = b"Digest header test"
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project, package, content, tag="digest-test"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/digest-test",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert "Digest" in response.headers
# Verify Digest format (sha-256=base64hash)
digest = response.headers["Digest"]
assert digest.startswith("sha-256=")
@pytest.mark.integration
def test_digest_header_base64_valid(self, integration_client, test_package):
"""Test Digest header contains valid base64 encoding."""
import base64
project, package = test_package
content = b"Digest base64 test"
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project, package, content, tag="digest-b64"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/digest-b64",
params={"mode": "proxy"},
)
assert response.status_code == 200
digest = response.headers["Digest"]
base64_part = digest.split("=", 1)[1]
# Should be valid base64
try:
decoded = base64.b64decode(base64_part)
assert len(decoded) == 32 # SHA256 is 32 bytes
except Exception as e:
pytest.fail(f"Invalid base64 in Digest header: {e}")
class TestVerificationModes:
"""Tests for download verification modes."""
@pytest.mark.integration
def test_pre_verification_mode(self, integration_client, test_package):
"""Test pre-verification mode verifies before streaming."""
project, package = test_package
content = b"Pre-verification mode test"
upload_test_file(
integration_client, project, package, content, tag="pre-verify"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/pre-verify",
params={"mode": "proxy", "verify": "true", "verify_mode": "pre"},
)
assert response.status_code == 200
assert response.content == content
# X-Verified header should be true
assert response.headers.get("X-Verified") == "true"
@pytest.mark.integration
def test_stream_verification_mode(self, integration_client, test_package):
"""Test streaming verification mode."""
project, package = test_package
content = b"Stream verification mode test"
upload_test_file(
integration_client, project, package, content, tag="stream-verify"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/stream-verify",
params={"mode": "proxy", "verify": "true", "verify_mode": "stream"},
)
assert response.status_code == 200
assert response.content == content
class TestArtifactIntegrityEndpoint:
"""Tests for artifact-specific integrity operations."""
@pytest.mark.integration
def test_artifact_size_matches(self, integration_client, test_package):
"""Test artifact endpoint returns correct size."""
project, package = test_package
content = b"Artifact size test content"
expected_size = len(content)
result = upload_test_file(integration_client, project, package, content)
artifact_id = result["artifact_id"]
response = integration_client.get(f"/api/v1/artifact/{artifact_id}")
assert response.status_code == 200
data = response.json()
assert data["size"] == expected_size
@pytest.mark.integration
def test_content_length_header_matches_size(self, integration_client, test_package):
"""Test Content-Length header matches artifact size."""
project, package = test_package
content = b"Content-Length header test"
expected_size = len(content)
upload_test_file(
integration_client, project, package, content, tag="content-len"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/content-len",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert int(response.headers.get("Content-Length", 0)) == expected_size
assert len(response.content) == expected_size
@pytest.mark.requires_direct_s3
class TestCorruptionDetection:
"""Tests for detecting corrupted S3 objects.
These tests directly manipulate S3 objects to simulate corruption
and verify that the system can detect hash mismatches.
Note: These tests require direct S3/MinIO access and are skipped in CI
where S3 is not directly accessible from the test runner.
"""
@pytest.mark.integration
def test_detection_of_corrupted_content(self, integration_client, test_package):
"""Test that corrupted S3 content is detected via hash mismatch.
Uploads content, then directly modifies the S3 object, then
verifies that the downloaded content hash doesn't match.
"""
project, package = test_package
content = b"Original content for corruption test"
expected_hash = compute_sha256(content)
# Upload original content
result = upload_test_file(
integration_client, project, package, content, tag="corrupt-test"
)
assert result["artifact_id"] == expected_hash
# Get the S3 object and corrupt it
s3_client = get_s3_client()
bucket = get_s3_bucket()
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
# Replace with corrupted content
corrupted_content = b"Corrupted content - different from original!"
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=corrupted_content)
# Download via proxy (bypasses hash verification)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/corrupt-test",
params={"mode": "proxy"},
)
assert response.status_code == 200
# Verify the downloaded content doesn't match original hash
downloaded_hash = compute_sha256(response.content)
assert downloaded_hash != expected_hash, "Corruption was not detected - hashes match"
assert response.content == corrupted_content
# The X-Checksum-SHA256 header should still show the original hash (from DB)
# but the actual content hash is different
header_hash = response.headers.get("X-Checksum-SHA256")
assert header_hash == expected_hash # Header shows expected hash
assert downloaded_hash != header_hash # But content is corrupted
# Restore original content for cleanup
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
@pytest.mark.integration
def test_detection_of_single_bit_flip(self, integration_client, test_package):
"""Test detection of a single bit flip in S3 object content."""
project, package = test_package
content = b"Content for single bit flip detection test"
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project, package, content, tag="bitflip-test"
)
assert result["artifact_id"] == expected_hash
# Get S3 object and flip a single bit
s3_client = get_s3_client()
bucket = get_s3_bucket()
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
# Flip the first bit of the first byte
corrupted_content = bytearray(content)
corrupted_content[0] ^= 0x01
corrupted_content = bytes(corrupted_content)
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=corrupted_content)
# Download and verify hash mismatch
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/bitflip-test",
params={"mode": "proxy"},
)
assert response.status_code == 200
downloaded_hash = compute_sha256(response.content)
assert downloaded_hash != expected_hash, "Single bit flip not detected"
# Restore original
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
@pytest.mark.integration
def test_detection_of_truncated_content(self, integration_client, test_package):
"""Test detection of truncated S3 object."""
project, package = test_package
content = b"This is content that will be truncated for testing purposes"
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project, package, content, tag="truncate-test"
)
assert result["artifact_id"] == expected_hash
# Get S3 object and truncate it
s3_client = get_s3_client()
bucket = get_s3_bucket()
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
# Truncate to half the original size
truncated_content = content[: len(content) // 2]
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=truncated_content)
# Download and verify hash mismatch
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/truncate-test",
params={"mode": "proxy"},
)
assert response.status_code == 200
downloaded_hash = compute_sha256(response.content)
assert downloaded_hash != expected_hash, "Truncation not detected"
assert len(response.content) < len(content), "Content was not truncated"
# Restore original
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
@pytest.mark.integration
def test_detection_of_appended_content(self, integration_client, test_package):
"""Test detection of content with extra bytes appended."""
project, package = test_package
content = b"Original content"
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project, package, content, tag="append-test"
)
assert result["artifact_id"] == expected_hash
# Get S3 object and append extra bytes
s3_client = get_s3_client()
bucket = get_s3_bucket()
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
appended_content = content + b" - extra bytes appended"
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=appended_content)
# Download and verify hash mismatch
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/append-test",
params={"mode": "proxy"},
)
assert response.status_code == 200
downloaded_hash = compute_sha256(response.content)
assert downloaded_hash != expected_hash, "Appended content not detected"
assert len(response.content) > len(content), "Content was not extended"
# Restore original
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
@pytest.mark.integration
def test_client_detects_hash_mismatch_post_download(
self, integration_client, test_package
):
"""Test that a client can detect hash mismatch after downloading corrupted content.
This simulates the full client verification workflow:
1. Download content
2. Get expected hash from header
3. Compute actual hash of content
4. Verify they match (or detect corruption)
"""
project, package = test_package
content = b"Content for client-side corruption detection"
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project, package, content, tag="client-detect"
)
# Corrupt the S3 object
s3_client = get_s3_client()
bucket = get_s3_bucket()
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
corrupted = b"This is completely different content"
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=corrupted)
# Simulate client download and verification
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/client-detect",
params={"mode": "proxy"},
)
assert response.status_code == 200
# Client gets expected hash from header
header_hash = response.headers.get("X-Checksum-SHA256")
# Client computes hash of downloaded content
actual_hash = compute_sha256(response.content)
# Client detects the mismatch
corruption_detected = actual_hash != header_hash
assert corruption_detected, "Client should detect hash mismatch"
# Restore original
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
@pytest.mark.integration
def test_consistency_check_detects_size_mismatch(
self, integration_client, test_package, unique_test_id
):
"""Test that consistency check detects size mismatches.
Uploads content, modifies S3 object size, then runs consistency check.
"""
project, package = test_package
content = b"Content for size mismatch consistency check test " + unique_test_id.encode()
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project, package, content, tag="size-mismatch"
)
# Modify S3 object to have different size
s3_client = get_s3_client()
bucket = get_s3_bucket()
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
different_size_content = content + b"extra extra extra"
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=different_size_content)
# Run consistency check
response = integration_client.get("/api/v1/admin/consistency-check")
assert response.status_code == 200
data = response.json()
# Should detect the size mismatch
assert data["size_mismatches"] >= 1 or len(data["size_mismatch_artifacts"]) >= 1
# Restore original
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
@pytest.mark.integration
def test_consistency_check_detects_missing_s3_object(
self, integration_client, test_package, unique_test_id
):
"""Test that consistency check detects missing S3 objects.
Uploads content, deletes S3 object, then runs consistency check.
"""
project, package = test_package
content = b"Content for missing S3 object test " + unique_test_id.encode()
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project, package, content, tag="missing-s3"
)
# Delete the S3 object
s3_client = get_s3_client()
bucket = get_s3_bucket()
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
s3_client.delete_object(Bucket=bucket, Key=s3_key)
# Run consistency check
response = integration_client.get("/api/v1/admin/consistency-check")
assert response.status_code == 200
data = response.json()
# Should detect the missing S3 object
assert data["missing_s3_objects"] >= 1 or len(data["missing_s3_keys"]) >= 1
# Restore the object for cleanup
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)

View File

@@ -0,0 +1,552 @@
"""
Integration tests for large file upload functionality.
Tests cover:
- Large file uploads (100MB, 1GB)
- Multipart upload behavior
- Upload metrics (duration, throughput)
- Memory efficiency during uploads
- Upload progress tracking
Note: Large tests are marked with @pytest.mark.slow and will be skipped
by default. Run with `pytest --run-slow` to include them.
"""
import os
import pytest
import io
import time
from tests.factories import (
compute_sha256,
upload_test_file,
s3_object_exists,
)
from tests.conftest import (
SIZE_1KB,
SIZE_100KB,
SIZE_1MB,
SIZE_10MB,
SIZE_100MB,
SIZE_1GB,
)
class TestUploadMetrics:
"""Tests for upload duration and throughput metrics."""
@pytest.mark.integration
def test_upload_response_includes_duration_ms(self, integration_client, test_package):
"""Test upload response includes duration_ms field."""
project, package = test_package
content = b"duration test content"
result = upload_test_file(
integration_client, project, package, content, tag="duration-test"
)
assert "duration_ms" in result
assert result["duration_ms"] is not None
assert result["duration_ms"] >= 0
@pytest.mark.integration
def test_upload_response_includes_throughput(self, integration_client, test_package):
"""Test upload response includes throughput_mbps field."""
project, package = test_package
content = b"throughput test content"
result = upload_test_file(
integration_client, project, package, content, tag="throughput-test"
)
assert "throughput_mbps" in result
# For small files throughput may be very high or None
# Just verify the field exists
@pytest.mark.integration
def test_upload_duration_reasonable(
self, integration_client, test_package, sized_content
):
"""Test upload duration is reasonable for file size."""
project, package = test_package
content, _ = sized_content(SIZE_1MB, seed=100)
start = time.time()
result = upload_test_file(
integration_client, project, package, content, tag="duration-check"
)
actual_duration = (time.time() - start) * 1000 # ms
# Reported duration should be close to actual
assert result["duration_ms"] is not None
# Allow some variance (network overhead)
assert result["duration_ms"] <= actual_duration + 1000 # Within 1s
class TestLargeFileUploads:
"""Tests for large file uploads using multipart."""
@pytest.mark.integration
def test_upload_10mb_file(self, integration_client, test_package, sized_content):
"""Test uploading a 10MB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_10MB, seed=200)
result = upload_test_file(
integration_client, project, package, content, tag="large-10mb"
)
assert result["artifact_id"] == expected_hash
assert result["size"] == SIZE_10MB
assert result["duration_ms"] is not None
assert result["throughput_mbps"] is not None
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.requires_direct_s3
def test_upload_100mb_file(self, integration_client, test_package, sized_content):
"""Test uploading a 100MB file (triggers multipart upload)."""
project, package = test_package
content, expected_hash = sized_content(SIZE_100MB, seed=300)
result = upload_test_file(
integration_client, project, package, content, tag="large-100mb"
)
assert result["artifact_id"] == expected_hash
assert result["size"] == SIZE_100MB
# Verify S3 object exists
assert s3_object_exists(expected_hash)
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.large
def test_upload_1gb_file(self, integration_client, test_package, sized_content):
"""Test uploading a 1GB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_1GB, seed=400)
result = upload_test_file(
integration_client, project, package, content, tag="large-1gb"
)
assert result["artifact_id"] == expected_hash
assert result["size"] == SIZE_1GB
# Should have measurable throughput
assert result["throughput_mbps"] is not None
assert result["throughput_mbps"] > 0
@pytest.mark.integration
def test_large_file_deduplication(
self, integration_client, test_package, sized_content, unique_test_id
):
"""Test deduplication works for large files."""
project, package = test_package
# Use unique_test_id to ensure unique content per test run
seed = hash(unique_test_id) % 10000
content, expected_hash = sized_content(SIZE_10MB, seed=seed)
# First upload
result1 = upload_test_file(
integration_client, project, package, content, tag=f"dedup-{unique_test_id}-1"
)
# Note: may be True if previous test uploaded same content
first_dedupe = result1["deduplicated"]
# Second upload of same content
result2 = upload_test_file(
integration_client, project, package, content, tag=f"dedup-{unique_test_id}-2"
)
assert result2["artifact_id"] == expected_hash
# Second upload MUST be deduplicated
assert result2["deduplicated"] is True
class TestUploadProgress:
"""Tests for upload progress tracking endpoint."""
@pytest.mark.integration
def test_progress_endpoint_returns_not_found_for_invalid_id(
self, integration_client, test_package
):
"""Test progress endpoint returns not_found status for invalid upload ID."""
project, package = test_package
response = integration_client.get(
f"/api/v1/project/{project}/{package}/upload/invalid-upload-id/progress"
)
assert response.status_code == 200
data = response.json()
assert data["status"] == "not_found"
assert data["upload_id"] == "invalid-upload-id"
@pytest.mark.integration
def test_progress_endpoint_requires_valid_project(
self, integration_client, unique_test_id
):
"""Test progress endpoint validates project exists."""
response = integration_client.get(
f"/api/v1/project/nonexistent-{unique_test_id}/pkg/upload/upload-id/progress"
)
assert response.status_code == 404
@pytest.mark.integration
def test_progress_endpoint_requires_valid_package(
self, integration_client, test_project, unique_test_id
):
"""Test progress endpoint validates package exists."""
response = integration_client.get(
f"/api/v1/project/{test_project}/nonexistent-{unique_test_id}/upload/upload-id/progress"
)
assert response.status_code == 404
class TestResumableUploadProgress:
"""Tests for progress tracking during resumable uploads."""
@pytest.mark.integration
def test_resumable_upload_init_and_progress(
self, integration_client, test_package, sized_content
):
"""Test initializing resumable upload and checking progress."""
project, package = test_package
content, expected_hash = sized_content(SIZE_100KB, seed=600)
# Get API key for auth
api_key_response = integration_client.post(
"/api/v1/auth/keys",
json={"name": "progress-test-key"},
)
assert api_key_response.status_code == 200
api_key = api_key_response.json()["key"]
# Initialize resumable upload
init_response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload/init",
json={
"expected_hash": expected_hash,
"filename": "progress-test.bin",
"size": SIZE_100KB,
},
headers={"Authorization": f"Bearer {api_key}"},
)
assert init_response.status_code == 200
upload_id = init_response.json().get("upload_id")
if upload_id:
# Check initial progress
progress_response = integration_client.get(
f"/api/v1/project/{project}/{package}/upload/{upload_id}/progress",
headers={"Authorization": f"Bearer {api_key}"},
)
assert progress_response.status_code == 200
progress = progress_response.json()
assert progress["status"] == "in_progress"
assert progress["bytes_uploaded"] == 0
assert progress["bytes_total"] == SIZE_100KB
# Abort to clean up
integration_client.delete(
f"/api/v1/project/{project}/{package}/upload/{upload_id}",
headers={"Authorization": f"Bearer {api_key}"},
)
class TestUploadSizeLimits:
"""Tests for upload size limit enforcement."""
@pytest.mark.integration
def test_empty_file_rejected(self, integration_client, test_package):
"""Test empty files are rejected."""
project, package = test_package
files = {"file": ("empty.txt", io.BytesIO(b""), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
)
assert response.status_code in [400, 422]
@pytest.mark.integration
def test_minimum_size_accepted(self, integration_client, test_package):
"""Test 1-byte file is accepted."""
project, package = test_package
content = b"X"
result = upload_test_file(
integration_client, project, package, content, tag="min-size"
)
assert result["size"] == 1
@pytest.mark.integration
def test_content_length_header_used_in_response(self, integration_client, test_package):
"""Test that upload response size matches Content-Length."""
project, package = test_package
content = b"content length verification test"
result = upload_test_file(
integration_client, project, package, content, tag="content-length-test"
)
# Size in response should match actual content length
assert result["size"] == len(content)
class TestUploadErrorHandling:
"""Tests for upload error handling."""
@pytest.mark.integration
def test_upload_to_nonexistent_project_returns_404(
self, integration_client, unique_test_id
):
"""Test upload to nonexistent project returns 404."""
content = b"test content"
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/nonexistent-{unique_test_id}/pkg/upload",
files=files,
)
assert response.status_code == 404
@pytest.mark.integration
def test_upload_to_nonexistent_package_returns_404(
self, integration_client, test_project, unique_test_id
):
"""Test upload to nonexistent package returns 404."""
content = b"test content"
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{test_project}/nonexistent-{unique_test_id}/upload",
files=files,
)
assert response.status_code == 404
@pytest.mark.integration
def test_upload_without_file_returns_422(self, integration_client, test_package):
"""Test upload without file field returns 422."""
project, package = test_package
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
data={"tag": "no-file"},
)
assert response.status_code == 422
@pytest.mark.integration
def test_upload_with_invalid_checksum_rejected(
self, integration_client, test_package
):
"""Test upload with invalid checksum header format is rejected."""
project, package = test_package
content = b"checksum test"
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
headers={"X-Checksum-SHA256": "invalid-checksum"},
)
assert response.status_code == 400
@pytest.mark.integration
def test_upload_with_mismatched_checksum_rejected(
self, integration_client, test_package
):
"""Test upload with wrong checksum is rejected."""
project, package = test_package
content = b"mismatch test"
wrong_hash = "0" * 64
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
headers={"X-Checksum-SHA256": wrong_hash},
)
assert response.status_code == 422
assert "verification failed" in response.json().get("detail", "").lower()
class TestResumableUploadCancellation:
"""Tests for resumable upload cancellation."""
@pytest.mark.integration
def test_abort_resumable_upload(self, integration_client, test_package, sized_content):
"""Test aborting a resumable upload cleans up properly."""
project, package = test_package
content, expected_hash = sized_content(SIZE_100KB, seed=700)
# Get API key for auth
api_key_response = integration_client.post(
"/api/v1/auth/keys",
json={"name": "abort-test-key"},
)
assert api_key_response.status_code == 200
api_key = api_key_response.json()["key"]
# Initialize resumable upload
init_response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload/init",
json={
"expected_hash": expected_hash,
"filename": "abort-test.bin",
"size": SIZE_100KB,
},
headers={"Authorization": f"Bearer {api_key}"},
)
assert init_response.status_code == 200
upload_id = init_response.json().get("upload_id")
if upload_id:
# Abort the upload (without uploading any parts)
abort_response = integration_client.delete(
f"/api/v1/project/{project}/{package}/upload/{upload_id}",
headers={"Authorization": f"Bearer {api_key}"},
)
assert abort_response.status_code in [200, 204]
# Verify progress shows not_found after abort
progress_response = integration_client.get(
f"/api/v1/project/{project}/{package}/upload/{upload_id}/progress",
headers={"Authorization": f"Bearer {api_key}"},
)
assert progress_response.status_code == 200
assert progress_response.json()["status"] == "not_found"
@pytest.mark.integration
def test_abort_nonexistent_upload(self, integration_client, test_package):
"""Test aborting nonexistent upload returns appropriate error."""
project, package = test_package
# Get API key for auth
api_key_response = integration_client.post(
"/api/v1/auth/keys",
json={"name": "abort-nonexistent-key"},
)
assert api_key_response.status_code == 200
api_key = api_key_response.json()["key"]
response = integration_client.delete(
f"/api/v1/project/{project}/{package}/upload/nonexistent-upload-id",
headers={"Authorization": f"Bearer {api_key}"},
)
# Should return 404 or 200 (idempotent delete)
assert response.status_code in [200, 204, 404]
class TestUploadTimeout:
"""Tests for upload timeout handling."""
@pytest.mark.integration
def test_upload_with_short_timeout_succeeds_for_small_file(
self, integration_client, test_package
):
"""Test small file upload succeeds with reasonable timeout."""
project, package = test_package
content = b"small timeout test"
# httpx client should handle this quickly
result = upload_test_file(
integration_client, project, package, content, tag="timeout-small"
)
assert result["artifact_id"] is not None
@pytest.mark.integration
def test_upload_response_duration_under_timeout(
self, integration_client, test_package, sized_content
):
"""Test upload completes within reasonable time."""
project, package = test_package
content, _ = sized_content(SIZE_1MB, seed=800)
start = time.time()
result = upload_test_file(
integration_client, project, package, content, tag="timeout-check"
)
duration = time.time() - start
# 1MB should upload in well under 60 seconds on local
assert duration < 60
assert result["artifact_id"] is not None
class TestConcurrentUploads:
"""Tests for concurrent upload handling."""
@pytest.mark.integration
def test_concurrent_different_files(
self, integration_client, test_package, sized_content
):
"""Test concurrent uploads of different files succeed."""
from concurrent.futures import ThreadPoolExecutor, as_completed
project, package = test_package
# Get API key for auth
api_key_response = integration_client.post(
"/api/v1/auth/keys",
json={"name": "concurrent-diff-key"},
)
assert api_key_response.status_code == 200
api_key = api_key_response.json()["key"]
num_uploads = 3
results = []
errors = []
def upload_unique_file(idx):
try:
from httpx import Client
content, expected_hash = sized_content(SIZE_100KB, seed=900 + idx)
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with Client(base_url=base_url, timeout=30.0) as client:
files = {
"file": (
f"concurrent-{idx}.bin",
io.BytesIO(content),
"application/octet-stream",
)
}
response = client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"tag": f"concurrent-diff-{idx}"},
headers={"Authorization": f"Bearer {api_key}"},
)
if response.status_code == 200:
results.append((idx, response.json(), expected_hash))
else:
errors.append(f"Upload {idx}: {response.status_code} - {response.text}")
except Exception as e:
errors.append(f"Upload {idx}: {str(e)}")
with ThreadPoolExecutor(max_workers=num_uploads) as executor:
futures = [executor.submit(upload_unique_file, i) for i in range(num_uploads)]
for future in as_completed(futures):
pass
assert len(errors) == 0, f"Concurrent upload errors: {errors}"
assert len(results) == num_uploads
# Each upload should have unique artifact ID
artifact_ids = set(r[1]["artifact_id"] for r in results)
assert len(artifact_ids) == num_uploads
# Each should match expected hash
for idx, result, expected_hash in results:
assert result["artifact_id"] == expected_hash

View File

@@ -0,0 +1,583 @@
"""
Integration tests for upload/download with various file sizes.
Tests cover:
- Small files (0B - 100KB)
- Medium files (1MB - 50MB)
- Large files (100MB - 1GB) - marked as slow/large
- Exact chunk boundaries
- Data integrity verification across all sizes
"""
import pytest
import io
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from tests.factories import (
compute_sha256,
upload_test_file,
generate_content,
generate_content_with_hash,
)
from tests.conftest import (
SIZE_1B,
SIZE_1KB,
SIZE_10KB,
SIZE_100KB,
SIZE_1MB,
SIZE_5MB,
SIZE_10MB,
SIZE_50MB,
SIZE_100MB,
SIZE_250MB,
SIZE_500MB,
SIZE_1GB,
CHUNK_SIZE,
MULTIPART_THRESHOLD,
)
class TestSmallFileSizes:
"""Tests for small file uploads/downloads (0B - 100KB)."""
@pytest.mark.integration
def test_upload_download_1_byte(self, integration_client, test_package, sized_content):
"""Test upload/download of 1 byte file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_1B, seed=1)
result = upload_test_file(
integration_client, project, package, content,
filename="1byte.bin", tag="1byte"
)
assert result["artifact_id"] == expected_hash
assert result["size"] == SIZE_1B
# Download and verify
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/1byte",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == content
assert len(response.content) == SIZE_1B
@pytest.mark.integration
def test_upload_download_1kb(self, integration_client, test_package, sized_content):
"""Test upload/download of 1KB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_1KB, seed=2)
result = upload_test_file(
integration_client, project, package, content,
filename="1kb.bin", tag="1kb"
)
assert result["artifact_id"] == expected_hash
assert result["size"] == SIZE_1KB
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/1kb",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == content
@pytest.mark.integration
def test_upload_download_10kb(self, integration_client, test_package, sized_content):
"""Test upload/download of 10KB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_10KB, seed=3)
result = upload_test_file(
integration_client, project, package, content,
filename="10kb.bin", tag="10kb"
)
assert result["artifact_id"] == expected_hash
assert result["size"] == SIZE_10KB
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/10kb",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == content
@pytest.mark.integration
def test_upload_download_100kb(self, integration_client, test_package, sized_content):
"""Test upload/download of 100KB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_100KB, seed=4)
result = upload_test_file(
integration_client, project, package, content,
filename="100kb.bin", tag="100kb"
)
assert result["artifact_id"] == expected_hash
assert result["size"] == SIZE_100KB
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/100kb",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == content
class TestMediumFileSizes:
"""Tests for medium file uploads/downloads (1MB - 50MB)."""
@pytest.mark.integration
def test_upload_download_1mb(self, integration_client, test_package, sized_content):
"""Test upload/download of 1MB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_1MB, seed=10)
result = upload_test_file(
integration_client, project, package, content,
filename="1mb.bin", tag="1mb"
)
assert result["artifact_id"] == expected_hash
assert result["size"] == SIZE_1MB
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/1mb",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert len(response.content) == SIZE_1MB
assert compute_sha256(response.content) == expected_hash
@pytest.mark.integration
def test_upload_download_5mb(self, integration_client, test_package, sized_content):
"""Test upload/download of 5MB file (multipart threshold boundary area)."""
project, package = test_package
content, expected_hash = sized_content(SIZE_5MB, seed=11)
result = upload_test_file(
integration_client, project, package, content,
filename="5mb.bin", tag="5mb"
)
assert result["artifact_id"] == expected_hash
assert result["size"] == SIZE_5MB
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/5mb",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert len(response.content) == SIZE_5MB
assert compute_sha256(response.content) == expected_hash
@pytest.mark.integration
@pytest.mark.slow
def test_upload_download_10mb(self, integration_client, test_package, sized_content):
"""Test upload/download of 10MB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_10MB, seed=12)
result = upload_test_file(
integration_client, project, package, content,
filename="10mb.bin", tag="10mb"
)
assert result["artifact_id"] == expected_hash
assert result["size"] == SIZE_10MB
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/10mb",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert len(response.content) == SIZE_10MB
assert compute_sha256(response.content) == expected_hash
@pytest.mark.integration
@pytest.mark.slow
def test_upload_download_50mb(self, integration_client, test_package, sized_content):
"""Test upload/download of 50MB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_50MB, seed=13)
start_time = time.time()
result = upload_test_file(
integration_client, project, package, content,
filename="50mb.bin", tag="50mb"
)
upload_time = time.time() - start_time
assert result["artifact_id"] == expected_hash
assert result["size"] == SIZE_50MB
start_time = time.time()
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/50mb",
params={"mode": "proxy"},
)
download_time = time.time() - start_time
assert response.status_code == 200
assert len(response.content) == SIZE_50MB
assert compute_sha256(response.content) == expected_hash
# Log timing for performance tracking
print(f"\n50MB upload: {upload_time:.2f}s, download: {download_time:.2f}s")
class TestLargeFileSizes:
"""Tests for large file uploads/downloads (100MB - 1GB).
These tests are marked as slow and large, skipped by default.
Run with: pytest -m "large" to include these tests.
"""
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.large
def test_upload_download_100mb(self, integration_client, test_package, sized_content):
"""Test upload/download of 100MB file (multipart threshold)."""
project, package = test_package
content, expected_hash = sized_content(SIZE_100MB, seed=100)
start_time = time.time()
result = upload_test_file(
integration_client, project, package, content,
filename="100mb.bin", tag="100mb"
)
upload_time = time.time() - start_time
assert result["artifact_id"] == expected_hash
assert result["size"] == SIZE_100MB
start_time = time.time()
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/100mb",
params={"mode": "proxy"},
)
download_time = time.time() - start_time
assert response.status_code == 200
assert len(response.content) == SIZE_100MB
assert compute_sha256(response.content) == expected_hash
print(f"\n100MB upload: {upload_time:.2f}s, download: {download_time:.2f}s")
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.large
def test_upload_download_250mb(self, integration_client, test_package, sized_content):
"""Test upload/download of 250MB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_250MB, seed=250)
start_time = time.time()
result = upload_test_file(
integration_client, project, package, content,
filename="250mb.bin", tag="250mb"
)
upload_time = time.time() - start_time
assert result["artifact_id"] == expected_hash
assert result["size"] == SIZE_250MB
start_time = time.time()
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/250mb",
params={"mode": "proxy"},
)
download_time = time.time() - start_time
assert response.status_code == 200
assert len(response.content) == SIZE_250MB
assert compute_sha256(response.content) == expected_hash
print(f"\n250MB upload: {upload_time:.2f}s, download: {download_time:.2f}s")
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.large
def test_upload_download_500mb(self, integration_client, test_package, sized_content):
"""Test upload/download of 500MB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_500MB, seed=500)
start_time = time.time()
result = upload_test_file(
integration_client, project, package, content,
filename="500mb.bin", tag="500mb"
)
upload_time = time.time() - start_time
assert result["artifact_id"] == expected_hash
assert result["size"] == SIZE_500MB
start_time = time.time()
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/500mb",
params={"mode": "proxy"},
)
download_time = time.time() - start_time
assert response.status_code == 200
assert len(response.content) == SIZE_500MB
assert compute_sha256(response.content) == expected_hash
print(f"\n500MB upload: {upload_time:.2f}s, download: {download_time:.2f}s")
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.large
def test_upload_download_1gb(self, integration_client, test_package, sized_content):
"""Test upload/download of 1GB file.
This test may take several minutes depending on network/disk speed.
"""
project, package = test_package
content, expected_hash = sized_content(SIZE_1GB, seed=1024)
start_time = time.time()
result = upload_test_file(
integration_client, project, package, content,
filename="1gb.bin", tag="1gb"
)
upload_time = time.time() - start_time
assert result["artifact_id"] == expected_hash
assert result["size"] == SIZE_1GB
start_time = time.time()
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/1gb",
params={"mode": "proxy"},
)
download_time = time.time() - start_time
assert response.status_code == 200
assert len(response.content) == SIZE_1GB
assert compute_sha256(response.content) == expected_hash
print(f"\n1GB upload: {upload_time:.2f}s, download: {download_time:.2f}s")
class TestChunkBoundaries:
"""Tests for exact chunk size boundaries."""
@pytest.mark.integration
def test_upload_download_at_chunk_size(self, integration_client, test_package, sized_content):
"""Test upload/download at exact chunk size (64KB)."""
project, package = test_package
content, expected_hash = sized_content(CHUNK_SIZE, seed=64)
result = upload_test_file(
integration_client, project, package, content,
filename="chunk.bin", tag="chunk-exact"
)
assert result["artifact_id"] == expected_hash
assert result["size"] == CHUNK_SIZE
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/chunk-exact",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == content
@pytest.mark.integration
def test_upload_download_chunk_size_plus_1(self, integration_client, test_package, sized_content):
"""Test upload/download at chunk size + 1 byte."""
project, package = test_package
size = CHUNK_SIZE + 1
content, expected_hash = sized_content(size, seed=65)
result = upload_test_file(
integration_client, project, package, content,
filename="chunk_plus.bin", tag="chunk-plus"
)
assert result["artifact_id"] == expected_hash
assert result["size"] == size
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/chunk-plus",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == content
@pytest.mark.integration
def test_upload_download_chunk_size_minus_1(self, integration_client, test_package, sized_content):
"""Test upload/download at chunk size - 1 byte."""
project, package = test_package
size = CHUNK_SIZE - 1
content, expected_hash = sized_content(size, seed=63)
result = upload_test_file(
integration_client, project, package, content,
filename="chunk_minus.bin", tag="chunk-minus"
)
assert result["artifact_id"] == expected_hash
assert result["size"] == size
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/chunk-minus",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == content
@pytest.mark.integration
def test_upload_download_multiple_chunks(self, integration_client, test_package, sized_content):
"""Test upload/download spanning multiple chunks."""
project, package = test_package
size = CHUNK_SIZE * 3 + 1000 # 3 full chunks + partial
content, expected_hash = sized_content(size, seed=300)
result = upload_test_file(
integration_client, project, package, content,
filename="multi_chunk.bin", tag="multi-chunk"
)
assert result["artifact_id"] == expected_hash
assert result["size"] == size
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/multi-chunk",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == content
class TestDataIntegrity:
"""Tests for data integrity with various content types."""
@pytest.mark.integration
def test_binary_content_integrity(self, integration_client, test_package):
"""Test binary content (all byte values 0-255) integrity."""
project, package = test_package
# Content with all 256 possible byte values
content = bytes(range(256)) * 100 # 25.6KB
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project, package, content,
filename="binary.bin", tag="binary"
)
assert result["artifact_id"] == expected_hash
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/binary",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == content
@pytest.mark.integration
def test_text_content_integrity(self, integration_client, test_package):
"""Test UTF-8 text content integrity."""
project, package = test_package
content = "Hello, World! 你好世界 🌍 مرحبا العالم".encode("utf-8")
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project, package, content,
filename="text.txt", tag="text"
)
assert result["artifact_id"] == expected_hash
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/text",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == content
assert response.content.decode("utf-8") == "Hello, World! 你好世界 🌍 مرحبا العالم"
@pytest.mark.integration
def test_null_bytes_content_integrity(self, integration_client, test_package):
"""Test content with null bytes."""
project, package = test_package
content = b"before\x00null\x00bytes\x00after"
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project, package, content,
filename="nulls.bin", tag="nulls"
)
assert result["artifact_id"] == expected_hash
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/nulls",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == content
assert b"\x00" in response.content
@pytest.mark.integration
def test_unicode_filename_integrity(self, integration_client, test_package):
"""Test file with unicode filename."""
project, package = test_package
content = b"unicode filename test"
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project, package, content,
filename="文件名.txt", tag="unicode-name"
)
assert result["artifact_id"] == expected_hash
assert result["original_name"] == "文件名.txt"
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/unicode-name",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == content
@pytest.mark.integration
def test_compressed_content_integrity(self, integration_client, test_package):
"""Test gzip-compressed content integrity."""
import gzip
project, package = test_package
original = b"This is some text that will be compressed " * 100
content = gzip.compress(original)
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project, package, content,
filename="data.gz", tag="compressed"
)
assert result["artifact_id"] == expected_hash
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/compressed",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == content
# Verify we can decompress
assert gzip.decompress(response.content) == original
@pytest.mark.integration
def test_hash_verification_matches(self, integration_client, test_package, sized_content):
"""Test that computed hash matches artifact_id for various sizes."""
project, package = test_package
sizes = [SIZE_1B, SIZE_1KB, SIZE_10KB, SIZE_100KB, SIZE_1MB]
for i, size in enumerate(sizes):
content, expected_hash = sized_content(size, seed=1000 + i)
result = upload_test_file(
integration_client, project, package, content,
filename=f"hash_test_{size}.bin", tag=f"hash-{size}"
)
# Verify artifact_id matches expected hash
assert result["artifact_id"] == expected_hash
# Download and verify hash of downloaded content
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/hash-{size}",
params={"mode": "proxy"},
)
downloaded_hash = compute_sha256(response.content)
assert downloaded_hash == expected_hash

View File

@@ -0,0 +1,535 @@
"""
Integration tests for streaming download functionality.
Tests cover:
- HTTP Range requests (partial downloads, resume)
- Conditional requests (If-None-Match, If-Modified-Since)
- Caching headers (Cache-Control, Last-Modified, Accept-Ranges)
- Large file streaming
- Download modes (proxy, redirect, presigned)
"""
import pytest
import io
import time
from email.utils import formatdate
from tests.factories import (
compute_sha256,
upload_test_file,
)
from tests.conftest import (
SIZE_1KB,
SIZE_100KB,
SIZE_1MB,
)
class TestRangeRequests:
"""Tests for HTTP Range request support (partial downloads)."""
@pytest.mark.integration
def test_range_request_first_bytes(self, integration_client, test_package):
"""Test range request for first N bytes."""
project, package = test_package
content = b"0123456789" * 100 # 1000 bytes
upload_test_file(integration_client, project, package, content, tag="range-test")
# Request first 10 bytes
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/range-test",
params={"mode": "proxy"},
headers={"Range": "bytes=0-9"},
)
assert response.status_code == 206 # Partial Content
assert response.content == b"0123456789"
assert "Content-Range" in response.headers
assert response.headers["Content-Range"].startswith("bytes 0-9/")
@pytest.mark.integration
def test_range_request_middle_bytes(self, integration_client, test_package):
"""Test range request for bytes in the middle."""
project, package = test_package
content = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
upload_test_file(integration_client, project, package, content, tag="range-mid")
# Request bytes 10-19 (KLMNOPQRST)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/range-mid",
params={"mode": "proxy"},
headers={"Range": "bytes=10-19"},
)
assert response.status_code == 206
assert response.content == b"KLMNOPQRST"
@pytest.mark.integration
def test_range_request_suffix_bytes(self, integration_client, test_package):
"""Test range request for last N bytes (suffix range)."""
project, package = test_package
content = b"0123456789ABCDEF" # 16 bytes
upload_test_file(integration_client, project, package, content, tag="range-suffix")
# Request last 4 bytes
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/range-suffix",
params={"mode": "proxy"},
headers={"Range": "bytes=-4"},
)
assert response.status_code == 206
assert response.content == b"CDEF"
@pytest.mark.integration
def test_range_request_open_ended(self, integration_client, test_package):
"""Test range request from offset to end."""
project, package = test_package
content = b"0123456789"
upload_test_file(integration_client, project, package, content, tag="range-open")
# Request from byte 5 to end
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/range-open",
params={"mode": "proxy"},
headers={"Range": "bytes=5-"},
)
assert response.status_code == 206
assert response.content == b"56789"
@pytest.mark.integration
def test_range_request_includes_accept_ranges_header(
self, integration_client, test_package
):
"""Test that range requests include Accept-Ranges header."""
project, package = test_package
content = b"test content"
upload_test_file(integration_client, project, package, content, tag="accept-ranges")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/accept-ranges",
params={"mode": "proxy"},
headers={"Range": "bytes=0-4"},
)
assert response.status_code == 206
assert response.headers.get("Accept-Ranges") == "bytes"
@pytest.mark.integration
def test_full_download_advertises_accept_ranges(
self, integration_client, test_package
):
"""Test that full downloads advertise range support."""
project, package = test_package
content = b"test content"
upload_test_file(integration_client, project, package, content, tag="full-accept")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/full-accept",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.headers.get("Accept-Ranges") == "bytes"
class TestConditionalRequests:
"""Tests for conditional request handling (304 Not Modified)."""
@pytest.mark.integration
def test_if_none_match_returns_304(self, integration_client, test_package):
"""Test If-None-Match with matching ETag returns 304."""
project, package = test_package
content = b"conditional request test content"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content, tag="cond-etag")
# Request with matching ETag
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/cond-etag",
params={"mode": "proxy"},
headers={"If-None-Match": f'"{expected_hash}"'},
)
assert response.status_code == 304
assert response.content == b"" # No body for 304
@pytest.mark.integration
def test_if_none_match_without_quotes(self, integration_client, test_package):
"""Test If-None-Match works with or without quotes."""
project, package = test_package
content = b"etag no quotes test"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content, tag="cond-noquote")
# Request with ETag without quotes
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/cond-noquote",
params={"mode": "proxy"},
headers={"If-None-Match": expected_hash},
)
assert response.status_code == 304
@pytest.mark.integration
def test_if_none_match_mismatch_returns_200(self, integration_client, test_package):
"""Test If-None-Match with non-matching ETag returns 200."""
project, package = test_package
content = b"etag mismatch test"
upload_test_file(integration_client, project, package, content, tag="cond-mismatch")
# Request with different ETag
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/cond-mismatch",
params={"mode": "proxy"},
headers={"If-None-Match": '"different-etag-value"'},
)
assert response.status_code == 200
assert response.content == content
@pytest.mark.integration
def test_if_modified_since_returns_304(self, integration_client, test_package):
"""Test If-Modified-Since with future date returns 304."""
project, package = test_package
content = b"modified since test"
upload_test_file(integration_client, project, package, content, tag="cond-modified")
# Request with future date (artifact was definitely created before this)
future_date = formatdate(time.time() + 86400, usegmt=True) # Tomorrow
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/cond-modified",
params={"mode": "proxy"},
headers={"If-Modified-Since": future_date},
)
assert response.status_code == 304
@pytest.mark.integration
def test_if_modified_since_old_date_returns_200(
self, integration_client, test_package
):
"""Test If-Modified-Since with old date returns 200."""
project, package = test_package
content = b"old date test"
upload_test_file(integration_client, project, package, content, tag="cond-old")
# Request with old date (2020-01-01)
old_date = "Wed, 01 Jan 2020 00:00:00 GMT"
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/cond-old",
params={"mode": "proxy"},
headers={"If-Modified-Since": old_date},
)
assert response.status_code == 200
assert response.content == content
@pytest.mark.integration
def test_304_includes_etag(self, integration_client, test_package):
"""Test 304 response includes ETag header."""
project, package = test_package
content = b"304 etag test"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content, tag="304-etag")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/304-etag",
params={"mode": "proxy"},
headers={"If-None-Match": f'"{expected_hash}"'},
)
assert response.status_code == 304
assert response.headers.get("ETag") == f'"{expected_hash}"'
@pytest.mark.integration
def test_304_includes_cache_control(self, integration_client, test_package):
"""Test 304 response includes Cache-Control header."""
project, package = test_package
content = b"304 cache test"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content, tag="304-cache")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/304-cache",
params={"mode": "proxy"},
headers={"If-None-Match": f'"{expected_hash}"'},
)
assert response.status_code == 304
assert "immutable" in response.headers.get("Cache-Control", "")
class TestCachingHeaders:
"""Tests for caching headers on download responses."""
@pytest.mark.integration
def test_download_includes_cache_control(self, integration_client, test_package):
"""Test download response includes Cache-Control header."""
project, package = test_package
content = b"cache control test"
upload_test_file(integration_client, project, package, content, tag="cache-ctl")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/cache-ctl",
params={"mode": "proxy"},
)
assert response.status_code == 200
cache_control = response.headers.get("Cache-Control", "")
assert "public" in cache_control
assert "immutable" in cache_control
assert "max-age" in cache_control
@pytest.mark.integration
def test_download_includes_last_modified(self, integration_client, test_package):
"""Test download response includes Last-Modified header."""
project, package = test_package
content = b"last modified test"
upload_test_file(integration_client, project, package, content, tag="last-mod")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/last-mod",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert "Last-Modified" in response.headers
# Should be in RFC 7231 format
last_modified = response.headers["Last-Modified"]
assert "GMT" in last_modified
@pytest.mark.integration
def test_download_includes_etag(self, integration_client, test_package):
"""Test download response includes ETag header."""
project, package = test_package
content = b"etag header test"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content, tag="etag-hdr")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/etag-hdr",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.headers.get("ETag") == f'"{expected_hash}"'
class TestDownloadResume:
"""Tests for download resume functionality using range requests."""
@pytest.mark.integration
def test_resume_download_after_partial(self, integration_client, test_package):
"""Test resuming download from where it left off."""
project, package = test_package
content = b"ABCDEFGHIJ" * 100 # 1000 bytes
upload_test_file(integration_client, project, package, content, tag="resume-test")
# Simulate partial download (first 500 bytes)
response1 = integration_client.get(
f"/api/v1/project/{project}/{package}/+/resume-test",
params={"mode": "proxy"},
headers={"Range": "bytes=0-499"},
)
assert response1.status_code == 206
first_half = response1.content
assert len(first_half) == 500
# Resume from byte 500
response2 = integration_client.get(
f"/api/v1/project/{project}/{package}/+/resume-test",
params={"mode": "proxy"},
headers={"Range": "bytes=500-"},
)
assert response2.status_code == 206
second_half = response2.content
assert len(second_half) == 500
# Combine and verify
combined = first_half + second_half
assert combined == content
@pytest.mark.integration
def test_resume_with_etag_verification(self, integration_client, test_package):
"""Test that resumed download can verify content hasn't changed."""
project, package = test_package
content = b"resume etag verification test content"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content, tag="resume-etag")
# Get ETag from first request
response1 = integration_client.get(
f"/api/v1/project/{project}/{package}/+/resume-etag",
params={"mode": "proxy"},
headers={"Range": "bytes=0-9"},
)
assert response1.status_code == 206
etag = response1.headers.get("ETag")
assert etag == f'"{expected_hash}"'
# Resume with If-Match to ensure content hasn't changed
# (Note: If-Match would fail and return 412 if content changed)
response2 = integration_client.get(
f"/api/v1/project/{project}/{package}/+/resume-etag",
params={"mode": "proxy"},
headers={"Range": "bytes=10-"},
)
assert response2.status_code == 206
# ETag should be the same
assert response2.headers.get("ETag") == etag
class TestLargeFileStreaming:
"""Tests for streaming large files."""
@pytest.mark.integration
def test_stream_1mb_file(self, integration_client, test_package, sized_content):
"""Test streaming a 1MB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_1MB, seed=500)
upload_test_file(integration_client, project, package, content, tag="stream-1mb")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/stream-1mb",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert len(response.content) == SIZE_1MB
assert compute_sha256(response.content) == expected_hash
@pytest.mark.integration
def test_stream_large_file_has_correct_headers(
self, integration_client, test_package, sized_content
):
"""Test that large file streaming has correct headers."""
project, package = test_package
content, expected_hash = sized_content(SIZE_100KB, seed=501)
upload_test_file(integration_client, project, package, content, tag="stream-hdr")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/stream-hdr",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert int(response.headers.get("Content-Length", 0)) == SIZE_100KB
assert response.headers.get("X-Checksum-SHA256") == expected_hash
assert response.headers.get("Accept-Ranges") == "bytes"
@pytest.mark.integration
def test_range_request_on_large_file(
self, integration_client, test_package, sized_content
):
"""Test range request on a larger file."""
project, package = test_package
content, _ = sized_content(SIZE_100KB, seed=502)
upload_test_file(integration_client, project, package, content, tag="range-large")
# Request a slice from the middle
start = 50000
end = 50999
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/range-large",
params={"mode": "proxy"},
headers={"Range": f"bytes={start}-{end}"},
)
assert response.status_code == 206
assert len(response.content) == 1000
assert response.content == content[start : end + 1]
class TestDownloadModes:
"""Tests for different download modes."""
@pytest.mark.integration
def test_proxy_mode_streams_content(self, integration_client, test_package):
"""Test proxy mode streams content through backend."""
project, package = test_package
content = b"proxy mode test content"
upload_test_file(integration_client, project, package, content, tag="mode-proxy")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/mode-proxy",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == content
@pytest.mark.integration
def test_presigned_mode_returns_url(self, integration_client, test_package):
"""Test presigned mode returns JSON with URL."""
project, package = test_package
content = b"presigned mode test"
upload_test_file(integration_client, project, package, content, tag="mode-presign")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/mode-presign",
params={"mode": "presigned"},
)
assert response.status_code == 200
data = response.json()
assert "url" in data
assert "expires_at" in data
assert data["url"].startswith("http")
@pytest.mark.integration
def test_redirect_mode_returns_302(self, integration_client, test_package):
"""Test redirect mode returns 302 to presigned URL."""
project, package = test_package
content = b"redirect mode test"
upload_test_file(integration_client, project, package, content, tag="mode-redir")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/mode-redir",
params={"mode": "redirect"},
follow_redirects=False,
)
assert response.status_code == 302
assert "Location" in response.headers
class TestIntegrityDuringStreaming:
"""Tests for data integrity during streaming downloads."""
@pytest.mark.integration
def test_checksum_header_matches_content(self, integration_client, test_package):
"""Test X-Checksum-SHA256 header matches actual downloaded content."""
project, package = test_package
content = b"integrity check content"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content, tag="integrity")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/integrity",
params={"mode": "proxy"},
)
assert response.status_code == 200
header_hash = response.headers.get("X-Checksum-SHA256")
actual_hash = compute_sha256(response.content)
assert header_hash == expected_hash
assert actual_hash == expected_hash
assert header_hash == actual_hash
@pytest.mark.integration
def test_etag_matches_content_hash(self, integration_client, test_package):
"""Test ETag header matches content hash."""
project, package = test_package
content = b"etag integrity test"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content, tag="etag-int")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/etag-int",
params={"mode": "proxy"},
)
assert response.status_code == 200
etag = response.headers.get("ETag", "").strip('"')
actual_hash = compute_sha256(response.content)
assert etag == expected_hash
assert actual_hash == expected_hash
@pytest.mark.integration
def test_digest_header_present(self, integration_client, test_package):
"""Test Digest header is present in RFC 3230 format."""
project, package = test_package
content = b"digest header test"
upload_test_file(integration_client, project, package, content, tag="digest")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/digest",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert "Digest" in response.headers
assert response.headers["Digest"].startswith("sha-256=")

View File

@@ -10,6 +10,7 @@ Tests cover:
- S3 storage verification
"""
import os
import pytest
import io
import threading
@@ -25,6 +26,19 @@ from tests.factories import (
class TestUploadBasics:
"""Tests for basic upload functionality."""
@pytest.mark.integration
def test_upload_returns_200(self, integration_client, test_package):
"""Test upload with valid file returns 200."""
project, package = test_package
content = b"valid file upload test"
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
)
assert response.status_code == 200
@pytest.mark.integration
def test_upload_returns_artifact_id(self, integration_client, test_package):
"""Test upload returns the artifact ID (SHA256 hash)."""
@@ -101,6 +115,83 @@ class TestUploadBasics:
assert "created_at" in result
assert result["created_at"] is not None
@pytest.mark.integration
def test_upload_without_tag_succeeds(self, integration_client, test_package):
"""Test upload without tag succeeds (no tag created)."""
project, package = test_package
content = b"upload without tag test"
expected_hash = compute_sha256(content)
files = {"file": ("no_tag.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
# No tag parameter
)
assert response.status_code == 200
result = response.json()
assert result["artifact_id"] == expected_hash
# Verify no tag was created - list tags and check
tags_response = integration_client.get(
f"/api/v1/project/{project}/{package}/tags"
)
assert tags_response.status_code == 200
tags = tags_response.json()
# Filter for tags pointing to this artifact
artifact_tags = [t for t in tags.get("items", tags) if t.get("artifact_id") == expected_hash]
assert len(artifact_tags) == 0, "Tag should not be created when not specified"
@pytest.mark.integration
def test_upload_creates_artifact_in_database(self, integration_client, test_package):
"""Test upload creates artifact record in database."""
project, package = test_package
content = b"database artifact test"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content)
# Verify artifact exists via API
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
artifact = response.json()
assert artifact["id"] == expected_hash
assert artifact["size"] == len(content)
@pytest.mark.integration
@pytest.mark.requires_direct_s3
def test_upload_creates_object_in_s3(self, integration_client, test_package):
"""Test upload creates object in S3 storage."""
project, package = test_package
content = b"s3 object creation test"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content)
# Verify S3 object exists
assert s3_object_exists(expected_hash), "S3 object should exist after upload"
@pytest.mark.integration
def test_upload_with_tag_creates_tag_record(self, integration_client, test_package):
"""Test upload with tag creates tag record."""
project, package = test_package
content = b"tag creation test"
expected_hash = compute_sha256(content)
tag_name = "my-tag-v1"
upload_test_file(
integration_client, project, package, content, tag=tag_name
)
# Verify tag exists
tags_response = integration_client.get(
f"/api/v1/project/{project}/{package}/tags"
)
assert tags_response.status_code == 200
tags = tags_response.json()
tag_names = [t["name"] for t in tags.get("items", tags)]
assert tag_name in tag_names
class TestDuplicateUploads:
"""Tests for duplicate upload deduplication behavior."""
@@ -248,6 +339,23 @@ class TestDownload:
assert response.status_code == 200
assert response.content == original_content
@pytest.mark.integration
def test_download_by_tag_prefix(self, integration_client, test_package):
"""Test downloading artifact using tag: prefix."""
project, package = test_package
original_content = b"download by tag prefix test"
upload_test_file(
integration_client, project, package, original_content, tag="prefix-tag"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/tag:prefix-tag",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == original_content
@pytest.mark.integration
def test_download_nonexistent_tag(self, integration_client, test_package):
"""Test downloading nonexistent tag returns 404."""
@@ -258,6 +366,33 @@ class TestDownload:
)
assert response.status_code == 404
@pytest.mark.integration
def test_download_nonexistent_artifact(self, integration_client, test_package):
"""Test downloading nonexistent artifact ID returns 404."""
project, package = test_package
fake_hash = "0" * 64
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/artifact:{fake_hash}"
)
assert response.status_code == 404
@pytest.mark.integration
def test_download_from_nonexistent_project(self, integration_client, unique_test_id):
"""Test downloading from nonexistent project returns 404."""
response = integration_client.get(
f"/api/v1/project/nonexistent-project-{unique_test_id}/somepackage/+/sometag"
)
assert response.status_code == 404
@pytest.mark.integration
def test_download_from_nonexistent_package(self, integration_client, test_project, unique_test_id):
"""Test downloading from nonexistent package returns 404."""
response = integration_client.get(
f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/+/sometag"
)
assert response.status_code == 404
@pytest.mark.integration
def test_content_matches_original(self, integration_client, test_package):
"""Test downloaded content matches original exactly."""
@@ -275,6 +410,111 @@ class TestDownload:
assert response.content == original_content
class TestDownloadHeaders:
"""Tests for download response headers."""
@pytest.mark.integration
def test_download_content_type_header(self, integration_client, test_package):
"""Test download returns correct Content-Type header."""
project, package = test_package
content = b"content type header test"
upload_test_file(
integration_client, project, package, content,
filename="test.txt", tag="content-type-test"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/content-type-test",
params={"mode": "proxy"},
)
assert response.status_code == 200
# Content-Type should be set (either text/plain or application/octet-stream)
assert "content-type" in response.headers
@pytest.mark.integration
def test_download_content_length_header(self, integration_client, test_package):
"""Test download returns correct Content-Length header."""
project, package = test_package
content = b"content length header test - exactly 41 bytes!"
expected_length = len(content)
upload_test_file(
integration_client, project, package, content, tag="content-length-test"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/content-length-test",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert "content-length" in response.headers
assert int(response.headers["content-length"]) == expected_length
@pytest.mark.integration
def test_download_content_disposition_header(self, integration_client, test_package):
"""Test download returns correct Content-Disposition header."""
project, package = test_package
content = b"content disposition test"
filename = "my-test-file.bin"
upload_test_file(
integration_client, project, package, content,
filename=filename, tag="disposition-test"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/disposition-test",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert "content-disposition" in response.headers
disposition = response.headers["content-disposition"]
assert "attachment" in disposition
assert filename in disposition
@pytest.mark.integration
def test_download_checksum_headers(self, integration_client, test_package):
"""Test download returns checksum headers."""
project, package = test_package
content = b"checksum header test content"
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project, package, content, tag="checksum-headers"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/checksum-headers",
params={"mode": "proxy"},
)
assert response.status_code == 200
# Check for checksum headers
assert "x-checksum-sha256" in response.headers
assert response.headers["x-checksum-sha256"] == expected_hash
@pytest.mark.integration
def test_download_etag_header(self, integration_client, test_package):
"""Test download returns ETag header (artifact ID)."""
project, package = test_package
content = b"etag header test"
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project, package, content, tag="etag-test"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/etag-test",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert "etag" in response.headers
# ETag should contain the artifact ID (hash)
etag = response.headers["etag"].strip('"')
assert etag == expected_hash
class TestConcurrentUploads:
"""Tests for concurrent upload handling."""
@@ -301,7 +541,7 @@ class TestConcurrentUploads:
try:
from httpx import Client
base_url = "http://localhost:8080"
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with Client(base_url=base_url, timeout=30.0) as client:
files = {
"file": (
@@ -397,6 +637,7 @@ class TestUploadFailureCleanup:
"""Tests for cleanup when uploads fail."""
@pytest.mark.integration
@pytest.mark.requires_direct_s3
def test_upload_failure_invalid_project_no_orphaned_s3(
self, integration_client, unique_test_id
):
@@ -419,6 +660,7 @@ class TestUploadFailureCleanup:
)
@pytest.mark.integration
@pytest.mark.requires_direct_s3
def test_upload_failure_invalid_package_no_orphaned_s3(
self, integration_client, test_project, unique_test_id
):
@@ -466,6 +708,7 @@ class TestS3StorageVerification:
"""Tests to verify S3 storage behavior."""
@pytest.mark.integration
@pytest.mark.requires_direct_s3
def test_s3_single_object_after_duplicates(
self, integration_client, test_package, unique_test_id
):
@@ -521,6 +764,7 @@ class TestSecurityPathTraversal:
"""
@pytest.mark.integration
@pytest.mark.requires_direct_s3
def test_path_traversal_in_filename_stored_safely(
self, integration_client, test_package
):

View File

@@ -0,0 +1,347 @@
"""
Integration tests for package version API endpoints.
Tests cover:
- Version creation via upload
- Version auto-detection from filename
- Version listing and retrieval
- Download by version prefix
- Version deletion
"""
import pytest
import io
from tests.factories import (
compute_sha256,
upload_test_file,
)
class TestVersionCreation:
"""Tests for creating versions via upload."""
@pytest.mark.integration
def test_upload_with_explicit_version(self, integration_client, test_package):
"""Test upload with explicit version parameter creates version record."""
project, package = test_package
content = b"version creation test"
expected_hash = compute_sha256(content)
files = {"file": ("app.tar.gz", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"version": "1.0.0"},
)
assert response.status_code == 200
result = response.json()
assert result["artifact_id"] == expected_hash
assert result.get("version") == "1.0.0"
assert result.get("version_source") == "explicit"
@pytest.mark.integration
def test_upload_with_version_and_tag(self, integration_client, test_package):
"""Test upload with both version and tag creates both records."""
project, package = test_package
content = b"version and tag test"
files = {"file": ("app.tar.gz", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"version": "2.0.0", "tag": "latest"},
)
assert response.status_code == 200
result = response.json()
assert result.get("version") == "2.0.0"
# Verify tag was also created
tags_response = integration_client.get(
f"/api/v1/project/{project}/{package}/tags"
)
assert tags_response.status_code == 200
tags = tags_response.json()
tag_names = [t["name"] for t in tags.get("items", tags)]
assert "latest" in tag_names
@pytest.mark.integration
def test_duplicate_version_same_content_succeeds(self, integration_client, test_package):
"""Test uploading same version with same content succeeds (deduplication)."""
project, package = test_package
content = b"version dedup test"
# First upload with version
files1 = {"file": ("app1.tar.gz", io.BytesIO(content), "application/octet-stream")}
response1 = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files1,
data={"version": "3.0.0"},
)
assert response1.status_code == 200
# Second upload with same version and same content succeeds
files2 = {"file": ("app2.tar.gz", io.BytesIO(content), "application/octet-stream")}
response2 = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files2,
data={"version": "3.0.0"},
)
# This succeeds because it's the same artifact (deduplication)
assert response2.status_code == 200
class TestVersionAutoDetection:
"""Tests for automatic version detection from filename."""
@pytest.mark.integration
def test_version_detected_from_filename_tarball(self, integration_client, test_package):
"""Test version is auto-detected from tarball filename or metadata."""
project, package = test_package
content = b"auto detect version tarball"
files = {"file": ("myapp-1.2.3.tar.gz", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
)
assert response.status_code == 200
result = response.json()
assert result.get("version") == "1.2.3"
# Version source can be 'filename' or 'metadata' depending on detection order
assert result.get("version_source") in ["filename", "metadata"]
@pytest.mark.integration
def test_version_detected_from_filename_zip(self, integration_client, test_package):
"""Test version is auto-detected from zip filename."""
project, package = test_package
content = b"auto detect version zip"
files = {"file": ("package-2.0.0.zip", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
)
assert response.status_code == 200
result = response.json()
assert result.get("version") == "2.0.0"
assert result.get("version_source") == "filename"
@pytest.mark.integration
def test_explicit_version_overrides_filename(self, integration_client, test_package):
"""Test explicit version parameter overrides filename detection."""
project, package = test_package
content = b"explicit override test"
files = {"file": ("myapp-1.0.0.tar.gz", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"version": "9.9.9"},
)
assert response.status_code == 200
result = response.json()
assert result.get("version") == "9.9.9"
assert result.get("version_source") == "explicit"
@pytest.mark.integration
def test_no_version_detected_from_plain_filename(self, integration_client, test_package):
"""Test no version is created for filenames without version pattern."""
project, package = test_package
content = b"no version in filename"
files = {"file": ("plain-file.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
)
assert response.status_code == 200
result = response.json()
# Version should be None or not present
assert result.get("version") is None
class TestVersionListing:
"""Tests for listing and retrieving versions."""
@pytest.mark.integration
def test_list_versions(self, integration_client, test_package):
"""Test listing all versions for a package."""
project, package = test_package
# Create multiple versions
for ver in ["1.0.0", "1.1.0", "2.0.0"]:
content = f"version {ver} content".encode()
files = {"file": (f"app-{ver}.tar.gz", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"version": ver},
)
assert response.status_code == 200
# List versions
response = integration_client.get(
f"/api/v1/project/{project}/{package}/versions"
)
assert response.status_code == 200
data = response.json()
versions = [v["version"] for v in data.get("items", data)]
assert "1.0.0" in versions
assert "1.1.0" in versions
assert "2.0.0" in versions
@pytest.mark.integration
def test_get_specific_version(self, integration_client, test_package):
"""Test getting details for a specific version."""
project, package = test_package
content = b"specific version test"
expected_hash = compute_sha256(content)
# Create version
files = {"file": ("app-4.0.0.tar.gz", io.BytesIO(content), "application/octet-stream")}
integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"version": "4.0.0"},
)
# Get version details
response = integration_client.get(
f"/api/v1/project/{project}/{package}/versions/4.0.0"
)
assert response.status_code == 200
data = response.json()
assert data["version"] == "4.0.0"
assert data["artifact_id"] == expected_hash
@pytest.mark.integration
def test_get_nonexistent_version_returns_404(self, integration_client, test_package):
"""Test getting nonexistent version returns 404."""
project, package = test_package
response = integration_client.get(
f"/api/v1/project/{project}/{package}/versions/99.99.99"
)
assert response.status_code == 404
class TestDownloadByVersion:
"""Tests for downloading artifacts by version."""
@pytest.mark.integration
def test_download_by_version_prefix(self, integration_client, test_package):
"""Test downloading artifact using version: prefix."""
project, package = test_package
content = b"download by version test"
expected_hash = compute_sha256(content)
# Upload with version
files = {"file": ("app.tar.gz", io.BytesIO(content), "application/octet-stream")}
integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"version": "5.0.0"},
)
# Download by version prefix
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/version:5.0.0",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == content
@pytest.mark.integration
def test_download_nonexistent_version_returns_404(self, integration_client, test_package):
"""Test downloading nonexistent version returns 404."""
project, package = test_package
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/version:99.0.0"
)
assert response.status_code == 404
@pytest.mark.integration
def test_version_resolution_priority(self, integration_client, test_package):
"""Test that version: prefix explicitly resolves to version, not tag."""
project, package = test_package
version_content = b"this is the version content"
tag_content = b"this is the tag content"
# Create a version 6.0.0
files1 = {"file": ("app-v.tar.gz", io.BytesIO(version_content), "application/octet-stream")}
integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files1,
data={"version": "6.0.0"},
)
# Create a tag named "6.0.0" pointing to different content
files2 = {"file": ("app-t.tar.gz", io.BytesIO(tag_content), "application/octet-stream")}
integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files2,
data={"tag": "6.0.0"},
)
# Download with version: prefix should get version content
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/version:6.0.0",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == version_content
# Download with tag: prefix should get tag content
response2 = integration_client.get(
f"/api/v1/project/{project}/{package}/+/tag:6.0.0",
params={"mode": "proxy"},
)
assert response2.status_code == 200
assert response2.content == tag_content
class TestVersionDeletion:
"""Tests for deleting versions."""
@pytest.mark.integration
def test_delete_version(self, integration_client, test_package):
"""Test deleting a version."""
project, package = test_package
content = b"delete version test"
# Create version
files = {"file": ("app.tar.gz", io.BytesIO(content), "application/octet-stream")}
integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"version": "7.0.0"},
)
# Verify version exists
response = integration_client.get(
f"/api/v1/project/{project}/{package}/versions/7.0.0"
)
assert response.status_code == 200
# Delete version - returns 204 No Content on success
delete_response = integration_client.delete(
f"/api/v1/project/{project}/{package}/versions/7.0.0"
)
assert delete_response.status_code == 204
# Verify version no longer exists
response2 = integration_client.get(
f"/api/v1/project/{project}/{package}/versions/7.0.0"
)
assert response2.status_code == 404
@pytest.mark.integration
def test_delete_nonexistent_version_returns_404(self, integration_client, test_package):
"""Test deleting nonexistent version returns 404."""
project, package = test_package
response = integration_client.delete(
f"/api/v1/project/{project}/{package}/versions/99.0.0"
)
assert response.status_code == 404

View File

@@ -0,0 +1,294 @@
# Integrity Verification
Orchard uses content-addressable storage with SHA256 hashing to ensure artifact integrity. This document describes how integrity verification works and how to use it.
## How It Works
### Content-Addressable Storage
Orchard stores artifacts using their SHA256 hash as the unique identifier. This provides several benefits:
1. **Automatic deduplication**: Identical content is stored only once
2. **Built-in integrity**: The artifact ID *is* the content hash
3. **Tamper detection**: Any modification changes the hash, making corruption detectable
When you upload a file:
1. Orchard computes the SHA256 hash of the content
2. The hash becomes the artifact ID (64-character hex string)
3. The file is stored in S3 at `fruits/{hash[0:2]}/{hash[2:4]}/{hash}`
4. The hash and metadata are recorded in the database
### Hash Format
- Algorithm: SHA256
- Format: 64-character lowercase hexadecimal string
- Example: `dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f`
## Client-Side Verification
### Before Upload
Compute the hash locally before uploading to verify the server received your content correctly:
```python
import hashlib
def compute_sha256(content: bytes) -> str:
return hashlib.sha256(content).hexdigest()
# Compute hash before upload
content = open("myfile.tar.gz", "rb").read()
local_hash = compute_sha256(content)
# Upload the file
response = requests.post(
f"{base_url}/api/v1/project/{project}/{package}/upload",
files={"file": ("myfile.tar.gz", content)},
)
result = response.json()
# Verify server computed the same hash
assert result["artifact_id"] == local_hash, "Hash mismatch!"
```
### Providing Expected Hash on Upload
You can provide the expected hash in the upload request. The server will reject the upload if the computed hash doesn't match:
```python
response = requests.post(
f"{base_url}/api/v1/project/{project}/{package}/upload",
files={"file": ("myfile.tar.gz", content)},
headers={"X-Checksum-SHA256": local_hash},
)
# Returns 422 if hash doesn't match
if response.status_code == 422:
print("Checksum mismatch - upload rejected")
```
### After Download
Verify downloaded content matches the expected hash using response headers:
```python
response = requests.get(
f"{base_url}/api/v1/project/{project}/{package}/+/{tag}",
params={"mode": "proxy"},
)
# Get expected hash from header
expected_hash = response.headers.get("X-Checksum-SHA256")
# Compute hash of downloaded content
actual_hash = compute_sha256(response.content)
# Verify
if actual_hash != expected_hash:
raise Exception(f"Integrity check failed! Expected {expected_hash}, got {actual_hash}")
```
### Response Headers for Verification
Download responses include multiple headers for verification:
| Header | Format | Description |
|--------|--------|-------------|
| `X-Checksum-SHA256` | Hex string | SHA256 hash (64 chars) |
| `ETag` | `"<hash>"` | SHA256 hash in quotes |
| `Digest` | `sha-256=<base64>` | RFC 3230 format (base64-encoded) |
| `Content-Length` | Integer | File size in bytes |
### Server-Side Verification on Download
Request server-side verification during download:
```bash
# Pre-verification: Server verifies before streaming (returns 500 if corrupt)
curl "${base_url}/api/v1/project/${project}/${package}/+/${tag}?mode=proxy&verify=true&verify_mode=pre"
# Stream verification: Server verifies while streaming (logs error if corrupt)
curl "${base_url}/api/v1/project/${project}/${package}/+/${tag}?mode=proxy&verify=true&verify_mode=stream"
```
The `X-Verified` header indicates whether server-side verification was performed:
- `X-Verified: true` - Content was verified by the server
## Server-Side Consistency Check
### Consistency Check Endpoint
Administrators can run a consistency check to verify all stored artifacts:
```bash
curl "${base_url}/api/v1/admin/consistency-check"
```
Response:
```json
{
"total_artifacts_checked": 1234,
"healthy": true,
"orphaned_s3_objects": 0,
"missing_s3_objects": 0,
"size_mismatches": 0,
"orphaned_s3_keys": [],
"missing_s3_keys": [],
"size_mismatch_artifacts": []
}
```
### What the Check Verifies
1. **Missing S3 objects**: Database records with no corresponding S3 object
2. **Orphaned S3 objects**: S3 objects with no database record
3. **Size mismatches**: S3 object size doesn't match database record
### Running Consistency Checks
**Manual check:**
```bash
# Check all artifacts
curl "${base_url}/api/v1/admin/consistency-check"
# Limit results (for large deployments)
curl "${base_url}/api/v1/admin/consistency-check?limit=100"
```
**Scheduled checks (recommended):**
Set up a cron job or Kubernetes CronJob to run periodic checks:
```yaml
# Kubernetes CronJob example
apiVersion: batch/v1
kind: CronJob
metadata:
name: orchard-consistency-check
spec:
schedule: "0 2 * * *" # Daily at 2 AM
jobTemplate:
spec:
template:
spec:
containers:
- name: check
image: curlimages/curl
command:
- /bin/sh
- -c
- |
response=$(curl -s "${ORCHARD_URL}/api/v1/admin/consistency-check")
healthy=$(echo "$response" | jq -r '.healthy')
if [ "$healthy" != "true" ]; then
echo "ALERT: Consistency check failed!"
echo "$response"
exit 1
fi
echo "Consistency check passed"
restartPolicy: OnFailure
```
## Recovery Procedures
### Corrupted Artifact (Size Mismatch)
If the consistency check reports size mismatches:
1. **Identify affected artifacts:**
```bash
curl "${base_url}/api/v1/admin/consistency-check" | jq '.size_mismatch_artifacts'
```
2. **Check if artifact can be re-uploaded:**
- If the original content is available, delete the corrupted artifact and re-upload
- The same content will produce the same artifact ID
3. **If original content is lost:**
- The artifact data is corrupted and cannot be recovered
- Delete the artifact record and notify affected users
- Consider restoring from backup if available
### Missing S3 Object
If database records exist but S3 objects are missing:
1. **Identify affected artifacts:**
```bash
curl "${base_url}/api/v1/admin/consistency-check" | jq '.missing_s3_keys'
```
2. **Check S3 bucket:**
- Verify the S3 bucket exists and is accessible
- Check S3 access logs for deletion events
- Check if objects were moved or lifecycle-deleted
3. **Recovery options:**
- Restore from S3 versioning (if enabled)
- Restore from backup
- Re-upload original content (if available)
- Delete orphaned database records
### Orphaned S3 Objects
If S3 objects exist without database records:
1. **Identify orphaned objects:**
```bash
curl "${base_url}/api/v1/admin/consistency-check" | jq '.orphaned_s3_keys'
```
2. **Investigate cause:**
- Upload interrupted before database commit?
- Database record deleted but S3 cleanup failed?
3. **Resolution:**
- If content is needed, create database record manually
- If content is not needed, delete the S3 object to reclaim storage
### Preventive Measures
1. **Enable S3 versioning** to recover from accidental deletions
2. **Regular backups** of both database and S3 bucket
3. **Scheduled consistency checks** to detect issues early
4. **Monitoring and alerting** on consistency check failures
5. **Audit logging** to track all artifact operations
## Verification in CI/CD
### Verifying Artifacts in Pipelines
```bash
#!/bin/bash
# Download and verify artifact in CI pipeline
ARTIFACT_URL="${ORCHARD_URL}/api/v1/project/${PROJECT}/${PACKAGE}/+/${TAG}"
# Download with verification headers
response=$(curl -s -D - "${ARTIFACT_URL}?mode=proxy" -o artifact.tar.gz)
expected_hash=$(echo "$response" | grep -i "X-Checksum-SHA256" | cut -d: -f2 | tr -d ' \r')
# Compute actual hash
actual_hash=$(sha256sum artifact.tar.gz | cut -d' ' -f1)
# Verify
if [ "$actual_hash" != "$expected_hash" ]; then
echo "ERROR: Integrity check failed!"
echo "Expected: $expected_hash"
echo "Actual: $actual_hash"
exit 1
fi
echo "Integrity verified: $actual_hash"
```
### Using Server-Side Verification
For critical deployments, use server-side pre-verification:
```bash
# Server verifies before streaming - returns 500 if corrupt
curl -f "${ARTIFACT_URL}?mode=proxy&verify=true&verify_mode=pre" -o artifact.tar.gz
```
This ensures the artifact is verified before any bytes are streamed to your pipeline.

View File

@@ -110,6 +110,12 @@ spec:
value: {{ .Values.orchard.download.mode | quote }}
- name: ORCHARD_PRESIGNED_URL_EXPIRY
value: {{ .Values.orchard.download.presignedUrlExpiry | quote }}
{{- if .Values.orchard.rateLimit }}
{{- if .Values.orchard.rateLimit.login }}
- name: ORCHARD_LOGIN_RATE_LIMIT
value: {{ .Values.orchard.rateLimit.login | quote }}
{{- end }}
{{- end }}
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 12 }}
readinessProbe:

View File

@@ -42,6 +42,7 @@ ingress:
className: "nginx"
annotations:
cert-manager.io/cluster-issuer: "letsencrypt"
nginx.ingress.kubernetes.io/proxy-body-size: "0" # Disable body size limit for uploads
hosts:
- host: orchard-dev.common.global.bsf.tools # Overridden by CI
paths:
@@ -113,6 +114,10 @@ orchard:
mode: "presigned"
presignedUrlExpiry: 3600
# Relaxed rate limits for dev/feature environments (allows integration tests to run)
rateLimit:
login: "1000/minute" # Default is 5/minute, relaxed for CI integration tests
# PostgreSQL - ephemeral, no persistence
postgresql:
enabled: true

View File

@@ -41,6 +41,7 @@ ingress:
className: "nginx"
annotations:
cert-manager.io/cluster-issuer: "letsencrypt"
nginx.ingress.kubernetes.io/proxy-body-size: "0" # Disable body size limit for uploads
hosts:
- host: orchard-stage.common.global.bsf.tools
paths:
@@ -120,6 +121,10 @@ orchard:
mode: "presigned" # presigned, redirect, or proxy
presignedUrlExpiry: 3600 # Presigned URL expiry in seconds
# Relaxed rate limits for stage (allows CI integration tests to run)
rateLimit:
login: "1000/minute" # Default is 5/minute, relaxed for CI integration tests
# PostgreSQL subchart configuration
postgresql:
enabled: true