4 Commits

Author SHA1 Message Date
Mondo Diaz
2bb619975e Add version API tests for new package_versions feature
- Add tests for version creation via upload with explicit version parameter
- Add tests for version auto-detection from filename/metadata
- Add tests for version listing and retrieval
- Add tests for download by version: prefix
- Add tests for version deletion
- Test version resolution priority (version: vs tag: prefixes)
2026-01-16 17:46:38 +00:00
Mondo Diaz
e7ae94b1e1 Merge main into feature/upload-download-tests 2026-01-16 17:39:35 +00:00
Mondo Diaz
4deadc708f Add comprehensive upload/download API tests and error handling tests
- Add upload API tests: upload without tag, artifact creation, S3 storage
- Add download tests: tag: prefix, Content-Type/Length/Disposition headers
- Add download tests: 404 for nonexistent project/package/artifact
- Add checksum header tests: ETag, X-Checksum-SHA256
- Add error handling tests: timeout behavior, checksum validation
- Add resource cleanup tests: verify no orphans on failed uploads
- Add graceful error response tests: JSON format, detail messages
2026-01-16 17:37:09 +00:00
Mondo Diaz
9106e79aac Add upload/download tests for size boundaries and concurrency
- Add size boundary tests: 1B, 1KB, 10KB, 100KB, 1MB, 5MB, 10MB, 50MB
- Add large file tests (100MB-1GB) marked with @pytest.mark.large
- Add chunk boundary tests at 64KB boundaries
- Add concurrent upload/download tests (2, 5, 10 parallel)
- Add data integrity tests (binary, text, unicode, compressed)
- Add generate_content() and sized_content fixture for test helpers
- Add @pytest.mark.large and @pytest.mark.concurrent markers
- Fix Content-Disposition header encoding for non-ASCII filenames (RFC 5987)
2026-01-16 17:22:16 +00:00
20 changed files with 636 additions and 3394 deletions

View File

@@ -8,17 +8,6 @@ variables:
PROSPER_VERSION: v0.64.1
# Use internal PyPI proxy instead of public internet
PIP_INDEX_URL: https://deps.global.bsf.tools/artifactory/api/pypi/pypi.org/simple
# Environment URLs (used by deploy and test jobs)
STAGE_URL: https://orchard-stage.common.global.bsf.tools
PROD_URL: https://orchard.common.global.bsf.tools
# Stage environment AWS resources (used by reset job)
STAGE_RDS_HOST: orchard-stage.cluster-cvw3jzjkozoc.us-gov-west-1.rds.amazonaws.com
STAGE_RDS_DBNAME: postgres
STAGE_SECRET_ARN: "arn:aws-us-gov:secretsmanager:us-gov-west-1:052673043337:secret:rds!cluster-a573672b-1a38-4665-a654-1b7df37b5297-IaeFQL"
STAGE_S3_BUCKET: orchard-artifacts-stage
AWS_REGION: us-gov-west-1
# Shared pip cache directory
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"
# Prevent duplicate pipelines for MRs
workflow:
@@ -40,47 +29,11 @@ kics:
variables:
KICS_CONFIG: kics.config
# Full integration test suite template (for feature/stage deployments)
# Runs the complete pytest integration test suite against the deployed environment
# Post-deployment integration tests template
.integration_test_template: &integration_test_template
stage: deploy # Runs in deploy stage, but after deployment due to 'needs'
image: deps.global.bsf.tools/docker/python:3.12-slim
timeout: 20m # Full suite takes longer than smoke tests
interruptible: true # Cancel if new pipeline starts
retry: 1 # Retry once on failure (network flakiness)
cache:
key: pip-$CI_COMMIT_REF_SLUG
paths:
- .pip-cache/
policy: pull-push
before_script:
- pip install --index-url "$PIP_INDEX_URL" -r backend/requirements.txt
- pip install --index-url "$PIP_INDEX_URL" pytest pytest-asyncio httpx
script:
- cd backend
# Run full integration test suite, excluding:
# - large/slow tests
# - requires_direct_s3 tests (can't access MinIO from outside K8s cluster)
# ORCHARD_TEST_URL tells the tests which server to connect to
# Note: Auth tests work because dev/stage deployments have relaxed rate limits
- |
python -m pytest tests/integration/ -v \
--junitxml=integration-report.xml \
-m "not large and not slow and not requires_direct_s3" \
--tb=short
artifacts:
when: always
expire_in: 1 week
paths:
- backend/integration-report.xml
reports:
junit: backend/integration-report.xml
# Lightweight smoke test template (for production - no test data creation)
.smoke_test_template: &smoke_test_template
stage: deploy
image: deps.global.bsf.tools/docker/python:3.12-slim
timeout: 5m
timeout: 10m
before_script:
- pip install --index-url "$PIP_INDEX_URL" httpx
script:
@@ -90,12 +43,12 @@ kics:
import os
import sys
BASE_URL = os.environ.get("ORCHARD_TEST_URL")
BASE_URL = os.environ.get("BASE_URL")
if not BASE_URL:
print("ERROR: ORCHARD_TEST_URL not set")
print("ERROR: BASE_URL not set")
sys.exit(1)
print(f"Running smoke tests against {BASE_URL}")
print(f"Running integration tests against {BASE_URL}")
client = httpx.Client(base_url=BASE_URL, timeout=30.0)
errors = []
@@ -133,100 +86,38 @@ kics:
print(f" FAIL: {e}")
sys.exit(1)
else:
print("SUCCESS: All smoke tests passed!")
print("SUCCESS: All integration tests passed!")
sys.exit(0)
PYTEST_SCRIPT
# Integration tests for stage deployment (full suite)
# Integration tests for stage deployment
integration_test_stage:
<<: *integration_test_template
needs: [deploy_stage]
variables:
ORCHARD_TEST_URL: $STAGE_URL
BASE_URL: https://orchard-stage.common.global.bsf.tools
rules:
- if: '$CI_COMMIT_BRANCH == "main"'
when: on_success
# Reset stage environment after integration tests (clean slate for next run)
# Calls the /api/v1/admin/factory-reset endpoint which handles DB and S3 cleanup
reset_stage:
stage: deploy
needs: [integration_test_stage]
image: deps.global.bsf.tools/docker/python:3.12-slim
timeout: 5m
before_script:
- pip install --index-url "$PIP_INDEX_URL" httpx
script:
- |
python - <<'RESET_SCRIPT'
import httpx
import sys
import os
BASE_URL = os.environ.get("STAGE_URL", "")
ADMIN_USER = "admin"
ADMIN_PASS = "changeme123" # Default admin password
if not BASE_URL:
print("ERROR: STAGE_URL environment variable not set")
sys.exit(1)
print(f"=== Resetting stage environment at {BASE_URL} ===")
client = httpx.Client(base_url=BASE_URL, timeout=60.0)
# Login as admin
print("Logging in as admin...")
login_response = client.post(
"/api/v1/auth/login",
json={"username": ADMIN_USER, "password": ADMIN_PASS},
)
if login_response.status_code != 200:
print(f"Login failed: {login_response.status_code} - {login_response.text}")
sys.exit(1)
print("Login successful")
# Call factory reset endpoint
print("Calling factory reset endpoint...")
reset_response = client.post(
"/api/v1/admin/factory-reset",
headers={"X-Confirm-Reset": "yes-delete-all-data"},
)
if reset_response.status_code == 200:
result = reset_response.json()
print(f"Factory reset successful!")
print(f" Database tables dropped: {result['results']['database_tables_dropped']}")
print(f" S3 objects deleted: {result['results']['s3_objects_deleted']}")
print(f" Database reinitialized: {result['results']['database_reinitialized']}")
print(f" Seeded: {result['results']['seeded']}")
sys.exit(0)
else:
print(f"Factory reset failed: {reset_response.status_code} - {reset_response.text}")
sys.exit(1)
RESET_SCRIPT
rules:
- if: '$CI_COMMIT_BRANCH == "main"'
when: on_success
allow_failure: true # Don't fail pipeline if reset has issues
# Integration tests for feature deployment (full suite)
# Integration tests for feature deployment
integration_test_feature:
<<: *integration_test_template
needs: [deploy_feature]
variables:
ORCHARD_TEST_URL: https://orchard-$CI_COMMIT_REF_SLUG.common.global.bsf.tools
BASE_URL: https://orchard-$CI_COMMIT_REF_SLUG.common.global.bsf.tools
rules:
- if: '$CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != "main"'
when: on_success
# Run Python backend unit tests
python_unit_tests:
# Run Python backend tests
python_tests:
stage: test
needs: [] # Run in parallel with build
image: deps.global.bsf.tools/docker/python:3.12-slim
timeout: 15m
interruptible: true # Cancel if new pipeline starts
variables:
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"
cache:
key: pip-$CI_COMMIT_REF_SLUG
paths:
@@ -237,7 +128,7 @@ python_unit_tests:
- pip install --index-url "$PIP_INDEX_URL" pytest pytest-asyncio pytest-cov httpx
script:
- cd backend
# Run unit tests (integration tests run post-deployment against live environment)
# Only run unit tests - integration tests require Docker Compose services
- python -m pytest tests/unit/ -v --cov=app --cov-report=term --cov-report=xml:coverage.xml --cov-report=html:coverage_html --junitxml=pytest-report.xml
artifacts:
when: always
@@ -259,7 +150,6 @@ frontend_tests:
needs: [] # Run in parallel with build
image: deps.global.bsf.tools/docker/node:20-alpine
timeout: 15m
interruptible: true # Cancel if new pipeline starts
cache:
key: npm-$CI_COMMIT_REF_SLUG
paths:
@@ -285,7 +175,7 @@ frontend_tests:
# Shared deploy configuration
.deploy_template: &deploy_template
stage: deploy
needs: [build_image, test_image, kics, hadolint, python_unit_tests, frontend_tests, secrets, app_deps_scan, cve_scan, cve_sbom_analysis, app_sbom_analysis]
needs: [build_image, test_image, kics, hadolint, python_tests, frontend_tests, secrets, app_deps_scan, cve_scan, cve_sbom_analysis, app_sbom_analysis]
image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
.helm_setup: &helm_setup
@@ -294,21 +184,47 @@ frontend_tests:
# OCI-based charts from internal registry - no repo add needed
- helm dependency update
# Simplified deployment verification - just health check
# Full API/frontend checks are done by integration tests post-deployment
.verify_deployment: &verify_deployment |
echo "=== Waiting for health endpoint (certs may take a few minutes) ==="
for i in $(seq 1 30); do
if curl -sf --max-time 10 "$BASE_URL/health" > /dev/null 2>&1; then
echo "Health check passed!"
echo "Deployment URL: $BASE_URL"
exit 0
break
fi
echo "Attempt $i/30 - waiting 10s..."
sleep 10
done
echo "Health check failed after 30 attempts"
exit 1
# Verify health endpoint
echo ""
echo "=== Health Check ==="
curl -sf "$BASE_URL/health" || { echo "Health check failed"; exit 1; }
echo ""
# Verify API is responding
echo ""
echo "=== API Check (GET /api/v1/projects) ==="
HTTP_CODE=$(curl -sf -o /dev/null -w "%{http_code}" "$BASE_URL/api/v1/projects")
if [ "$HTTP_CODE" = "200" ]; then
echo "API responding: HTTP $HTTP_CODE"
else
echo "API check failed: HTTP $HTTP_CODE"
exit 1
fi
# Verify frontend is served
echo ""
echo "=== Frontend Check ==="
if curl -sf "$BASE_URL/" | grep -q "</html>"; then
echo "Frontend is being served"
else
echo "Frontend check failed"
exit 1
fi
echo ""
echo "=== All checks passed! ==="
echo "Deployment URL: $BASE_URL"
# Deploy to stage (main branch)
deploy_stage:
@@ -316,7 +232,7 @@ deploy_stage:
variables:
NAMESPACE: orch-stage-namespace
VALUES_FILE: helm/orchard/values-stage.yaml
BASE_URL: $STAGE_URL
BASE_URL: https://orchard-stage.common.global.bsf.tools
before_script:
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
- *helm_setup
@@ -335,7 +251,7 @@ deploy_stage:
- *verify_deployment
environment:
name: stage
url: $STAGE_URL
url: https://orchard-stage.common.global.bsf.tools
kubernetes:
agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
rules:
@@ -381,12 +297,10 @@ deploy_feature:
- if: '$CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != "main"'
when: on_success
# Cleanup feature branch deployment (standalone - doesn't need deploy dependencies)
# Cleanup feature branch deployment
cleanup_feature:
stage: deploy
<<: *deploy_template
needs: []
image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
timeout: 5m
variables:
NAMESPACE: orch-dev-namespace
GIT_STRATEGY: none # No source needed, branch may be deleted
@@ -405,7 +319,7 @@ cleanup_feature:
when: manual
allow_failure: true
# Deploy to production (version tags only)
# Deploy to production (version tags only, manual approval required)
deploy_prod:
stage: deploy
# For tag pipelines, most jobs don't run (trusting main was tested)
@@ -415,7 +329,7 @@ deploy_prod:
variables:
NAMESPACE: orch-prod-namespace
VALUES_FILE: helm/orchard/values-prod.yaml
BASE_URL: $PROD_URL
BASE_URL: https://orchard.common.global.bsf.tools
before_script:
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-prod
- *helm_setup
@@ -434,21 +348,21 @@ deploy_prod:
- *verify_deployment
environment:
name: production
url: $PROD_URL
url: https://orchard.common.global.bsf.tools
kubernetes:
agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-prod
rules:
# Only run on semantic version tags (v1.0.0, v1.2.3, etc.)
- if: '$CI_COMMIT_TAG =~ /^v\d+\.\d+\.\d+$/'
when: on_success
when: manual # Require manual approval for prod
allow_failure: false
# Smoke tests for production deployment (read-only, no test data creation)
smoke_test_prod:
<<: *smoke_test_template
# Integration tests for production deployment
integration_test_prod:
<<: *integration_test_template
needs: [deploy_prod]
variables:
ORCHARD_TEST_URL: $PROD_URL
BASE_URL: https://orchard.common.global.bsf.tools
rules:
- if: '$CI_COMMIT_TAG =~ /^v\d+\.\d+\.\d+$/'
when: on_success

View File

@@ -11,7 +11,3 @@ bccbc71c13570d14b8b26a11335c45f102fe3072:backend/tests/unit/test_storage.py:gene
90bb2a3a393d2361dc3136ee8d761debb0726d8a:backend/tests/unit/test_storage.py:generic-api-key:381
37666e41a72d2a4f34447c0d1a8728e1d7271d24:backend/tests/unit/test_storage.py:generic-api-key:381
0cc4f253621a9601c5193f6ae1e7ae33f0e7fc9b:backend/tests/unit/test_storage.py:generic-api-key:381
35fda65d381acc5ab59bc592ee3013f75906c197:backend/tests/unit/test_storage.py:generic-api-key:381
08dce6cbb836b687002751fed4159bfc2da61f8b:backend/tests/unit/test_storage.py:generic-api-key:381
617bcbe89cff9a009d77e4f1f1864efed1820e63:backend/tests/unit/test_storage.py:generic-api-key:381
1cbd33544388e0fe6db752fa8886fab33cf9ce7c:backend/tests/unit/test_storage.py:generic-api-key:381

View File

@@ -7,58 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
- Added factory reset endpoint `POST /api/v1/admin/factory-reset` for test environment cleanup (#54)
- Requires admin authentication and `X-Confirm-Reset: yes-delete-all-data` header
- Drops all database tables, clears S3 bucket, reinitializes schema, re-seeds default data
- CI pipeline automatically calls this after integration tests on stage
- Added `delete_all()` method to storage backend for bulk S3 object deletion (#54)
- Added AWS Secrets Manager CSI driver support for database credentials (#54)
- Added SecretProviderClass template for Secrets Manager integration (#54)
- Added IRSA service account annotations for prod and stage environments (#54)
### Changed
- Configured stage and prod to use AWS RDS instead of PostgreSQL subchart (#54)
- Configured stage and prod to use AWS S3 instead of MinIO subchart (#54)
- Changed prod deployment from manual to automatic on version tags (#54)
- Updated S3 client to support IRSA credentials when no explicit keys provided (#54)
- Changed prod image pullPolicy to Always (#54)
- Added proxy-body-size annotation to prod ingress for large uploads (#54)
### Removed
- Disabled PostgreSQL subchart for stage and prod environments (#54)
- Disabled MinIO subchart for stage and prod environments (#54)
### Added
- Added comprehensive upload/download tests for size boundaries (1B to 1GB) (#38)
- Added concurrent upload/download tests (2, 5, 10 parallel operations) (#38)
- Added data integrity tests (binary, text, unicode, compressed content) (#38)
- Added chunk boundary tests for edge cases (#38)
- Added `@pytest.mark.large` and `@pytest.mark.concurrent` test markers (#38)
- Added `generate_content()` and `generate_content_with_hash()` test helpers (#38)
- Added `sized_content` fixture for generating test content of specific sizes (#38)
- Added upload API tests: upload without tag, artifact creation verification, S3 object creation (#38)
- Added download API tests: tag: prefix resolution, 404 for nonexistent project/package/artifact (#38)
- Added download header tests: Content-Type, Content-Length, Content-Disposition, ETag, X-Checksum-SHA256 (#38)
- Added error handling tests: timeout behavior, checksum validation, resource cleanup, graceful error responses (#38)
- Added version API tests: version creation, auto-detection, listing, download by version prefix (#38)
- Added integrity verification tests: round-trip hash verification, client-side verification workflow, size variants (1KB-10MB) (#40)
- Added consistency check endpoint tests with response format validation (#40)
- Added corruption detection tests: bit flip, truncation, appended content, size mismatch, missing S3 objects (#40)
- Added Digest header tests (RFC 3230) and verification mode tests (#40)
- Added integrity verification documentation (`docs/integrity-verification.md`) (#40)
- Added conditional request support for downloads (If-None-Match, If-Modified-Since) returning 304 Not Modified (#42)
- Added caching headers to downloads: Cache-Control (immutable), Last-Modified (#42)
- Added 416 Range Not Satisfiable response for invalid range requests (#42)
- Added download completion logging with bytes transferred and throughput (#42)
- Added client disconnect handling during streaming downloads (#42)
- Added streaming download tests: range requests, conditional requests, caching headers, download resume (#42)
- Added upload duration and throughput metrics (`duration_ms`, `throughput_mbps`) to upload response (#43)
- Added upload progress logging for large files (hash computation and multipart upload phases) (#43)
- Added client disconnect handling during uploads with proper cleanup (#43)
- Added upload progress tracking endpoint `GET /upload/{upload_id}/progress` for resumable uploads (#43)
- Added large file upload tests (10MB, 100MB, 1GB) with multipart upload verification (#43)
- Added upload cancellation and timeout handling tests (#43)
- Added comprehensive API documentation for upload endpoints with curl, Python, and JavaScript examples (#43)
- Added comprehensive upload/download tests for size boundaries (1B to 1GB)
- Added concurrent upload/download tests (2, 5, 10 parallel operations)
- Added data integrity tests (binary, text, unicode, compressed content)
- Added chunk boundary tests for edge cases
- Added `@pytest.mark.large` and `@pytest.mark.concurrent` test markers
- Added `generate_content()` and `generate_content_with_hash()` test helpers
- Added `sized_content` fixture for generating test content of specific sizes
- Added upload API tests: upload without tag, artifact creation verification, S3 object creation
- Added download API tests: tag: prefix resolution, 404 for nonexistent project/package/artifact
- Added download header tests: Content-Type, Content-Length, Content-Disposition, ETag, X-Checksum-SHA256
- Added error handling tests: timeout behavior, checksum validation, resource cleanup, graceful error responses
- Added version API tests: version creation, auto-detection, listing, download by version prefix, deletion
- Added `package_versions` table for immutable version tracking separate from mutable tags (#56)
- Versions are set at upload time via explicit `version` parameter or auto-detected from filename/metadata
- Version detection priority: explicit parameter > package metadata > filename pattern
@@ -83,13 +43,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added internal proxy configuration for npm, pip, helm, and apt (#51)
### Changed
- CI integration tests now run full pytest suite (~350 tests) against deployed environment instead of 3 smoke tests
- CI production deployment uses lightweight smoke tests only (no test data creation in prod)
- CI pipeline improvements: shared pip cache, `interruptible` flag on test jobs, retry on integration tests
- Simplified deploy verification to health check only (full checks done by integration tests)
- Extracted environment URLs to global variables for maintainability
- Made `cleanup_feature` job standalone (no longer inherits deploy template dependencies)
- Renamed `integration_test_prod` to `smoke_test_prod` for clarity
- Updated download ref resolution to check versions before tags (version → tag → artifact ID) (#56)
- Deploy jobs now require all security scans to pass before deployment (added test_image, app_deps_scan, cve_scan, cve_sbom_analysis, app_sbom_analysis to dependencies) (#63)
- Increased deploy job timeout from 5m to 10m (#63)
@@ -103,13 +56,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Improved pod naming: Orchard pods now named `orchard-{env}-server-*` for clarity (#51)
### Fixed
- Fixed CI integration test rate limiting: added configurable `ORCHARD_LOGIN_RATE_LIMIT` env var, relaxed to 1000/minute for dev/stage
- Fixed duplicate `TestSecurityEdgeCases` class definition in test_auth_api.py
- Fixed integration tests auth: session-scoped client, configurable credentials via env vars, fail-fast on auth errors
- Fixed 413 Request Entity Too Large errors on uploads by adding `proxy-body-size: "0"` nginx annotation to Orchard ingress
- Fixed CI tests that require direct S3 access: added `@pytest.mark.requires_direct_s3` marker and excluded from CI
- Fixed ref_count triggers not being created: added auto-migration for tags ref_count trigger functions
- Fixed Content-Disposition header encoding for non-ASCII filenames using RFC 5987 (#38)
- Fixed Content-Disposition header encoding for non-ASCII filenames using RFC 5987
- Fixed deploy jobs running even when tests or security scans fail (changed rules from `when: always` to `when: on_success`) (#63)
- Fixed python_tests job not using internal PyPI proxy (#63)
- Fixed `cleanup_feature` job failing when branch is deleted (`GIT_STRATEGY: none`) (#51)
@@ -119,7 +66,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fixed deploy jobs running when secrets scan fails (added `secrets` to deploy dependencies)
- Fixed dev environment memory requests to equal limits per cluster Kyverno policy
- Fixed init containers missing resource limits (Kyverno policy compliance)
- Fixed Python SyntaxWarning for invalid escape sequence in database migration regex pattern
### Removed
- Removed unused `store_streaming()` method from storage.py (#51)

View File

@@ -170,62 +170,6 @@ def _run_migrations():
END IF;
END $$;
""",
# Create ref_count trigger functions for tags (ensures triggers exist even if initial migration wasn't run)
"""
CREATE OR REPLACE FUNCTION increment_artifact_ref_count()
RETURNS TRIGGER AS $$
BEGIN
UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
""",
"""
CREATE OR REPLACE FUNCTION decrement_artifact_ref_count()
RETURNS TRIGGER AS $$
BEGIN
UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
RETURN OLD;
END;
$$ LANGUAGE plpgsql;
""",
"""
CREATE OR REPLACE FUNCTION update_artifact_ref_count()
RETURNS TRIGGER AS $$
BEGIN
IF OLD.artifact_id != NEW.artifact_id THEN
UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
""",
# Create triggers for tags ref_count management
"""
DO $$
BEGIN
-- Drop and recreate triggers to ensure they're current
DROP TRIGGER IF EXISTS tags_ref_count_insert_trigger ON tags;
CREATE TRIGGER tags_ref_count_insert_trigger
AFTER INSERT ON tags
FOR EACH ROW
EXECUTE FUNCTION increment_artifact_ref_count();
DROP TRIGGER IF EXISTS tags_ref_count_delete_trigger ON tags;
CREATE TRIGGER tags_ref_count_delete_trigger
AFTER DELETE ON tags
FOR EACH ROW
EXECUTE FUNCTION decrement_artifact_ref_count();
DROP TRIGGER IF EXISTS tags_ref_count_update_trigger ON tags;
CREATE TRIGGER tags_ref_count_update_trigger
AFTER UPDATE ON tags
FOR EACH ROW
WHEN (OLD.artifact_id IS DISTINCT FROM NEW.artifact_id)
EXECUTE FUNCTION update_artifact_ref_count();
END $$;
""",
# Create ref_count trigger functions for package_versions
"""
CREATE OR REPLACE FUNCTION increment_version_ref_count()
@@ -266,7 +210,7 @@ def _run_migrations():
END $$;
""",
# Migrate existing semver tags to package_versions
r"""
"""
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'package_versions') THEN

View File

@@ -82,7 +82,6 @@ from .schemas import (
ResumableUploadCompleteRequest,
ResumableUploadCompleteResponse,
ResumableUploadStatusResponse,
UploadProgressResponse,
GlobalSearchResponse,
SearchResultProject,
SearchResultPackage,
@@ -2284,56 +2283,10 @@ def upload_artifact(
"""
Upload an artifact to a package.
**Size Limits:**
- Minimum: 1 byte (empty files rejected)
- Maximum: 10GB (configurable via ORCHARD_MAX_FILE_SIZE)
- Files > 100MB automatically use S3 multipart upload
**Headers:**
- `X-Checksum-SHA256`: Optional SHA256 hash for server-side verification
- `Content-Length`: File size (required for early rejection of oversized files)
- `Authorization`: Bearer <api-key> for authentication
**Deduplication:**
Content-addressable storage automatically deduplicates identical files.
If the same content is uploaded multiple times, only one copy is stored.
**Response Metrics:**
- `duration_ms`: Upload duration in milliseconds
- `throughput_mbps`: Upload throughput in MB/s
- `deduplicated`: True if content already existed
**Example (curl):**
```bash
curl -X POST "http://localhost:8080/api/v1/project/myproject/mypackage/upload" \\
-H "Authorization: Bearer <api-key>" \\
-F "file=@myfile.tar.gz" \\
-F "tag=v1.0.0"
```
**Example (Python requests):**
```python
import requests
with open('myfile.tar.gz', 'rb') as f:
response = requests.post(
'http://localhost:8080/api/v1/project/myproject/mypackage/upload',
files={'file': f},
data={'tag': 'v1.0.0'},
headers={'Authorization': 'Bearer <api-key>'}
)
```
**Example (JavaScript fetch):**
```javascript
const formData = new FormData();
formData.append('file', fileInput.files[0]);
formData.append('tag', 'v1.0.0');
const response = await fetch('/api/v1/project/myproject/mypackage/upload', {
method: 'POST',
headers: { 'Authorization': 'Bearer <api-key>' },
body: formData
});
```
Headers:
- X-Checksum-SHA256: Optional client-provided SHA256 for verification
- User-Agent: Captured for audit purposes
- Authorization: Bearer <api-key> for authentication
"""
start_time = time.time()
settings = get_settings()
@@ -2435,30 +2388,6 @@ def upload_artifact(
except StorageError as e:
logger.error(f"Storage error during upload: {e}")
raise HTTPException(status_code=500, detail="Internal storage error")
except (ConnectionResetError, BrokenPipeError) as e:
# Client disconnected during upload
logger.warning(
f"Client disconnected during upload: project={project_name} "
f"package={package_name} filename={file.filename} error={e}"
)
raise HTTPException(
status_code=499, # Client Closed Request (nginx convention)
detail="Client disconnected during upload",
)
except Exception as e:
# Catch-all for unexpected errors including client disconnects
error_str = str(e).lower()
if "connection" in error_str or "broken pipe" in error_str or "reset" in error_str:
logger.warning(
f"Client connection error during upload: project={project_name} "
f"package={package_name} filename={file.filename} error={e}"
)
raise HTTPException(
status_code=499,
detail="Client connection error during upload",
)
logger.error(f"Unexpected error during upload: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Internal server error during upload")
# Verify client-provided checksum if present
checksum_verified = True
@@ -2651,12 +2580,6 @@ def upload_artifact(
detail="Failed to save upload record. Please retry.",
)
# Calculate throughput
throughput_mbps = None
if duration_ms > 0:
duration_seconds = duration_ms / 1000.0
throughput_mbps = round((storage_result.size / (1024 * 1024)) / duration_seconds, 2)
return UploadResponse(
artifact_id=storage_result.sha256,
sha256=storage_result.sha256,
@@ -2676,8 +2599,6 @@ def upload_artifact(
content_type=artifact.content_type,
original_name=artifact.original_name,
created_at=artifact.created_at,
duration_ms=duration_ms,
throughput_mbps=throughput_mbps,
)
@@ -2695,46 +2616,8 @@ def init_resumable_upload(
storage: S3Storage = Depends(get_storage),
):
"""
Initialize a resumable upload session for large files.
Resumable uploads allow uploading large files in chunks, with the ability
to resume after interruption. The client must compute the SHA256 hash
of the entire file before starting.
**Workflow:**
1. POST /upload/init - Initialize upload session (this endpoint)
2. PUT /upload/{upload_id}/part/{part_number} - Upload each part
3. GET /upload/{upload_id}/progress - Check upload progress (optional)
4. POST /upload/{upload_id}/complete - Finalize upload
5. DELETE /upload/{upload_id} - Abort upload (if needed)
**Chunk Size:**
Use the `chunk_size` returned in the response (10MB default).
Each part except the last must be exactly this size.
**Deduplication:**
If the expected_hash already exists in storage, the response will include
`already_exists: true` and no upload session is created.
**Example (curl):**
```bash
# Step 1: Initialize
curl -X POST "http://localhost:8080/api/v1/project/myproject/mypackage/upload/init" \\
-H "Authorization: Bearer <api-key>" \\
-H "Content-Type: application/json" \\
-d '{"expected_hash": "<sha256>", "filename": "large.tar.gz", "size": 104857600}'
# Step 2: Upload parts
curl -X PUT "http://localhost:8080/api/v1/project/myproject/mypackage/upload/<upload_id>/part/1" \\
-H "Authorization: Bearer <api-key>" \\
--data-binary @part1.bin
# Step 3: Complete
curl -X POST "http://localhost:8080/api/v1/project/myproject/mypackage/upload/<upload_id>/complete" \\
-H "Authorization: Bearer <api-key>" \\
-H "Content-Type: application/json" \\
-d '{"tag": "v1.0.0"}'
```
Initialize a resumable upload session.
Client must provide the SHA256 hash of the file in advance.
"""
user_id = get_user_id(request)
@@ -2828,10 +2711,6 @@ def init_resumable_upload(
# Initialize resumable upload
session = storage.initiate_resumable_upload(init_request.expected_hash)
# Set expected size for progress tracking
if session["upload_id"] and init_request.size:
storage.set_upload_expected_size(session["upload_id"], init_request.size)
return ResumableUploadInitResponse(
upload_id=session["upload_id"],
already_exists=False,
@@ -2898,64 +2777,6 @@ def upload_part(
raise HTTPException(status_code=404, detail=str(e))
@router.get(
"/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/progress",
response_model=UploadProgressResponse,
)
def get_upload_progress(
project_name: str,
package_name: str,
upload_id: str,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
):
"""
Get progress information for an in-flight resumable upload.
Returns progress metrics including bytes uploaded, percent complete,
elapsed time, and throughput.
"""
# Validate project and package exist
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = (
db.query(Package)
.filter(Package.project_id == project.id, Package.name == package_name)
.first()
)
if not package:
raise HTTPException(status_code=404, detail="Package not found")
progress = storage.get_upload_progress(upload_id)
if not progress:
# Return not_found status instead of 404 to allow polling
return UploadProgressResponse(
upload_id=upload_id,
status="not_found",
bytes_uploaded=0,
)
from datetime import datetime, timezone
started_at_dt = None
if progress.get("started_at"):
started_at_dt = datetime.fromtimestamp(progress["started_at"], tz=timezone.utc)
return UploadProgressResponse(
upload_id=upload_id,
status=progress.get("status", "in_progress"),
bytes_uploaded=progress.get("bytes_uploaded", 0),
bytes_total=progress.get("bytes_total"),
percent_complete=progress.get("percent_complete"),
parts_uploaded=progress.get("parts_uploaded", 0),
parts_total=progress.get("parts_total"),
started_at=started_at_dt,
elapsed_seconds=progress.get("elapsed_seconds"),
throughput_mbps=progress.get("throughput_mbps"),
)
@router.post(
"/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/complete"
)
@@ -3151,8 +2972,6 @@ def download_artifact(
storage: S3Storage = Depends(get_storage),
current_user: Optional[User] = Depends(get_current_user_optional),
range: Optional[str] = Header(None),
if_none_match: Optional[str] = Header(None, alias="If-None-Match"),
if_modified_since: Optional[str] = Header(None, alias="If-Modified-Since"),
mode: Optional[Literal["proxy", "redirect", "presigned"]] = Query(
default=None,
description="Download mode: proxy (stream through backend), redirect (302 to presigned URL), presigned (return JSON with URL)",
@@ -3169,15 +2988,6 @@ def download_artifact(
"""
Download an artifact by reference (tag name, artifact:hash, tag:name).
Supports conditional requests:
- If-None-Match: Returns 304 Not Modified if ETag matches
- If-Modified-Since: Returns 304 Not Modified if not modified since date
Supports range requests for partial downloads and resume:
- Range: bytes=0-1023 (first 1KB)
- Range: bytes=-1024 (last 1KB)
- Returns 206 Partial Content with Content-Range header
Verification modes:
- verify=false (default): No verification, maximum performance
- verify=true&verify_mode=stream: Compute hash while streaming, verify after completion.
@@ -3190,9 +3000,6 @@ def download_artifact(
- X-Content-Length: File size in bytes
- ETag: Artifact ID (SHA256)
- Digest: RFC 3230 format sha-256 hash
- Last-Modified: Artifact creation timestamp
- Cache-Control: Immutable caching for content-addressable storage
- Accept-Ranges: bytes (advertises range request support)
When verify=true:
- X-Verified: 'true' if verified, 'false' if verification failed
@@ -3217,52 +3024,6 @@ def download_artifact(
filename = sanitize_filename(artifact.original_name or f"{artifact.id}")
# Format Last-Modified header (RFC 7231 format)
last_modified = None
last_modified_str = None
if artifact.created_at:
last_modified = artifact.created_at
if last_modified.tzinfo is None:
last_modified = last_modified.replace(tzinfo=timezone.utc)
last_modified_str = last_modified.strftime("%a, %d %b %Y %H:%M:%S GMT")
# Handle conditional requests (If-None-Match, If-Modified-Since)
# Return 304 Not Modified if content hasn't changed
artifact_etag = f'"{artifact.id}"'
if if_none_match:
# Strip quotes and compare with artifact ETag
client_etag = if_none_match.strip().strip('"')
if client_etag == artifact.id or if_none_match == artifact_etag:
return Response(
status_code=304,
headers={
"ETag": artifact_etag,
"Cache-Control": "public, max-age=31536000, immutable",
**({"Last-Modified": last_modified_str} if last_modified_str else {}),
},
)
if if_modified_since and last_modified:
try:
# Parse If-Modified-Since header
from email.utils import parsedate_to_datetime
client_date = parsedate_to_datetime(if_modified_since)
if client_date.tzinfo is None:
client_date = client_date.replace(tzinfo=timezone.utc)
# If artifact hasn't been modified since client's date, return 304
if last_modified <= client_date:
return Response(
status_code=304,
headers={
"ETag": artifact_etag,
"Cache-Control": "public, max-age=31536000, immutable",
**({"Last-Modified": last_modified_str} if last_modified_str else {}),
},
)
except (ValueError, TypeError):
pass # Invalid date format, ignore and continue with download
# Audit log download
user_id = get_user_id(request)
_log_audit(
@@ -3280,28 +3041,22 @@ def download_artifact(
)
db.commit()
# Build common headers (always included)
common_headers = {
# Build common checksum headers (always included)
checksum_headers = {
"X-Checksum-SHA256": artifact.id,
"X-Content-Length": str(artifact.size),
"ETag": artifact_etag,
# Cache-Control: content-addressable storage is immutable
"Cache-Control": "public, max-age=31536000, immutable",
"ETag": f'"{artifact.id}"',
}
# Add Last-Modified header
if last_modified_str:
common_headers["Last-Modified"] = last_modified_str
# Add RFC 3230 Digest header
try:
digest_base64 = sha256_to_base64(artifact.id)
common_headers["Digest"] = f"sha-256={digest_base64}"
checksum_headers["Digest"] = f"sha-256={digest_base64}"
except Exception:
pass # Skip if conversion fails
# Add MD5 checksum if available
if artifact.checksum_md5:
common_headers["X-Checksum-MD5"] = artifact.checksum_md5
checksum_headers["X-Checksum-MD5"] = artifact.checksum_md5
# Determine download mode (query param overrides server default)
download_mode = mode or settings.download_mode
@@ -3341,29 +3096,15 @@ def download_artifact(
# Proxy mode (default fallback) - stream through backend
# Handle range requests (verification not supported for partial downloads)
if range:
try:
stream, content_length, content_range = storage.get_stream(
artifact.s3_key, range
)
except Exception as e:
# S3 returns InvalidRange error for unsatisfiable ranges
error_str = str(e).lower()
if "invalidrange" in error_str or "range" in error_str:
raise HTTPException(
status_code=416,
detail="Range Not Satisfiable",
headers={
"Content-Range": f"bytes */{artifact.size}",
"Accept-Ranges": "bytes",
},
)
raise
stream, content_length, content_range = storage.get_stream(
artifact.s3_key, range
)
headers = {
"Content-Disposition": build_content_disposition(filename),
"Accept-Ranges": "bytes",
"Content-Length": str(content_length),
**common_headers,
**checksum_headers,
}
if content_range:
headers["Content-Range"] = content_range
@@ -3380,7 +3121,7 @@ def download_artifact(
base_headers = {
"Content-Disposition": build_content_disposition(filename),
"Accept-Ranges": "bytes",
**common_headers,
**checksum_headers,
}
# Pre-verification mode: verify before streaming
@@ -3448,42 +3189,11 @@ def download_artifact(
},
)
# No verification - direct streaming with completion logging
# No verification - direct streaming
stream, content_length, _ = storage.get_stream(artifact.s3_key)
def logged_stream():
"""Generator that yields chunks and logs completion/disconnection."""
import time
start_time = time.time()
bytes_sent = 0
try:
for chunk in stream:
bytes_sent += len(chunk)
yield chunk
# Download completed successfully
duration = time.time() - start_time
throughput_mbps = (bytes_sent / (1024 * 1024)) / duration if duration > 0 else 0
logger.info(
f"Download completed: artifact={artifact.id[:16]}... "
f"bytes={bytes_sent} duration={duration:.2f}s throughput={throughput_mbps:.2f}MB/s"
)
except GeneratorExit:
# Client disconnected before download completed
duration = time.time() - start_time
logger.warning(
f"Download interrupted: artifact={artifact.id[:16]}... "
f"bytes_sent={bytes_sent}/{content_length} duration={duration:.2f}s"
)
except Exception as e:
duration = time.time() - start_time
logger.error(
f"Download error: artifact={artifact.id[:16]}... "
f"bytes_sent={bytes_sent} duration={duration:.2f}s error={e}"
)
raise
return StreamingResponse(
logged_stream(),
stream,
media_type=artifact.content_type or "application/octet-stream",
headers={
**base_headers,
@@ -6390,110 +6100,3 @@ def get_artifact_provenance(
tags=tag_list,
uploads=upload_history,
)
# =============================================================================
# Factory Reset Endpoint (Admin Only)
# =============================================================================
@router.post("/api/v1/admin/factory-reset", tags=["admin"])
def factory_reset(
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
current_user: User = Depends(require_admin),
):
"""
Factory reset - delete all data and restore to initial state.
This endpoint:
1. Drops all database tables
2. Deletes all objects from S3 storage
3. Recreates the database schema
4. Re-seeds with default admin user
Requires:
- Admin authentication
- X-Confirm-Reset header set to "yes-delete-all-data"
WARNING: This is a destructive operation that cannot be undone.
"""
# Require explicit confirmation header
confirm_header = request.headers.get("X-Confirm-Reset")
if confirm_header != "yes-delete-all-data":
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Factory reset requires X-Confirm-Reset header set to 'yes-delete-all-data'",
)
logger.warning(f"Factory reset initiated by admin user: {current_user.username}")
results = {
"database_tables_dropped": 0,
"s3_objects_deleted": 0,
"database_reinitialized": False,
"seeded": False,
}
try:
# Step 1: Drop all tables in public schema
logger.info("Dropping all database tables...")
drop_result = db.execute(
text("""
DO $$
DECLARE
r RECORD;
table_count INT := 0;
BEGIN
SET session_replication_role = 'replica';
FOR r IN (SELECT tablename FROM pg_tables WHERE schemaname = 'public') LOOP
EXECUTE 'DROP TABLE IF EXISTS public.' || quote_ident(r.tablename) || ' CASCADE';
table_count := table_count + 1;
END LOOP;
SET session_replication_role = 'origin';
RAISE NOTICE 'Dropped % tables', table_count;
END $$;
""")
)
db.commit()
# Count tables that were dropped
count_result = db.execute(
text("SELECT COUNT(*) FROM pg_tables WHERE schemaname = 'public'")
)
remaining_tables = count_result.scalar()
results["database_tables_dropped"] = "all"
logger.info(f"Database tables dropped, remaining: {remaining_tables}")
# Step 2: Delete all S3 objects
logger.info("Deleting all S3 objects...")
results["s3_objects_deleted"] = storage.delete_all()
# Step 3: Reinitialize database schema
logger.info("Reinitializing database schema...")
from .database import init_db
init_db()
results["database_reinitialized"] = True
# Step 4: Re-seed with default data
logger.info("Seeding database with defaults...")
from .seed import seed_database
seed_database()
results["seeded"] = True
logger.warning(f"Factory reset completed by {current_user.username}")
return {
"status": "success",
"message": "Factory reset completed successfully",
"results": results,
}
except Exception as e:
logger.error(f"Factory reset failed: {e}")
db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Factory reset failed: {str(e)}",
)

View File

@@ -412,9 +412,6 @@ class UploadResponse(BaseModel):
content_type: Optional[str] = None
original_name: Optional[str] = None
created_at: Optional[datetime] = None
# Upload metrics (Issue #43)
duration_ms: Optional[int] = None # Upload duration in milliseconds
throughput_mbps: Optional[float] = None # Upload throughput in MB/s
# Resumable upload schemas
@@ -481,21 +478,6 @@ class ResumableUploadStatusResponse(BaseModel):
total_uploaded_bytes: int
class UploadProgressResponse(BaseModel):
"""Progress information for an in-flight upload"""
upload_id: str
status: str # 'in_progress', 'completed', 'failed', 'not_found'
bytes_uploaded: int = 0
bytes_total: Optional[int] = None
percent_complete: Optional[float] = None
parts_uploaded: int = 0
parts_total: Optional[int] = None
started_at: Optional[datetime] = None
elapsed_seconds: Optional[float] = None
throughput_mbps: Optional[float] = None
# Consumer schemas
class ConsumerResponse(BaseModel):
id: UUID

View File

@@ -242,19 +242,15 @@ class S3Storage:
},
)
# Build client kwargs - only include credentials if explicitly provided
# This allows IRSA/IAM role credentials to be used when no explicit creds are set
client_kwargs = {
"endpoint_url": settings.s3_endpoint if settings.s3_endpoint else None,
"region_name": settings.s3_region,
"config": config,
"verify": settings.s3_verify_ssl,
}
if settings.s3_access_key_id and settings.s3_secret_access_key:
client_kwargs["aws_access_key_id"] = settings.s3_access_key_id
client_kwargs["aws_secret_access_key"] = settings.s3_secret_access_key
self.client = boto3.client("s3", **client_kwargs)
self.client = boto3.client(
"s3",
endpoint_url=settings.s3_endpoint if settings.s3_endpoint else None,
region_name=settings.s3_region,
aws_access_key_id=settings.s3_access_key_id,
aws_secret_access_key=settings.s3_secret_access_key,
config=config,
verify=settings.s3_verify_ssl, # SSL/TLS verification
)
self.bucket = settings.s3_bucket
# Store active multipart uploads for resumable support
self._active_uploads: Dict[str, Dict[str, Any]] = {}
@@ -382,16 +378,10 @@ class S3Storage:
"""
# First pass: compute all hashes by streaming through file
try:
import time
sha256_hasher = hashlib.sha256()
md5_hasher = hashlib.md5()
sha1_hasher = hashlib.sha1()
size = 0
hash_start_time = time.time()
last_log_time = hash_start_time
log_interval_seconds = 5 # Log progress every 5 seconds
logger.info(f"Computing hashes for large file: expected_size={content_length}")
# Read file in chunks to compute hashes
while True:
@@ -403,18 +393,6 @@ class S3Storage:
sha1_hasher.update(chunk)
size += len(chunk)
# Log hash computation progress periodically
current_time = time.time()
if current_time - last_log_time >= log_interval_seconds:
elapsed = current_time - hash_start_time
percent = (size / content_length) * 100 if content_length > 0 else 0
throughput = (size / (1024 * 1024)) / elapsed if elapsed > 0 else 0
logger.info(
f"Hash computation progress: bytes={size}/{content_length} ({percent:.1f}%) "
f"throughput={throughput:.2f}MB/s"
)
last_log_time = current_time
# Enforce file size limit during streaming (protection against spoofing)
if size > settings.max_file_size:
raise FileSizeExceededError(
@@ -427,14 +405,6 @@ class S3Storage:
sha256_hash = sha256_hasher.hexdigest()
md5_hash = md5_hasher.hexdigest()
sha1_hash = sha1_hasher.hexdigest()
# Log hash computation completion
hash_elapsed = time.time() - hash_start_time
hash_throughput = (size / (1024 * 1024)) / hash_elapsed if hash_elapsed > 0 else 0
logger.info(
f"Hash computation completed: hash={sha256_hash[:16]}... "
f"size={size} duration={hash_elapsed:.2f}s throughput={hash_throughput:.2f}MB/s"
)
except (HashComputationError, FileSizeExceededError):
raise
except Exception as e:
@@ -488,19 +458,8 @@ class S3Storage:
upload_id = mpu["UploadId"]
try:
import time
parts = []
part_number = 1
bytes_uploaded = 0
upload_start_time = time.time()
last_log_time = upload_start_time
log_interval_seconds = 5 # Log progress every 5 seconds
total_parts = (content_length + MULTIPART_CHUNK_SIZE - 1) // MULTIPART_CHUNK_SIZE
logger.info(
f"Starting multipart upload: hash={sha256_hash[:16]}... "
f"size={content_length} parts={total_parts}"
)
while True:
chunk = file.read(MULTIPART_CHUNK_SIZE)
@@ -520,32 +479,8 @@ class S3Storage:
"ETag": response["ETag"],
}
)
bytes_uploaded += len(chunk)
# Log progress periodically
current_time = time.time()
if current_time - last_log_time >= log_interval_seconds:
elapsed = current_time - upload_start_time
percent = (bytes_uploaded / content_length) * 100
throughput = (bytes_uploaded / (1024 * 1024)) / elapsed if elapsed > 0 else 0
logger.info(
f"Upload progress: hash={sha256_hash[:16]}... "
f"part={part_number}/{total_parts} "
f"bytes={bytes_uploaded}/{content_length} ({percent:.1f}%) "
f"throughput={throughput:.2f}MB/s"
)
last_log_time = current_time
part_number += 1
# Log completion
total_elapsed = time.time() - upload_start_time
final_throughput = (content_length / (1024 * 1024)) / total_elapsed if total_elapsed > 0 else 0
logger.info(
f"Multipart upload completed: hash={sha256_hash[:16]}... "
f"size={content_length} duration={total_elapsed:.2f}s throughput={final_throughput:.2f}MB/s"
)
# Complete multipart upload
complete_response = self.client.complete_multipart_upload(
Bucket=self.bucket,
@@ -567,28 +502,12 @@ class S3Storage:
except Exception as e:
# Abort multipart upload on failure
error_str = str(e).lower()
is_client_disconnect = (
isinstance(e, (ConnectionResetError, BrokenPipeError)) or
"connection" in error_str or "broken pipe" in error_str or "reset" in error_str
logger.error(f"Multipart upload failed: {e}")
self.client.abort_multipart_upload(
Bucket=self.bucket,
Key=s3_key,
UploadId=upload_id,
)
if is_client_disconnect:
logger.warning(
f"Multipart upload aborted (client disconnect): hash={sha256_hash[:16]}... "
f"parts_uploaded={len(parts)} bytes_uploaded={bytes_uploaded}"
)
else:
logger.error(f"Multipart upload failed: hash={sha256_hash[:16]}... error={e}")
try:
self.client.abort_multipart_upload(
Bucket=self.bucket,
Key=s3_key,
UploadId=upload_id,
)
logger.info(f"Multipart upload aborted and cleaned up: upload_id={upload_id[:16]}...")
except Exception as abort_error:
logger.error(f"Failed to abort multipart upload: {abort_error}")
raise
def initiate_resumable_upload(self, expected_hash: str) -> Dict[str, Any]:
@@ -610,17 +529,12 @@ class S3Storage:
mpu = self.client.create_multipart_upload(Bucket=self.bucket, Key=s3_key)
upload_id = mpu["UploadId"]
import time
session = {
"upload_id": upload_id,
"s3_key": s3_key,
"already_exists": False,
"parts": [],
"expected_hash": expected_hash,
"started_at": time.time(),
"bytes_uploaded": 0,
"expected_size": None, # Set when init provides size
"status": "in_progress",
}
self._active_uploads[upload_id] = session
return session
@@ -647,57 +561,10 @@ class S3Storage:
part_info = {
"PartNumber": part_number,
"ETag": response["ETag"],
"size": len(data),
}
session["parts"].append(part_info)
session["bytes_uploaded"] = session.get("bytes_uploaded", 0) + len(data)
return part_info
def get_upload_progress(self, upload_id: str) -> Optional[Dict[str, Any]]:
"""
Get progress information for a resumable upload.
Returns None if upload not found.
"""
import time
session = self._active_uploads.get(upload_id)
if not session:
return None
bytes_uploaded = session.get("bytes_uploaded", 0)
expected_size = session.get("expected_size")
started_at = session.get("started_at")
progress = {
"upload_id": upload_id,
"status": session.get("status", "in_progress"),
"bytes_uploaded": bytes_uploaded,
"bytes_total": expected_size,
"parts_uploaded": len(session.get("parts", [])),
"parts_total": None,
"started_at": started_at,
"elapsed_seconds": None,
"percent_complete": None,
"throughput_mbps": None,
}
if expected_size and expected_size > 0:
progress["percent_complete"] = round((bytes_uploaded / expected_size) * 100, 2)
progress["parts_total"] = (expected_size + MULTIPART_CHUNK_SIZE - 1) // MULTIPART_CHUNK_SIZE
if started_at:
elapsed = time.time() - started_at
progress["elapsed_seconds"] = round(elapsed, 2)
if elapsed > 0 and bytes_uploaded > 0:
progress["throughput_mbps"] = round((bytes_uploaded / (1024 * 1024)) / elapsed, 2)
return progress
def set_upload_expected_size(self, upload_id: str, size: int):
"""Set the expected size for an upload (for progress tracking)."""
session = self._active_uploads.get(upload_id)
if session:
session["expected_size"] = size
def complete_resumable_upload(self, upload_id: str) -> Tuple[str, str]:
"""
Complete a resumable upload.
@@ -835,36 +702,6 @@ class S3Storage:
except ClientError:
return False
def delete_all(self) -> int:
"""
Delete all objects in the bucket.
Returns:
Number of objects deleted
"""
deleted_count = 0
try:
paginator = self.client.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket=self.bucket):
objects = page.get("Contents", [])
if not objects:
continue
# Delete objects in batches of 1000 (S3 limit)
delete_keys = [{"Key": obj["Key"]} for obj in objects]
if delete_keys:
self.client.delete_objects(
Bucket=self.bucket, Delete={"Objects": delete_keys}
)
deleted_count += len(delete_keys)
logger.info(f"Deleted {len(delete_keys)} objects from S3")
logger.info(f"Total objects deleted from S3: {deleted_count}")
return deleted_count
except ClientError as e:
logger.error(f"Failed to delete all S3 objects: {e}")
raise
def generate_presigned_url(
self,
s3_key: str,

View File

@@ -9,37 +9,6 @@ This module provides:
import os
import pytest
# =============================================================================
# Pytest Markers
# =============================================================================
def pytest_configure(config):
"""Register custom pytest markers."""
config.addinivalue_line(
"markers",
"auth_intensive: marks tests that make many login requests (excluded from CI integration tests due to rate limiting)",
)
config.addinivalue_line(
"markers",
"integration: marks tests as integration tests",
)
config.addinivalue_line(
"markers",
"large: marks tests that handle large files (slow)",
)
config.addinivalue_line(
"markers",
"slow: marks tests as slow running",
)
config.addinivalue_line(
"markers",
"requires_direct_s3: marks tests that require direct S3/MinIO access (skipped in CI where S3 is not directly accessible)",
)
import io
from typing import Generator
from unittest.mock import MagicMock
@@ -212,64 +181,29 @@ def test_app():
# =============================================================================
@pytest.fixture(scope="session")
@pytest.fixture
def integration_client():
"""
Create an authenticated test client for integration tests.
Uses the real database and MinIO from docker-compose.local.yml or deployed environment.
Authenticates as admin for write operations. Session-scoped to reuse login across tests.
Environment variables:
ORCHARD_TEST_URL: Base URL of the Orchard server (default: http://localhost:8080)
ORCHARD_TEST_USERNAME: Admin username for authentication (default: admin)
ORCHARD_TEST_PASSWORD: Admin password for authentication (default: changeme123)
Uses the real database and MinIO from docker-compose.local.yml.
Authenticates as admin for write operations.
"""
import httpx
from httpx import Client
# Connect to the running orchard-server container or deployed environment
# Connect to the running orchard-server container
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
username = os.environ.get("ORCHARD_TEST_USERNAME", "admin")
password = os.environ.get("ORCHARD_TEST_PASSWORD", "changeme123")
with httpx.Client(base_url=base_url, timeout=30.0) as client:
with Client(base_url=base_url, timeout=30.0) as client:
# Login as admin to enable write operations
login_response = client.post(
"/api/v1/auth/login",
json={"username": username, "password": password},
json={"username": "admin", "password": "changeme123"},
)
# If login fails, tests will fail - that's expected if auth is broken
if login_response.status_code != 200:
pytest.fail(
f"Authentication failed against {base_url}: {login_response.status_code} - {login_response.text}. "
f"Set ORCHARD_TEST_USERNAME and ORCHARD_TEST_PASSWORD environment variables if using non-default credentials."
)
# Verify cookie was set
if not client.cookies:
pytest.fail(
f"Login succeeded but no session cookie was set. Response headers: {login_response.headers}"
)
yield client
@pytest.fixture
def auth_client():
"""
Create a function-scoped test client for authentication tests.
Unlike integration_client (session-scoped), this creates a fresh client
for each test. Use this for tests that manipulate authentication state
(login, logout, cookie clearing) to avoid polluting other tests.
Environment variables:
ORCHARD_TEST_URL: Base URL of the Orchard server (default: http://localhost:8080)
"""
import httpx
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with httpx.Client(base_url=base_url, timeout=30.0) as client:
# Try to continue without auth for backward compatibility
pass
yield client

View File

@@ -1,25 +1,16 @@
"""Integration tests for authentication API endpoints.
Note: These tests are marked as auth_intensive because they make many login
requests. Dev/stage deployments have relaxed rate limits (1000/minute) to
allow these tests to run. Production uses strict rate limits (5/minute).
"""
"""Integration tests for authentication API endpoints."""
import pytest
from uuid import uuid4
# Mark all tests in this module as auth_intensive (informational, not excluded from CI)
pytestmark = pytest.mark.auth_intensive
class TestAuthLogin:
"""Tests for login endpoint."""
@pytest.mark.integration
def test_login_success(self, auth_client):
def test_login_success(self, integration_client):
"""Test successful login with default admin credentials."""
response = auth_client.post(
response = integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
@@ -30,9 +21,9 @@ class TestAuthLogin:
assert "orchard_session" in response.cookies
@pytest.mark.integration
def test_login_invalid_password(self, auth_client):
def test_login_invalid_password(self, integration_client):
"""Test login with wrong password."""
response = auth_client.post(
response = integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "wrongpassword"},
)
@@ -40,9 +31,9 @@ class TestAuthLogin:
assert "Invalid username or password" in response.json()["detail"]
@pytest.mark.integration
def test_login_nonexistent_user(self, auth_client):
def test_login_nonexistent_user(self, integration_client):
"""Test login with non-existent user."""
response = auth_client.post(
response = integration_client.post(
"/api/v1/auth/login",
json={"username": "nonexistent", "password": "password"},
)
@@ -53,24 +44,24 @@ class TestAuthLogout:
"""Tests for logout endpoint."""
@pytest.mark.integration
def test_logout_success(self, auth_client):
def test_logout_success(self, integration_client):
"""Test successful logout."""
# First login
login_response = auth_client.post(
login_response = integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
assert login_response.status_code == 200
# Then logout
logout_response = auth_client.post("/api/v1/auth/logout")
logout_response = integration_client.post("/api/v1/auth/logout")
assert logout_response.status_code == 200
assert "Logged out successfully" in logout_response.json()["message"]
@pytest.mark.integration
def test_logout_without_session(self, auth_client):
def test_logout_without_session(self, integration_client):
"""Test logout without being logged in."""
response = auth_client.post("/api/v1/auth/logout")
response = integration_client.post("/api/v1/auth/logout")
# Should succeed even without session
assert response.status_code == 200
@@ -79,15 +70,15 @@ class TestAuthMe:
"""Tests for get current user endpoint."""
@pytest.mark.integration
def test_get_me_authenticated(self, auth_client):
def test_get_me_authenticated(self, integration_client):
"""Test getting current user when authenticated."""
# Login first
auth_client.post(
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
response = auth_client.get("/api/v1/auth/me")
response = integration_client.get("/api/v1/auth/me")
assert response.status_code == 200
data = response.json()
assert data["username"] == "admin"
@@ -96,88 +87,67 @@ class TestAuthMe:
assert "created_at" in data
@pytest.mark.integration
def test_get_me_unauthenticated(self, auth_client):
def test_get_me_unauthenticated(self, integration_client):
"""Test getting current user without authentication."""
# Clear any existing cookies
auth_client.cookies.clear()
integration_client.cookies.clear()
response = auth_client.get("/api/v1/auth/me")
response = integration_client.get("/api/v1/auth/me")
assert response.status_code == 401
assert "Not authenticated" in response.json()["detail"]
class TestAuthChangePassword:
"""Tests for change password endpoint.
Note: These tests use dedicated test users instead of admin to avoid
invalidating the integration_client session (which uses admin).
"""
"""Tests for change password endpoint."""
@pytest.mark.integration
def test_change_password_success(self, auth_client):
def test_change_password_success(self, integration_client):
"""Test successful password change."""
# Login as admin to create a test user
auth_client.post(
# Login first
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"pwchange_{uuid4().hex[:8]}"
auth_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "oldpassword123"},
)
# Login as test user
auth_client.cookies.clear()
auth_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "oldpassword123"},
)
# Change password
response = auth_client.post(
response = integration_client.post(
"/api/v1/auth/change-password",
json={"current_password": "oldpassword123", "new_password": "newpassword123"},
json={"current_password": "changeme123", "new_password": "newpassword123"},
)
assert response.status_code == 200
# Verify old password no longer works
auth_client.cookies.clear()
response = auth_client.post(
integration_client.cookies.clear()
response = integration_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "oldpassword123"},
json={"username": "admin", "password": "changeme123"},
)
assert response.status_code == 401
# Verify new password works
response = auth_client.post(
response = integration_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "newpassword123"},
json={"username": "admin", "password": "newpassword123"},
)
assert response.status_code == 200
# Reset password back to original for other tests
reset_response = integration_client.post(
"/api/v1/auth/change-password",
json={"current_password": "newpassword123", "new_password": "changeme123"},
)
assert reset_response.status_code == 200, "Failed to reset admin password back to default"
@pytest.mark.integration
def test_change_password_wrong_current(self, auth_client):
def test_change_password_wrong_current(self, integration_client):
"""Test password change with wrong current password."""
# Login as admin to create a test user
auth_client.post(
# Login first
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"pwwrong_{uuid4().hex[:8]}"
auth_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
# Login as test user
auth_client.cookies.clear()
auth_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "password123"},
)
response = auth_client.post(
response = integration_client.post(
"/api/v1/auth/change-password",
json={"current_password": "wrongpassword", "new_password": "newpassword"},
)
@@ -189,16 +159,16 @@ class TestAPIKeys:
"""Tests for API key management endpoints."""
@pytest.mark.integration
def test_create_and_list_api_key(self, auth_client):
def test_create_and_list_api_key(self, integration_client):
"""Test creating and listing API keys."""
# Login first
auth_client.post(
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
# Create API key
create_response = auth_client.post(
create_response = integration_client.post(
"/api/v1/auth/keys",
json={"name": "test-key", "description": "Test API key"},
)
@@ -212,23 +182,23 @@ class TestAPIKeys:
api_key = data["key"]
# List API keys
list_response = auth_client.get("/api/v1/auth/keys")
list_response = integration_client.get("/api/v1/auth/keys")
assert list_response.status_code == 200
keys = list_response.json()
assert any(k["id"] == key_id for k in keys)
# Clean up - delete the key
auth_client.delete(f"/api/v1/auth/keys/{key_id}")
integration_client.delete(f"/api/v1/auth/keys/{key_id}")
@pytest.mark.integration
def test_use_api_key_for_auth(self, auth_client):
def test_use_api_key_for_auth(self, integration_client):
"""Test using API key for authentication."""
# Login and create API key
auth_client.post(
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
create_response = auth_client.post(
create_response = integration_client.post(
"/api/v1/auth/keys",
json={"name": "auth-test-key"},
)
@@ -236,8 +206,8 @@ class TestAPIKeys:
key_id = create_response.json()["id"]
# Clear cookies and use API key
auth_client.cookies.clear()
response = auth_client.get(
integration_client.cookies.clear()
response = integration_client.get(
"/api/v1/auth/me",
headers={"Authorization": f"Bearer {api_key}"},
)
@@ -245,21 +215,21 @@ class TestAPIKeys:
assert response.json()["username"] == "admin"
# Clean up
auth_client.post(
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
auth_client.delete(f"/api/v1/auth/keys/{key_id}")
integration_client.delete(f"/api/v1/auth/keys/{key_id}")
@pytest.mark.integration
def test_delete_api_key(self, auth_client):
def test_delete_api_key(self, integration_client):
"""Test revoking an API key."""
# Login and create API key
auth_client.post(
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
create_response = auth_client.post(
create_response = integration_client.post(
"/api/v1/auth/keys",
json={"name": "delete-test-key"},
)
@@ -267,12 +237,12 @@ class TestAPIKeys:
api_key = create_response.json()["key"]
# Delete the key
delete_response = auth_client.delete(f"/api/v1/auth/keys/{key_id}")
delete_response = integration_client.delete(f"/api/v1/auth/keys/{key_id}")
assert delete_response.status_code == 200
# Verify key no longer works
auth_client.cookies.clear()
response = auth_client.get(
integration_client.cookies.clear()
response = integration_client.get(
"/api/v1/auth/me",
headers={"Authorization": f"Bearer {api_key}"},
)
@@ -283,32 +253,32 @@ class TestAdminUserManagement:
"""Tests for admin user management endpoints."""
@pytest.mark.integration
def test_list_users(self, auth_client):
def test_list_users(self, integration_client):
"""Test listing users as admin."""
# Login as admin
auth_client.post(
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
response = auth_client.get("/api/v1/admin/users")
response = integration_client.get("/api/v1/admin/users")
assert response.status_code == 200
users = response.json()
assert len(users) >= 1
assert any(u["username"] == "admin" for u in users)
@pytest.mark.integration
def test_create_user(self, auth_client):
def test_create_user(self, integration_client):
"""Test creating a new user as admin."""
# Login as admin
auth_client.post(
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
# Create new user
test_username = f"testuser_{uuid4().hex[:8]}"
response = auth_client.post(
response = integration_client.post(
"/api/v1/admin/users",
json={
"username": test_username,
@@ -323,31 +293,31 @@ class TestAdminUserManagement:
assert data["is_admin"] is False
# Verify new user can login
auth_client.cookies.clear()
login_response = auth_client.post(
integration_client.cookies.clear()
login_response = integration_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "testpassword"},
)
assert login_response.status_code == 200
@pytest.mark.integration
def test_update_user(self, auth_client):
def test_update_user(self, integration_client):
"""Test updating a user as admin."""
# Login as admin
auth_client.post(
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
# Create a test user
test_username = f"updateuser_{uuid4().hex[:8]}"
auth_client.post(
integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password"},
)
# Update the user
response = auth_client.put(
response = integration_client.put(
f"/api/v1/admin/users/{test_username}",
json={"email": "updated@example.com", "is_admin": True},
)
@@ -357,59 +327,59 @@ class TestAdminUserManagement:
assert data["is_admin"] is True
@pytest.mark.integration
def test_reset_user_password(self, auth_client):
def test_reset_user_password(self, integration_client):
"""Test resetting a user's password as admin."""
# Login as admin
auth_client.post(
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
# Create a test user
test_username = f"resetuser_{uuid4().hex[:8]}"
auth_client.post(
integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "oldpassword"},
)
# Reset password
response = auth_client.post(
response = integration_client.post(
f"/api/v1/admin/users/{test_username}/reset-password",
json={"new_password": "newpassword"},
)
assert response.status_code == 200
# Verify new password works
auth_client.cookies.clear()
login_response = auth_client.post(
integration_client.cookies.clear()
login_response = integration_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "newpassword"},
)
assert login_response.status_code == 200
@pytest.mark.integration
def test_non_admin_cannot_access_admin_endpoints(self, auth_client):
def test_non_admin_cannot_access_admin_endpoints(self, integration_client):
"""Test that non-admin users cannot access admin endpoints."""
# Login as admin and create non-admin user
auth_client.post(
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"nonadmin_{uuid4().hex[:8]}"
auth_client.post(
integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password", "is_admin": False},
)
# Login as non-admin
auth_client.cookies.clear()
auth_client.post(
integration_client.cookies.clear()
integration_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "password"},
)
# Try to access admin endpoints
response = auth_client.get("/api/v1/admin/users")
response = integration_client.get("/api/v1/admin/users")
assert response.status_code == 403
assert "Admin privileges required" in response.json()["detail"]
@@ -418,28 +388,28 @@ class TestSecurityEdgeCases:
"""Tests for security edge cases and validation."""
@pytest.mark.integration
def test_login_inactive_user(self, auth_client):
def test_login_inactive_user(self, integration_client):
"""Test that inactive users cannot login."""
# Login as admin and create a user
auth_client.post(
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"inactive_{uuid4().hex[:8]}"
auth_client.post(
integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
# Deactivate the user
auth_client.put(
integration_client.put(
f"/api/v1/admin/users/{test_username}",
json={"is_active": False},
)
# Try to login as inactive user
auth_client.cookies.clear()
response = auth_client.post(
integration_client.cookies.clear()
response = integration_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "password123"},
)
@@ -447,14 +417,14 @@ class TestSecurityEdgeCases:
assert "Invalid username or password" in response.json()["detail"]
@pytest.mark.integration
def test_password_too_short_on_create(self, auth_client):
def test_password_too_short_on_create(self, integration_client):
"""Test that short passwords are rejected when creating users."""
auth_client.post(
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
response = auth_client.post(
response = integration_client.post(
"/api/v1/admin/users",
json={"username": f"shortpw_{uuid4().hex[:8]}", "password": "short"},
)
@@ -462,49 +432,36 @@ class TestSecurityEdgeCases:
assert "at least 8 characters" in response.json()["detail"]
@pytest.mark.integration
def test_password_too_short_on_change(self, auth_client):
def test_password_too_short_on_change(self, integration_client):
"""Test that short passwords are rejected when changing password."""
# Create test user
auth_client.post(
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"shortchange_{uuid4().hex[:8]}"
auth_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
# Login as test user
auth_client.cookies.clear()
auth_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "password123"},
)
response = auth_client.post(
response = integration_client.post(
"/api/v1/auth/change-password",
json={"current_password": "password123", "new_password": "short"},
json={"current_password": "changeme123", "new_password": "short"},
)
assert response.status_code == 400
assert "at least 8 characters" in response.json()["detail"]
@pytest.mark.integration
def test_password_too_short_on_reset(self, auth_client):
def test_password_too_short_on_reset(self, integration_client):
"""Test that short passwords are rejected when resetting password."""
auth_client.post(
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
# Create a test user first
test_username = f"resetshort_{uuid4().hex[:8]}"
auth_client.post(
integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
response = auth_client.post(
response = integration_client.post(
f"/api/v1/admin/users/{test_username}/reset-password",
json={"new_password": "short"},
)
@@ -512,23 +469,23 @@ class TestSecurityEdgeCases:
assert "at least 8 characters" in response.json()["detail"]
@pytest.mark.integration
def test_duplicate_username_rejected(self, auth_client):
def test_duplicate_username_rejected(self, integration_client):
"""Test that duplicate usernames are rejected."""
auth_client.post(
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"duplicate_{uuid4().hex[:8]}"
# Create user first time
response1 = auth_client.post(
response1 = integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
assert response1.status_code == 200
# Try to create same username again
response2 = auth_client.post(
response2 = integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password456"},
)
@@ -536,14 +493,14 @@ class TestSecurityEdgeCases:
assert "already exists" in response2.json()["detail"]
@pytest.mark.integration
def test_cannot_delete_other_users_api_key(self, auth_client):
def test_cannot_delete_other_users_api_key(self, integration_client):
"""Test that users cannot delete API keys owned by other users."""
# Login as admin and create an API key
auth_client.post(
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
create_response = auth_client.post(
create_response = integration_client.post(
"/api/v1/auth/keys",
json={"name": "admin-key"},
)
@@ -551,65 +508,253 @@ class TestSecurityEdgeCases:
# Create a non-admin user
test_username = f"nonadmin_{uuid4().hex[:8]}"
auth_client.post(
integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
# Login as non-admin
auth_client.cookies.clear()
auth_client.post(
integration_client.cookies.clear()
integration_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "password123"},
)
# Try to delete admin's API key
response = auth_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
response = integration_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
assert response.status_code == 403
assert "Cannot delete another user's API key" in response.json()["detail"]
# Cleanup: login as admin and delete the key
auth_client.cookies.clear()
auth_client.post(
integration_client.cookies.clear()
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
auth_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
integration_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
@pytest.mark.integration
def test_sessions_invalidated_on_password_change(self, auth_client):
def test_sessions_invalidated_on_password_change(self, integration_client):
"""Test that all sessions are invalidated when password is changed."""
# Create a test user
auth_client.post(
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"sessiontest_{uuid4().hex[:8]}"
auth_client.post(
integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
# Login as test user
auth_client.cookies.clear()
login_response = auth_client.post(
integration_client.cookies.clear()
login_response = integration_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "password123"},
)
assert login_response.status_code == 200
# Verify session works
me_response = auth_client.get("/api/v1/auth/me")
me_response = integration_client.get("/api/v1/auth/me")
assert me_response.status_code == 200
# Change password
auth_client.post(
integration_client.post(
"/api/v1/auth/change-password",
json={"current_password": "password123", "new_password": "newpassword123"},
)
# Old session should be invalidated - try to access /me
# (note: the change-password call itself may have cleared the session cookie)
me_response2 = auth_client.get("/api/v1/auth/me")
me_response2 = integration_client.get("/api/v1/auth/me")
# This should fail because all sessions were invalidated
assert me_response2.status_code == 401
class TestSecurityEdgeCases:
"""Tests for security edge cases and validation."""
@pytest.mark.integration
def test_login_inactive_user(self, integration_client):
"""Test that inactive users cannot login."""
# Login as admin and create a user
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"inactive_{uuid4().hex[:8]}"
integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
# Deactivate the user
integration_client.put(
f"/api/v1/admin/users/{test_username}",
json={"is_active": False},
)
# Try to login as inactive user
integration_client.cookies.clear()
response = integration_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "password123"},
)
assert response.status_code == 401
assert "Invalid username or password" in response.json()["detail"]
@pytest.mark.integration
def test_password_too_short_on_create(self, integration_client):
"""Test that short passwords are rejected when creating users."""
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
response = integration_client.post(
"/api/v1/admin/users",
json={"username": f"shortpw_{uuid4().hex[:8]}", "password": "short"},
)
assert response.status_code == 400
assert "at least 8 characters" in response.json()["detail"]
@pytest.mark.integration
def test_password_too_short_on_change(self, integration_client):
"""Test that short passwords are rejected when changing password."""
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
response = integration_client.post(
"/api/v1/auth/change-password",
json={"current_password": "changeme123", "new_password": "short"},
)
assert response.status_code == 400
assert "at least 8 characters" in response.json()["detail"]
@pytest.mark.integration
def test_password_too_short_on_reset(self, integration_client):
"""Test that short passwords are rejected when resetting password."""
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
# Create a test user first
test_username = f"resetshort_{uuid4().hex[:8]}"
integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
response = integration_client.post(
f"/api/v1/admin/users/{test_username}/reset-password",
json={"new_password": "short"},
)
assert response.status_code == 400
assert "at least 8 characters" in response.json()["detail"]
@pytest.mark.integration
def test_duplicate_username_rejected(self, integration_client):
"""Test that duplicate usernames are rejected."""
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"duplicate_{uuid4().hex[:8]}"
# Create user first time
response1 = integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
assert response1.status_code == 200
# Try to create same username again
response2 = integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password456"},
)
assert response2.status_code == 409
assert "already exists" in response2.json()["detail"]
@pytest.mark.integration
def test_cannot_delete_other_users_api_key(self, integration_client):
"""Test that users cannot delete API keys owned by other users."""
# Login as admin and create an API key
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
create_response = integration_client.post(
"/api/v1/auth/keys",
json={"name": "admin-key"},
)
admin_key_id = create_response.json()["id"]
# Create a non-admin user
test_username = f"nonadmin_{uuid4().hex[:8]}"
integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
# Login as non-admin
integration_client.cookies.clear()
integration_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "password123"},
)
# Try to delete admin's API key
response = integration_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
assert response.status_code == 403
assert "Cannot delete another user's API key" in response.json()["detail"]
# Cleanup: login as admin and delete the key
integration_client.cookies.clear()
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
integration_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
@pytest.mark.integration
def test_sessions_invalidated_on_password_change(self, integration_client):
"""Test that all sessions are invalidated when password is changed."""
# Create a test user
integration_client.post(
"/api/v1/auth/login",
json={"username": "admin", "password": "changeme123"},
)
test_username = f"sessiontest_{uuid4().hex[:8]}"
integration_client.post(
"/api/v1/admin/users",
json={"username": test_username, "password": "password123"},
)
# Login as test user
integration_client.cookies.clear()
login_response = integration_client.post(
"/api/v1/auth/login",
json={"username": test_username, "password": "password123"},
)
assert login_response.status_code == 200
# Verify session works
me_response = integration_client.get("/api/v1/auth/me")
assert me_response.status_code == 200
# Change password
integration_client.post(
"/api/v1/auth/change-password",
json={"current_password": "password123", "new_password": "newpassword123"},
)
# Old session should be invalidated - try to access /me
# (note: the change-password call itself may have cleared the session cookie)
me_response2 = integration_client.get("/api/v1/auth/me")
# This should fail because all sessions were invalidated
assert me_response2.status_code == 401

View File

@@ -1,768 +0,0 @@
"""
Integration tests for artifact integrity verification.
Tests cover:
- Round-trip verification (upload -> download -> verify hash)
- Consistency check endpoint
- Header-based verification
- Integrity verification across file sizes
- Client-side verification workflow
"""
import pytest
import io
import hashlib
from tests.factories import (
compute_sha256,
upload_test_file,
generate_content_with_hash,
s3_object_exists,
get_s3_client,
get_s3_bucket,
)
from tests.conftest import (
SIZE_1KB,
SIZE_10KB,
SIZE_100KB,
SIZE_1MB,
SIZE_10MB,
)
class TestRoundTripVerification:
"""Tests for complete round-trip integrity verification."""
@pytest.mark.integration
def test_upload_download_hash_matches(self, integration_client, test_package):
"""Test that upload -> download round trip preserves content integrity."""
project, package = test_package
content = b"Round trip integrity test content"
expected_hash = compute_sha256(content)
# Upload and capture returned hash
result = upload_test_file(
integration_client, project, package, content, tag="roundtrip"
)
uploaded_hash = result["artifact_id"]
# Verify upload returned correct hash
assert uploaded_hash == expected_hash
# Download artifact
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/roundtrip",
params={"mode": "proxy"},
)
assert response.status_code == 200
# Compute hash of downloaded content
downloaded_hash = compute_sha256(response.content)
# All three hashes should match
assert downloaded_hash == expected_hash
assert downloaded_hash == uploaded_hash
@pytest.mark.integration
def test_upload_response_contains_hash(self, integration_client, test_package):
"""Test upload response contains artifact_id which is the SHA256 hash."""
project, package = test_package
content = b"Upload response hash test"
expected_hash = compute_sha256(content)
result = upload_test_file(integration_client, project, package, content)
assert "artifact_id" in result
assert result["artifact_id"] == expected_hash
assert len(result["artifact_id"]) == 64
assert all(c in "0123456789abcdef" for c in result["artifact_id"])
@pytest.mark.integration
def test_download_header_matches_artifact_id(self, integration_client, test_package):
"""Test X-Checksum-SHA256 header matches artifact ID."""
project, package = test_package
content = b"Header verification test"
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project, package, content, tag="header-check"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/header-check",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.headers.get("X-Checksum-SHA256") == expected_hash
@pytest.mark.integration
def test_etag_matches_artifact_id(self, integration_client, test_package):
"""Test ETag header matches artifact ID."""
project, package = test_package
content = b"ETag verification test"
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project, package, content, tag="etag-check"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/etag-check",
params={"mode": "proxy"},
)
assert response.status_code == 200
etag = response.headers.get("ETag", "").strip('"')
assert etag == expected_hash
@pytest.mark.integration
def test_artifact_endpoint_returns_correct_hash(self, integration_client, test_package):
"""Test artifact endpoint returns correct hash/ID."""
project, package = test_package
content = b"Artifact endpoint hash test"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content)
# Query artifact directly
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
assert response.status_code == 200
data = response.json()
assert data["id"] == expected_hash
assert data.get("sha256") == expected_hash
class TestClientSideVerificationWorkflow:
"""Tests for client-side verification workflow."""
@pytest.mark.integration
def test_client_can_verify_before_upload(self, integration_client, test_package):
"""Test client can compute hash before upload and verify response matches."""
project, package = test_package
content = b"Client pre-upload verification test"
# Client computes hash locally before upload
client_hash = compute_sha256(content)
# Upload
result = upload_test_file(integration_client, project, package, content)
# Client verifies server returned the same hash
assert result["artifact_id"] == client_hash
@pytest.mark.integration
def test_client_can_provide_checksum_header(self, integration_client, test_package):
"""Test client can provide X-Checksum-SHA256 header for verification."""
project, package = test_package
content = b"Client checksum header test"
client_hash = compute_sha256(content)
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
headers={"X-Checksum-SHA256": client_hash},
)
assert response.status_code == 200
assert response.json()["artifact_id"] == client_hash
@pytest.mark.integration
def test_checksum_mismatch_rejected(self, integration_client, test_package):
"""Test upload with wrong client checksum is rejected."""
project, package = test_package
content = b"Checksum mismatch test"
wrong_hash = "0" * 64
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
headers={"X-Checksum-SHA256": wrong_hash},
)
assert response.status_code == 422
@pytest.mark.integration
def test_client_can_verify_after_download(self, integration_client, test_package):
"""Test client can verify downloaded content matches header hash."""
project, package = test_package
content = b"Client post-download verification"
upload_test_file(
integration_client, project, package, content, tag="verify-after"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/verify-after",
params={"mode": "proxy"},
)
assert response.status_code == 200
# Client gets hash from header
header_hash = response.headers.get("X-Checksum-SHA256")
# Client computes hash of downloaded content
downloaded_hash = compute_sha256(response.content)
# Client verifies they match
assert downloaded_hash == header_hash
class TestIntegritySizeVariants:
"""Tests for integrity verification across different file sizes."""
@pytest.mark.integration
def test_integrity_1kb(self, integration_client, test_package, sized_content):
"""Test integrity verification for 1KB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_1KB, seed=100)
result = upload_test_file(
integration_client, project, package, content, tag="int-1kb"
)
assert result["artifact_id"] == expected_hash
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/int-1kb",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert compute_sha256(response.content) == expected_hash
assert response.headers.get("X-Checksum-SHA256") == expected_hash
@pytest.mark.integration
def test_integrity_100kb(self, integration_client, test_package, sized_content):
"""Test integrity verification for 100KB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_100KB, seed=101)
result = upload_test_file(
integration_client, project, package, content, tag="int-100kb"
)
assert result["artifact_id"] == expected_hash
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/int-100kb",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert compute_sha256(response.content) == expected_hash
assert response.headers.get("X-Checksum-SHA256") == expected_hash
@pytest.mark.integration
def test_integrity_1mb(self, integration_client, test_package, sized_content):
"""Test integrity verification for 1MB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_1MB, seed=102)
result = upload_test_file(
integration_client, project, package, content, tag="int-1mb"
)
assert result["artifact_id"] == expected_hash
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/int-1mb",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert compute_sha256(response.content) == expected_hash
assert response.headers.get("X-Checksum-SHA256") == expected_hash
@pytest.mark.integration
@pytest.mark.slow
def test_integrity_10mb(self, integration_client, test_package, sized_content):
"""Test integrity verification for 10MB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_10MB, seed=103)
result = upload_test_file(
integration_client, project, package, content, tag="int-10mb"
)
assert result["artifact_id"] == expected_hash
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/int-10mb",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert compute_sha256(response.content) == expected_hash
assert response.headers.get("X-Checksum-SHA256") == expected_hash
class TestConsistencyCheck:
"""Tests for the admin consistency check endpoint."""
@pytest.mark.integration
def test_consistency_check_returns_200(self, integration_client):
"""Test consistency check endpoint returns 200."""
response = integration_client.get("/api/v1/admin/consistency-check")
assert response.status_code == 200
@pytest.mark.integration
def test_consistency_check_response_format(self, integration_client):
"""Test consistency check returns expected response format."""
response = integration_client.get("/api/v1/admin/consistency-check")
assert response.status_code == 200
data = response.json()
# Check expected fields
assert "total_artifacts_checked" in data
assert "orphaned_s3_objects" in data
assert "missing_s3_objects" in data
assert "size_mismatches" in data
assert "healthy" in data
assert "orphaned_s3_keys" in data
assert "missing_s3_keys" in data
assert "size_mismatch_artifacts" in data
# Verify types
assert isinstance(data["total_artifacts_checked"], int)
assert isinstance(data["orphaned_s3_objects"], int)
assert isinstance(data["missing_s3_objects"], int)
assert isinstance(data["size_mismatches"], int)
assert isinstance(data["healthy"], bool)
assert isinstance(data["orphaned_s3_keys"], list)
assert isinstance(data["missing_s3_keys"], list)
assert isinstance(data["size_mismatch_artifacts"], list)
@pytest.mark.integration
def test_consistency_check_after_upload(self, integration_client, test_package):
"""Test consistency check passes after valid upload."""
project, package = test_package
content = b"Consistency check test content"
# Upload artifact
upload_test_file(integration_client, project, package, content)
# Run consistency check
response = integration_client.get("/api/v1/admin/consistency-check")
assert response.status_code == 200
data = response.json()
# Verify check ran and no issues
assert data["total_artifacts_checked"] >= 1
assert data["healthy"] is True
@pytest.mark.integration
def test_consistency_check_limit_parameter(self, integration_client):
"""Test consistency check respects limit parameter."""
response = integration_client.get(
"/api/v1/admin/consistency-check",
params={"limit": 10}
)
assert response.status_code == 200
data = response.json()
# Lists should not exceed limit
assert len(data["orphaned_s3_keys"]) <= 10
assert len(data["missing_s3_keys"]) <= 10
assert len(data["size_mismatch_artifacts"]) <= 10
class TestDigestHeader:
"""Tests for RFC 3230 Digest header."""
@pytest.mark.integration
def test_download_includes_digest_header(self, integration_client, test_package):
"""Test download includes Digest header in RFC 3230 format."""
project, package = test_package
content = b"Digest header test"
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project, package, content, tag="digest-test"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/digest-test",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert "Digest" in response.headers
# Verify Digest format (sha-256=base64hash)
digest = response.headers["Digest"]
assert digest.startswith("sha-256=")
@pytest.mark.integration
def test_digest_header_base64_valid(self, integration_client, test_package):
"""Test Digest header contains valid base64 encoding."""
import base64
project, package = test_package
content = b"Digest base64 test"
expected_hash = compute_sha256(content)
upload_test_file(
integration_client, project, package, content, tag="digest-b64"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/digest-b64",
params={"mode": "proxy"},
)
assert response.status_code == 200
digest = response.headers["Digest"]
base64_part = digest.split("=", 1)[1]
# Should be valid base64
try:
decoded = base64.b64decode(base64_part)
assert len(decoded) == 32 # SHA256 is 32 bytes
except Exception as e:
pytest.fail(f"Invalid base64 in Digest header: {e}")
class TestVerificationModes:
"""Tests for download verification modes."""
@pytest.mark.integration
def test_pre_verification_mode(self, integration_client, test_package):
"""Test pre-verification mode verifies before streaming."""
project, package = test_package
content = b"Pre-verification mode test"
upload_test_file(
integration_client, project, package, content, tag="pre-verify"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/pre-verify",
params={"mode": "proxy", "verify": "true", "verify_mode": "pre"},
)
assert response.status_code == 200
assert response.content == content
# X-Verified header should be true
assert response.headers.get("X-Verified") == "true"
@pytest.mark.integration
def test_stream_verification_mode(self, integration_client, test_package):
"""Test streaming verification mode."""
project, package = test_package
content = b"Stream verification mode test"
upload_test_file(
integration_client, project, package, content, tag="stream-verify"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/stream-verify",
params={"mode": "proxy", "verify": "true", "verify_mode": "stream"},
)
assert response.status_code == 200
assert response.content == content
class TestArtifactIntegrityEndpoint:
"""Tests for artifact-specific integrity operations."""
@pytest.mark.integration
def test_artifact_size_matches(self, integration_client, test_package):
"""Test artifact endpoint returns correct size."""
project, package = test_package
content = b"Artifact size test content"
expected_size = len(content)
result = upload_test_file(integration_client, project, package, content)
artifact_id = result["artifact_id"]
response = integration_client.get(f"/api/v1/artifact/{artifact_id}")
assert response.status_code == 200
data = response.json()
assert data["size"] == expected_size
@pytest.mark.integration
def test_content_length_header_matches_size(self, integration_client, test_package):
"""Test Content-Length header matches artifact size."""
project, package = test_package
content = b"Content-Length header test"
expected_size = len(content)
upload_test_file(
integration_client, project, package, content, tag="content-len"
)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/content-len",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert int(response.headers.get("Content-Length", 0)) == expected_size
assert len(response.content) == expected_size
@pytest.mark.requires_direct_s3
class TestCorruptionDetection:
"""Tests for detecting corrupted S3 objects.
These tests directly manipulate S3 objects to simulate corruption
and verify that the system can detect hash mismatches.
Note: These tests require direct S3/MinIO access and are skipped in CI
where S3 is not directly accessible from the test runner.
"""
@pytest.mark.integration
def test_detection_of_corrupted_content(self, integration_client, test_package):
"""Test that corrupted S3 content is detected via hash mismatch.
Uploads content, then directly modifies the S3 object, then
verifies that the downloaded content hash doesn't match.
"""
project, package = test_package
content = b"Original content for corruption test"
expected_hash = compute_sha256(content)
# Upload original content
result = upload_test_file(
integration_client, project, package, content, tag="corrupt-test"
)
assert result["artifact_id"] == expected_hash
# Get the S3 object and corrupt it
s3_client = get_s3_client()
bucket = get_s3_bucket()
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
# Replace with corrupted content
corrupted_content = b"Corrupted content - different from original!"
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=corrupted_content)
# Download via proxy (bypasses hash verification)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/corrupt-test",
params={"mode": "proxy"},
)
assert response.status_code == 200
# Verify the downloaded content doesn't match original hash
downloaded_hash = compute_sha256(response.content)
assert downloaded_hash != expected_hash, "Corruption was not detected - hashes match"
assert response.content == corrupted_content
# The X-Checksum-SHA256 header should still show the original hash (from DB)
# but the actual content hash is different
header_hash = response.headers.get("X-Checksum-SHA256")
assert header_hash == expected_hash # Header shows expected hash
assert downloaded_hash != header_hash # But content is corrupted
# Restore original content for cleanup
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
@pytest.mark.integration
def test_detection_of_single_bit_flip(self, integration_client, test_package):
"""Test detection of a single bit flip in S3 object content."""
project, package = test_package
content = b"Content for single bit flip detection test"
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project, package, content, tag="bitflip-test"
)
assert result["artifact_id"] == expected_hash
# Get S3 object and flip a single bit
s3_client = get_s3_client()
bucket = get_s3_bucket()
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
# Flip the first bit of the first byte
corrupted_content = bytearray(content)
corrupted_content[0] ^= 0x01
corrupted_content = bytes(corrupted_content)
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=corrupted_content)
# Download and verify hash mismatch
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/bitflip-test",
params={"mode": "proxy"},
)
assert response.status_code == 200
downloaded_hash = compute_sha256(response.content)
assert downloaded_hash != expected_hash, "Single bit flip not detected"
# Restore original
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
@pytest.mark.integration
def test_detection_of_truncated_content(self, integration_client, test_package):
"""Test detection of truncated S3 object."""
project, package = test_package
content = b"This is content that will be truncated for testing purposes"
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project, package, content, tag="truncate-test"
)
assert result["artifact_id"] == expected_hash
# Get S3 object and truncate it
s3_client = get_s3_client()
bucket = get_s3_bucket()
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
# Truncate to half the original size
truncated_content = content[: len(content) // 2]
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=truncated_content)
# Download and verify hash mismatch
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/truncate-test",
params={"mode": "proxy"},
)
assert response.status_code == 200
downloaded_hash = compute_sha256(response.content)
assert downloaded_hash != expected_hash, "Truncation not detected"
assert len(response.content) < len(content), "Content was not truncated"
# Restore original
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
@pytest.mark.integration
def test_detection_of_appended_content(self, integration_client, test_package):
"""Test detection of content with extra bytes appended."""
project, package = test_package
content = b"Original content"
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project, package, content, tag="append-test"
)
assert result["artifact_id"] == expected_hash
# Get S3 object and append extra bytes
s3_client = get_s3_client()
bucket = get_s3_bucket()
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
appended_content = content + b" - extra bytes appended"
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=appended_content)
# Download and verify hash mismatch
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/append-test",
params={"mode": "proxy"},
)
assert response.status_code == 200
downloaded_hash = compute_sha256(response.content)
assert downloaded_hash != expected_hash, "Appended content not detected"
assert len(response.content) > len(content), "Content was not extended"
# Restore original
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
@pytest.mark.integration
def test_client_detects_hash_mismatch_post_download(
self, integration_client, test_package
):
"""Test that a client can detect hash mismatch after downloading corrupted content.
This simulates the full client verification workflow:
1. Download content
2. Get expected hash from header
3. Compute actual hash of content
4. Verify they match (or detect corruption)
"""
project, package = test_package
content = b"Content for client-side corruption detection"
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project, package, content, tag="client-detect"
)
# Corrupt the S3 object
s3_client = get_s3_client()
bucket = get_s3_bucket()
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
corrupted = b"This is completely different content"
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=corrupted)
# Simulate client download and verification
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/client-detect",
params={"mode": "proxy"},
)
assert response.status_code == 200
# Client gets expected hash from header
header_hash = response.headers.get("X-Checksum-SHA256")
# Client computes hash of downloaded content
actual_hash = compute_sha256(response.content)
# Client detects the mismatch
corruption_detected = actual_hash != header_hash
assert corruption_detected, "Client should detect hash mismatch"
# Restore original
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
@pytest.mark.integration
def test_consistency_check_detects_size_mismatch(
self, integration_client, test_package, unique_test_id
):
"""Test that consistency check detects size mismatches.
Uploads content, modifies S3 object size, then runs consistency check.
"""
project, package = test_package
content = b"Content for size mismatch consistency check test " + unique_test_id.encode()
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project, package, content, tag="size-mismatch"
)
# Modify S3 object to have different size
s3_client = get_s3_client()
bucket = get_s3_bucket()
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
different_size_content = content + b"extra extra extra"
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=different_size_content)
# Run consistency check
response = integration_client.get("/api/v1/admin/consistency-check")
assert response.status_code == 200
data = response.json()
# Should detect the size mismatch
assert data["size_mismatches"] >= 1 or len(data["size_mismatch_artifacts"]) >= 1
# Restore original
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
@pytest.mark.integration
def test_consistency_check_detects_missing_s3_object(
self, integration_client, test_package, unique_test_id
):
"""Test that consistency check detects missing S3 objects.
Uploads content, deletes S3 object, then runs consistency check.
"""
project, package = test_package
content = b"Content for missing S3 object test " + unique_test_id.encode()
expected_hash = compute_sha256(content)
result = upload_test_file(
integration_client, project, package, content, tag="missing-s3"
)
# Delete the S3 object
s3_client = get_s3_client()
bucket = get_s3_bucket()
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
s3_client.delete_object(Bucket=bucket, Key=s3_key)
# Run consistency check
response = integration_client.get("/api/v1/admin/consistency-check")
assert response.status_code == 200
data = response.json()
# Should detect the missing S3 object
assert data["missing_s3_objects"] >= 1 or len(data["missing_s3_keys"]) >= 1
# Restore the object for cleanup
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)

View File

@@ -1,552 +0,0 @@
"""
Integration tests for large file upload functionality.
Tests cover:
- Large file uploads (100MB, 1GB)
- Multipart upload behavior
- Upload metrics (duration, throughput)
- Memory efficiency during uploads
- Upload progress tracking
Note: Large tests are marked with @pytest.mark.slow and will be skipped
by default. Run with `pytest --run-slow` to include them.
"""
import os
import pytest
import io
import time
from tests.factories import (
compute_sha256,
upload_test_file,
s3_object_exists,
)
from tests.conftest import (
SIZE_1KB,
SIZE_100KB,
SIZE_1MB,
SIZE_10MB,
SIZE_100MB,
SIZE_1GB,
)
class TestUploadMetrics:
"""Tests for upload duration and throughput metrics."""
@pytest.mark.integration
def test_upload_response_includes_duration_ms(self, integration_client, test_package):
"""Test upload response includes duration_ms field."""
project, package = test_package
content = b"duration test content"
result = upload_test_file(
integration_client, project, package, content, tag="duration-test"
)
assert "duration_ms" in result
assert result["duration_ms"] is not None
assert result["duration_ms"] >= 0
@pytest.mark.integration
def test_upload_response_includes_throughput(self, integration_client, test_package):
"""Test upload response includes throughput_mbps field."""
project, package = test_package
content = b"throughput test content"
result = upload_test_file(
integration_client, project, package, content, tag="throughput-test"
)
assert "throughput_mbps" in result
# For small files throughput may be very high or None
# Just verify the field exists
@pytest.mark.integration
def test_upload_duration_reasonable(
self, integration_client, test_package, sized_content
):
"""Test upload duration is reasonable for file size."""
project, package = test_package
content, _ = sized_content(SIZE_1MB, seed=100)
start = time.time()
result = upload_test_file(
integration_client, project, package, content, tag="duration-check"
)
actual_duration = (time.time() - start) * 1000 # ms
# Reported duration should be close to actual
assert result["duration_ms"] is not None
# Allow some variance (network overhead)
assert result["duration_ms"] <= actual_duration + 1000 # Within 1s
class TestLargeFileUploads:
"""Tests for large file uploads using multipart."""
@pytest.mark.integration
def test_upload_10mb_file(self, integration_client, test_package, sized_content):
"""Test uploading a 10MB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_10MB, seed=200)
result = upload_test_file(
integration_client, project, package, content, tag="large-10mb"
)
assert result["artifact_id"] == expected_hash
assert result["size"] == SIZE_10MB
assert result["duration_ms"] is not None
assert result["throughput_mbps"] is not None
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.requires_direct_s3
def test_upload_100mb_file(self, integration_client, test_package, sized_content):
"""Test uploading a 100MB file (triggers multipart upload)."""
project, package = test_package
content, expected_hash = sized_content(SIZE_100MB, seed=300)
result = upload_test_file(
integration_client, project, package, content, tag="large-100mb"
)
assert result["artifact_id"] == expected_hash
assert result["size"] == SIZE_100MB
# Verify S3 object exists
assert s3_object_exists(expected_hash)
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.large
def test_upload_1gb_file(self, integration_client, test_package, sized_content):
"""Test uploading a 1GB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_1GB, seed=400)
result = upload_test_file(
integration_client, project, package, content, tag="large-1gb"
)
assert result["artifact_id"] == expected_hash
assert result["size"] == SIZE_1GB
# Should have measurable throughput
assert result["throughput_mbps"] is not None
assert result["throughput_mbps"] > 0
@pytest.mark.integration
def test_large_file_deduplication(
self, integration_client, test_package, sized_content, unique_test_id
):
"""Test deduplication works for large files."""
project, package = test_package
# Use unique_test_id to ensure unique content per test run
seed = hash(unique_test_id) % 10000
content, expected_hash = sized_content(SIZE_10MB, seed=seed)
# First upload
result1 = upload_test_file(
integration_client, project, package, content, tag=f"dedup-{unique_test_id}-1"
)
# Note: may be True if previous test uploaded same content
first_dedupe = result1["deduplicated"]
# Second upload of same content
result2 = upload_test_file(
integration_client, project, package, content, tag=f"dedup-{unique_test_id}-2"
)
assert result2["artifact_id"] == expected_hash
# Second upload MUST be deduplicated
assert result2["deduplicated"] is True
class TestUploadProgress:
"""Tests for upload progress tracking endpoint."""
@pytest.mark.integration
def test_progress_endpoint_returns_not_found_for_invalid_id(
self, integration_client, test_package
):
"""Test progress endpoint returns not_found status for invalid upload ID."""
project, package = test_package
response = integration_client.get(
f"/api/v1/project/{project}/{package}/upload/invalid-upload-id/progress"
)
assert response.status_code == 200
data = response.json()
assert data["status"] == "not_found"
assert data["upload_id"] == "invalid-upload-id"
@pytest.mark.integration
def test_progress_endpoint_requires_valid_project(
self, integration_client, unique_test_id
):
"""Test progress endpoint validates project exists."""
response = integration_client.get(
f"/api/v1/project/nonexistent-{unique_test_id}/pkg/upload/upload-id/progress"
)
assert response.status_code == 404
@pytest.mark.integration
def test_progress_endpoint_requires_valid_package(
self, integration_client, test_project, unique_test_id
):
"""Test progress endpoint validates package exists."""
response = integration_client.get(
f"/api/v1/project/{test_project}/nonexistent-{unique_test_id}/upload/upload-id/progress"
)
assert response.status_code == 404
class TestResumableUploadProgress:
"""Tests for progress tracking during resumable uploads."""
@pytest.mark.integration
def test_resumable_upload_init_and_progress(
self, integration_client, test_package, sized_content
):
"""Test initializing resumable upload and checking progress."""
project, package = test_package
content, expected_hash = sized_content(SIZE_100KB, seed=600)
# Get API key for auth
api_key_response = integration_client.post(
"/api/v1/auth/keys",
json={"name": "progress-test-key"},
)
assert api_key_response.status_code == 200
api_key = api_key_response.json()["key"]
# Initialize resumable upload
init_response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload/init",
json={
"expected_hash": expected_hash,
"filename": "progress-test.bin",
"size": SIZE_100KB,
},
headers={"Authorization": f"Bearer {api_key}"},
)
assert init_response.status_code == 200
upload_id = init_response.json().get("upload_id")
if upload_id:
# Check initial progress
progress_response = integration_client.get(
f"/api/v1/project/{project}/{package}/upload/{upload_id}/progress",
headers={"Authorization": f"Bearer {api_key}"},
)
assert progress_response.status_code == 200
progress = progress_response.json()
assert progress["status"] == "in_progress"
assert progress["bytes_uploaded"] == 0
assert progress["bytes_total"] == SIZE_100KB
# Abort to clean up
integration_client.delete(
f"/api/v1/project/{project}/{package}/upload/{upload_id}",
headers={"Authorization": f"Bearer {api_key}"},
)
class TestUploadSizeLimits:
"""Tests for upload size limit enforcement."""
@pytest.mark.integration
def test_empty_file_rejected(self, integration_client, test_package):
"""Test empty files are rejected."""
project, package = test_package
files = {"file": ("empty.txt", io.BytesIO(b""), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
)
assert response.status_code in [400, 422]
@pytest.mark.integration
def test_minimum_size_accepted(self, integration_client, test_package):
"""Test 1-byte file is accepted."""
project, package = test_package
content = b"X"
result = upload_test_file(
integration_client, project, package, content, tag="min-size"
)
assert result["size"] == 1
@pytest.mark.integration
def test_content_length_header_used_in_response(self, integration_client, test_package):
"""Test that upload response size matches Content-Length."""
project, package = test_package
content = b"content length verification test"
result = upload_test_file(
integration_client, project, package, content, tag="content-length-test"
)
# Size in response should match actual content length
assert result["size"] == len(content)
class TestUploadErrorHandling:
"""Tests for upload error handling."""
@pytest.mark.integration
def test_upload_to_nonexistent_project_returns_404(
self, integration_client, unique_test_id
):
"""Test upload to nonexistent project returns 404."""
content = b"test content"
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/nonexistent-{unique_test_id}/pkg/upload",
files=files,
)
assert response.status_code == 404
@pytest.mark.integration
def test_upload_to_nonexistent_package_returns_404(
self, integration_client, test_project, unique_test_id
):
"""Test upload to nonexistent package returns 404."""
content = b"test content"
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{test_project}/nonexistent-{unique_test_id}/upload",
files=files,
)
assert response.status_code == 404
@pytest.mark.integration
def test_upload_without_file_returns_422(self, integration_client, test_package):
"""Test upload without file field returns 422."""
project, package = test_package
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
data={"tag": "no-file"},
)
assert response.status_code == 422
@pytest.mark.integration
def test_upload_with_invalid_checksum_rejected(
self, integration_client, test_package
):
"""Test upload with invalid checksum header format is rejected."""
project, package = test_package
content = b"checksum test"
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
headers={"X-Checksum-SHA256": "invalid-checksum"},
)
assert response.status_code == 400
@pytest.mark.integration
def test_upload_with_mismatched_checksum_rejected(
self, integration_client, test_package
):
"""Test upload with wrong checksum is rejected."""
project, package = test_package
content = b"mismatch test"
wrong_hash = "0" * 64
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
headers={"X-Checksum-SHA256": wrong_hash},
)
assert response.status_code == 422
assert "verification failed" in response.json().get("detail", "").lower()
class TestResumableUploadCancellation:
"""Tests for resumable upload cancellation."""
@pytest.mark.integration
def test_abort_resumable_upload(self, integration_client, test_package, sized_content):
"""Test aborting a resumable upload cleans up properly."""
project, package = test_package
content, expected_hash = sized_content(SIZE_100KB, seed=700)
# Get API key for auth
api_key_response = integration_client.post(
"/api/v1/auth/keys",
json={"name": "abort-test-key"},
)
assert api_key_response.status_code == 200
api_key = api_key_response.json()["key"]
# Initialize resumable upload
init_response = integration_client.post(
f"/api/v1/project/{project}/{package}/upload/init",
json={
"expected_hash": expected_hash,
"filename": "abort-test.bin",
"size": SIZE_100KB,
},
headers={"Authorization": f"Bearer {api_key}"},
)
assert init_response.status_code == 200
upload_id = init_response.json().get("upload_id")
if upload_id:
# Abort the upload (without uploading any parts)
abort_response = integration_client.delete(
f"/api/v1/project/{project}/{package}/upload/{upload_id}",
headers={"Authorization": f"Bearer {api_key}"},
)
assert abort_response.status_code in [200, 204]
# Verify progress shows not_found after abort
progress_response = integration_client.get(
f"/api/v1/project/{project}/{package}/upload/{upload_id}/progress",
headers={"Authorization": f"Bearer {api_key}"},
)
assert progress_response.status_code == 200
assert progress_response.json()["status"] == "not_found"
@pytest.mark.integration
def test_abort_nonexistent_upload(self, integration_client, test_package):
"""Test aborting nonexistent upload returns appropriate error."""
project, package = test_package
# Get API key for auth
api_key_response = integration_client.post(
"/api/v1/auth/keys",
json={"name": "abort-nonexistent-key"},
)
assert api_key_response.status_code == 200
api_key = api_key_response.json()["key"]
response = integration_client.delete(
f"/api/v1/project/{project}/{package}/upload/nonexistent-upload-id",
headers={"Authorization": f"Bearer {api_key}"},
)
# Should return 404 or 200 (idempotent delete)
assert response.status_code in [200, 204, 404]
class TestUploadTimeout:
"""Tests for upload timeout handling."""
@pytest.mark.integration
def test_upload_with_short_timeout_succeeds_for_small_file(
self, integration_client, test_package
):
"""Test small file upload succeeds with reasonable timeout."""
project, package = test_package
content = b"small timeout test"
# httpx client should handle this quickly
result = upload_test_file(
integration_client, project, package, content, tag="timeout-small"
)
assert result["artifact_id"] is not None
@pytest.mark.integration
def test_upload_response_duration_under_timeout(
self, integration_client, test_package, sized_content
):
"""Test upload completes within reasonable time."""
project, package = test_package
content, _ = sized_content(SIZE_1MB, seed=800)
start = time.time()
result = upload_test_file(
integration_client, project, package, content, tag="timeout-check"
)
duration = time.time() - start
# 1MB should upload in well under 60 seconds on local
assert duration < 60
assert result["artifact_id"] is not None
class TestConcurrentUploads:
"""Tests for concurrent upload handling."""
@pytest.mark.integration
def test_concurrent_different_files(
self, integration_client, test_package, sized_content
):
"""Test concurrent uploads of different files succeed."""
from concurrent.futures import ThreadPoolExecutor, as_completed
project, package = test_package
# Get API key for auth
api_key_response = integration_client.post(
"/api/v1/auth/keys",
json={"name": "concurrent-diff-key"},
)
assert api_key_response.status_code == 200
api_key = api_key_response.json()["key"]
num_uploads = 3
results = []
errors = []
def upload_unique_file(idx):
try:
from httpx import Client
content, expected_hash = sized_content(SIZE_100KB, seed=900 + idx)
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
with Client(base_url=base_url, timeout=30.0) as client:
files = {
"file": (
f"concurrent-{idx}.bin",
io.BytesIO(content),
"application/octet-stream",
)
}
response = client.post(
f"/api/v1/project/{project}/{package}/upload",
files=files,
data={"tag": f"concurrent-diff-{idx}"},
headers={"Authorization": f"Bearer {api_key}"},
)
if response.status_code == 200:
results.append((idx, response.json(), expected_hash))
else:
errors.append(f"Upload {idx}: {response.status_code} - {response.text}")
except Exception as e:
errors.append(f"Upload {idx}: {str(e)}")
with ThreadPoolExecutor(max_workers=num_uploads) as executor:
futures = [executor.submit(upload_unique_file, i) for i in range(num_uploads)]
for future in as_completed(futures):
pass
assert len(errors) == 0, f"Concurrent upload errors: {errors}"
assert len(results) == num_uploads
# Each upload should have unique artifact ID
artifact_ids = set(r[1]["artifact_id"] for r in results)
assert len(artifact_ids) == num_uploads
# Each should match expected hash
for idx, result, expected_hash in results:
assert result["artifact_id"] == expected_hash

View File

@@ -1,535 +0,0 @@
"""
Integration tests for streaming download functionality.
Tests cover:
- HTTP Range requests (partial downloads, resume)
- Conditional requests (If-None-Match, If-Modified-Since)
- Caching headers (Cache-Control, Last-Modified, Accept-Ranges)
- Large file streaming
- Download modes (proxy, redirect, presigned)
"""
import pytest
import io
import time
from email.utils import formatdate
from tests.factories import (
compute_sha256,
upload_test_file,
)
from tests.conftest import (
SIZE_1KB,
SIZE_100KB,
SIZE_1MB,
)
class TestRangeRequests:
"""Tests for HTTP Range request support (partial downloads)."""
@pytest.mark.integration
def test_range_request_first_bytes(self, integration_client, test_package):
"""Test range request for first N bytes."""
project, package = test_package
content = b"0123456789" * 100 # 1000 bytes
upload_test_file(integration_client, project, package, content, tag="range-test")
# Request first 10 bytes
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/range-test",
params={"mode": "proxy"},
headers={"Range": "bytes=0-9"},
)
assert response.status_code == 206 # Partial Content
assert response.content == b"0123456789"
assert "Content-Range" in response.headers
assert response.headers["Content-Range"].startswith("bytes 0-9/")
@pytest.mark.integration
def test_range_request_middle_bytes(self, integration_client, test_package):
"""Test range request for bytes in the middle."""
project, package = test_package
content = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
upload_test_file(integration_client, project, package, content, tag="range-mid")
# Request bytes 10-19 (KLMNOPQRST)
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/range-mid",
params={"mode": "proxy"},
headers={"Range": "bytes=10-19"},
)
assert response.status_code == 206
assert response.content == b"KLMNOPQRST"
@pytest.mark.integration
def test_range_request_suffix_bytes(self, integration_client, test_package):
"""Test range request for last N bytes (suffix range)."""
project, package = test_package
content = b"0123456789ABCDEF" # 16 bytes
upload_test_file(integration_client, project, package, content, tag="range-suffix")
# Request last 4 bytes
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/range-suffix",
params={"mode": "proxy"},
headers={"Range": "bytes=-4"},
)
assert response.status_code == 206
assert response.content == b"CDEF"
@pytest.mark.integration
def test_range_request_open_ended(self, integration_client, test_package):
"""Test range request from offset to end."""
project, package = test_package
content = b"0123456789"
upload_test_file(integration_client, project, package, content, tag="range-open")
# Request from byte 5 to end
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/range-open",
params={"mode": "proxy"},
headers={"Range": "bytes=5-"},
)
assert response.status_code == 206
assert response.content == b"56789"
@pytest.mark.integration
def test_range_request_includes_accept_ranges_header(
self, integration_client, test_package
):
"""Test that range requests include Accept-Ranges header."""
project, package = test_package
content = b"test content"
upload_test_file(integration_client, project, package, content, tag="accept-ranges")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/accept-ranges",
params={"mode": "proxy"},
headers={"Range": "bytes=0-4"},
)
assert response.status_code == 206
assert response.headers.get("Accept-Ranges") == "bytes"
@pytest.mark.integration
def test_full_download_advertises_accept_ranges(
self, integration_client, test_package
):
"""Test that full downloads advertise range support."""
project, package = test_package
content = b"test content"
upload_test_file(integration_client, project, package, content, tag="full-accept")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/full-accept",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.headers.get("Accept-Ranges") == "bytes"
class TestConditionalRequests:
"""Tests for conditional request handling (304 Not Modified)."""
@pytest.mark.integration
def test_if_none_match_returns_304(self, integration_client, test_package):
"""Test If-None-Match with matching ETag returns 304."""
project, package = test_package
content = b"conditional request test content"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content, tag="cond-etag")
# Request with matching ETag
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/cond-etag",
params={"mode": "proxy"},
headers={"If-None-Match": f'"{expected_hash}"'},
)
assert response.status_code == 304
assert response.content == b"" # No body for 304
@pytest.mark.integration
def test_if_none_match_without_quotes(self, integration_client, test_package):
"""Test If-None-Match works with or without quotes."""
project, package = test_package
content = b"etag no quotes test"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content, tag="cond-noquote")
# Request with ETag without quotes
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/cond-noquote",
params={"mode": "proxy"},
headers={"If-None-Match": expected_hash},
)
assert response.status_code == 304
@pytest.mark.integration
def test_if_none_match_mismatch_returns_200(self, integration_client, test_package):
"""Test If-None-Match with non-matching ETag returns 200."""
project, package = test_package
content = b"etag mismatch test"
upload_test_file(integration_client, project, package, content, tag="cond-mismatch")
# Request with different ETag
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/cond-mismatch",
params={"mode": "proxy"},
headers={"If-None-Match": '"different-etag-value"'},
)
assert response.status_code == 200
assert response.content == content
@pytest.mark.integration
def test_if_modified_since_returns_304(self, integration_client, test_package):
"""Test If-Modified-Since with future date returns 304."""
project, package = test_package
content = b"modified since test"
upload_test_file(integration_client, project, package, content, tag="cond-modified")
# Request with future date (artifact was definitely created before this)
future_date = formatdate(time.time() + 86400, usegmt=True) # Tomorrow
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/cond-modified",
params={"mode": "proxy"},
headers={"If-Modified-Since": future_date},
)
assert response.status_code == 304
@pytest.mark.integration
def test_if_modified_since_old_date_returns_200(
self, integration_client, test_package
):
"""Test If-Modified-Since with old date returns 200."""
project, package = test_package
content = b"old date test"
upload_test_file(integration_client, project, package, content, tag="cond-old")
# Request with old date (2020-01-01)
old_date = "Wed, 01 Jan 2020 00:00:00 GMT"
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/cond-old",
params={"mode": "proxy"},
headers={"If-Modified-Since": old_date},
)
assert response.status_code == 200
assert response.content == content
@pytest.mark.integration
def test_304_includes_etag(self, integration_client, test_package):
"""Test 304 response includes ETag header."""
project, package = test_package
content = b"304 etag test"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content, tag="304-etag")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/304-etag",
params={"mode": "proxy"},
headers={"If-None-Match": f'"{expected_hash}"'},
)
assert response.status_code == 304
assert response.headers.get("ETag") == f'"{expected_hash}"'
@pytest.mark.integration
def test_304_includes_cache_control(self, integration_client, test_package):
"""Test 304 response includes Cache-Control header."""
project, package = test_package
content = b"304 cache test"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content, tag="304-cache")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/304-cache",
params={"mode": "proxy"},
headers={"If-None-Match": f'"{expected_hash}"'},
)
assert response.status_code == 304
assert "immutable" in response.headers.get("Cache-Control", "")
class TestCachingHeaders:
"""Tests for caching headers on download responses."""
@pytest.mark.integration
def test_download_includes_cache_control(self, integration_client, test_package):
"""Test download response includes Cache-Control header."""
project, package = test_package
content = b"cache control test"
upload_test_file(integration_client, project, package, content, tag="cache-ctl")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/cache-ctl",
params={"mode": "proxy"},
)
assert response.status_code == 200
cache_control = response.headers.get("Cache-Control", "")
assert "public" in cache_control
assert "immutable" in cache_control
assert "max-age" in cache_control
@pytest.mark.integration
def test_download_includes_last_modified(self, integration_client, test_package):
"""Test download response includes Last-Modified header."""
project, package = test_package
content = b"last modified test"
upload_test_file(integration_client, project, package, content, tag="last-mod")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/last-mod",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert "Last-Modified" in response.headers
# Should be in RFC 7231 format
last_modified = response.headers["Last-Modified"]
assert "GMT" in last_modified
@pytest.mark.integration
def test_download_includes_etag(self, integration_client, test_package):
"""Test download response includes ETag header."""
project, package = test_package
content = b"etag header test"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content, tag="etag-hdr")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/etag-hdr",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.headers.get("ETag") == f'"{expected_hash}"'
class TestDownloadResume:
"""Tests for download resume functionality using range requests."""
@pytest.mark.integration
def test_resume_download_after_partial(self, integration_client, test_package):
"""Test resuming download from where it left off."""
project, package = test_package
content = b"ABCDEFGHIJ" * 100 # 1000 bytes
upload_test_file(integration_client, project, package, content, tag="resume-test")
# Simulate partial download (first 500 bytes)
response1 = integration_client.get(
f"/api/v1/project/{project}/{package}/+/resume-test",
params={"mode": "proxy"},
headers={"Range": "bytes=0-499"},
)
assert response1.status_code == 206
first_half = response1.content
assert len(first_half) == 500
# Resume from byte 500
response2 = integration_client.get(
f"/api/v1/project/{project}/{package}/+/resume-test",
params={"mode": "proxy"},
headers={"Range": "bytes=500-"},
)
assert response2.status_code == 206
second_half = response2.content
assert len(second_half) == 500
# Combine and verify
combined = first_half + second_half
assert combined == content
@pytest.mark.integration
def test_resume_with_etag_verification(self, integration_client, test_package):
"""Test that resumed download can verify content hasn't changed."""
project, package = test_package
content = b"resume etag verification test content"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content, tag="resume-etag")
# Get ETag from first request
response1 = integration_client.get(
f"/api/v1/project/{project}/{package}/+/resume-etag",
params={"mode": "proxy"},
headers={"Range": "bytes=0-9"},
)
assert response1.status_code == 206
etag = response1.headers.get("ETag")
assert etag == f'"{expected_hash}"'
# Resume with If-Match to ensure content hasn't changed
# (Note: If-Match would fail and return 412 if content changed)
response2 = integration_client.get(
f"/api/v1/project/{project}/{package}/+/resume-etag",
params={"mode": "proxy"},
headers={"Range": "bytes=10-"},
)
assert response2.status_code == 206
# ETag should be the same
assert response2.headers.get("ETag") == etag
class TestLargeFileStreaming:
"""Tests for streaming large files."""
@pytest.mark.integration
def test_stream_1mb_file(self, integration_client, test_package, sized_content):
"""Test streaming a 1MB file."""
project, package = test_package
content, expected_hash = sized_content(SIZE_1MB, seed=500)
upload_test_file(integration_client, project, package, content, tag="stream-1mb")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/stream-1mb",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert len(response.content) == SIZE_1MB
assert compute_sha256(response.content) == expected_hash
@pytest.mark.integration
def test_stream_large_file_has_correct_headers(
self, integration_client, test_package, sized_content
):
"""Test that large file streaming has correct headers."""
project, package = test_package
content, expected_hash = sized_content(SIZE_100KB, seed=501)
upload_test_file(integration_client, project, package, content, tag="stream-hdr")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/stream-hdr",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert int(response.headers.get("Content-Length", 0)) == SIZE_100KB
assert response.headers.get("X-Checksum-SHA256") == expected_hash
assert response.headers.get("Accept-Ranges") == "bytes"
@pytest.mark.integration
def test_range_request_on_large_file(
self, integration_client, test_package, sized_content
):
"""Test range request on a larger file."""
project, package = test_package
content, _ = sized_content(SIZE_100KB, seed=502)
upload_test_file(integration_client, project, package, content, tag="range-large")
# Request a slice from the middle
start = 50000
end = 50999
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/range-large",
params={"mode": "proxy"},
headers={"Range": f"bytes={start}-{end}"},
)
assert response.status_code == 206
assert len(response.content) == 1000
assert response.content == content[start : end + 1]
class TestDownloadModes:
"""Tests for different download modes."""
@pytest.mark.integration
def test_proxy_mode_streams_content(self, integration_client, test_package):
"""Test proxy mode streams content through backend."""
project, package = test_package
content = b"proxy mode test content"
upload_test_file(integration_client, project, package, content, tag="mode-proxy")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/mode-proxy",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert response.content == content
@pytest.mark.integration
def test_presigned_mode_returns_url(self, integration_client, test_package):
"""Test presigned mode returns JSON with URL."""
project, package = test_package
content = b"presigned mode test"
upload_test_file(integration_client, project, package, content, tag="mode-presign")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/mode-presign",
params={"mode": "presigned"},
)
assert response.status_code == 200
data = response.json()
assert "url" in data
assert "expires_at" in data
assert data["url"].startswith("http")
@pytest.mark.integration
def test_redirect_mode_returns_302(self, integration_client, test_package):
"""Test redirect mode returns 302 to presigned URL."""
project, package = test_package
content = b"redirect mode test"
upload_test_file(integration_client, project, package, content, tag="mode-redir")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/mode-redir",
params={"mode": "redirect"},
follow_redirects=False,
)
assert response.status_code == 302
assert "Location" in response.headers
class TestIntegrityDuringStreaming:
"""Tests for data integrity during streaming downloads."""
@pytest.mark.integration
def test_checksum_header_matches_content(self, integration_client, test_package):
"""Test X-Checksum-SHA256 header matches actual downloaded content."""
project, package = test_package
content = b"integrity check content"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content, tag="integrity")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/integrity",
params={"mode": "proxy"},
)
assert response.status_code == 200
header_hash = response.headers.get("X-Checksum-SHA256")
actual_hash = compute_sha256(response.content)
assert header_hash == expected_hash
assert actual_hash == expected_hash
assert header_hash == actual_hash
@pytest.mark.integration
def test_etag_matches_content_hash(self, integration_client, test_package):
"""Test ETag header matches content hash."""
project, package = test_package
content = b"etag integrity test"
expected_hash = compute_sha256(content)
upload_test_file(integration_client, project, package, content, tag="etag-int")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/etag-int",
params={"mode": "proxy"},
)
assert response.status_code == 200
etag = response.headers.get("ETag", "").strip('"')
actual_hash = compute_sha256(response.content)
assert etag == expected_hash
assert actual_hash == expected_hash
@pytest.mark.integration
def test_digest_header_present(self, integration_client, test_package):
"""Test Digest header is present in RFC 3230 format."""
project, package = test_package
content = b"digest header test"
upload_test_file(integration_client, project, package, content, tag="digest")
response = integration_client.get(
f"/api/v1/project/{project}/{package}/+/digest",
params={"mode": "proxy"},
)
assert response.status_code == 200
assert "Digest" in response.headers
assert response.headers["Digest"].startswith("sha-256=")

View File

@@ -10,7 +10,6 @@ Tests cover:
- S3 storage verification
"""
import os
import pytest
import io
import threading
@@ -159,7 +158,6 @@ class TestUploadBasics:
assert artifact["size"] == len(content)
@pytest.mark.integration
@pytest.mark.requires_direct_s3
def test_upload_creates_object_in_s3(self, integration_client, test_package):
"""Test upload creates object in S3 storage."""
project, package = test_package
@@ -541,7 +539,7 @@ class TestConcurrentUploads:
try:
from httpx import Client
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
base_url = "http://localhost:8080"
with Client(base_url=base_url, timeout=30.0) as client:
files = {
"file": (
@@ -637,7 +635,6 @@ class TestUploadFailureCleanup:
"""Tests for cleanup when uploads fail."""
@pytest.mark.integration
@pytest.mark.requires_direct_s3
def test_upload_failure_invalid_project_no_orphaned_s3(
self, integration_client, unique_test_id
):
@@ -660,7 +657,6 @@ class TestUploadFailureCleanup:
)
@pytest.mark.integration
@pytest.mark.requires_direct_s3
def test_upload_failure_invalid_package_no_orphaned_s3(
self, integration_client, test_project, unique_test_id
):
@@ -708,7 +704,6 @@ class TestS3StorageVerification:
"""Tests to verify S3 storage behavior."""
@pytest.mark.integration
@pytest.mark.requires_direct_s3
def test_s3_single_object_after_duplicates(
self, integration_client, test_package, unique_test_id
):
@@ -764,7 +759,6 @@ class TestSecurityPathTraversal:
"""
@pytest.mark.integration
@pytest.mark.requires_direct_s3
def test_path_traversal_in_filename_stored_safely(
self, integration_client, test_package
):

View File

@@ -1,294 +0,0 @@
# Integrity Verification
Orchard uses content-addressable storage with SHA256 hashing to ensure artifact integrity. This document describes how integrity verification works and how to use it.
## How It Works
### Content-Addressable Storage
Orchard stores artifacts using their SHA256 hash as the unique identifier. This provides several benefits:
1. **Automatic deduplication**: Identical content is stored only once
2. **Built-in integrity**: The artifact ID *is* the content hash
3. **Tamper detection**: Any modification changes the hash, making corruption detectable
When you upload a file:
1. Orchard computes the SHA256 hash of the content
2. The hash becomes the artifact ID (64-character hex string)
3. The file is stored in S3 at `fruits/{hash[0:2]}/{hash[2:4]}/{hash}`
4. The hash and metadata are recorded in the database
### Hash Format
- Algorithm: SHA256
- Format: 64-character lowercase hexadecimal string
- Example: `dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f`
## Client-Side Verification
### Before Upload
Compute the hash locally before uploading to verify the server received your content correctly:
```python
import hashlib
def compute_sha256(content: bytes) -> str:
return hashlib.sha256(content).hexdigest()
# Compute hash before upload
content = open("myfile.tar.gz", "rb").read()
local_hash = compute_sha256(content)
# Upload the file
response = requests.post(
f"{base_url}/api/v1/project/{project}/{package}/upload",
files={"file": ("myfile.tar.gz", content)},
)
result = response.json()
# Verify server computed the same hash
assert result["artifact_id"] == local_hash, "Hash mismatch!"
```
### Providing Expected Hash on Upload
You can provide the expected hash in the upload request. The server will reject the upload if the computed hash doesn't match:
```python
response = requests.post(
f"{base_url}/api/v1/project/{project}/{package}/upload",
files={"file": ("myfile.tar.gz", content)},
headers={"X-Checksum-SHA256": local_hash},
)
# Returns 422 if hash doesn't match
if response.status_code == 422:
print("Checksum mismatch - upload rejected")
```
### After Download
Verify downloaded content matches the expected hash using response headers:
```python
response = requests.get(
f"{base_url}/api/v1/project/{project}/{package}/+/{tag}",
params={"mode": "proxy"},
)
# Get expected hash from header
expected_hash = response.headers.get("X-Checksum-SHA256")
# Compute hash of downloaded content
actual_hash = compute_sha256(response.content)
# Verify
if actual_hash != expected_hash:
raise Exception(f"Integrity check failed! Expected {expected_hash}, got {actual_hash}")
```
### Response Headers for Verification
Download responses include multiple headers for verification:
| Header | Format | Description |
|--------|--------|-------------|
| `X-Checksum-SHA256` | Hex string | SHA256 hash (64 chars) |
| `ETag` | `"<hash>"` | SHA256 hash in quotes |
| `Digest` | `sha-256=<base64>` | RFC 3230 format (base64-encoded) |
| `Content-Length` | Integer | File size in bytes |
### Server-Side Verification on Download
Request server-side verification during download:
```bash
# Pre-verification: Server verifies before streaming (returns 500 if corrupt)
curl "${base_url}/api/v1/project/${project}/${package}/+/${tag}?mode=proxy&verify=true&verify_mode=pre"
# Stream verification: Server verifies while streaming (logs error if corrupt)
curl "${base_url}/api/v1/project/${project}/${package}/+/${tag}?mode=proxy&verify=true&verify_mode=stream"
```
The `X-Verified` header indicates whether server-side verification was performed:
- `X-Verified: true` - Content was verified by the server
## Server-Side Consistency Check
### Consistency Check Endpoint
Administrators can run a consistency check to verify all stored artifacts:
```bash
curl "${base_url}/api/v1/admin/consistency-check"
```
Response:
```json
{
"total_artifacts_checked": 1234,
"healthy": true,
"orphaned_s3_objects": 0,
"missing_s3_objects": 0,
"size_mismatches": 0,
"orphaned_s3_keys": [],
"missing_s3_keys": [],
"size_mismatch_artifacts": []
}
```
### What the Check Verifies
1. **Missing S3 objects**: Database records with no corresponding S3 object
2. **Orphaned S3 objects**: S3 objects with no database record
3. **Size mismatches**: S3 object size doesn't match database record
### Running Consistency Checks
**Manual check:**
```bash
# Check all artifacts
curl "${base_url}/api/v1/admin/consistency-check"
# Limit results (for large deployments)
curl "${base_url}/api/v1/admin/consistency-check?limit=100"
```
**Scheduled checks (recommended):**
Set up a cron job or Kubernetes CronJob to run periodic checks:
```yaml
# Kubernetes CronJob example
apiVersion: batch/v1
kind: CronJob
metadata:
name: orchard-consistency-check
spec:
schedule: "0 2 * * *" # Daily at 2 AM
jobTemplate:
spec:
template:
spec:
containers:
- name: check
image: curlimages/curl
command:
- /bin/sh
- -c
- |
response=$(curl -s "${ORCHARD_URL}/api/v1/admin/consistency-check")
healthy=$(echo "$response" | jq -r '.healthy')
if [ "$healthy" != "true" ]; then
echo "ALERT: Consistency check failed!"
echo "$response"
exit 1
fi
echo "Consistency check passed"
restartPolicy: OnFailure
```
## Recovery Procedures
### Corrupted Artifact (Size Mismatch)
If the consistency check reports size mismatches:
1. **Identify affected artifacts:**
```bash
curl "${base_url}/api/v1/admin/consistency-check" | jq '.size_mismatch_artifacts'
```
2. **Check if artifact can be re-uploaded:**
- If the original content is available, delete the corrupted artifact and re-upload
- The same content will produce the same artifact ID
3. **If original content is lost:**
- The artifact data is corrupted and cannot be recovered
- Delete the artifact record and notify affected users
- Consider restoring from backup if available
### Missing S3 Object
If database records exist but S3 objects are missing:
1. **Identify affected artifacts:**
```bash
curl "${base_url}/api/v1/admin/consistency-check" | jq '.missing_s3_keys'
```
2. **Check S3 bucket:**
- Verify the S3 bucket exists and is accessible
- Check S3 access logs for deletion events
- Check if objects were moved or lifecycle-deleted
3. **Recovery options:**
- Restore from S3 versioning (if enabled)
- Restore from backup
- Re-upload original content (if available)
- Delete orphaned database records
### Orphaned S3 Objects
If S3 objects exist without database records:
1. **Identify orphaned objects:**
```bash
curl "${base_url}/api/v1/admin/consistency-check" | jq '.orphaned_s3_keys'
```
2. **Investigate cause:**
- Upload interrupted before database commit?
- Database record deleted but S3 cleanup failed?
3. **Resolution:**
- If content is needed, create database record manually
- If content is not needed, delete the S3 object to reclaim storage
### Preventive Measures
1. **Enable S3 versioning** to recover from accidental deletions
2. **Regular backups** of both database and S3 bucket
3. **Scheduled consistency checks** to detect issues early
4. **Monitoring and alerting** on consistency check failures
5. **Audit logging** to track all artifact operations
## Verification in CI/CD
### Verifying Artifacts in Pipelines
```bash
#!/bin/bash
# Download and verify artifact in CI pipeline
ARTIFACT_URL="${ORCHARD_URL}/api/v1/project/${PROJECT}/${PACKAGE}/+/${TAG}"
# Download with verification headers
response=$(curl -s -D - "${ARTIFACT_URL}?mode=proxy" -o artifact.tar.gz)
expected_hash=$(echo "$response" | grep -i "X-Checksum-SHA256" | cut -d: -f2 | tr -d ' \r')
# Compute actual hash
actual_hash=$(sha256sum artifact.tar.gz | cut -d' ' -f1)
# Verify
if [ "$actual_hash" != "$expected_hash" ]; then
echo "ERROR: Integrity check failed!"
echo "Expected: $expected_hash"
echo "Actual: $actual_hash"
exit 1
fi
echo "Integrity verified: $actual_hash"
```
### Using Server-Side Verification
For critical deployments, use server-side pre-verification:
```bash
# Server verifies before streaming - returns 500 if corrupt
curl -f "${ARTIFACT_URL}?mode=proxy&verify=true&verify_mode=pre" -o artifact.tar.gz
```
This ensures the artifact is verified before any bytes are streamed to your pipeline.

View File

@@ -77,8 +77,6 @@ PostgreSQL secret name
{{- define "orchard.postgresql.secretName" -}}
{{- if .Values.orchard.database.existingSecret }}
{{- .Values.orchard.database.existingSecret }}
{{- else if and .Values.orchard.database.secretsManager .Values.orchard.database.secretsManager.enabled }}
{{- printf "%s-db-credentials" (include "orchard.fullname" .) }}
{{- else if .Values.postgresql.enabled }}
{{- printf "%s-postgresql" .Release.Name }}
{{- else }}
@@ -92,8 +90,6 @@ PostgreSQL password key in secret
{{- define "orchard.postgresql.passwordKey" -}}
{{- if .Values.orchard.database.existingSecret -}}
{{- .Values.orchard.database.existingSecretPasswordKey -}}
{{- else if and .Values.orchard.database.secretsManager .Values.orchard.database.secretsManager.enabled -}}
password
{{- else if .Values.postgresql.enabled -}}
password
{{- else -}}

View File

@@ -77,16 +77,8 @@ spec:
value: {{ include "orchard.postgresql.host" . | quote }}
- name: ORCHARD_DATABASE_PORT
value: {{ .Values.orchard.database.port | quote }}
{{- if and .Values.orchard.database.secretsManager .Values.orchard.database.secretsManager.enabled }}
- name: ORCHARD_DATABASE_USER
valueFrom:
secretKeyRef:
name: {{ include "orchard.postgresql.secretName" . }}
key: username
{{- else }}
- name: ORCHARD_DATABASE_USER
value: {{ .Values.orchard.database.user | default .Values.postgresql.auth.username | quote }}
{{- end }}
- name: ORCHARD_DATABASE_DBNAME
value: {{ .Values.orchard.database.dbname | default .Values.postgresql.auth.database | quote }}
- name: ORCHARD_DATABASE_SSLMODE
@@ -104,7 +96,6 @@ spec:
value: {{ .Values.orchard.s3.bucket | quote }}
- name: ORCHARD_S3_USE_PATH_STYLE
value: {{ .Values.orchard.s3.usePathStyle | quote }}
{{- if or .Values.minio.enabled .Values.orchard.s3.existingSecret .Values.orchard.s3.accessKeyId }}
- name: ORCHARD_S3_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
@@ -115,38 +106,16 @@ spec:
secretKeyRef:
name: {{ include "orchard.minio.secretName" . }}
key: {{ if .Values.minio.enabled }}root-password{{ else }}{{ .Values.orchard.s3.existingSecretSecretKeyKey }}{{ end }}
{{- end }}
- name: ORCHARD_DOWNLOAD_MODE
value: {{ .Values.orchard.download.mode | quote }}
- name: ORCHARD_PRESIGNED_URL_EXPIRY
value: {{ .Values.orchard.download.presignedUrlExpiry | quote }}
{{- if .Values.orchard.rateLimit }}
{{- if .Values.orchard.rateLimit.login }}
- name: ORCHARD_LOGIN_RATE_LIMIT
value: {{ .Values.orchard.rateLimit.login | quote }}
{{- end }}
{{- end }}
{{- if and .Values.orchard.database.secretsManager .Values.orchard.database.secretsManager.enabled }}
volumeMounts:
- name: db-secrets
mountPath: /mnt/secrets-store
readOnly: true
{{- end }}
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 12 }}
readinessProbe:
{{- toYaml .Values.readinessProbe | nindent 12 }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
{{- if and .Values.orchard.database.secretsManager .Values.orchard.database.secretsManager.enabled }}
volumes:
- name: db-secrets
csi:
driver: secrets-store.csi.k8s.io
readOnly: true
volumeAttributes:
secretProviderClass: {{ include "orchard.fullname" . }}-db-secret
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}

View File

@@ -1,27 +0,0 @@
{{- if and .Values.orchard.database.secretsManager .Values.orchard.database.secretsManager.enabled }}
apiVersion: secrets-store.csi.x-k8s.io/v1
kind: SecretProviderClass
metadata:
name: {{ include "orchard.fullname" . }}-db-secret
labels:
{{- include "orchard.labels" . | nindent 4 }}
spec:
provider: aws
parameters:
objects: |
- objectName: "{{ .Values.orchard.database.secretsManager.secretArn }}"
objectType: "secretsmanager"
jmesPath:
- path: username
objectAlias: db-username
- path: password
objectAlias: db-password
secretObjects:
- secretName: {{ include "orchard.fullname" . }}-db-credentials
type: Opaque
data:
- objectName: db-username
key: username
- objectName: db-password
key: password
{{- end }}

View File

@@ -42,7 +42,6 @@ ingress:
className: "nginx"
annotations:
cert-manager.io/cluster-issuer: "letsencrypt"
nginx.ingress.kubernetes.io/proxy-body-size: "0" # Disable body size limit for uploads
hosts:
- host: orchard-dev.common.global.bsf.tools # Overridden by CI
paths:
@@ -114,10 +113,6 @@ orchard:
mode: "presigned"
presignedUrlExpiry: 3600
# Relaxed rate limits for dev/feature environments (allows integration tests to run)
rateLimit:
login: "1000/minute" # Default is 5/minute, relaxed for CI integration tests
# PostgreSQL - ephemeral, no persistence
postgresql:
enabled: true

View File

@@ -4,7 +4,7 @@ replicaCount: 1
image:
repository: registry.global.bsf.tools/esv/bsf/bsf-integration/orchard/orchard-mvp
pullPolicy: Always
pullPolicy: IfNotPresent # Don't always pull in prod
tag: "latest" # Overridden by CI
imagePullSecrets:
@@ -19,8 +19,7 @@ initContainer:
serviceAccount:
create: true
automount: true
annotations:
eks.amazonaws.com/role-arn: arn:aws-us-gov:iam::052673043337:role/service-orchard-prd
annotations: {}
name: "orchard"
podAnnotations: {}
@@ -42,7 +41,6 @@ ingress:
className: "nginx"
annotations:
cert-manager.io/cluster-issuer: "letsencrypt"
nginx.ingress.kubernetes.io/proxy-body-size: "0" # Disable body size limit for uploads
hosts:
- host: orchard.common.global.bsf.tools
paths:
@@ -92,38 +90,122 @@ orchard:
host: "0.0.0.0"
port: 8080
# Database configuration - uses AWS Secrets Manager via CSI driver
# Database configuration (used when postgresql.enabled is false)
# TODO: Configure for managed PostgreSQL when ready
database:
host: "orchard-prd.cluster-cvw3jzjkozoc.us-gov-west-1.rds.amazonaws.com"
host: ""
port: 5432
dbname: orchard_prod
sslmode: require
secretsManager:
enabled: true
secretArn: "arn:aws-us-gov:secretsmanager:us-gov-west-1:052673043337:secret:rds!cluster-0afc8af5-f644-4284-92fb-2ed545490f92-3v9uXV"
user: orchard
password: ""
dbname: orchard
sslmode: disable
existingSecret: ""
existingSecretPasswordKey: "password"
# S3 configuration - uses IRSA for credentials
# S3 configuration (used when minio.enabled is false)
# TODO: Configure for real S3 when ready
s3:
endpoint: "" # Empty = use AWS default
region: us-gov-west-1
bucket: orchard-artifacts-prod
usePathStyle: false # Real S3 uses virtual-hosted style
endpoint: ""
region: us-east-1
bucket: orchard-artifacts
accessKeyId: ""
secretAccessKey: ""
usePathStyle: true
existingSecret: ""
existingSecretAccessKeyKey: "access-key-id"
existingSecretSecretKeyKey: "secret-access-key"
download:
mode: "presigned"
presignedUrlExpiry: 3600
# PostgreSQL subchart - disabled in prod, using RDS
# PostgreSQL subchart - MVP uses subchart, switch to managed later
postgresql:
enabled: false
enabled: true
image:
registry: containers.global.bsf.tools
repository: bitnami/postgresql
tag: "15"
pullPolicy: IfNotPresent
auth:
username: orchard
password: orchard-prod-password # TODO: Use existingSecret
database: orchard
primary:
persistence:
enabled: true # Enable persistence for prod
size: 20Gi
resourcesPreset: "none"
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 250m
memory: 512Mi
volumePermissions:
resourcesPreset: "none"
resources:
limits:
cpu: 100m
memory: 128Mi
requests:
cpu: 50m
memory: 128Mi
# MinIO subchart - disabled in prod, using real S3
# MinIO subchart - MVP uses subchart, switch to real S3 later
minio:
enabled: false
enabled: true
image:
registry: containers.global.bsf.tools
repository: bitnami/minio
tag: "latest"
pullPolicy: IfNotPresent
auth:
rootUser: minioadmin
rootPassword: minioadmin-prod # TODO: Use existingSecret
defaultBuckets: "orchard-artifacts"
persistence:
enabled: true # Enable persistence for prod
size: 100Gi
resourcesPreset: "none"
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 250m
memory: 512Mi
defaultInitContainers:
volumePermissions:
resourcesPreset: "none"
resources:
limits:
cpu: 100m
memory: 128Mi
requests:
cpu: 50m
memory: 128Mi
provisioning:
resources:
limits:
cpu: 200m
memory: 256Mi
requests:
cpu: 100m
memory: 256Mi
# MinIO ingress - disabled in prod, using real S3
# MinIO external ingress for presigned URL access
minioIngress:
enabled: false
enabled: true
className: "nginx"
annotations:
cert-manager.io/cluster-issuer: "letsencrypt"
nginx.ingress.kubernetes.io/proxy-body-size: "0"
host: "minio-orchard.common.global.bsf.tools"
tls:
enabled: true
secretName: minio-prod-tls
redis:
enabled: false

View File

@@ -19,8 +19,7 @@ initContainer:
serviceAccount:
create: true
automount: true
annotations:
eks.amazonaws.com/role-arn: arn:aws-us-gov:iam::052673043337:role/service-orchard-stage
annotations: {}
name: "orchard"
podAnnotations: {}
@@ -42,7 +41,6 @@ ingress:
className: "nginx"
annotations:
cert-manager.io/cluster-issuer: "letsencrypt"
nginx.ingress.kubernetes.io/proxy-body-size: "0" # Disable body size limit for uploads
hosts:
- host: orchard-stage.common.global.bsf.tools
paths:
@@ -94,43 +92,126 @@ orchard:
host: "0.0.0.0"
port: 8080
# Database configuration - uses AWS Secrets Manager via CSI driver
# Database configuration (used when postgresql.enabled is false)
database:
host: "orchard-stage.cluster-cvw3jzjkozoc.us-gov-west-1.rds.amazonaws.com"
host: ""
port: 5432
dbname: postgres
sslmode: require
secretsManager:
enabled: true
secretArn: "arn:aws-us-gov:secretsmanager:us-gov-west-1:052673043337:secret:rds!cluster-a573672b-1a38-4665-a654-1b7df37b5297-IaeFQL"
user: orchard
password: ""
dbname: orchard
sslmode: disable
existingSecret: ""
existingSecretPasswordKey: "password"
# S3 configuration - uses IRSA for credentials
# S3 configuration (used when minio.enabled is false)
s3:
endpoint: "" # Empty = use AWS default
region: us-gov-west-1
bucket: orchard-artifacts-stage
usePathStyle: false # Real S3 uses virtual-hosted style
endpoint: ""
region: us-east-1
bucket: orchard-artifacts
accessKeyId: ""
secretAccessKey: ""
usePathStyle: true
existingSecret: ""
existingSecretAccessKeyKey: "access-key-id"
existingSecretSecretKeyKey: "secret-access-key"
# Download configuration
download:
mode: "presigned" # presigned, redirect, or proxy
presignedUrlExpiry: 3600 # Presigned URL expiry in seconds
# Relaxed rate limits for stage (allows CI integration tests to run)
rateLimit:
login: "1000/minute" # Default is 5/minute, relaxed for CI integration tests
# PostgreSQL subchart - disabled in stage, using RDS
# PostgreSQL subchart configuration
postgresql:
enabled: false
enabled: true
image:
registry: containers.global.bsf.tools
repository: bitnami/postgresql
tag: "15"
pullPolicy: IfNotPresent
auth:
username: orchard
password: orchard-password
database: orchard
primary:
persistence:
enabled: false
size: 10Gi
# Resources with memory requests = limits per cluster policy
resourcesPreset: "none"
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 250m
memory: 512Mi
# Volume permissions init container
volumePermissions:
resourcesPreset: "none"
resources:
limits:
cpu: 100m
memory: 128Mi
requests:
cpu: 50m
memory: 128Mi
# MinIO subchart - disabled in stage, using real S3
# MinIO subchart configuration
minio:
enabled: false
enabled: true
image:
registry: containers.global.bsf.tools
repository: bitnami/minio
tag: "latest"
pullPolicy: IfNotPresent
auth:
rootUser: minioadmin
rootPassword: minioadmin
defaultBuckets: "orchard-artifacts"
persistence:
enabled: false
size: 50Gi
# Resources with memory requests = limits per cluster policy
resourcesPreset: "none" # Disable preset to use explicit resources
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 250m
memory: 512Mi
# Init container resources
defaultInitContainers:
volumePermissions:
resourcesPreset: "none"
resources:
limits:
cpu: 100m
memory: 128Mi
requests:
cpu: 50m
memory: 128Mi
# Provisioning job resources
provisioning:
resources:
limits:
cpu: 200m
memory: 256Mi
requests:
cpu: 100m
memory: 256Mi
# MinIO ingress - disabled in stage, using real S3
# MinIO external ingress for presigned URL access (separate from subchart ingress)
minioIngress:
enabled: false
enabled: true
className: "nginx"
annotations:
cert-manager.io/cluster-issuer: "letsencrypt"
nginx.ingress.kubernetes.io/proxy-body-size: "0" # Disable body size limit for uploads
host: "minio-orch-stage.common.global.bsf.tools"
tls:
enabled: true
secretName: minio-tls
# Redis subchart configuration (for future caching)
redis: