Compare commits
124 Commits
cb3d62b02a
...
feature/tr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
20e5a2948e | ||
|
|
b4e23d9899 | ||
|
|
aa3bd05d46 | ||
|
|
810e024d09 | ||
|
|
9e3eea4d08 | ||
|
|
a9de32d922 | ||
|
|
e8cf2462b7 | ||
|
|
038ad4ed1b | ||
|
|
858b45d434 | ||
|
|
95470b2bf6 | ||
|
|
c512d85f9e | ||
|
|
82f67539bd | ||
|
|
e93e7e7021 | ||
|
|
1d51c856b0 | ||
|
|
c92895ffe9 | ||
|
|
b147af43d2 | ||
|
|
aed48bb4a2 | ||
|
|
0e67ebf94f | ||
|
|
0a69910e8b | ||
|
|
576791d19e | ||
|
|
a5796f5437 | ||
|
|
284945ba33 | ||
|
|
fe07638485 | ||
|
|
7120cf64f1 | ||
|
|
718e6e7193 | ||
|
|
abba90ebac | ||
|
|
6c8b922818 | ||
|
|
99d28cf9c6 | ||
|
|
b5579f1643 | ||
|
|
fafa03e4ce | ||
|
|
d4b2da3232 | ||
|
|
7b04bbdf05 | ||
|
|
3a807870a3 | ||
|
|
f966fde7df | ||
|
|
133d9cbfd6 | ||
|
|
276b4f2743 | ||
|
|
67ac6bb3f8 | ||
|
|
b0bb3ed569 | ||
|
|
1ac75e1017 | ||
|
|
693613f111 | ||
|
|
9da4ae8c0d | ||
|
|
7ffdc64364 | ||
|
|
6abc0c88b0 | ||
|
|
e96dc5cde8 | ||
|
|
cba5bac383 | ||
|
|
535280a783 | ||
|
|
c9026e1950 | ||
|
|
fedbd95cf4 | ||
|
|
255e25d66d | ||
|
|
427d2fec70 | ||
|
|
199821b34d | ||
|
|
584acd1e90 | ||
|
|
f7ffc1c877 | ||
|
|
b93d5a9c68 | ||
|
|
a98ac154d5 | ||
|
|
823dfcb400 | ||
|
|
0ad106a141 | ||
|
|
5d5a054452 | ||
|
|
f3a817f8a5 | ||
|
|
f212864647 | ||
|
|
e8f26e9976 | ||
|
|
32162c4ec7 | ||
|
|
1bb0c4e911 | ||
|
|
179503c68b | ||
|
|
2f3c44b58e | ||
|
|
4b3d2fd41d | ||
|
|
7cfad28f67 | ||
|
|
37666e41a7 | ||
|
|
0cc4f25362 | ||
|
|
5c9da9003b | ||
|
|
90bb2a3a39 | ||
|
|
617bcbe89c | ||
|
|
1cbd335443 | ||
|
|
10d3694794 | ||
|
|
bccbc71c13 | ||
|
|
35fda65d38 | ||
|
|
08dce6cbb8 | ||
|
|
2f1891cf01 | ||
|
|
81458b3bcb | ||
|
|
7e68baed08 | ||
|
|
66622caf5d | ||
|
|
96d79e4127 | ||
|
|
accba9e404 | ||
|
|
64e420fb58 | ||
|
|
994f166de8 | ||
|
|
8999552949 | ||
|
|
2df97ae94a | ||
|
|
caa0c5af0c | ||
|
|
3fd2747ae4 | ||
|
|
96367da448 | ||
|
|
2686fdcb89 | ||
|
|
0eb2deb4ca | ||
|
|
3fe421f31d | ||
|
|
68660eacf6 | ||
|
|
b52c8840f1 | ||
|
|
4afcdf5cda | ||
|
|
bc3da14d50 | ||
|
|
2843335f6d | ||
|
|
2097865874 | ||
|
|
0e1474bf6c | ||
|
|
9604540dd3 | ||
|
|
a6df5aba5a | ||
|
|
096887d4da | ||
|
|
7d80bef39a | ||
|
|
96198dc127 | ||
|
|
fd06dfb3ce | ||
|
|
11852adc66 | ||
|
|
21555d64a3 | ||
|
|
b83f19aa52 | ||
|
|
5d0122fc36 | ||
|
|
81b423e0ea | ||
|
|
e89947f3d3 | ||
|
|
459867abdb | ||
|
|
2b5bc60a69 | ||
|
|
8b7b523aa8 | ||
|
|
dea03c4a12 | ||
|
|
1793fd3a8f | ||
|
|
c119ab4a04 | ||
|
|
e9404a4425 | ||
|
|
b896ad1fad | ||
|
|
b3861894cb | ||
|
|
71bb7d96b3 | ||
|
|
e0802444c0 | ||
|
|
c08d1082eb |
7
.env.example
Normal file
7
.env.example
Normal file
@@ -0,0 +1,7 @@
|
||||
# Orchard Local Development Environment
|
||||
# Copy this file to .env and customize as needed
|
||||
# Note: .env is gitignored and will not be committed
|
||||
|
||||
# Admin account password (required for local development)
|
||||
# This sets the initial admin password when the database is first created
|
||||
ORCHARD_ADMIN_PASSWORD=changeme123
|
||||
16
.gitignore
vendored
16
.gitignore
vendored
@@ -1,3 +1,11 @@
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*.pyo
|
||||
.Python
|
||||
*.egg-info/
|
||||
.eggs/
|
||||
|
||||
# Binaries
|
||||
/bin/
|
||||
*.exe
|
||||
@@ -29,6 +37,10 @@ Thumbs.db
|
||||
# Build
|
||||
/build/
|
||||
/dist/
|
||||
frontend/dist/
|
||||
|
||||
# Node
|
||||
node_modules/
|
||||
|
||||
# Local config overrides
|
||||
config.local.yaml
|
||||
@@ -49,6 +61,8 @@ logs/
|
||||
tmp/
|
||||
temp/
|
||||
|
||||
# Claude Code
|
||||
# AI Agent Instructions
|
||||
.claude/
|
||||
CLAUDE.md
|
||||
AGENTS.md
|
||||
PROSPER-NOTES.md
|
||||
|
||||
606
.gitlab-ci.yml
606
.gitlab-ci.yml
@@ -1,95 +1,545 @@
|
||||
stages:
|
||||
- test
|
||||
- build
|
||||
- publish
|
||||
# - deploy
|
||||
include:
|
||||
- project: 'esv/bsf/pypi/prosper'
|
||||
ref: v0.64.1
|
||||
file: '/prosper/templates/projects/docker.yml'
|
||||
|
||||
variables:
|
||||
# Container registry settings
|
||||
REGISTRY: ${CI_REGISTRY}
|
||||
IMAGE_NAME: ${CI_REGISTRY_IMAGE}
|
||||
# Buildah settings
|
||||
STORAGE_DRIVER: vfs
|
||||
BUILDAH_FORMAT: docker
|
||||
BUILDAH_ISOLATION: chroot
|
||||
# renovate: datasource=gitlab-tags depName=esv/bsf/pypi/prosper versioning=semver registryUrl=https://gitlab.global.bsf.tools
|
||||
PROSPER_VERSION: v0.64.1
|
||||
# Use internal PyPI proxy instead of public internet
|
||||
PIP_INDEX_URL: https://deps.global.bsf.tools/artifactory/api/pypi/pypi.org/simple
|
||||
# Environment URLs (used by deploy and test jobs)
|
||||
STAGE_URL: https://orchard-stage.common.global.bsf.tools
|
||||
PROD_URL: https://orchard.common.global.bsf.tools
|
||||
# Shared pip cache directory
|
||||
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"
|
||||
|
||||
.buildah-base:
|
||||
image: deps.global.bsf.tools/quay.io/buildah/stable:latest
|
||||
before_script:
|
||||
- buildah version
|
||||
- buildah login -u ${CI_REGISTRY_USER} -p ${CI_REGISTRY_PASSWORD} ${CI_REGISTRY}
|
||||
# Prevent duplicate pipelines for MRs
|
||||
workflow:
|
||||
rules:
|
||||
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
|
||||
when: never
|
||||
- when: always
|
||||
|
||||
# Run Python tests
|
||||
test:
|
||||
stage: test
|
||||
# Define stages - extends Prosper's stages with our custom ones
|
||||
stages:
|
||||
- .pre
|
||||
- lint
|
||||
- build
|
||||
- test
|
||||
- analyze
|
||||
- deploy
|
||||
|
||||
# Override Prosper template jobs to exclude tag pipelines
|
||||
# Tags only run deploy_prod and smoke_test_prod (image already built on main)
|
||||
build_image:
|
||||
rules:
|
||||
- if: '$CI_COMMIT_TAG'
|
||||
when: never
|
||||
- when: on_success
|
||||
|
||||
test_image:
|
||||
rules:
|
||||
- if: '$CI_COMMIT_TAG'
|
||||
when: never
|
||||
- when: on_success
|
||||
|
||||
hadolint:
|
||||
rules:
|
||||
- if: '$CI_COMMIT_TAG'
|
||||
when: never
|
||||
- when: on_success
|
||||
|
||||
kics:
|
||||
variables:
|
||||
KICS_CONFIG: kics.config
|
||||
rules:
|
||||
- if: '$CI_COMMIT_TAG'
|
||||
when: never
|
||||
- when: on_success
|
||||
|
||||
secrets:
|
||||
rules:
|
||||
- if: '$CI_COMMIT_TAG'
|
||||
when: never
|
||||
- when: on_success
|
||||
|
||||
app_deps_scan:
|
||||
rules:
|
||||
- if: '$CI_COMMIT_TAG'
|
||||
when: never
|
||||
- when: on_success
|
||||
|
||||
cve_scan:
|
||||
rules:
|
||||
- if: '$CI_COMMIT_TAG'
|
||||
when: never
|
||||
- when: on_success
|
||||
|
||||
app_sbom_analysis:
|
||||
rules:
|
||||
- if: '$CI_COMMIT_TAG'
|
||||
when: never
|
||||
- when: on_success
|
||||
|
||||
cve_sbom_analysis:
|
||||
rules:
|
||||
- if: '$CI_COMMIT_TAG'
|
||||
when: never
|
||||
- when: on_success
|
||||
|
||||
# Disable prosper_setup for tag pipelines since no build/analysis jobs run
|
||||
# (image is already built when commit was on main, and deploy uses helm directly)
|
||||
prosper_setup:
|
||||
rules:
|
||||
- if: '$CI_COMMIT_TAG'
|
||||
when: never
|
||||
- when: on_success
|
||||
|
||||
# Override release job to wait for stage deployment and smoke tests before creating tag
|
||||
# This ensures the tag (which triggers prod deploy) is only created after stage passes
|
||||
release:
|
||||
needs: [smoke_test_stage, changelog]
|
||||
|
||||
# Full integration test suite template (for feature/stage deployments)
|
||||
# Runs the complete pytest integration test suite against the deployed environment
|
||||
.integration_test_template: &integration_test_template
|
||||
stage: deploy # Runs in deploy stage, but after deployment due to 'needs'
|
||||
image: deps.global.bsf.tools/docker/python:3.12-slim
|
||||
timeout: 20m # Full suite takes longer than smoke tests
|
||||
interruptible: true # Cancel if new pipeline starts
|
||||
retry: 1 # Retry once on failure (network flakiness)
|
||||
cache:
|
||||
key: pip-$CI_COMMIT_REF_SLUG
|
||||
paths:
|
||||
- .pip-cache/
|
||||
policy: pull-push
|
||||
before_script:
|
||||
- pip install -r backend/requirements.txt
|
||||
- pip install pytest pytest-asyncio httpx
|
||||
- pip install --index-url "$PIP_INDEX_URL" -r backend/requirements.txt
|
||||
- pip install --index-url "$PIP_INDEX_URL" pytest pytest-asyncio httpx
|
||||
script:
|
||||
- cd backend
|
||||
- python -m pytest -v || echo "No tests yet"
|
||||
rules:
|
||||
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
|
||||
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
|
||||
# Debug: Print environment variables for test configuration
|
||||
- echo "ORCHARD_TEST_URL=$ORCHARD_TEST_URL"
|
||||
- echo "ORCHARD_TEST_PASSWORD is set to '${ORCHARD_TEST_PASSWORD:-NOT SET}'"
|
||||
# Run full integration test suite, excluding:
|
||||
# - large/slow tests
|
||||
# - requires_direct_s3 tests (can't access MinIO from outside K8s cluster)
|
||||
# ORCHARD_TEST_URL tells the tests which server to connect to
|
||||
# Note: Auth tests work because dev/stage deployments have relaxed rate limits
|
||||
- |
|
||||
python -m pytest tests/integration/ -v \
|
||||
--junitxml=integration-report.xml \
|
||||
-m "not large and not slow and not requires_direct_s3" \
|
||||
--tb=short
|
||||
artifacts:
|
||||
when: always
|
||||
expire_in: 1 week
|
||||
paths:
|
||||
- backend/integration-report.xml
|
||||
reports:
|
||||
junit: backend/integration-report.xml
|
||||
|
||||
# Build container image for merge requests (no push)
|
||||
build:
|
||||
stage: build
|
||||
extends: .buildah-base
|
||||
# Lightweight smoke test template (for production - no test data creation)
|
||||
.smoke_test_template: &smoke_test_template
|
||||
stage: deploy
|
||||
image: deps.global.bsf.tools/docker/python:3.12-slim
|
||||
timeout: 5m
|
||||
before_script:
|
||||
- pip install --index-url "$PIP_INDEX_URL" httpx
|
||||
script:
|
||||
- |
|
||||
buildah build \
|
||||
--build-arg NPM_REGISTRY=https://deps.global.bsf.tools/artifactory/api/npm/registry.npmjs.org/ \
|
||||
--tag ${IMAGE_NAME}:${CI_COMMIT_SHORT_SHA} \
|
||||
--label org.opencontainers.image.source=${CI_PROJECT_URL} \
|
||||
--label org.opencontainers.image.revision=${CI_COMMIT_SHA} \
|
||||
--label org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ) \
|
||||
.
|
||||
rules:
|
||||
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
|
||||
python - <<'PYTEST_SCRIPT'
|
||||
import httpx
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Build and push on main branch
|
||||
publish:
|
||||
stage: publish
|
||||
extends: .buildah-base
|
||||
BASE_URL = os.environ.get("ORCHARD_TEST_URL")
|
||||
if not BASE_URL:
|
||||
print("ERROR: ORCHARD_TEST_URL not set")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Running smoke tests against {BASE_URL}")
|
||||
client = httpx.Client(base_url=BASE_URL, timeout=30.0)
|
||||
|
||||
errors = []
|
||||
|
||||
# Test 1: Health endpoint
|
||||
print("\n=== Test 1: Health endpoint ===")
|
||||
r = client.get("/health")
|
||||
if r.status_code == 200:
|
||||
print("PASS: Health check passed")
|
||||
else:
|
||||
errors.append(f"Health check failed: {r.status_code}")
|
||||
|
||||
# Test 2: API responds (list projects)
|
||||
print("\n=== Test 2: API responds ===")
|
||||
r = client.get("/api/v1/projects")
|
||||
if r.status_code == 200:
|
||||
projects = r.json()
|
||||
print(f"PASS: API responding, found {len(projects)} project(s)")
|
||||
else:
|
||||
errors.append(f"API check failed: {r.status_code}")
|
||||
|
||||
# Test 3: Frontend served
|
||||
print("\n=== Test 3: Frontend served ===")
|
||||
r = client.get("/")
|
||||
if r.status_code == 200 and "</html>" in r.text:
|
||||
print("PASS: Frontend is being served")
|
||||
else:
|
||||
errors.append(f"Frontend check failed: {r.status_code}")
|
||||
|
||||
# Report results
|
||||
print("\n" + "=" * 50)
|
||||
if errors:
|
||||
print(f"FAILED: {len(errors)} error(s)")
|
||||
for e in errors:
|
||||
print(f" FAIL: {e}")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("SUCCESS: All smoke tests passed!")
|
||||
sys.exit(0)
|
||||
PYTEST_SCRIPT
|
||||
|
||||
# Integration tests for feature deployment (full suite)
|
||||
# Uses DEV_ADMIN_PASSWORD CI variable (same as deploy_feature)
|
||||
integration_test_feature:
|
||||
<<: *integration_test_template
|
||||
needs: [deploy_feature]
|
||||
variables:
|
||||
ORCHARD_TEST_URL: https://orchard-$CI_COMMIT_REF_SLUG.common.global.bsf.tools
|
||||
ORCHARD_TEST_PASSWORD: $DEV_ADMIN_PASSWORD
|
||||
rules:
|
||||
- if: '$CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != "main"'
|
||||
when: on_success
|
||||
|
||||
# Run Python backend unit tests
|
||||
python_unit_tests:
|
||||
stage: test
|
||||
needs: [] # Run in parallel with build
|
||||
image: deps.global.bsf.tools/docker/python:3.12-slim
|
||||
timeout: 15m
|
||||
interruptible: true # Cancel if new pipeline starts
|
||||
cache:
|
||||
key: pip-$CI_COMMIT_REF_SLUG
|
||||
paths:
|
||||
- .pip-cache/
|
||||
policy: pull-push
|
||||
before_script:
|
||||
- pip install --index-url "$PIP_INDEX_URL" -r backend/requirements.txt
|
||||
- pip install --index-url "$PIP_INDEX_URL" pytest pytest-asyncio pytest-cov httpx
|
||||
script:
|
||||
- |
|
||||
buildah build \
|
||||
--build-arg NPM_REGISTRY=https://deps.global.bsf.tools/artifactory/api/npm/registry.npmjs.org/ \
|
||||
--tag ${IMAGE_NAME}:${CI_COMMIT_SHORT_SHA} \
|
||||
--tag ${IMAGE_NAME}:${CI_COMMIT_REF_SLUG} \
|
||||
--tag ${IMAGE_NAME}:latest \
|
||||
--label org.opencontainers.image.source=${CI_PROJECT_URL} \
|
||||
--label org.opencontainers.image.revision=${CI_COMMIT_SHA} \
|
||||
--label org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ) \
|
||||
.
|
||||
- buildah push ${IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}
|
||||
- buildah push ${IMAGE_NAME}:${CI_COMMIT_REF_SLUG}
|
||||
- buildah push ${IMAGE_NAME}:latest
|
||||
- cd backend
|
||||
# Run unit tests (integration tests run post-deployment against live environment)
|
||||
- python -m pytest tests/unit/ -v --cov=app --cov-report=term --cov-report=xml:coverage.xml --cov-report=html:coverage_html --junitxml=pytest-report.xml
|
||||
artifacts:
|
||||
when: always
|
||||
expire_in: 1 week
|
||||
paths:
|
||||
- backend/coverage.xml
|
||||
- backend/coverage_html/
|
||||
- backend/pytest-report.xml
|
||||
reports:
|
||||
junit: backend/pytest-report.xml
|
||||
coverage_report:
|
||||
coverage_format: cobertura
|
||||
path: backend/coverage.xml
|
||||
coverage: '/TOTAL.*\s+(\d+%)/'
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
|
||||
- if: '$CI_COMMIT_TAG'
|
||||
when: never
|
||||
- when: on_success
|
||||
|
||||
# Run frontend tests
|
||||
frontend_tests:
|
||||
stage: test
|
||||
needs: [] # Run in parallel with build
|
||||
image: deps.global.bsf.tools/docker/node:20-alpine
|
||||
timeout: 15m
|
||||
interruptible: true # Cancel if new pipeline starts
|
||||
cache:
|
||||
key: npm-$CI_COMMIT_REF_SLUG
|
||||
paths:
|
||||
- frontend/node_modules/
|
||||
policy: pull-push
|
||||
before_script:
|
||||
- cd frontend
|
||||
- npm config set registry https://deps.global.bsf.tools/artifactory/api/npm/registry.npmjs.org
|
||||
- npm ci --verbose
|
||||
script:
|
||||
- npm run test -- --run --reporter=verbose --coverage
|
||||
artifacts:
|
||||
when: always
|
||||
expire_in: 1 week
|
||||
paths:
|
||||
- frontend/coverage/
|
||||
reports:
|
||||
coverage_report:
|
||||
coverage_format: cobertura
|
||||
path: frontend/coverage/cobertura-coverage.xml
|
||||
coverage: '/All files[^|]*\|[^|]*\s+([\d\.]+)/'
|
||||
rules:
|
||||
- if: '$CI_COMMIT_TAG'
|
||||
when: never
|
||||
- when: on_success
|
||||
|
||||
# deploy_helm_charts:
|
||||
# stage: deploy
|
||||
# image:
|
||||
# name: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
|
||||
# parallel:
|
||||
# matrix:
|
||||
# # - ENV: "prod"
|
||||
# # VALUES_FILE: "helm/values-prod.yaml"
|
||||
# # CONTEXT: "esv/bsf/bsf-services/gitlab-kaas-agent-config:services-prod-agent"
|
||||
# # NAMESPACE: "bsf-services-namespace"
|
||||
# # ONLY: "main"
|
||||
# - ENV: "dev"
|
||||
# VALUES_FILE: "helm/orchard/values.yaml"
|
||||
# CONTEXT: "esv/bsf/bsf-services/gitlab-kaas-agent-config:services-prod-agent"
|
||||
# NAMESPACE: "bsf-services-dev-namespace"
|
||||
# # ONLY: ["branches", "!main"]
|
||||
# script:
|
||||
# - kubectl config use-context $CONTEXT
|
||||
# - echo "Deploy - buildah push ${IMAGE_NAME}:latest"
|
||||
# - |
|
||||
# helm upgrade --install orchard-dev ./helm/orchard --namespace $NAMESPACE -f $VALUES_FILE
|
||||
# Shared deploy configuration
|
||||
.deploy_template: &deploy_template
|
||||
stage: deploy
|
||||
needs: [build_image, test_image, kics, hadolint, python_unit_tests, frontend_tests, secrets, app_deps_scan, cve_scan, cve_sbom_analysis, app_sbom_analysis]
|
||||
image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
|
||||
|
||||
.helm_setup: &helm_setup
|
||||
- helm version
|
||||
- cd helm/orchard
|
||||
# OCI-based charts from internal registry - no repo add needed
|
||||
- helm dependency update
|
||||
|
||||
# Simplified deployment verification - just health check
|
||||
# Full API/frontend checks are done by integration tests post-deployment
|
||||
.verify_deployment: &verify_deployment |
|
||||
echo "=== Waiting for health endpoint (certs may take a few minutes) ==="
|
||||
for i in $(seq 1 30); do
|
||||
if curl -sf --max-time 10 "$BASE_URL/health" > /dev/null 2>&1; then
|
||||
echo "Health check passed!"
|
||||
echo "Deployment URL: $BASE_URL"
|
||||
exit 0
|
||||
fi
|
||||
echo "Attempt $i/30 - waiting 10s..."
|
||||
sleep 10
|
||||
done
|
||||
echo "Health check failed after 30 attempts"
|
||||
exit 1
|
||||
|
||||
# Ephemeral test deployment in stage namespace (main branch only)
|
||||
# Runs integration tests before promoting to long-running stage
|
||||
deploy_test:
|
||||
<<: *deploy_template
|
||||
variables:
|
||||
NAMESPACE: orch-stage-namespace
|
||||
VALUES_FILE: helm/orchard/values-dev.yaml
|
||||
BASE_URL: https://orchard-test.common.global.bsf.tools
|
||||
before_script:
|
||||
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
|
||||
- *helm_setup
|
||||
script:
|
||||
- echo "Deploying ephemeral test environment"
|
||||
- cd $CI_PROJECT_DIR
|
||||
- |
|
||||
helm upgrade --install orchard-test ./helm/orchard \
|
||||
--namespace $NAMESPACE \
|
||||
-f $VALUES_FILE \
|
||||
--set image.tag=git.linux-amd64-$CI_COMMIT_SHA \
|
||||
--set orchard.auth.adminPassword=$STAGE_ADMIN_PASSWORD \
|
||||
--set ingress.hosts[0].host=orchard-test.common.global.bsf.tools \
|
||||
--set ingress.tls[0].hosts[0]=orchard-test.common.global.bsf.tools \
|
||||
--set ingress.tls[0].secretName=orchard-test-tls \
|
||||
--set minioIngress.host=minio-test.common.global.bsf.tools \
|
||||
--set minioIngress.tls.secretName=minio-test-tls \
|
||||
--wait \
|
||||
--atomic \
|
||||
--timeout 10m
|
||||
- kubectl rollout status deployment/orchard-test-server -n $NAMESPACE --timeout=10m
|
||||
- *verify_deployment
|
||||
environment:
|
||||
name: test
|
||||
url: https://orchard-test.common.global.bsf.tools
|
||||
on_stop: cleanup_test
|
||||
kubernetes:
|
||||
agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
|
||||
rules:
|
||||
- if: '$CI_COMMIT_BRANCH == "main"'
|
||||
when: on_success
|
||||
|
||||
# Integration tests for ephemeral test deployment (main branch)
|
||||
# Runs against orchard-test before promoting to long-running stage
|
||||
integration_test_main:
|
||||
<<: *integration_test_template
|
||||
needs: [deploy_test]
|
||||
variables:
|
||||
ORCHARD_TEST_URL: https://orchard-test.common.global.bsf.tools
|
||||
ORCHARD_TEST_PASSWORD: $STAGE_ADMIN_PASSWORD
|
||||
rules:
|
||||
- if: '$CI_COMMIT_BRANCH == "main"'
|
||||
when: on_success
|
||||
|
||||
# Cleanup ephemeral test deployment after integration tests
|
||||
cleanup_test:
|
||||
stage: deploy
|
||||
needs: [integration_test_main]
|
||||
image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
|
||||
timeout: 5m
|
||||
variables:
|
||||
NAMESPACE: orch-stage-namespace
|
||||
GIT_STRATEGY: none
|
||||
before_script:
|
||||
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
|
||||
script:
|
||||
- echo "Cleaning up ephemeral test deployment orchard-test"
|
||||
- helm uninstall orchard-test --namespace $NAMESPACE || true
|
||||
environment:
|
||||
name: test
|
||||
action: stop
|
||||
kubernetes:
|
||||
agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
|
||||
rules:
|
||||
- if: '$CI_COMMIT_BRANCH == "main"'
|
||||
when: on_success
|
||||
allow_failure: true
|
||||
|
||||
# Deploy to long-running stage (main branch, after ephemeral tests pass)
|
||||
deploy_stage:
|
||||
stage: deploy
|
||||
# Wait for ephemeral test to pass before promoting to long-running stage
|
||||
needs: [cleanup_test]
|
||||
image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
|
||||
variables:
|
||||
NAMESPACE: orch-stage-namespace
|
||||
VALUES_FILE: helm/orchard/values-stage.yaml
|
||||
BASE_URL: $STAGE_URL
|
||||
before_script:
|
||||
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
|
||||
- *helm_setup
|
||||
script:
|
||||
- echo "Deploying to long-running stage environment"
|
||||
- cd $CI_PROJECT_DIR
|
||||
- |
|
||||
helm upgrade --install orchard-stage ./helm/orchard \
|
||||
--namespace $NAMESPACE \
|
||||
-f $VALUES_FILE \
|
||||
--set image.tag=git.linux-amd64-$CI_COMMIT_SHA \
|
||||
--set orchard.auth.adminPassword=$STAGE_ADMIN_PASSWORD \
|
||||
--wait \
|
||||
--atomic \
|
||||
--timeout 10m
|
||||
- kubectl rollout status deployment/orchard-stage-server -n $NAMESPACE --timeout=10m
|
||||
- *verify_deployment
|
||||
environment:
|
||||
name: stage
|
||||
url: $STAGE_URL
|
||||
kubernetes:
|
||||
agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
|
||||
rules:
|
||||
- if: '$CI_COMMIT_BRANCH == "main"'
|
||||
when: on_success
|
||||
|
||||
# Smoke test for long-running stage (after promotion)
|
||||
smoke_test_stage:
|
||||
<<: *smoke_test_template
|
||||
needs: [deploy_stage]
|
||||
variables:
|
||||
ORCHARD_TEST_URL: $STAGE_URL
|
||||
rules:
|
||||
- if: '$CI_COMMIT_BRANCH == "main"'
|
||||
when: on_success
|
||||
|
||||
# Deploy feature branch to dev namespace
|
||||
deploy_feature:
|
||||
<<: *deploy_template
|
||||
variables:
|
||||
NAMESPACE: orch-dev-namespace
|
||||
VALUES_FILE: helm/orchard/values-dev.yaml
|
||||
before_script:
|
||||
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard
|
||||
- *helm_setup
|
||||
script:
|
||||
- echo "Deploying feature branch $CI_COMMIT_REF_SLUG"
|
||||
- cd $CI_PROJECT_DIR
|
||||
- |
|
||||
helm upgrade --install orchard-$CI_COMMIT_REF_SLUG ./helm/orchard \
|
||||
--namespace $NAMESPACE \
|
||||
-f $VALUES_FILE \
|
||||
--set image.tag=git.linux-amd64-$CI_COMMIT_SHA \
|
||||
--set orchard.auth.adminPassword=$DEV_ADMIN_PASSWORD \
|
||||
--set ingress.hosts[0].host=orchard-$CI_COMMIT_REF_SLUG.common.global.bsf.tools \
|
||||
--set ingress.tls[0].hosts[0]=orchard-$CI_COMMIT_REF_SLUG.common.global.bsf.tools \
|
||||
--set ingress.tls[0].secretName=orchard-$CI_COMMIT_REF_SLUG-tls \
|
||||
--set minioIngress.host=minio-$CI_COMMIT_REF_SLUG.common.global.bsf.tools \
|
||||
--set minioIngress.tls.secretName=minio-$CI_COMMIT_REF_SLUG-tls \
|
||||
--wait \
|
||||
--atomic \
|
||||
--timeout 10m
|
||||
- kubectl rollout status deployment/orchard-$CI_COMMIT_REF_SLUG-server -n $NAMESPACE --timeout=10m
|
||||
- export BASE_URL="https://orchard-$CI_COMMIT_REF_SLUG.common.global.bsf.tools"
|
||||
- *verify_deployment
|
||||
environment:
|
||||
name: review/$CI_COMMIT_REF_SLUG
|
||||
url: https://orchard-$CI_COMMIT_REF_SLUG.common.global.bsf.tools
|
||||
on_stop: cleanup_feature
|
||||
auto_stop_in: 1 week
|
||||
kubernetes:
|
||||
agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard
|
||||
rules:
|
||||
- if: '$CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != "main"'
|
||||
when: on_success
|
||||
|
||||
# Cleanup feature branch deployment (standalone - doesn't need deploy dependencies)
|
||||
cleanup_feature:
|
||||
stage: deploy
|
||||
needs: []
|
||||
image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
|
||||
timeout: 5m
|
||||
variables:
|
||||
NAMESPACE: orch-dev-namespace
|
||||
GIT_STRATEGY: none # No source needed, branch may be deleted
|
||||
before_script:
|
||||
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard
|
||||
script:
|
||||
- echo "Cleaning up feature deployment orchard-$CI_COMMIT_REF_SLUG"
|
||||
- helm uninstall orchard-$CI_COMMIT_REF_SLUG --namespace $NAMESPACE || true
|
||||
environment:
|
||||
name: review/$CI_COMMIT_REF_SLUG
|
||||
action: stop
|
||||
kubernetes:
|
||||
agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard
|
||||
rules:
|
||||
- if: '$CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != "main"'
|
||||
when: manual
|
||||
allow_failure: true
|
||||
|
||||
# Deploy to production (version tags only)
|
||||
deploy_prod:
|
||||
stage: deploy
|
||||
# For tag pipelines, no other jobs run - image was already built when commit was on main
|
||||
needs: []
|
||||
image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
|
||||
variables:
|
||||
NAMESPACE: orch-namespace
|
||||
VALUES_FILE: helm/orchard/values-prod.yaml
|
||||
BASE_URL: $PROD_URL
|
||||
before_script:
|
||||
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-prod
|
||||
- *helm_setup
|
||||
script:
|
||||
- echo "Deploying to PRODUCTION - version $CI_COMMIT_TAG"
|
||||
- cd $CI_PROJECT_DIR
|
||||
- |
|
||||
helm upgrade --install orchard-prod ./helm/orchard \
|
||||
--namespace $NAMESPACE \
|
||||
-f $VALUES_FILE \
|
||||
--set image.tag=git.linux-amd64-$CI_COMMIT_SHA \
|
||||
--wait \
|
||||
--atomic \
|
||||
--timeout 10m
|
||||
- kubectl rollout status deployment/orchard-prod-server -n $NAMESPACE --timeout=10m
|
||||
- *verify_deployment
|
||||
environment:
|
||||
name: production
|
||||
url: $PROD_URL
|
||||
kubernetes:
|
||||
agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-prod
|
||||
rules:
|
||||
# Only run on semantic version tags (v1.0.0, v1.2.3, etc.)
|
||||
- if: '$CI_COMMIT_TAG =~ /^v\d+\.\d+\.\d+$/'
|
||||
when: on_success
|
||||
allow_failure: false
|
||||
|
||||
# Smoke tests for production deployment (read-only, no test data creation)
|
||||
smoke_test_prod:
|
||||
<<: *smoke_test_template
|
||||
needs: [deploy_prod]
|
||||
variables:
|
||||
ORCHARD_TEST_URL: $PROD_URL
|
||||
rules:
|
||||
- if: '$CI_COMMIT_TAG =~ /^v\d+\.\d+\.\d+$/'
|
||||
when: on_success
|
||||
|
||||
0
.gitlab/.gitkeep
Normal file
0
.gitlab/.gitkeep
Normal file
4
.gitlab/agents/orchard-stage/config.yaml
Normal file
4
.gitlab/agents/orchard-stage/config.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
# GitLab Agent configuration for stage deployments
|
||||
ci_access:
|
||||
projects:
|
||||
- id: esv/bsf/bsf-integration/orchard/orchard-mvp
|
||||
4
.gitlab/agents/orchard/config.yaml
Normal file
4
.gitlab/agents/orchard/config.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
# GitLab Agent configuration for dev/feature deployments
|
||||
ci_access:
|
||||
projects:
|
||||
- id: esv/bsf/bsf-integration/orchard/orchard-mvp
|
||||
0
.gitlab/orchard/.gitkeep
Normal file
0
.gitlab/orchard/.gitkeep
Normal file
3
.gitlab/orchard/config.yaml
Normal file
3
.gitlab/orchard/config.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
ci_access:
|
||||
projects:
|
||||
- id: esv/bsf/bsf-integration/orchard/orchard-mvp
|
||||
8
.gitleaks.toml
Normal file
8
.gitleaks.toml
Normal file
@@ -0,0 +1,8 @@
|
||||
# Gitleaks configuration
|
||||
# https://github.com/gitleaks/gitleaks#configuration
|
||||
|
||||
[allowlist]
|
||||
# Test files that contain variable names matching secret patterns (e.g., s3_key)
|
||||
paths = [
|
||||
'''backend/tests/.*\.py''',
|
||||
]
|
||||
19
.gitleaksignore
Normal file
19
.gitleaksignore
Normal file
@@ -0,0 +1,19 @@
|
||||
# Gitleaks ignore file
|
||||
# https://github.com/gitleaks/gitleaks#gitleaksignore
|
||||
#
|
||||
# False positive: s3_key is an attribute name in test assertions, not a secret
|
||||
# These are historical commits - files have since been deleted or updated with inline comments
|
||||
7e68baed0886a3c928644cd01aa3b39f92d4f976:backend/tests/test_duplicate_detection.py:generic-api-key:154
|
||||
81458b3bcb5ace97109ba4c16f4afa6e55b1b8bd:backend/tests/test_duplicate_detection.py:generic-api-key:154
|
||||
2f1891cf0126ec0e7d4c789d872a2cb2dd3a1745:backend/tests/unit/test_storage.py:generic-api-key:381
|
||||
10d36947948de796f0bacea3827f4531529c405d:backend/tests/unit/test_storage.py:generic-api-key:381
|
||||
bccbc71c13570d14b8b26a11335c45f102fe3072:backend/tests/unit/test_storage.py:generic-api-key:381
|
||||
5c9da9003b844a2d655cce74a7c82c57e74f27c4:backend/tests/unit/test_storage.py:generic-api-key:381
|
||||
90bb2a3a393d2361dc3136ee8d761debb0726d8a:backend/tests/unit/test_storage.py:generic-api-key:381
|
||||
37666e41a72d2a4f34447c0d1a8728e1d7271d24:backend/tests/unit/test_storage.py:generic-api-key:381
|
||||
0cc4f253621a9601c5193f6ae1e7ae33f0e7fc9b:backend/tests/unit/test_storage.py:generic-api-key:381
|
||||
35fda65d381acc5ab59bc592ee3013f75906c197:backend/tests/unit/test_storage.py:generic-api-key:381
|
||||
08dce6cbb836b687002751fed4159bfc2da61f8b:backend/tests/unit/test_storage.py:generic-api-key:381
|
||||
617bcbe89cff9a009d77e4f1f1864efed1820e63:backend/tests/unit/test_storage.py:generic-api-key:381
|
||||
1cbd33544388e0fe6db752fa8886fab33cf9ce7c:backend/tests/unit/test_storage.py:generic-api-key:381
|
||||
7cfad28f678f5a5b8b927d694a17b9ba446b7138:backend/tests/unit/test_storage.py:generic-api-key:381
|
||||
568
CHANGELOG.md
Normal file
568
CHANGELOG.md
Normal file
@@ -0,0 +1,568 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
### Added
|
||||
- Added transparent PyPI proxy implementing PEP 503 Simple API (#108)
|
||||
- `GET /pypi/simple/` - package index (proxied from upstream)
|
||||
- `GET /pypi/simple/{package}/` - version list with rewritten download links
|
||||
- `GET /pypi/simple/{package}/{filename}` - download with automatic caching
|
||||
- Allows `pip install --index-url https://orchard.../pypi/simple/ <package>`
|
||||
- Artifacts cached on first access through configured upstream sources
|
||||
- Added `POST /api/v1/cache/resolve` endpoint to cache packages by coordinates instead of URL (#108)
|
||||
|
||||
### Changed
|
||||
- Upstream sources table text is now centered under column headers (#108)
|
||||
- ENV badge now appears inline with source name instead of separate column (#108)
|
||||
- Test and Edit buttons now have more prominent button styling (#108)
|
||||
- Reduced footer padding for cleaner layout (#108)
|
||||
|
||||
### Fixed
|
||||
- Fixed purge_seed_data crash when deleting access permissions - was comparing UUID to VARCHAR column (#107)
|
||||
|
||||
### Changed
|
||||
- Upstream source connectivity test no longer follows redirects, fixing "Exceeded maximum allowed redirects" error with Artifactory proxies (#107)
|
||||
- Test runs automatically after saving a new or updated upstream source (#107)
|
||||
- Test status now shows as colored dots (green=success, red=error) instead of text badges (#107)
|
||||
- Clicking red dot shows error details in a modal (#107)
|
||||
- Source name column no longer wraps text for better table layout (#107)
|
||||
- Renamed "Cache Management" page to "Upstream Sources" (#107)
|
||||
- Moved Delete button from table row to edit modal for cleaner table layout (#107)
|
||||
|
||||
### Removed
|
||||
- Removed `is_public` field from upstream sources - all sources are now treated as internal/private (#107)
|
||||
- Removed `allow_public_internet` (air-gap mode) setting from cache settings - not needed for enterprise proxy use case (#107)
|
||||
- Removed seeding of public registry URLs (npm-public, pypi-public, maven-central, docker-hub) (#107)
|
||||
- Removed "Public" badge and checkbox from upstream sources UI (#107)
|
||||
- Removed "Allow Public Internet" toggle from cache settings UI (#107)
|
||||
- Removed "Global Settings" section from cache management UI - auto-create system projects is always enabled (#107)
|
||||
- Removed unused CacheSettings frontend types and API functions (#107)
|
||||
|
||||
### Added
|
||||
- Added `ORCHARD_PURGE_SEED_DATA` environment variable support to stage helm values to remove seed data from long-running deployments (#107)
|
||||
- Added frontend system projects visual distinction (#105)
|
||||
- "Cache" badge for system projects in project list
|
||||
- "System Cache" badge on project detail page
|
||||
- Added `is_system` field to Project type
|
||||
- Added frontend admin page for upstream sources and cache settings (#75)
|
||||
- New `/admin/cache` page accessible from user menu (admin only)
|
||||
- Upstream sources table with create/edit/delete/test connectivity
|
||||
- Cache settings section with air-gap mode and auto-create system projects toggles
|
||||
- Visual indicators for env-defined sources (locked, cannot be modified)
|
||||
- Environment variable override badges when settings are overridden
|
||||
- API client functions for all cache admin operations
|
||||
- Added environment variable overrides for cache configuration (#74)
|
||||
- `ORCHARD_CACHE_ALLOW_PUBLIC_INTERNET` - Override allow_public_internet (air-gap mode)
|
||||
- `ORCHARD_CACHE_AUTO_CREATE_SYSTEM_PROJECTS` - Override auto_create_system_projects
|
||||
- `ORCHARD_UPSTREAM__{NAME}__*` - Define upstream sources via env vars
|
||||
- Env-defined sources appear in API with `source: "env"` marker
|
||||
- Env-defined sources cannot be modified/deleted via API (400 error)
|
||||
- Cache settings response includes `*_env_override` fields when overridden
|
||||
- 7 unit tests for env var parsing and configuration
|
||||
- Added Global Cache Settings Admin API (#73)
|
||||
- `GET /api/v1/admin/cache-settings` - Retrieve current cache settings
|
||||
- `PUT /api/v1/admin/cache-settings` - Update cache settings (partial updates)
|
||||
- Admin-only access with audit logging
|
||||
- Controls `allow_public_internet` (air-gap mode) and `auto_create_system_projects`
|
||||
- 7 integration tests for settings management
|
||||
- Added Upstream Sources Admin API for managing cache sources (#72)
|
||||
- `GET /api/v1/admin/upstream-sources` - List sources with filtering
|
||||
- `POST /api/v1/admin/upstream-sources` - Create source with auth configuration
|
||||
- `GET /api/v1/admin/upstream-sources/{id}` - Get source details
|
||||
- `PUT /api/v1/admin/upstream-sources/{id}` - Update source (partial updates)
|
||||
- `DELETE /api/v1/admin/upstream-sources/{id}` - Delete source
|
||||
- `POST /api/v1/admin/upstream-sources/{id}/test` - Test connectivity
|
||||
- Admin-only access with audit logging
|
||||
- Credentials never exposed (only has_password/has_headers flags)
|
||||
- 13 integration tests for all CRUD operations
|
||||
- Added system project restrictions and management (#71)
|
||||
- System projects (`_npm`, `_pypi`, etc.) cannot be deleted (returns 403)
|
||||
- System projects cannot be made private (must remain public)
|
||||
- `GET /api/v1/system-projects` endpoint to list all system cache projects
|
||||
- 5 integration tests for system project restrictions
|
||||
- Added Cache API endpoint for fetching and storing artifacts from upstream URLs (#70)
|
||||
- `POST /api/v1/cache` endpoint to cache artifacts from upstream registries
|
||||
- URL parsing helpers to extract package name/version from npm, PyPI, Maven URLs
|
||||
- Automatic system project creation (`_npm`, `_pypi`, `_maven`, etc.)
|
||||
- URL-to-artifact provenance tracking via `cached_urls` table
|
||||
- Optional user project cross-referencing for custom organization
|
||||
- Cache hit returns existing artifact without re-fetching
|
||||
- Air-gap mode enforcement (blocks public URLs when disabled)
|
||||
- Hash verification for downloaded artifacts
|
||||
- 21 unit tests for URL parsing and cache endpoint
|
||||
- Added HTTP client for fetching artifacts from upstream sources (#69)
|
||||
- `UpstreamClient` class in `backend/app/upstream.py` with streaming downloads
|
||||
- SHA256 hash computation while streaming (doesn't load large files into memory)
|
||||
- Auth support: none, basic auth, bearer token, API key (custom headers)
|
||||
- URL-to-source matching by URL prefix with priority ordering
|
||||
- Configuration options: timeouts, retries with exponential backoff, redirect limits, max file size
|
||||
- Air-gap mode enforcement via `allow_public_internet` setting
|
||||
- Response header capture for provenance tracking
|
||||
- Proper error handling with custom exception types
|
||||
- Connection test method for upstream source validation
|
||||
- 33 unit tests for client functionality
|
||||
- Added upstream artifact caching schema for hermetic builds (#68)
|
||||
- `upstream_sources` table for configuring upstream registries (npm, PyPI, Maven, etc.)
|
||||
- `cache_settings` table for global settings including air-gap mode
|
||||
- `cached_urls` table for URL-to-artifact provenance tracking
|
||||
- `is_system` column on projects for system cache projects (_npm, _pypi, etc.)
|
||||
- Support for multiple auth types: none, basic auth, bearer token, API key
|
||||
- Fernet encryption for credentials using `ORCHARD_CACHE_ENCRYPTION_KEY`
|
||||
- Default upstream sources seeded (npm-public, pypi-public, maven-central, docker-hub) - disabled by default
|
||||
- Migration `010_upstream_caching.sql`
|
||||
- Added team-based multi-tenancy for organizing projects and collaboration (#88-#104)
|
||||
- Teams serve as organizational containers for projects
|
||||
- Users can belong to multiple teams with different roles (owner, admin, member)
|
||||
- Projects can optionally belong to a team
|
||||
- Added database schema for teams (#88):
|
||||
- `teams` table with id, name, slug, description, settings, timestamps
|
||||
- `team_memberships` table mapping users to teams with roles
|
||||
- `team_id` column on projects table for team association
|
||||
- Migrations `009_teams.sql` and `009b_migrate_projects.sql`
|
||||
- Added Team and TeamMembership ORM models with relationships (#89)
|
||||
- Added TeamAuthorizationService for team-level access control (#90):
|
||||
- Team owner/admin gets admin access to all team projects
|
||||
- Team member gets read access to team projects (upgradeable by explicit permission)
|
||||
- Role hierarchy: owner > admin > member
|
||||
- Added Team API endpoints (#92, #93, #94, #95):
|
||||
- `GET /api/v1/teams` - List teams user belongs to (paginated)
|
||||
- `POST /api/v1/teams` - Create team (creator becomes owner)
|
||||
- `GET /api/v1/teams/{slug}` - Get team details
|
||||
- `PUT /api/v1/teams/{slug}` - Update team (requires admin)
|
||||
- `DELETE /api/v1/teams/{slug}` - Delete team (requires owner)
|
||||
- `GET /api/v1/teams/{slug}/members` - List team members
|
||||
- `POST /api/v1/teams/{slug}/members` - Add member (requires admin)
|
||||
- `PUT /api/v1/teams/{slug}/members/{username}` - Update member role
|
||||
- `DELETE /api/v1/teams/{slug}/members/{username}` - Remove member
|
||||
- `GET /api/v1/teams/{slug}/projects` - List team projects (paginated)
|
||||
- Updated project creation to support optional team assignment (#95)
|
||||
- Updated project responses to include team info (team_id, team_slug, team_name)
|
||||
- Added frontend team management (#97-#104):
|
||||
- TeamContext provider for managing current team selection
|
||||
- TeamSelector dropdown component (persists selection in localStorage)
|
||||
- Teams list page at `/teams`
|
||||
- Team dashboard page at `/teams/{slug}` with inline project creation
|
||||
- Team settings page at `/teams/{slug}/settings`
|
||||
- Team members page at `/teams/{slug}/members`
|
||||
- Teams navigation link in header (authenticated users only)
|
||||
- Updated seed data to create a "Demo Team" and assign all seed projects to it
|
||||
- Added TypeScript types and API client functions for teams
|
||||
- Access management now shows team-based permissions alongside explicit permissions
|
||||
- Team-based access displayed as read-only with "Source" column indicating origin
|
||||
- Team members with access show team slug and role
|
||||
- Added integration tests for team CRUD, membership, and project operations
|
||||
- Redesigned teams portal with modern card-based layout
|
||||
- Card grid view with team avatar, name, slug, role badge, and stats
|
||||
- Stats bar showing total teams, owned teams, and total projects
|
||||
- Search functionality for filtering teams (appears when >3 teams)
|
||||
- Empty states for no teams and no search results
|
||||
- Added user autocomplete component for team member invitations
|
||||
- `GET /api/v1/users/search` endpoint for username prefix search
|
||||
- Dropdown shows matching users as you type
|
||||
- Keyboard navigation support (arrow keys, enter, escape)
|
||||
- Debounced search to reduce API calls
|
||||
- Added unit tests for TeamAuthorizationService
|
||||
- Added `ORCHARD_ADMIN_PASSWORD` environment variable to configure initial admin password (#87)
|
||||
- When set, admin user is created with the specified password (no password change required)
|
||||
- When not set, defaults to `changeme123` and requires password change on first login
|
||||
- Added Helm chart support for admin password via multiple sources (#87):
|
||||
- `orchard.auth.adminPassword` - plain value (creates K8s secret)
|
||||
- `orchard.auth.existingSecret` - reference existing K8s secret
|
||||
- `orchard.auth.secretsManager` - AWS Secrets Manager integration
|
||||
- Added `.env.example` template for local development (#87)
|
||||
- Added `.env` file support in docker-compose.local.yml (#87)
|
||||
- Added Project Settings page accessible to project admins (#65)
|
||||
- General settings section for editing description and visibility
|
||||
- Access Management section (moved from project page)
|
||||
- Danger Zone section with inline delete confirmation requiring project name
|
||||
- Settings button (gear icon) on project page header for admins
|
||||
- Added artifact dependency management system (#76, #77, #78, #79, #80, #81)
|
||||
- `artifact_dependencies` table with version/tag constraints and check constraints
|
||||
- `ArtifactDependency` SQLAlchemy model with indexes for fast lookups
|
||||
- Ensure file parsing (`orchard.ensure` YAML format) during artifact upload
|
||||
- Circular dependency detection at upload time (rejected with 400)
|
||||
- Dependency conflict detection at resolution time (409 with conflict details)
|
||||
- Added dependency API endpoints (#78, #79):
|
||||
- `GET /api/v1/artifact/{artifact_id}/dependencies` - Get dependencies by artifact ID
|
||||
- `GET /api/v1/project/{project}/{package}/+/{ref}/dependencies` - Get dependencies by ref
|
||||
- `GET /api/v1/project/{project}/{package}/reverse-dependencies` - Get reverse dependencies (paginated)
|
||||
- `GET /api/v1/project/{project}/{package}/+/{ref}/resolve` - Resolve full dependency tree
|
||||
- Added dependency resolution with topological sorting (#79)
|
||||
- Returns flat list of all artifacts needed in dependency order
|
||||
- Includes download URLs, sizes, and version info for each artifact
|
||||
- Added frontend dependency visualization (#84, #85, #86):
|
||||
- Dependencies section on package page showing direct dependencies for selected tag
|
||||
- Tag/version selector to switch between artifacts
|
||||
- "Used By" section showing reverse dependencies with pagination
|
||||
- Interactive dependency graph modal with:
|
||||
- Tree visualization with collapsible nodes
|
||||
- Zoom (mouse wheel + buttons) and pan (click-drag)
|
||||
- Click to navigate to package
|
||||
- Hover tooltip with package details
|
||||
- Error display for circular dependencies and conflicts
|
||||
- Added migration `008_artifact_dependencies.sql` for dependency schema
|
||||
- Added `dependencies.py` module with parsing, validation, and resolution logic
|
||||
- Added comprehensive integration tests for all dependency features
|
||||
|
||||
### Changed
|
||||
- Added pre-test stage reset to ensure known environment state before integration tests (#54)
|
||||
- Upload endpoint now accepts optional `ensure` file parameter for declaring dependencies
|
||||
- Updated upload API documentation with ensure file format and examples
|
||||
- Converted teams list and team projects to use DataTable component for consistent styling
|
||||
- Centered team members and team settings page content
|
||||
- Added orchard logo icon and dot separator to footer
|
||||
|
||||
### Fixed
|
||||
- Fixed dark theme styling for team pages - modals, forms, and dropdowns now use correct theme variables
|
||||
- Fixed UserAutocomplete and TeamSelector dropdown backgrounds for dark theme
|
||||
|
||||
## [0.5.1] - 2026-01-23
|
||||
### Changed
|
||||
- Simplified tag pipeline to only run deploy and smoke tests (image already built on main) (#54)
|
||||
|
||||
### Fixed
|
||||
- Fixed production CI deployment namespace to use correct `orch-namespace` (#54)
|
||||
- Added gitleaks config to allowlist test files from secret scanning (#54)
|
||||
|
||||
## [0.5.0] - 2026-01-23
|
||||
### Added
|
||||
- Added factory reset endpoint `POST /api/v1/admin/factory-reset` for test environment cleanup (#54)
|
||||
- Requires admin authentication and `X-Confirm-Reset: yes-delete-all-data` header
|
||||
- Drops all database tables, clears S3 bucket, reinitializes schema, re-seeds default data
|
||||
- CI pipeline automatically calls this after integration tests on stage
|
||||
- Added `delete_all()` method to storage backend for bulk S3 object deletion (#54)
|
||||
- Added AWS Secrets Manager CSI driver support for database credentials (#54)
|
||||
- Added SecretProviderClass template for Secrets Manager integration (#54)
|
||||
- Added IRSA service account annotations for prod and stage environments (#54)
|
||||
- Added comprehensive upload/download tests for size boundaries (1B to 1GB) (#38)
|
||||
- Added concurrent upload/download tests (2, 5, 10 parallel operations) (#38)
|
||||
- Added data integrity tests (binary, text, unicode, compressed content) (#38)
|
||||
- Added chunk boundary tests for edge cases (#38)
|
||||
- Added `@pytest.mark.large` and `@pytest.mark.concurrent` test markers (#38)
|
||||
- Added `generate_content()` and `generate_content_with_hash()` test helpers (#38)
|
||||
- Added `sized_content` fixture for generating test content of specific sizes (#38)
|
||||
- Added upload API tests: upload without tag, artifact creation verification, S3 object creation (#38)
|
||||
- Added download API tests: tag: prefix resolution, 404 for nonexistent project/package/artifact (#38)
|
||||
- Added download header tests: Content-Type, Content-Length, Content-Disposition, ETag, X-Checksum-SHA256 (#38)
|
||||
- Added error handling tests: timeout behavior, checksum validation, resource cleanup, graceful error responses (#38)
|
||||
- Added version API tests: version creation, auto-detection, listing, download by version prefix (#38)
|
||||
- Added integrity verification tests: round-trip hash verification, client-side verification workflow, size variants (1KB-10MB) (#40)
|
||||
- Added consistency check endpoint tests with response format validation (#40)
|
||||
- Added corruption detection tests: bit flip, truncation, appended content, size mismatch, missing S3 objects (#40)
|
||||
- Added Digest header tests (RFC 3230) and verification mode tests (#40)
|
||||
- Added integrity verification documentation (`docs/integrity-verification.md`) (#40)
|
||||
- Added conditional request support for downloads (If-None-Match, If-Modified-Since) returning 304 Not Modified (#42)
|
||||
- Added caching headers to downloads: Cache-Control (immutable), Last-Modified (#42)
|
||||
- Added 416 Range Not Satisfiable response for invalid range requests (#42)
|
||||
- Added download completion logging with bytes transferred and throughput (#42)
|
||||
- Added client disconnect handling during streaming downloads (#42)
|
||||
- Added streaming download tests: range requests, conditional requests, caching headers, download resume (#42)
|
||||
- Added upload duration and throughput metrics (`duration_ms`, `throughput_mbps`) to upload response (#43)
|
||||
- Added upload progress logging for large files (hash computation and multipart upload phases) (#43)
|
||||
- Added client disconnect handling during uploads with proper cleanup (#43)
|
||||
- Added upload progress tracking endpoint `GET /upload/{upload_id}/progress` for resumable uploads (#43)
|
||||
- Added large file upload tests (10MB, 100MB, 1GB) with multipart upload verification (#43)
|
||||
- Added upload cancellation and timeout handling tests (#43)
|
||||
- Added comprehensive API documentation for upload endpoints with curl, Python, and JavaScript examples (#43)
|
||||
- Added `package_versions` table for immutable version tracking separate from mutable tags (#56)
|
||||
- Versions are set at upload time via explicit `version` parameter or auto-detected from filename/metadata
|
||||
- Version detection priority: explicit parameter > package metadata > filename pattern
|
||||
- Versions are immutable once created (unlike tags which can be moved)
|
||||
- Added version API endpoints (#56):
|
||||
- `GET /api/v1/project/{project}/{package}/versions` - List all versions for a package
|
||||
- `GET /api/v1/project/{project}/{package}/versions/{version}` - Get specific version details
|
||||
- `DELETE /api/v1/project/{project}/{package}/versions/{version}` - Delete a version (admin only)
|
||||
- Added version support to upload endpoint via `version` form parameter (#56)
|
||||
- Added `version:X.Y.Z` prefix for explicit version resolution in download refs (#56)
|
||||
- Added version field to tag responses (shows which version the artifact has, if any) (#56)
|
||||
- Added migration `007_package_versions.sql` with ref_count triggers and data migration from semver tags (#56)
|
||||
- Added production deployment job triggered by semantic version tags (v1.0.0) with manual approval gate (#63)
|
||||
- Added production Helm values file with persistence enabled (20Gi PostgreSQL, 100Gi MinIO) (#63)
|
||||
- Added integration tests for production deployment (#63)
|
||||
- Added GitLab CI pipeline for feature branch deployments to dev namespace (#51)
|
||||
- Added `deploy_feature` job with dynamic hostnames and unique release names (#51)
|
||||
- Added `cleanup_feature` job with `on_stop` for automatic cleanup on merge (#51)
|
||||
- Added `values-dev.yaml` Helm values for lightweight ephemeral environments (#51)
|
||||
- Added main branch deployment to stage environment (#51)
|
||||
- Added post-deployment integration tests (#51)
|
||||
- Added internal proxy configuration for npm, pip, helm, and apt (#51)
|
||||
|
||||
### Changed
|
||||
- Configured stage and prod to use AWS RDS instead of PostgreSQL subchart (#54)
|
||||
- Configured stage and prod to use AWS S3 instead of MinIO subchart (#54)
|
||||
- Changed prod deployment from manual to automatic on version tags (#54)
|
||||
- Updated S3 client to support IRSA credentials when no explicit keys provided (#54)
|
||||
- Changed prod image pullPolicy to Always (#54)
|
||||
- Added proxy-body-size annotation to prod ingress for large uploads (#54)
|
||||
- CI integration tests now run full pytest suite (~350 tests) against deployed environment instead of 3 smoke tests
|
||||
- CI production deployment uses lightweight smoke tests only (no test data creation in prod)
|
||||
- CI pipeline improvements: shared pip cache, `interruptible` flag on test jobs, retry on integration tests
|
||||
- Simplified deploy verification to health check only (full checks done by integration tests)
|
||||
- Extracted environment URLs to global variables for maintainability
|
||||
- Made `cleanup_feature` job standalone (no longer inherits deploy template dependencies)
|
||||
- Renamed `integration_test_prod` to `smoke_test_prod` for clarity
|
||||
- Updated download ref resolution to check versions before tags (version → tag → artifact ID) (#56)
|
||||
- Deploy jobs now require all security scans to pass before deployment (added test_image, app_deps_scan, cve_scan, cve_sbom_analysis, app_sbom_analysis to dependencies) (#63)
|
||||
- Increased deploy job timeout from 5m to 10m (#63)
|
||||
- Added `--atomic` flag to Helm deployments for automatic rollback on failure
|
||||
- Adjusted dark mode color palette to use lighter background tones for better readability and reduced eye strain (#52)
|
||||
- Replaced project card grid with sortable data table on Home page for better handling of large project lists
|
||||
- Replaced package card grid with sortable data table on Project page for consistency
|
||||
- Replaced SortDropdown with table header sorting on Package page for consistency
|
||||
- Enabled sorting on supported table columns (name, created, updated) via clickable headers
|
||||
- Updated browser tab title to "Orchard" with custom favicon
|
||||
- Improved pod naming: Orchard pods now named `orchard-{env}-server-*` for clarity (#51)
|
||||
|
||||
### Fixed
|
||||
- Fixed factory reset not creating default admin user after reset (#60)
|
||||
- Admin user was only created at server startup, not after factory reset
|
||||
- CI reset job would fail to login because admin user didn't exist
|
||||
- Improved reset_stage CI job reliability (#60)
|
||||
- Added application-level retry logic (3 attempts with 5s delay)
|
||||
- Added job-level retry for transient failures
|
||||
- Fixed httpx client to use proper context manager
|
||||
- Increased timeout to 120s for reset operations
|
||||
- Fixed CI integration test rate limiting: added configurable `ORCHARD_LOGIN_RATE_LIMIT` env var, relaxed to 1000/minute for dev/stage
|
||||
- Fixed duplicate `TestSecurityEdgeCases` class definition in test_auth_api.py
|
||||
- Fixed integration tests auth: session-scoped client, configurable credentials via env vars, fail-fast on auth errors
|
||||
- Fixed 413 Request Entity Too Large errors on uploads by adding `proxy-body-size: "0"` nginx annotation to Orchard ingress
|
||||
- Fixed CI tests that require direct S3 access: added `@pytest.mark.requires_direct_s3` marker and excluded from CI
|
||||
- Fixed ref_count triggers not being created: added auto-migration for tags ref_count trigger functions
|
||||
- Fixed Content-Disposition header encoding for non-ASCII filenames using RFC 5987 (#38)
|
||||
- Fixed deploy jobs running even when tests or security scans fail (changed rules from `when: always` to `when: on_success`) (#63)
|
||||
- Fixed python_tests job not using internal PyPI proxy (#63)
|
||||
- Fixed `cleanup_feature` job failing when branch is deleted (`GIT_STRATEGY: none`) (#51)
|
||||
- Fixed gitleaks false positives with fingerprints for historical commits (#51)
|
||||
- Fixed integration tests running when deploy fails (`when: on_success`) (#51)
|
||||
- Fixed static file serving for favicon and other files in frontend dist root
|
||||
- Fixed deploy jobs running when secrets scan fails (added `secrets` to deploy dependencies)
|
||||
- Fixed dev environment memory requests to equal limits per cluster Kyverno policy
|
||||
- Fixed init containers missing resource limits (Kyverno policy compliance)
|
||||
- Fixed Python SyntaxWarning for invalid escape sequence in database migration regex pattern
|
||||
|
||||
### Removed
|
||||
- Removed unused `store_streaming()` method from storage.py (#51)
|
||||
- Disabled PostgreSQL subchart for stage and prod environments (#54)
|
||||
- Disabled MinIO subchart for stage and prod environments (#54)
|
||||
|
||||
## [0.4.0] - 2026-01-12
|
||||
### Added
|
||||
- Added user authentication system with session-based login (#50)
|
||||
- `users` table with password hashing (bcrypt), admin flag, active status
|
||||
- `sessions` table for web login sessions (24-hour expiry)
|
||||
- `auth_settings` table for future OIDC configuration
|
||||
- Default admin user created on first boot (username: admin, password: admin)
|
||||
- Added auth API endpoints (#50)
|
||||
- `POST /api/v1/auth/login` - Login with username/password
|
||||
- `POST /api/v1/auth/logout` - Logout and clear session
|
||||
- `GET /api/v1/auth/me` - Get current user info
|
||||
- `POST /api/v1/auth/change-password` - Change own password
|
||||
- Added API key management with user ownership (#50)
|
||||
- `POST /api/v1/auth/keys` - Create API key (format: `orch_<random>`)
|
||||
- `GET /api/v1/auth/keys` - List user's API keys
|
||||
- `DELETE /api/v1/auth/keys/{id}` - Revoke API key
|
||||
- Added `owner_id`, `scopes`, `description` columns to `api_keys` table
|
||||
- Added admin user management endpoints (#50)
|
||||
- `GET /api/v1/admin/users` - List all users
|
||||
- `POST /api/v1/admin/users` - Create user
|
||||
- `GET /api/v1/admin/users/{username}` - Get user details
|
||||
- `PUT /api/v1/admin/users/{username}` - Update user (admin/active status)
|
||||
- `POST /api/v1/admin/users/{username}/reset-password` - Reset password
|
||||
- Added `auth.py` module with AuthService class and FastAPI dependencies (#50)
|
||||
- Added auth schemas: LoginRequest, LoginResponse, UserResponse, APIKeyResponse (#50)
|
||||
- Added migration `006_auth_tables.sql` for auth database tables (#50)
|
||||
- Added frontend Login page with session management (#50)
|
||||
- Added frontend API Keys management page (#50)
|
||||
- Added frontend Admin Users page (admin-only) (#50)
|
||||
- Added AuthContext for frontend session state (#50)
|
||||
- Added user menu to Layout header with login/logout (#50)
|
||||
- Added 15 integration tests for auth system (#50)
|
||||
- Added reusable `DragDropUpload` component for artifact uploads (#8)
|
||||
- Drag-and-drop file selection with visual feedback
|
||||
- Click-to-browse fallback
|
||||
- Multiple file upload support with queue management
|
||||
- Real-time progress indicators with speed and ETA
|
||||
- File type and size validation (configurable)
|
||||
- Concurrent upload handling (configurable max concurrent)
|
||||
- Automatic retry with exponential backoff for network errors
|
||||
- Individual file status (pending, uploading, complete, failed)
|
||||
- Retry and remove actions per file
|
||||
- Auto-dismiss success messages after 5 seconds
|
||||
- Integrated DragDropUpload into PackagePage replacing basic file input (#8)
|
||||
- Added frontend testing infrastructure with Vitest and React Testing Library (#14)
|
||||
- Configured Vitest for React/TypeScript with jsdom
|
||||
- Added 24 unit tests for DragDropUpload component
|
||||
- Tests cover: rendering, drag-drop events, file validation, upload queue, progress, errors
|
||||
- Added chunked upload support for large files (#9)
|
||||
- Files >100MB automatically use chunked upload API (10MB chunks)
|
||||
- Client-side SHA256 hash computation via Web Crypto API
|
||||
- localStorage persistence for resume after browser close
|
||||
- Deduplication check at upload init phase
|
||||
- Added offline detection and network resilience (#12)
|
||||
- Automatic pause when browser goes offline
|
||||
- Auto-resume when connection restored
|
||||
- Offline banner UI with status message
|
||||
- XHR abort on network loss to prevent hung requests
|
||||
- Added download by artifact ID feature (#10)
|
||||
- Direct artifact ID input field on package page
|
||||
- Hex-only input validation with character count
|
||||
- File size and filename displayed in tag list
|
||||
- Added backend security tests (#15)
|
||||
- Path traversal prevention tests for upload/download
|
||||
- Malformed request handling tests
|
||||
- Checksum validation tests
|
||||
- 10 new security-focused integration tests
|
||||
- Added download verification with `verify` and `verify_mode` query parameters (#26)
|
||||
- `?verify=true&verify_mode=pre` - Pre-verification: verify before streaming (guaranteed no corrupt data)
|
||||
- `?verify=true&verify_mode=stream` - Streaming verification: verify while streaming (logs error if mismatch)
|
||||
- Added checksum response headers to all download endpoints (#27)
|
||||
- `X-Checksum-SHA256` - SHA256 hash of the artifact
|
||||
- `X-Content-Length` - File size in bytes
|
||||
- `X-Checksum-MD5` - MD5 hash (if available)
|
||||
- `ETag` - Artifact ID (SHA256)
|
||||
- `Digest` - RFC 3230 format sha-256 hash (base64)
|
||||
- `X-Verified` - Verification status (true/false/pending)
|
||||
- Added `checksum.py` module with SHA256 utilities (#26)
|
||||
- `compute_sha256()` and `compute_sha256_stream()` functions
|
||||
- `HashingStreamWrapper` for incremental hash computation
|
||||
- `VerifyingStreamWrapper` for stream verification
|
||||
- `verify_checksum()` and `verify_checksum_strict()` functions
|
||||
- `ChecksumMismatchError` exception with context
|
||||
- Added `get_verified()` and `get_stream_verified()` methods to storage layer (#26)
|
||||
- Added `logging_config.py` module with structured logging (#28)
|
||||
- JSON logging format for production
|
||||
- Request ID tracking via context variables
|
||||
- Verification failure logging with full context
|
||||
- Added `log_level` and `log_format` settings to configuration (#28)
|
||||
- Added 62 unit tests for checksum utilities and verification (#29)
|
||||
- Added 17 integration tests for download verification API (#29)
|
||||
- Added global artifacts endpoint `GET /api/v1/artifacts` with project/package/tag/size/date filters (#18)
|
||||
- Added global tags endpoint `GET /api/v1/tags` with project/package/search/date filters (#18)
|
||||
- Added wildcard pattern matching (`*`) for tag filters across all endpoints (#18)
|
||||
- Added comma-separated multi-value support for tag filters (#18)
|
||||
- Added `search` parameter to `/api/v1/uploads` for filename search (#18)
|
||||
- Added `tag` filter to `/api/v1/uploads` endpoint (#18)
|
||||
- Added `sort` and `order` parameters to `/api/v1/uploads` endpoint (#18)
|
||||
- Added `min_size` and `max_size` filters to package artifacts endpoint (#18)
|
||||
- Added `sort` and `order` parameters to package artifacts endpoint (#18)
|
||||
- Added `from` and `to` date filters to package tags endpoint (#18)
|
||||
- Added `GlobalArtifactResponse` and `GlobalTagResponse` schemas (#18)
|
||||
- Added S3 object verification before database commit during upload (#19)
|
||||
- Added S3 object cleanup on database commit failure (#19)
|
||||
- Added upload duration tracking (`duration_ms` field) (#19)
|
||||
- Added `User-Agent` header capture during uploads (#19)
|
||||
- Added `X-Checksum-SHA256` header support for client-side checksum verification (#19)
|
||||
- Added `status`, `error_message`, `client_checksum` columns to uploads table (#19)
|
||||
- Added `upload_locks` table for future concurrent upload conflict detection (#19)
|
||||
- Added consistency check endpoint `GET /api/v1/admin/consistency-check` (#19)
|
||||
- Added `PUT /api/v1/projects/{project}` endpoint for project updates with audit logging (#20)
|
||||
- Added `PUT /api/v1/project/{project}/packages/{package}` endpoint for package updates with audit logging (#20)
|
||||
- Added `artifact.download` audit logging to download endpoint (#20)
|
||||
- Added `ProjectHistory` and `PackageHistory` models with database triggers (#20)
|
||||
- Added migration `004_history_tables.sql` for project/package history (#20)
|
||||
- Added migration `005_upload_enhancements.sql` for upload status tracking (#19)
|
||||
- Added 9 integration tests for global artifacts/tags endpoints (#18)
|
||||
- Added global uploads query endpoint `GET /api/v1/uploads` with project/package/user/date filters (#18)
|
||||
- Added project-level uploads endpoint `GET /api/v1/project/{project}/uploads` (#18)
|
||||
- Added `has_more` field to pagination metadata for easier pagination UI (#18)
|
||||
- Added `upload_id`, `content_type`, `original_name`, `created_at` fields to upload response (#19)
|
||||
- Added audit log API endpoints with filtering and pagination (#20)
|
||||
- `GET /api/v1/audit-logs` - list all audit logs with action/resource/user/date filters
|
||||
- `GET /api/v1/projects/{project}/audit-logs` - project-scoped audit logs
|
||||
- `GET /api/v1/project/{project}/{package}/audit-logs` - package-scoped audit logs
|
||||
- Added upload history API endpoints (#20)
|
||||
- `GET /api/v1/project/{project}/{package}/uploads` - list upload events for a package
|
||||
- `GET /api/v1/artifact/{id}/uploads` - list all uploads of a specific artifact
|
||||
- Added artifact provenance endpoint `GET /api/v1/artifact/{id}/history` (#20)
|
||||
- Returns full artifact history including packages, tags, and upload events
|
||||
- Added audit logging for project.create, package.create, tag.create, tag.update, artifact.upload actions (#20)
|
||||
- Added `AuditLogResponse`, `UploadHistoryResponse`, `ArtifactProvenanceResponse` schemas (#20)
|
||||
- Added `TagHistoryDetailResponse` schema with artifact metadata (#20)
|
||||
- Added 31 integration tests for audit log, history, and upload query endpoints (#22)
|
||||
### Changed
|
||||
- Standardized audit action naming to `{entity}.{action}` pattern (project.delete, package.delete, tag.delete) (#20)
|
||||
- Added `StorageBackend` protocol/interface for backend-agnostic storage (#33)
|
||||
- Added `health_check()` method to storage backend with `/health` endpoint integration (#33)
|
||||
- Added `verify_integrity()` method for post-upload hash validation (#33)
|
||||
- Added S3 configuration options: `s3_verify_ssl`, `s3_connect_timeout`, `s3_read_timeout`, `s3_max_retries` (#33)
|
||||
- Added `S3StorageUnavailableError` and `HashCollisionError` exception types (#33)
|
||||
- Added hash collision detection by comparing file sizes during deduplication (#33)
|
||||
- Added garbage collection endpoint `POST /api/v1/admin/garbage-collect` for orphaned artifacts (#36)
|
||||
- Added orphaned artifacts listing endpoint `GET /api/v1/admin/orphaned-artifacts` (#36)
|
||||
- Added global storage statistics endpoint `GET /api/v1/stats` (#34)
|
||||
- Added storage breakdown endpoint `GET /api/v1/stats/storage` (#34)
|
||||
- Added deduplication metrics endpoint `GET /api/v1/stats/deduplication` (#34)
|
||||
- Added per-project statistics endpoint `GET /api/v1/projects/{project}/stats` (#34)
|
||||
- Added per-package statistics endpoint `GET /api/v1/project/{project}/packages/{package}/stats` (#34)
|
||||
- Added per-artifact statistics endpoint `GET /api/v1/artifact/{id}/stats` (#34)
|
||||
- Added cross-project deduplication endpoint `GET /api/v1/stats/cross-project` (#34)
|
||||
- Added timeline statistics endpoint `GET /api/v1/stats/timeline` with daily/weekly/monthly periods (#34)
|
||||
- Added stats export endpoint `GET /api/v1/stats/export` with JSON/CSV formats (#34)
|
||||
- Added summary report endpoint `GET /api/v1/stats/report` with markdown/JSON formats (#34)
|
||||
- Added Dashboard page at `/dashboard` with storage and deduplication visualizations (#34)
|
||||
- Added pytest infrastructure with mock S3 client for unit testing (#35)
|
||||
- Added unit tests for SHA256 hash calculation (#35)
|
||||
- Added unit tests for duplicate detection and deduplication behavior (#35)
|
||||
- Added integration tests for upload scenarios and ref_count management (#35)
|
||||
- Added integration tests for S3 verification and failure cleanup (#35)
|
||||
- Added integration tests for all stats endpoints (#35)
|
||||
- Added integration tests for cascade deletion ref_count behavior (package/project delete) (#35)
|
||||
- Added integration tests for tag update ref_count adjustments (#35)
|
||||
- Added integration tests for garbage collection endpoints (#35)
|
||||
- Added integration tests for file size validation (#35)
|
||||
- Added test dependencies to requirements.txt (pytest, pytest-asyncio, pytest-cov, httpx, moto) (#35)
|
||||
- Added `ORCHARD_MAX_FILE_SIZE` config option (default: 10GB) for upload size limits (#37)
|
||||
- Added `ORCHARD_MIN_FILE_SIZE` config option (default: 1 byte, rejects empty files) (#37)
|
||||
- Added file size validation to upload and resumable upload endpoints (#37)
|
||||
- Added comprehensive deduplication design document (`docs/design/deduplication-design.md`) (#37)
|
||||
### Fixed
|
||||
- Fixed Helm chart `minio.ingress` conflicting with Bitnami MinIO subchart by renaming to `minioIngress` (#48)
|
||||
- Fixed JSON report serialization error for Decimal types in `GET /api/v1/stats/report` (#34)
|
||||
- Fixed resumable upload double-counting ref_count when tag provided (removed manual increment, SQL triggers handle it) (#35)
|
||||
|
||||
## [0.3.0] - 2025-12-15
|
||||
### Changed
|
||||
- Changed default download mode from `proxy` to `presigned` for better performance (#48)
|
||||
### Added
|
||||
- Added presigned URL support for direct S3 downloads (#48)
|
||||
- Added `ORCHARD_DOWNLOAD_MODE` config option (`presigned`, `redirect`, `proxy`) (#48)
|
||||
- Added `ORCHARD_PRESIGNED_URL_EXPIRY` config option (default: 3600 seconds) (#48)
|
||||
- Added `?mode=` query parameter to override download mode per-request (#48)
|
||||
- Added `/api/v1/project/{project}/{package}/+/{ref}/url` endpoint for getting presigned URLs (#48)
|
||||
- Added `PresignedUrlResponse` schema with URL, expiry, checksums, and artifact metadata (#48)
|
||||
- Added MinIO ingress support in Helm chart for presigned URL access (#48)
|
||||
- Added `orchard.download.mode` and `orchard.download.presignedUrlExpiry` Helm values (#48)
|
||||
- Added integrity verification workflow design document (#24)
|
||||
- Added `sha256` field to API responses for clarity (alias of `id`) (#25)
|
||||
- Added `checksum_sha1` field to artifacts table for compatibility (#25)
|
||||
- Added `s3_etag` field to artifacts table for S3 verification (#25)
|
||||
- Compute and store MD5, SHA1, and S3 ETag alongside SHA256 during upload (#25)
|
||||
- Added `Dockerfile.local` and `docker-compose.local.yml` for local development (#25)
|
||||
- Added migration script `003_checksum_fields.sql` for existing databases (#25)
|
||||
|
||||
## [0.2.0] - 2025-12-15
|
||||
### Added
|
||||
- Added `format` and `platform` fields to packages table (#16)
|
||||
- Added `checksum_md5` and `metadata` JSONB fields to artifacts table (#16)
|
||||
- Added `updated_at` field to tags table (#16)
|
||||
- Added `tag_name`, `user_agent`, `duration_ms`, `deduplicated`, `checksum_verified` fields to uploads table (#16)
|
||||
- Added `change_type` field to tag_history table (#16)
|
||||
- Added composite indexes for common query patterns (#16)
|
||||
- Added GIN indexes on JSONB fields for efficient JSON queries (#16)
|
||||
- Added partial index for public projects (#16)
|
||||
- Added database triggers for `updated_at` timestamps (#16)
|
||||
- Added database triggers for maintaining artifact `ref_count` accuracy (#16)
|
||||
- Added CHECK constraints for data integrity (`size > 0`, `ref_count >= 0`) (#16)
|
||||
- Added migration script `002_schema_enhancements.sql` for existing databases (#16)
|
||||
### Changed
|
||||
- Updated images to use internal container BSF proxy (#46)
|
||||
|
||||
## [0.1.0] - 2025-12-12
|
||||
### Added
|
||||
- Added Prosper docker template config (#45)
|
||||
### Changed
|
||||
- Changed the Dockerfile npm build arg to use the deps.global.bsf.tools URL as the default registry (#45)
|
||||
19
Dockerfile
19
Dockerfile
@@ -1,7 +1,7 @@
|
||||
# Frontend build stage
|
||||
FROM node:20-alpine AS frontend-builder
|
||||
FROM containers.global.bsf.tools/node:20-alpine AS frontend-builder
|
||||
|
||||
ARG NPM_REGISTRY
|
||||
ARG NPM_REGISTRY=https://deps.global.bsf.tools/artifactory/api/npm/registry.npmjs.org
|
||||
|
||||
WORKDIR /app/frontend
|
||||
|
||||
@@ -19,9 +19,20 @@ COPY frontend/ ./
|
||||
RUN npm run build
|
||||
|
||||
# Runtime stage
|
||||
FROM python:3.12-slim
|
||||
FROM containers.global.bsf.tools/python:3.12-slim
|
||||
|
||||
ARG PIP_INDEX_URL=https://deps.global.bsf.tools/artifactory/api/pypi/pypi.org/simple
|
||||
|
||||
# Configure apt to use internal Debian mirrors only (trixie = Debian testing)
|
||||
RUN printf 'deb https://deps.global.bsf.tools/artifactory/deb.debian.org-debian trixie main\n\
|
||||
deb https://deps.global.bsf.tools/artifactory/security.debian.org-debian-security trixie-security main\n' > /etc/apt/sources.list \
|
||||
&& rm -rf /etc/apt/sources.list.d/* /var/lib/apt/lists/*
|
||||
|
||||
# Disable proxy cache
|
||||
RUN printf 'Acquire::http::Pipeline-Depth 0;\nAcquire::http::No-Cache true;\nAcquire::BrokenProxy true;\n' > /etc/apt/apt.conf.d/99fixbadproxy
|
||||
|
||||
# Install system dependencies
|
||||
# hadolint ignore=DL3008
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
@@ -34,7 +45,7 @@ WORKDIR /app
|
||||
|
||||
# Copy requirements and install Python dependencies
|
||||
COPY backend/requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
RUN pip install --no-cache-dir --index-url "$PIP_INDEX_URL" -r requirements.txt
|
||||
|
||||
# Copy backend source
|
||||
COPY backend/ ./backend/
|
||||
|
||||
50
Dockerfile.local
Normal file
50
Dockerfile.local
Normal file
@@ -0,0 +1,50 @@
|
||||
# Frontend build stage
|
||||
FROM node:20-alpine AS frontend-builder
|
||||
|
||||
WORKDIR /app/frontend
|
||||
|
||||
# Copy package files
|
||||
COPY frontend/package*.json ./
|
||||
RUN npm install
|
||||
|
||||
# Copy frontend source
|
||||
COPY frontend/ ./
|
||||
|
||||
# Build frontend
|
||||
RUN npm run build
|
||||
|
||||
# Runtime stage
|
||||
FROM python:3.12-slim
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Create non-root user
|
||||
RUN groupadd -g 1000 orchard && \
|
||||
useradd -u 1000 -g orchard -s /bin/bash -m orchard
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy requirements and install Python dependencies
|
||||
COPY backend/requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy backend source
|
||||
COPY backend/ ./backend/
|
||||
|
||||
# Copy frontend build
|
||||
COPY --from=frontend-builder /app/frontend/dist ./frontend/dist
|
||||
|
||||
# Set ownership
|
||||
RUN chown -R orchard:orchard /app
|
||||
|
||||
USER orchard
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:8080/health || exit 1
|
||||
|
||||
CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "8080"]
|
||||
467
README.md
467
README.md
@@ -22,12 +22,37 @@ Orchard is a centralized binary artifact storage system that provides content-ad
|
||||
- **Package** - Named collection within a project
|
||||
- **Artifact** - Specific content instance identified by SHA256
|
||||
- **Tags** - Alias system for referencing artifacts by human-readable names (e.g., `v1.0.0`, `latest`, `stable`)
|
||||
- **Versions** - Immutable version records set at upload time (explicit or auto-detected from filename/metadata), separate from mutable tags
|
||||
- **Package Formats & Platforms** - Packages can be tagged with format (npm, pypi, docker, deb, rpm, etc.) and platform (linux, darwin, windows, etc.)
|
||||
- **Rich Package Metadata** - Package listings include aggregated stats (tag count, artifact count, total size, latest tag)
|
||||
- **S3-Compatible Backend** - Uses MinIO (or any S3-compatible storage) for artifact storage
|
||||
- **PostgreSQL Metadata** - Relational database for metadata, access control, and audit trails
|
||||
- **REST API** - Full HTTP API for all operations
|
||||
- **Web UI** - React-based interface for managing artifacts
|
||||
- **Web UI** - React-based interface for managing artifacts with:
|
||||
- Hierarchical navigation (Projects → Packages → Tags/Artifacts)
|
||||
- Search, sort, and filter capabilities on all list views
|
||||
- URL-based state persistence for filters and pagination
|
||||
- Keyboard navigation (Backspace to go up hierarchy)
|
||||
- Copy-to-clipboard for artifact IDs
|
||||
- Responsive design for mobile and desktop
|
||||
- **Docker Compose Setup** - Easy local development environment
|
||||
- **Helm Chart** - Kubernetes deployment with PostgreSQL, MinIO, and Redis subcharts
|
||||
- **Multipart Upload** - Automatic multipart upload for files larger than 100MB
|
||||
- **Resumable Uploads** - API for resumable uploads with part-by-part upload support
|
||||
- **Range Requests** - HTTP range request support for partial downloads
|
||||
- **Format-Specific Metadata** - Automatic extraction of metadata from package formats:
|
||||
- `.deb` - Debian packages (name, version, architecture, maintainer)
|
||||
- `.rpm` - RPM packages (name, version, release, architecture)
|
||||
- `.tar.gz/.tgz` - Tarballs (name, version from filename)
|
||||
- `.whl` - Python wheels (name, version, author)
|
||||
- `.jar` - Java JARs (manifest info, Maven coordinates)
|
||||
- `.zip` - ZIP files (file count, uncompressed size)
|
||||
- **Authentication** - Multiple authentication methods:
|
||||
- Session-based login with username/password
|
||||
- API keys for programmatic access (`orch_` prefixed tokens)
|
||||
- OIDC integration for SSO
|
||||
- Admin user management
|
||||
- **Garbage Collection** - Clean up orphaned artifacts (ref_count=0) via admin API
|
||||
|
||||
### API Endpoints
|
||||
|
||||
@@ -38,25 +63,48 @@ Orchard is a centralized binary artifact storage system that provides content-ad
|
||||
| `GET` | `/api/v1/projects` | List all projects |
|
||||
| `POST` | `/api/v1/projects` | Create a new project |
|
||||
| `GET` | `/api/v1/projects/:project` | Get project details |
|
||||
| `GET` | `/api/v1/project/:project/packages` | List packages in a project |
|
||||
| `GET` | `/api/v1/project/:project/packages` | List packages (with pagination, search, filtering) |
|
||||
| `GET` | `/api/v1/project/:project/packages/:package` | Get single package with metadata |
|
||||
| `POST` | `/api/v1/project/:project/packages` | Create a new package |
|
||||
| `POST` | `/api/v1/project/:project/:package/upload` | Upload an artifact |
|
||||
| `GET` | `/api/v1/project/:project/:package/+/:ref` | Download an artifact |
|
||||
| `GET` | `/api/v1/project/:project/:package/tags` | List all tags |
|
||||
| `GET` | `/api/v1/project/:project/:package/+/:ref` | Download an artifact (supports Range header, mode param) |
|
||||
| `GET` | `/api/v1/project/:project/:package/+/:ref/url` | Get presigned URL for direct S3 download |
|
||||
| `HEAD` | `/api/v1/project/:project/:package/+/:ref` | Get artifact metadata without downloading |
|
||||
| `GET` | `/api/v1/project/:project/:package/tags` | List tags (with pagination, search, sorting, artifact metadata) |
|
||||
| `POST` | `/api/v1/project/:project/:package/tags` | Create a tag |
|
||||
| `GET` | `/api/v1/project/:project/:package/tags/:tag_name` | Get single tag with artifact metadata |
|
||||
| `GET` | `/api/v1/project/:project/:package/tags/:tag_name/history` | Get tag change history |
|
||||
| `GET` | `/api/v1/project/:project/:package/versions` | List all versions for a package |
|
||||
| `GET` | `/api/v1/project/:project/:package/versions/:version` | Get specific version details |
|
||||
| `DELETE` | `/api/v1/project/:project/:package/versions/:version` | Delete a version (admin only) |
|
||||
| `GET` | `/api/v1/project/:project/:package/artifacts` | List artifacts in package (with filtering) |
|
||||
| `GET` | `/api/v1/project/:project/:package/consumers` | List consumers of a package |
|
||||
| `GET` | `/api/v1/artifact/:id` | Get artifact metadata by hash |
|
||||
| `GET` | `/api/v1/artifact/:id` | Get artifact metadata with referencing tags |
|
||||
|
||||
#### Resumable Upload Endpoints
|
||||
|
||||
For large files, use the resumable upload API:
|
||||
|
||||
| Method | Endpoint | Description |
|
||||
|--------|----------|-------------|
|
||||
| `POST` | `/api/v1/project/:project/:package/upload/init` | Initialize resumable upload |
|
||||
| `PUT` | `/api/v1/project/:project/:package/upload/:upload_id/part/:part_number` | Upload a part |
|
||||
| `POST` | `/api/v1/project/:project/:package/upload/:upload_id/complete` | Complete upload |
|
||||
| `DELETE` | `/api/v1/project/:project/:package/upload/:upload_id` | Abort upload |
|
||||
| `GET` | `/api/v1/project/:project/:package/upload/:upload_id/status` | Get upload status |
|
||||
|
||||
### Reference Formats
|
||||
|
||||
When downloading artifacts, the `:ref` parameter supports multiple formats:
|
||||
|
||||
- `latest` - Tag name directly
|
||||
- `v1.0.0` - Version tag
|
||||
- `latest` - Implicit lookup (checks version first, then tag, then artifact ID)
|
||||
- `v1.0.0` - Implicit lookup (version takes precedence over tag with same name)
|
||||
- `version:1.0.0` - Explicit version reference
|
||||
- `tag:stable` - Explicit tag reference
|
||||
- `version:2024.1` - Version reference
|
||||
- `artifact:a3f5d8e12b4c6789...` - Direct SHA256 hash reference
|
||||
|
||||
**Resolution order for implicit refs:** version → tag → artifact ID
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Prerequisites
|
||||
@@ -128,15 +176,76 @@ curl -X POST http://localhost:8080/api/v1/projects \
|
||||
```bash
|
||||
curl -X POST http://localhost:8080/api/v1/project/my-project/packages \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"name": "releases", "description": "Release builds"}'
|
||||
-d '{"name": "releases", "description": "Release builds", "format": "generic", "platform": "any"}'
|
||||
```
|
||||
|
||||
Supported formats: `generic`, `npm`, `pypi`, `docker`, `deb`, `rpm`, `maven`, `nuget`, `helm`
|
||||
|
||||
Supported platforms: `any`, `linux`, `darwin`, `windows`, `linux-amd64`, `linux-arm64`, `darwin-amd64`, `darwin-arm64`, `windows-amd64`
|
||||
|
||||
### List Packages
|
||||
|
||||
```bash
|
||||
# Basic listing
|
||||
curl http://localhost:8080/api/v1/project/my-project/packages
|
||||
|
||||
# With pagination
|
||||
curl "http://localhost:8080/api/v1/project/my-project/packages?page=1&limit=10"
|
||||
|
||||
# With search
|
||||
curl "http://localhost:8080/api/v1/project/my-project/packages?search=release"
|
||||
|
||||
# With sorting
|
||||
curl "http://localhost:8080/api/v1/project/my-project/packages?sort=created_at&order=desc"
|
||||
|
||||
# Filter by format/platform
|
||||
curl "http://localhost:8080/api/v1/project/my-project/packages?format=npm&platform=linux"
|
||||
```
|
||||
|
||||
Response includes aggregated metadata:
|
||||
```json
|
||||
{
|
||||
"items": [
|
||||
{
|
||||
"id": "uuid",
|
||||
"name": "releases",
|
||||
"description": "Release builds",
|
||||
"format": "generic",
|
||||
"platform": "any",
|
||||
"tag_count": 5,
|
||||
"artifact_count": 3,
|
||||
"total_size": 1048576,
|
||||
"latest_tag": "v1.0.0",
|
||||
"latest_upload_at": "2025-01-01T00:00:00Z",
|
||||
"recent_tags": [...]
|
||||
}
|
||||
],
|
||||
"pagination": {"page": 1, "limit": 20, "total": 1, "total_pages": 1}
|
||||
}
|
||||
```
|
||||
|
||||
### Get Single Package
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/api/v1/project/my-project/packages/releases
|
||||
|
||||
# Include all tags (not just recent 5)
|
||||
curl "http://localhost:8080/api/v1/project/my-project/packages/releases?include_tags=true"
|
||||
```
|
||||
|
||||
### Upload an Artifact
|
||||
|
||||
```bash
|
||||
# Upload with tag only (version auto-detected from filename)
|
||||
curl -X POST http://localhost:8080/api/v1/project/my-project/releases/upload \
|
||||
-F "file=@./build/app-v1.0.0.tar.gz" \
|
||||
-F "tag=v1.0.0"
|
||||
-F "tag=latest"
|
||||
|
||||
# Upload with explicit version and tag
|
||||
curl -X POST http://localhost:8080/api/v1/project/my-project/releases/upload \
|
||||
-F "file=@./build/app-v1.0.0.tar.gz" \
|
||||
-F "tag=latest" \
|
||||
-F "version=1.0.0"
|
||||
```
|
||||
|
||||
Response:
|
||||
@@ -146,23 +255,106 @@ Response:
|
||||
"size": 1048576,
|
||||
"project": "my-project",
|
||||
"package": "releases",
|
||||
"tag": "v1.0.0"
|
||||
"tag": "latest",
|
||||
"version": "1.0.0",
|
||||
"version_source": "explicit",
|
||||
"format_metadata": {
|
||||
"format": "tarball",
|
||||
"package_name": "app",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
"deduplicated": false
|
||||
}
|
||||
```
|
||||
|
||||
### Resumable Upload (for large files)
|
||||
|
||||
For files larger than 100MB, use the resumable upload API:
|
||||
|
||||
```bash
|
||||
# 1. Initialize upload (client must compute SHA256 hash first)
|
||||
curl -X POST http://localhost:8080/api/v1/project/my-project/releases/upload/init \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"expected_hash": "a3f5d8e12b4c67890abcdef1234567890abcdef1234567890abcdef12345678",
|
||||
"filename": "large-file.tar.gz",
|
||||
"size": 524288000,
|
||||
"tag": "v2.0.0"
|
||||
}'
|
||||
|
||||
# Response: {"upload_id": "abc123", "already_exists": false, "chunk_size": 10485760}
|
||||
|
||||
# 2. Upload parts (10MB chunks recommended)
|
||||
curl -X PUT http://localhost:8080/api/v1/project/my-project/releases/upload/abc123/part/1 \
|
||||
--data-binary @chunk1.bin
|
||||
|
||||
# 3. Complete the upload
|
||||
curl -X POST http://localhost:8080/api/v1/project/my-project/releases/upload/abc123/complete \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"tag": "v2.0.0"}'
|
||||
```
|
||||
|
||||
### Download an Artifact
|
||||
|
||||
```bash
|
||||
# By tag
|
||||
curl -O http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0
|
||||
# By tag (use -OJ to save with the correct filename from Content-Disposition header)
|
||||
curl -OJ http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0
|
||||
|
||||
# By artifact ID
|
||||
curl -O http://localhost:8080/api/v1/project/my-project/releases/+/artifact:a3f5d8e12b4c6789...
|
||||
curl -OJ http://localhost:8080/api/v1/project/my-project/releases/+/artifact:a3f5d8e12b4c6789...
|
||||
|
||||
# Using the short URL pattern
|
||||
curl -O http://localhost:8080/project/my-project/releases/+/latest
|
||||
curl -OJ http://localhost:8080/project/my-project/releases/+/latest
|
||||
|
||||
# Save to a specific filename
|
||||
curl -o myfile.tar.gz http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0
|
||||
|
||||
# Partial download (range request)
|
||||
curl -H "Range: bytes=0-1023" http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0
|
||||
|
||||
# Check file info without downloading (HEAD request)
|
||||
curl -I http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0
|
||||
|
||||
# Download with specific mode (presigned, redirect, or proxy)
|
||||
curl "http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0?mode=proxy"
|
||||
|
||||
# Get presigned URL for direct S3 download
|
||||
curl http://localhost:8080/api/v1/project/my-project/releases/+/v1.0.0/url
|
||||
```
|
||||
|
||||
> **Note on curl flags:**
|
||||
> - `-O` saves the file using the URL path as the filename (e.g., `latest`, `v1.0.0`)
|
||||
> - `-J` tells curl to use the filename from the `Content-Disposition` header (e.g., `app-v1.0.0.tar.gz`)
|
||||
> - `-OJ` combines both: download to a file using the server-provided filename
|
||||
> - `-o <filename>` saves to a specific filename you choose
|
||||
|
||||
#### Download Modes
|
||||
|
||||
Orchard supports three download modes, configurable via `ORCHARD_DOWNLOAD_MODE` or per-request with `?mode=`:
|
||||
|
||||
| Mode | Description | Use Case |
|
||||
|------|-------------|----------|
|
||||
| `presigned` (default) | Returns JSON with a presigned S3 URL | Clients that handle redirects themselves, web UIs |
|
||||
| `redirect` | Returns HTTP 302 redirect to presigned S3 URL | Simple clients, browsers, wget |
|
||||
| `proxy` | Streams content through the backend | When S3 isn't directly accessible to clients |
|
||||
|
||||
**Presigned URL Response:**
|
||||
```json
|
||||
{
|
||||
"url": "https://minio.example.com/bucket/...",
|
||||
"expires_at": "2025-01-01T01:00:00Z",
|
||||
"method": "GET",
|
||||
"artifact_id": "a3f5d8e...",
|
||||
"size": 1048576,
|
||||
"content_type": "application/gzip",
|
||||
"original_name": "app-v1.0.0.tar.gz",
|
||||
"checksum_sha256": "a3f5d8e...",
|
||||
"checksum_md5": "d41d8cd..."
|
||||
}
|
||||
```
|
||||
|
||||
> **Note:** For presigned URLs to work, clients must be able to reach the S3 endpoint directly. In Kubernetes, this requires exposing MinIO via ingress (see Helm configuration below).
|
||||
|
||||
### Create a Tag
|
||||
|
||||
```bash
|
||||
@@ -171,12 +363,151 @@ curl -X POST http://localhost:8080/api/v1/project/my-project/releases/tags \
|
||||
-d '{"name": "stable", "artifact_id": "a3f5d8e12b4c6789..."}'
|
||||
```
|
||||
|
||||
### List Tags
|
||||
|
||||
```bash
|
||||
# Basic listing with artifact metadata
|
||||
curl http://localhost:8080/api/v1/project/my-project/releases/tags
|
||||
|
||||
# With pagination
|
||||
curl "http://localhost:8080/api/v1/project/my-project/releases/tags?page=1&limit=10"
|
||||
|
||||
# Search by tag name
|
||||
curl "http://localhost:8080/api/v1/project/my-project/releases/tags?search=v1"
|
||||
|
||||
# Sort by created_at descending
|
||||
curl "http://localhost:8080/api/v1/project/my-project/releases/tags?sort=created_at&order=desc"
|
||||
```
|
||||
|
||||
Response includes artifact metadata:
|
||||
```json
|
||||
{
|
||||
"items": [
|
||||
{
|
||||
"id": "uuid",
|
||||
"package_id": "uuid",
|
||||
"name": "v1.0.0",
|
||||
"artifact_id": "a3f5d8e...",
|
||||
"created_at": "2025-01-01T00:00:00Z",
|
||||
"created_by": "user",
|
||||
"artifact_size": 1048576,
|
||||
"artifact_content_type": "application/gzip",
|
||||
"artifact_original_name": "app-v1.0.0.tar.gz",
|
||||
"artifact_created_at": "2025-01-01T00:00:00Z",
|
||||
"artifact_format_metadata": {}
|
||||
}
|
||||
],
|
||||
"pagination": {"page": 1, "limit": 20, "total": 1, "total_pages": 1}
|
||||
}
|
||||
```
|
||||
|
||||
### Get Single Tag
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/api/v1/project/my-project/releases/tags/v1.0.0
|
||||
```
|
||||
|
||||
### Get Tag History
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/api/v1/project/my-project/releases/tags/latest/history
|
||||
```
|
||||
|
||||
Returns list of artifact changes for the tag (most recent first).
|
||||
|
||||
### List Versions
|
||||
|
||||
```bash
|
||||
# Basic listing
|
||||
curl http://localhost:8080/api/v1/project/my-project/releases/versions
|
||||
|
||||
# With pagination and sorting
|
||||
curl "http://localhost:8080/api/v1/project/my-project/releases/versions?sort=version&order=desc"
|
||||
```
|
||||
|
||||
Response includes tags pointing to each version's artifact:
|
||||
```json
|
||||
{
|
||||
"items": [
|
||||
{
|
||||
"id": "uuid",
|
||||
"package_id": "uuid",
|
||||
"version": "1.0.0",
|
||||
"version_source": "explicit",
|
||||
"artifact_id": "a3f5d8e...",
|
||||
"size": 1048576,
|
||||
"content_type": "application/gzip",
|
||||
"original_name": "app-v1.0.0.tar.gz",
|
||||
"created_at": "2025-01-01T00:00:00Z",
|
||||
"created_by": "user",
|
||||
"tags": ["latest", "stable"]
|
||||
}
|
||||
],
|
||||
"pagination": {"page": 1, "limit": 20, "total": 1, "total_pages": 1}
|
||||
}
|
||||
```
|
||||
|
||||
### List Artifacts in Package
|
||||
|
||||
```bash
|
||||
# Basic listing
|
||||
curl http://localhost:8080/api/v1/project/my-project/releases/artifacts
|
||||
|
||||
# Filter by content type
|
||||
curl "http://localhost:8080/api/v1/project/my-project/releases/artifacts?content_type=application/gzip"
|
||||
|
||||
# Filter by date range
|
||||
curl "http://localhost:8080/api/v1/project/my-project/releases/artifacts?created_after=2025-01-01T00:00:00Z"
|
||||
```
|
||||
|
||||
Response includes tags pointing to each artifact:
|
||||
```json
|
||||
{
|
||||
"items": [
|
||||
{
|
||||
"id": "a3f5d8e...",
|
||||
"size": 1048576,
|
||||
"content_type": "application/gzip",
|
||||
"original_name": "app-v1.0.0.tar.gz",
|
||||
"created_at": "2025-01-01T00:00:00Z",
|
||||
"created_by": "user",
|
||||
"format_metadata": {},
|
||||
"tags": ["v1.0.0", "latest", "stable"]
|
||||
}
|
||||
],
|
||||
"pagination": {"page": 1, "limit": 20, "total": 1, "total_pages": 1}
|
||||
}
|
||||
```
|
||||
|
||||
### Get Artifact by ID
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/api/v1/artifact/a3f5d8e12b4c67890abcdef1234567890abcdef1234567890abcdef12345678
|
||||
```
|
||||
|
||||
Response includes all tags/packages referencing the artifact:
|
||||
```json
|
||||
{
|
||||
"id": "a3f5d8e...",
|
||||
"size": 1048576,
|
||||
"content_type": "application/gzip",
|
||||
"original_name": "app-v1.0.0.tar.gz",
|
||||
"created_at": "2025-01-01T00:00:00Z",
|
||||
"created_by": "user",
|
||||
"ref_count": 2,
|
||||
"format_metadata": {},
|
||||
"tags": [
|
||||
{
|
||||
"id": "uuid",
|
||||
"name": "v1.0.0",
|
||||
"package_id": "uuid",
|
||||
"package_name": "releases",
|
||||
"project_name": "my-project"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
@@ -185,19 +516,31 @@ orchard/
|
||||
│ ├── app/
|
||||
│ │ ├── __init__.py
|
||||
│ │ ├── config.py # Pydantic settings
|
||||
│ │ ├── database.py # SQLAlchemy setup
|
||||
│ │ ├── database.py # SQLAlchemy setup and migrations
|
||||
│ │ ├── main.py # FastAPI application
|
||||
│ │ ├── metadata.py # Format-specific metadata extraction
|
||||
│ │ ├── models.py # SQLAlchemy models
|
||||
│ │ ├── routes.py # API endpoints
|
||||
│ │ ├── schemas.py # Pydantic schemas
|
||||
│ │ └── storage.py # S3 storage layer
|
||||
│ │ └── storage.py # S3 storage layer with multipart support
|
||||
│ └── requirements.txt
|
||||
├── frontend/
|
||||
│ ├── src/
|
||||
│ │ ├── components/ # React components
|
||||
│ │ ├── components/ # Reusable UI components
|
||||
│ │ │ ├── Badge.tsx # Status/type badges
|
||||
│ │ │ ├── Breadcrumb.tsx # Navigation breadcrumbs
|
||||
│ │ │ ├── Card.tsx # Card containers
|
||||
│ │ │ ├── DataTable.tsx # Sortable data tables
|
||||
│ │ │ ├── FilterChip.tsx # Active filter chips
|
||||
│ │ │ ├── Pagination.tsx # Page navigation
|
||||
│ │ │ ├── SearchInput.tsx # Debounced search
|
||||
│ │ │ └── SortDropdown.tsx# Sort field selector
|
||||
│ │ ├── pages/ # Page components
|
||||
│ │ ├── api.ts # API client
|
||||
│ │ ├── types.ts # TypeScript types
|
||||
│ │ │ ├── Home.tsx # Project list
|
||||
│ │ │ ├── ProjectPage.tsx # Package list within project
|
||||
│ │ │ └── PackagePage.tsx # Tag/artifact list within package
|
||||
│ │ ├── api.ts # API client with pagination support
|
||||
│ │ ├── types.ts # TypeScript interfaces
|
||||
│ │ ├── App.tsx
|
||||
│ │ └── main.tsx
|
||||
│ ├── index.html
|
||||
@@ -229,16 +572,51 @@ Configuration is provided via environment variables prefixed with `ORCHARD_`:
|
||||
| `ORCHARD_S3_BUCKET` | S3 bucket name | `orchard-artifacts` |
|
||||
| `ORCHARD_S3_ACCESS_KEY_ID` | S3 access key | - |
|
||||
| `ORCHARD_S3_SECRET_ACCESS_KEY` | S3 secret key | - |
|
||||
| `ORCHARD_DOWNLOAD_MODE` | Download mode: `presigned`, `redirect`, or `proxy` | `presigned` |
|
||||
| `ORCHARD_PRESIGNED_URL_EXPIRY` | Presigned URL expiry in seconds | `3600` |
|
||||
|
||||
## CI/CD Pipeline
|
||||
|
||||
The GitLab CI/CD pipeline automates building, testing, and deploying Orchard.
|
||||
|
||||
### Pipeline Stages
|
||||
|
||||
| Stage | Jobs | Description |
|
||||
|-------|------|-------------|
|
||||
| lint | `kics`, `hadolint`, `secrets` | Security and code quality scanning |
|
||||
| build | `build_image` | Build and push Docker image |
|
||||
| test | `python_tests`, `frontend_tests` | Run unit tests with coverage |
|
||||
| deploy | `deploy_stage`, `deploy_feature` | Deploy to Kubernetes |
|
||||
| deploy | `integration_test_*` | Post-deployment integration tests |
|
||||
|
||||
### Environments
|
||||
|
||||
| Environment | Branch | Namespace | URL |
|
||||
|-------------|--------|-----------|-----|
|
||||
| Stage | `main` | `orch-stage-namespace` | `orchard-stage.common.global.bsf.tools` |
|
||||
| Feature | `*` (non-main) | `orch-dev-namespace` | `orchard-{branch}.common.global.bsf.tools` |
|
||||
|
||||
### Feature Branch Workflow
|
||||
|
||||
1. Push a feature branch
|
||||
2. Pipeline builds, tests, and deploys to isolated environment
|
||||
3. Integration tests run against the deployed environment
|
||||
4. GitLab UI shows environment link for manual testing
|
||||
5. On merge to main, environment is automatically cleaned up
|
||||
6. Environments also auto-expire after 1 week if branch is not deleted
|
||||
|
||||
### Manual Cleanup
|
||||
|
||||
Feature environments can be manually cleaned up via:
|
||||
- GitLab UI: Environments → Stop environment
|
||||
- CLI: `helm uninstall orchard-{branch} -n orch-dev-namespace`
|
||||
|
||||
## Kubernetes Deployment
|
||||
|
||||
### Using Helm
|
||||
|
||||
```bash
|
||||
# Add Bitnami repo for dependencies
|
||||
helm repo add bitnami https://charts.bitnami.com/bitnami
|
||||
|
||||
# Update dependencies
|
||||
# Update dependencies (uses internal OCI registry)
|
||||
cd helm/orchard
|
||||
helm dependency update
|
||||
|
||||
@@ -249,6 +627,31 @@ helm install orchard ./helm/orchard -n orchard --create-namespace
|
||||
helm install orchard ./helm/orchard -f my-values.yaml
|
||||
```
|
||||
|
||||
### Helm Configuration
|
||||
|
||||
Key configuration options in `values.yaml`:
|
||||
|
||||
```yaml
|
||||
orchard:
|
||||
# Download configuration
|
||||
download:
|
||||
mode: "presigned" # presigned, redirect, or proxy
|
||||
presignedUrlExpiry: 3600
|
||||
|
||||
# MinIO ingress (required for presigned URL downloads)
|
||||
minioIngress:
|
||||
enabled: true
|
||||
className: "nginx"
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: "letsencrypt"
|
||||
host: "minio.your-domain.com"
|
||||
tls:
|
||||
enabled: true
|
||||
secretName: minio-tls
|
||||
```
|
||||
|
||||
When `minioIngress.enabled` is `true`, the S3 endpoint automatically uses the external URL (`https://minio.your-domain.com`), making presigned URLs accessible to external clients.
|
||||
|
||||
See `helm/orchard/values.yaml` for all configuration options.
|
||||
|
||||
## Database Schema
|
||||
@@ -258,7 +661,8 @@ See `helm/orchard/values.yaml` for all configuration options.
|
||||
- **projects** - Top-level organizational containers
|
||||
- **packages** - Collections within projects
|
||||
- **artifacts** - Content-addressable artifacts (SHA256)
|
||||
- **tags** - Aliases pointing to artifacts
|
||||
- **tags** - Mutable aliases pointing to artifacts
|
||||
- **package_versions** - Immutable version records (set at upload time)
|
||||
- **tag_history** - Audit trail for tag changes
|
||||
- **uploads** - Upload event records
|
||||
- **consumers** - Dependency tracking
|
||||
@@ -276,11 +680,16 @@ The following features are planned but not yet implemented:
|
||||
- [ ] Export/Import for air-gapped systems
|
||||
- [ ] Consumer notification
|
||||
- [ ] Automated update propagation
|
||||
- [ ] OIDC/SAML authentication
|
||||
- [ ] API key management
|
||||
- [ ] Package format detection
|
||||
- [ ] Multipart upload for large files
|
||||
- [ ] SAML authentication
|
||||
- [ ] Redis caching layer
|
||||
- [ ] Download integrity verification (see `docs/design/integrity-verification.md`)
|
||||
|
||||
### Recently Implemented
|
||||
|
||||
- [x] OIDC authentication
|
||||
- [x] API key management
|
||||
- [x] Garbage collection for orphaned artifacts
|
||||
- [x] User authentication with sessions
|
||||
|
||||
## License
|
||||
|
||||
|
||||
83
backend/alembic.ini
Normal file
83
backend/alembic.ini
Normal file
@@ -0,0 +1,83 @@
|
||||
# Alembic Configuration File
|
||||
|
||||
[alembic]
|
||||
# path to migration scripts
|
||||
script_location = alembic
|
||||
|
||||
# template used to generate migration files
|
||||
# file_template = %%(rev)s_%%(slug)s
|
||||
|
||||
# sys.path path, will be prepended to sys.path if present.
|
||||
prepend_sys_path = .
|
||||
|
||||
# timezone to use when rendering the date within the migration file
|
||||
# as well as the filename.
|
||||
# If specified, requires the python-dateutil library
|
||||
# timezone =
|
||||
|
||||
# max length of characters to apply to the "slug" field
|
||||
# truncate_slug_length = 40
|
||||
|
||||
# set to 'true' to run the environment during the 'revision' command,
|
||||
# regardless of autogenerate
|
||||
# revision_environment = false
|
||||
|
||||
# set to 'true' to allow .pyc and .pyo files without a source .py file
|
||||
# to be detected as revisions in the versions/ directory
|
||||
# sourceless = false
|
||||
|
||||
# version location specification
|
||||
# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
|
||||
|
||||
# version path separator
|
||||
# version_path_separator = :
|
||||
|
||||
# set to 'true' to search source files recursively
|
||||
# in each "version_locations" directory
|
||||
# recursive_version_locations = false
|
||||
|
||||
# the output encoding used when revision files are written from script.py.mako
|
||||
# output_encoding = utf-8
|
||||
|
||||
# Database URL - will be overridden by env.py
|
||||
sqlalchemy.url = driver://user:pass@localhost/dbname
|
||||
|
||||
|
||||
[post_write_hooks]
|
||||
# post_write_hooks defines scripts or Python functions that are run
|
||||
# on newly generated revision scripts.
|
||||
|
||||
# Logging configuration
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARN
|
||||
handlers = console
|
||||
qualname =
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARN
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = INFO
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
27
backend/alembic/README
Normal file
27
backend/alembic/README
Normal file
@@ -0,0 +1,27 @@
|
||||
Alembic Migrations for Orchard
|
||||
|
||||
This directory contains database migration scripts managed by Alembic.
|
||||
|
||||
Common Commands:
|
||||
# Generate a new migration (autogenerate from model changes)
|
||||
alembic revision --autogenerate -m "description of changes"
|
||||
|
||||
# Apply all pending migrations
|
||||
alembic upgrade head
|
||||
|
||||
# Rollback one migration
|
||||
alembic downgrade -1
|
||||
|
||||
# Show current migration status
|
||||
alembic current
|
||||
|
||||
# Show migration history
|
||||
alembic history
|
||||
|
||||
# Generate SQL without applying (for review)
|
||||
alembic upgrade head --sql
|
||||
|
||||
Notes:
|
||||
- Always review autogenerated migrations before applying
|
||||
- Test migrations in development before applying to production
|
||||
- Migrations are stored in the versions/ directory
|
||||
95
backend/alembic/env.py
Normal file
95
backend/alembic/env.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""
|
||||
Alembic migration environment configuration.
|
||||
"""
|
||||
|
||||
from logging.config import fileConfig
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from sqlalchemy import engine_from_config
|
||||
from sqlalchemy import pool
|
||||
|
||||
from alembic import context
|
||||
|
||||
# Add the app directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from app.config import get_settings
|
||||
from app.models import Base
|
||||
|
||||
# this is the Alembic Config object, which provides
|
||||
# access to the values within the .ini file in use.
|
||||
config = context.config
|
||||
|
||||
# Get database URL from settings
|
||||
settings = get_settings()
|
||||
config.set_main_option("sqlalchemy.url", settings.database_url)
|
||||
|
||||
# Interpret the config file for Python logging.
|
||||
# This line sets up loggers basically.
|
||||
if config.config_file_name is not None:
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
# add your model's MetaData object here
|
||||
# for 'autogenerate' support
|
||||
target_metadata = Base.metadata
|
||||
|
||||
# other values from the config, defined by the needs of env.py,
|
||||
# can be acquired:
|
||||
# my_important_option = config.get_main_option("my_important_option")
|
||||
# ... etc.
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
"""Run migrations in 'offline' mode.
|
||||
|
||||
This configures the context with just a URL
|
||||
and not an Engine, though an Engine is acceptable
|
||||
here as well. By skipping the Engine creation
|
||||
we don't even need a DBAPI to be available.
|
||||
|
||||
Calls to context.execute() here emit the given string to the
|
||||
script output.
|
||||
|
||||
"""
|
||||
url = config.get_main_option("sqlalchemy.url")
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
"""Run migrations in 'online' mode.
|
||||
|
||||
In this scenario we need to create an Engine
|
||||
and associate a connection with the context.
|
||||
|
||||
"""
|
||||
connectable = engine_from_config(
|
||||
config.get_section(config.config_ini_section, {}),
|
||||
prefix="sqlalchemy.",
|
||||
poolclass=pool.NullPool,
|
||||
)
|
||||
|
||||
with connectable.connect() as connection:
|
||||
context.configure(
|
||||
connection=connection,
|
||||
target_metadata=target_metadata,
|
||||
compare_type=True, # Detect column type changes
|
||||
compare_server_default=True, # Detect default value changes
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
26
backend/alembic/script.py.mako
Normal file
26
backend/alembic/script.py.mako
Normal file
@@ -0,0 +1,26 @@
|
||||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = ${repr(up_revision)}
|
||||
down_revision: Union[str, None] = ${repr(down_revision)}
|
||||
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
||||
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
${downgrades if downgrades else "pass"}
|
||||
1476
backend/app/auth.py
Normal file
1476
backend/app/auth.py
Normal file
File diff suppressed because it is too large
Load Diff
316
backend/app/cache.py
Normal file
316
backend/app/cache.py
Normal file
@@ -0,0 +1,316 @@
|
||||
"""
|
||||
Cache service for upstream artifact caching.
|
||||
|
||||
Provides URL parsing, system project management, and caching logic
|
||||
for the upstream caching feature.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
from urllib.parse import urlparse, unquote
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# System project names for each source type
|
||||
SYSTEM_PROJECT_NAMES = {
|
||||
"npm": "_npm",
|
||||
"pypi": "_pypi",
|
||||
"maven": "_maven",
|
||||
"docker": "_docker",
|
||||
"helm": "_helm",
|
||||
"nuget": "_nuget",
|
||||
"deb": "_deb",
|
||||
"rpm": "_rpm",
|
||||
"generic": "_generic",
|
||||
}
|
||||
|
||||
# System project descriptions
|
||||
SYSTEM_PROJECT_DESCRIPTIONS = {
|
||||
"npm": "System cache for npm packages",
|
||||
"pypi": "System cache for PyPI packages",
|
||||
"maven": "System cache for Maven packages",
|
||||
"docker": "System cache for Docker images",
|
||||
"helm": "System cache for Helm charts",
|
||||
"nuget": "System cache for NuGet packages",
|
||||
"deb": "System cache for Debian packages",
|
||||
"rpm": "System cache for RPM packages",
|
||||
"generic": "System cache for generic artifacts",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParsedUrl:
|
||||
"""Parsed URL information for caching."""
|
||||
|
||||
package_name: str
|
||||
version: Optional[str] = None
|
||||
filename: Optional[str] = None
|
||||
|
||||
|
||||
def parse_npm_url(url: str) -> Optional[ParsedUrl]:
|
||||
"""
|
||||
Parse npm registry URL to extract package name and version.
|
||||
|
||||
Formats:
|
||||
- https://registry.npmjs.org/{package}/-/{package}-{version}.tgz
|
||||
- https://registry.npmjs.org/@{scope}/{package}/-/{package}-{version}.tgz
|
||||
|
||||
Examples:
|
||||
- https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz
|
||||
- https://registry.npmjs.org/@types/node/-/node-18.0.0.tgz
|
||||
"""
|
||||
parsed = urlparse(url)
|
||||
path = unquote(parsed.path)
|
||||
|
||||
# Pattern for scoped packages: /@scope/package/-/package-version.tgz
|
||||
scoped_pattern = r"^/@([^/]+)/([^/]+)/-/\2-(.+)\.tgz$"
|
||||
match = re.match(scoped_pattern, path)
|
||||
if match:
|
||||
scope, name, version = match.groups()
|
||||
return ParsedUrl(
|
||||
package_name=f"@{scope}/{name}",
|
||||
version=version,
|
||||
filename=f"{name}-{version}.tgz",
|
||||
)
|
||||
|
||||
# Pattern for unscoped packages: /package/-/package-version.tgz
|
||||
unscoped_pattern = r"^/([^/@]+)/-/\1-(.+)\.tgz$"
|
||||
match = re.match(unscoped_pattern, path)
|
||||
if match:
|
||||
name, version = match.groups()
|
||||
return ParsedUrl(
|
||||
package_name=name,
|
||||
version=version,
|
||||
filename=f"{name}-{version}.tgz",
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def parse_pypi_url(url: str) -> Optional[ParsedUrl]:
|
||||
"""
|
||||
Parse PyPI URL to extract package name and version.
|
||||
|
||||
Formats:
|
||||
- https://files.pythonhosted.org/packages/.../package-version.tar.gz
|
||||
- https://files.pythonhosted.org/packages/.../package-version-py3-none-any.whl
|
||||
- https://pypi.org/packages/.../package-version.tar.gz
|
||||
|
||||
Examples:
|
||||
- https://files.pythonhosted.org/packages/ab/cd/requests-2.28.0.tar.gz
|
||||
- https://files.pythonhosted.org/packages/ab/cd/requests-2.28.0-py3-none-any.whl
|
||||
"""
|
||||
parsed = urlparse(url)
|
||||
path = unquote(parsed.path)
|
||||
|
||||
# Get the filename from the path
|
||||
filename = path.split("/")[-1]
|
||||
if not filename:
|
||||
return None
|
||||
|
||||
# Handle wheel files: package-version-py3-none-any.whl
|
||||
wheel_pattern = r"^([a-zA-Z0-9_-]+)-(\d+[^-]*)-.*\.whl$"
|
||||
match = re.match(wheel_pattern, filename)
|
||||
if match:
|
||||
name, version = match.groups()
|
||||
# Normalize package name (PyPI uses underscores internally)
|
||||
name = name.replace("_", "-").lower()
|
||||
return ParsedUrl(
|
||||
package_name=name,
|
||||
version=version,
|
||||
filename=filename,
|
||||
)
|
||||
|
||||
# Handle source distributions: package-version.tar.gz or package-version.zip
|
||||
sdist_pattern = r"^([a-zA-Z0-9_-]+)-(\d+(?:\.\d+)*(?:[a-zA-Z0-9_.+-]*)?)(?:\.tar\.gz|\.zip|\.tar\.bz2)$"
|
||||
match = re.match(sdist_pattern, filename)
|
||||
if match:
|
||||
name, version = match.groups()
|
||||
name = name.replace("_", "-").lower()
|
||||
return ParsedUrl(
|
||||
package_name=name,
|
||||
version=version,
|
||||
filename=filename,
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def parse_maven_url(url: str) -> Optional[ParsedUrl]:
|
||||
"""
|
||||
Parse Maven repository URL to extract artifact info.
|
||||
|
||||
Format:
|
||||
- https://repo1.maven.org/maven2/{group}/{artifact}/{version}/{artifact}-{version}.jar
|
||||
|
||||
Examples:
|
||||
- https://repo1.maven.org/maven2/org/apache/commons/commons-lang3/3.12.0/commons-lang3-3.12.0.jar
|
||||
- https://repo1.maven.org/maven2/com/google/guava/guava/31.1-jre/guava-31.1-jre.jar
|
||||
"""
|
||||
parsed = urlparse(url)
|
||||
path = unquote(parsed.path)
|
||||
|
||||
# Find /maven2/ or similar repository path
|
||||
maven2_idx = path.find("/maven2/")
|
||||
if maven2_idx >= 0:
|
||||
path = path[maven2_idx + 8:] # Remove /maven2/
|
||||
elif path.startswith("/"):
|
||||
path = path[1:]
|
||||
|
||||
parts = path.split("/")
|
||||
if len(parts) < 4:
|
||||
return None
|
||||
|
||||
# Last part is filename, before that is version, before that is artifact
|
||||
filename = parts[-1]
|
||||
version = parts[-2]
|
||||
artifact = parts[-3]
|
||||
group = ".".join(parts[:-3])
|
||||
|
||||
# Verify filename matches expected pattern
|
||||
if not filename.startswith(f"{artifact}-{version}"):
|
||||
return None
|
||||
|
||||
return ParsedUrl(
|
||||
package_name=f"{group}:{artifact}",
|
||||
version=version,
|
||||
filename=filename,
|
||||
)
|
||||
|
||||
|
||||
def parse_docker_url(url: str) -> Optional[ParsedUrl]:
|
||||
"""
|
||||
Parse Docker registry URL to extract image info.
|
||||
|
||||
Note: Docker registries are more complex (manifests, blobs, etc.)
|
||||
This handles basic blob/manifest URLs.
|
||||
|
||||
Examples:
|
||||
- https://registry-1.docker.io/v2/library/nginx/blobs/sha256:abc123
|
||||
- https://registry-1.docker.io/v2/myuser/myimage/manifests/latest
|
||||
"""
|
||||
parsed = urlparse(url)
|
||||
path = unquote(parsed.path)
|
||||
|
||||
# Pattern: /v2/{namespace}/{image}/blobs/{digest} or /manifests/{tag}
|
||||
pattern = r"^/v2/([^/]+(?:/[^/]+)?)/([^/]+)/(blobs|manifests)/(.+)$"
|
||||
match = re.match(pattern, path)
|
||||
if match:
|
||||
namespace, image, artifact_type, reference = match.groups()
|
||||
if namespace == "library":
|
||||
package_name = image
|
||||
else:
|
||||
package_name = f"{namespace}/{image}"
|
||||
|
||||
# For manifests, the reference is the tag
|
||||
version = reference if artifact_type == "manifests" else None
|
||||
|
||||
return ParsedUrl(
|
||||
package_name=package_name,
|
||||
version=version,
|
||||
filename=f"{image}-{reference}" if version else reference,
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def parse_generic_url(url: str) -> ParsedUrl:
|
||||
"""
|
||||
Parse a generic URL to extract filename.
|
||||
|
||||
Attempts to extract meaningful package name and version from filename.
|
||||
|
||||
Examples:
|
||||
- https://example.com/downloads/myapp-1.2.3.tar.gz
|
||||
- https://github.com/user/repo/releases/download/v1.0/release.zip
|
||||
"""
|
||||
parsed = urlparse(url)
|
||||
path = unquote(parsed.path)
|
||||
filename = path.split("/")[-1] or "artifact"
|
||||
|
||||
# List of known compound and simple extensions
|
||||
known_extensions = [
|
||||
".tar.gz", ".tar.bz2", ".tar.xz",
|
||||
".zip", ".tgz", ".gz", ".jar", ".war", ".deb", ".rpm"
|
||||
]
|
||||
|
||||
# Strip extension from filename first
|
||||
base_name = filename
|
||||
matched_ext = None
|
||||
for ext in known_extensions:
|
||||
if filename.endswith(ext):
|
||||
base_name = filename[:-len(ext)]
|
||||
matched_ext = ext
|
||||
break
|
||||
|
||||
if matched_ext is None:
|
||||
# Unknown extension, return filename as package name
|
||||
return ParsedUrl(
|
||||
package_name=filename,
|
||||
version=None,
|
||||
filename=filename,
|
||||
)
|
||||
|
||||
# Try to extract version from base_name
|
||||
# Pattern: name-version or name_version
|
||||
# Version starts with digit(s) and can include dots, dashes, and alphanumeric suffixes
|
||||
version_pattern = r"^(.+?)[-_](v?\d+(?:\.\d+)*(?:[-_][a-zA-Z0-9]+)?)$"
|
||||
match = re.match(version_pattern, base_name)
|
||||
if match:
|
||||
name, version = match.groups()
|
||||
return ParsedUrl(
|
||||
package_name=name,
|
||||
version=version,
|
||||
filename=filename,
|
||||
)
|
||||
|
||||
# No version found, use base_name as package name
|
||||
return ParsedUrl(
|
||||
package_name=base_name,
|
||||
version=None,
|
||||
filename=filename,
|
||||
)
|
||||
|
||||
|
||||
def parse_url(url: str, source_type: str) -> ParsedUrl:
|
||||
"""
|
||||
Parse URL to extract package name and version based on source type.
|
||||
|
||||
Args:
|
||||
url: The URL to parse.
|
||||
source_type: The source type (npm, pypi, maven, docker, etc.)
|
||||
|
||||
Returns:
|
||||
ParsedUrl with extracted information.
|
||||
"""
|
||||
parsed = None
|
||||
|
||||
if source_type == "npm":
|
||||
parsed = parse_npm_url(url)
|
||||
elif source_type == "pypi":
|
||||
parsed = parse_pypi_url(url)
|
||||
elif source_type == "maven":
|
||||
parsed = parse_maven_url(url)
|
||||
elif source_type == "docker":
|
||||
parsed = parse_docker_url(url)
|
||||
|
||||
# Fall back to generic parsing if type-specific parsing fails
|
||||
if parsed is None:
|
||||
parsed = parse_generic_url(url)
|
||||
|
||||
return parsed
|
||||
|
||||
|
||||
def get_system_project_name(source_type: str) -> str:
|
||||
"""Get the system project name for a source type."""
|
||||
return SYSTEM_PROJECT_NAMES.get(source_type, "_generic")
|
||||
|
||||
|
||||
def get_system_project_description(source_type: str) -> str:
|
||||
"""Get the system project description for a source type."""
|
||||
return SYSTEM_PROJECT_DESCRIPTIONS.get(
|
||||
source_type, "System cache for artifacts"
|
||||
)
|
||||
477
backend/app/checksum.py
Normal file
477
backend/app/checksum.py
Normal file
@@ -0,0 +1,477 @@
|
||||
"""
|
||||
Checksum utilities for download verification.
|
||||
|
||||
This module provides functions and classes for computing and verifying
|
||||
SHA256 checksums during artifact downloads.
|
||||
|
||||
Key components:
|
||||
- compute_sha256(): Compute SHA256 of bytes content
|
||||
- compute_sha256_stream(): Compute SHA256 from an iterable stream
|
||||
- HashingStreamWrapper: Wrapper that computes hash while streaming
|
||||
- VerifyingStreamWrapper: Wrapper that verifies hash after streaming
|
||||
- verify_checksum(): Verify content against expected hash
|
||||
- ChecksumMismatchError: Exception for verification failures
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import re
|
||||
import base64
|
||||
from typing import (
|
||||
Generator,
|
||||
Optional,
|
||||
Any,
|
||||
Callable,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default chunk size for streaming operations (8KB)
|
||||
DEFAULT_CHUNK_SIZE = 8 * 1024
|
||||
|
||||
# Regex pattern for valid SHA256 hash (64 hex characters)
|
||||
SHA256_PATTERN = re.compile(r"^[a-fA-F0-9]{64}$")
|
||||
|
||||
|
||||
class ChecksumError(Exception):
|
||||
"""Base exception for checksum operations."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class ChecksumMismatchError(ChecksumError):
|
||||
"""
|
||||
Raised when computed checksum does not match expected checksum.
|
||||
|
||||
Attributes:
|
||||
expected: The expected SHA256 hash
|
||||
actual: The actual computed SHA256 hash
|
||||
artifact_id: Optional artifact ID for context
|
||||
s3_key: Optional S3 key for debugging
|
||||
size: Optional file size
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
expected: str,
|
||||
actual: str,
|
||||
artifact_id: Optional[str] = None,
|
||||
s3_key: Optional[str] = None,
|
||||
size: Optional[int] = None,
|
||||
message: Optional[str] = None,
|
||||
):
|
||||
self.expected = expected
|
||||
self.actual = actual
|
||||
self.artifact_id = artifact_id
|
||||
self.s3_key = s3_key
|
||||
self.size = size
|
||||
|
||||
if message:
|
||||
self.message = message
|
||||
else:
|
||||
self.message = (
|
||||
f"Checksum verification failed: "
|
||||
f"expected {expected[:16]}..., got {actual[:16]}..."
|
||||
)
|
||||
super().__init__(self.message)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for logging/API responses."""
|
||||
return {
|
||||
"error": "checksum_mismatch",
|
||||
"expected": self.expected,
|
||||
"actual": self.actual,
|
||||
"artifact_id": self.artifact_id,
|
||||
"s3_key": self.s3_key,
|
||||
"size": self.size,
|
||||
"message": self.message,
|
||||
}
|
||||
|
||||
|
||||
class InvalidHashFormatError(ChecksumError):
|
||||
"""Raised when a hash string is not valid SHA256 format."""
|
||||
|
||||
def __init__(self, hash_value: str):
|
||||
self.hash_value = hash_value
|
||||
message = f"Invalid SHA256 hash format: '{hash_value[:32]}...'"
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
def is_valid_sha256(hash_value: str) -> bool:
|
||||
"""
|
||||
Check if a string is a valid SHA256 hash (64 hex characters).
|
||||
|
||||
Args:
|
||||
hash_value: String to validate
|
||||
|
||||
Returns:
|
||||
True if valid SHA256 format, False otherwise
|
||||
"""
|
||||
if not hash_value:
|
||||
return False
|
||||
return bool(SHA256_PATTERN.match(hash_value))
|
||||
|
||||
|
||||
def compute_sha256(content: bytes) -> str:
|
||||
"""
|
||||
Compute SHA256 hash of bytes content.
|
||||
|
||||
Args:
|
||||
content: Bytes content to hash
|
||||
|
||||
Returns:
|
||||
Lowercase hexadecimal SHA256 hash (64 characters)
|
||||
|
||||
Raises:
|
||||
ChecksumError: If hash computation fails
|
||||
"""
|
||||
if content is None:
|
||||
raise ChecksumError("Cannot compute hash of None content")
|
||||
|
||||
try:
|
||||
return hashlib.sha256(content).hexdigest().lower()
|
||||
except Exception as e:
|
||||
raise ChecksumError(f"Hash computation failed: {e}") from e
|
||||
|
||||
|
||||
def compute_sha256_stream(
|
||||
stream: Any,
|
||||
chunk_size: int = DEFAULT_CHUNK_SIZE,
|
||||
) -> str:
|
||||
"""
|
||||
Compute SHA256 hash from a stream or file-like object.
|
||||
|
||||
Reads the stream in chunks to minimize memory usage for large files.
|
||||
|
||||
Args:
|
||||
stream: Iterator yielding bytes or file-like object with read()
|
||||
chunk_size: Size of chunks to read (default 8KB)
|
||||
|
||||
Returns:
|
||||
Lowercase hexadecimal SHA256 hash (64 characters)
|
||||
|
||||
Raises:
|
||||
ChecksumError: If hash computation fails
|
||||
"""
|
||||
try:
|
||||
hasher = hashlib.sha256()
|
||||
|
||||
# Handle file-like objects with read()
|
||||
if hasattr(stream, "read"):
|
||||
while True:
|
||||
chunk = stream.read(chunk_size)
|
||||
if not chunk:
|
||||
break
|
||||
hasher.update(chunk)
|
||||
else:
|
||||
# Handle iterators
|
||||
for chunk in stream:
|
||||
if chunk:
|
||||
hasher.update(chunk)
|
||||
|
||||
return hasher.hexdigest().lower()
|
||||
except Exception as e:
|
||||
raise ChecksumError(f"Stream hash computation failed: {e}") from e
|
||||
|
||||
|
||||
def verify_checksum(content: bytes, expected: str) -> bool:
|
||||
"""
|
||||
Verify that content matches expected SHA256 hash.
|
||||
|
||||
Args:
|
||||
content: Bytes content to verify
|
||||
expected: Expected SHA256 hash (case-insensitive)
|
||||
|
||||
Returns:
|
||||
True if hash matches, False otherwise
|
||||
|
||||
Raises:
|
||||
InvalidHashFormatError: If expected hash is not valid format
|
||||
ChecksumError: If hash computation fails
|
||||
"""
|
||||
if not is_valid_sha256(expected):
|
||||
raise InvalidHashFormatError(expected)
|
||||
|
||||
actual = compute_sha256(content)
|
||||
return actual == expected.lower()
|
||||
|
||||
|
||||
def verify_checksum_strict(
|
||||
content: bytes,
|
||||
expected: str,
|
||||
artifact_id: Optional[str] = None,
|
||||
s3_key: Optional[str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Verify content matches expected hash, raising exception on mismatch.
|
||||
|
||||
Args:
|
||||
content: Bytes content to verify
|
||||
expected: Expected SHA256 hash (case-insensitive)
|
||||
artifact_id: Optional artifact ID for error context
|
||||
s3_key: Optional S3 key for error context
|
||||
|
||||
Raises:
|
||||
InvalidHashFormatError: If expected hash is not valid format
|
||||
ChecksumMismatchError: If verification fails
|
||||
ChecksumError: If hash computation fails
|
||||
"""
|
||||
if not is_valid_sha256(expected):
|
||||
raise InvalidHashFormatError(expected)
|
||||
|
||||
actual = compute_sha256(content)
|
||||
if actual != expected.lower():
|
||||
raise ChecksumMismatchError(
|
||||
expected=expected.lower(),
|
||||
actual=actual,
|
||||
artifact_id=artifact_id,
|
||||
s3_key=s3_key,
|
||||
size=len(content),
|
||||
)
|
||||
|
||||
|
||||
def sha256_to_base64(hex_hash: str) -> str:
|
||||
"""
|
||||
Convert SHA256 hex string to base64 encoding (for RFC 3230 Digest header).
|
||||
|
||||
Args:
|
||||
hex_hash: SHA256 hash as 64-character hex string
|
||||
|
||||
Returns:
|
||||
Base64-encoded hash string
|
||||
"""
|
||||
if not is_valid_sha256(hex_hash):
|
||||
raise InvalidHashFormatError(hex_hash)
|
||||
|
||||
hash_bytes = bytes.fromhex(hex_hash)
|
||||
return base64.b64encode(hash_bytes).decode("ascii")
|
||||
|
||||
|
||||
class HashingStreamWrapper:
|
||||
"""
|
||||
Wrapper that computes SHA256 hash incrementally as chunks are read.
|
||||
|
||||
This allows computing the hash while streaming content to a client,
|
||||
without buffering the entire content in memory.
|
||||
|
||||
Usage:
|
||||
wrapper = HashingStreamWrapper(stream)
|
||||
for chunk in wrapper:
|
||||
send_to_client(chunk)
|
||||
final_hash = wrapper.get_hash()
|
||||
|
||||
Attributes:
|
||||
chunk_size: Size of chunks to yield
|
||||
bytes_read: Total bytes processed so far
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
stream: Any,
|
||||
chunk_size: int = DEFAULT_CHUNK_SIZE,
|
||||
):
|
||||
"""
|
||||
Initialize the hashing stream wrapper.
|
||||
|
||||
Args:
|
||||
stream: Source stream (iterator, file-like, or S3 StreamingBody)
|
||||
chunk_size: Size of chunks to yield (default 8KB)
|
||||
"""
|
||||
self._stream = stream
|
||||
self._hasher = hashlib.sha256()
|
||||
self._chunk_size = chunk_size
|
||||
self._bytes_read = 0
|
||||
self._finalized = False
|
||||
self._final_hash: Optional[str] = None
|
||||
|
||||
@property
|
||||
def bytes_read(self) -> int:
|
||||
"""Total bytes read so far."""
|
||||
return self._bytes_read
|
||||
|
||||
@property
|
||||
def chunk_size(self) -> int:
|
||||
"""Chunk size for reading."""
|
||||
return self._chunk_size
|
||||
|
||||
def __iter__(self) -> Generator[bytes, None, None]:
|
||||
"""Iterate over chunks, computing hash as we go."""
|
||||
# Handle S3 StreamingBody (has iter_chunks)
|
||||
if hasattr(self._stream, "iter_chunks"):
|
||||
for chunk in self._stream.iter_chunks(chunk_size=self._chunk_size):
|
||||
if chunk:
|
||||
self._hasher.update(chunk)
|
||||
self._bytes_read += len(chunk)
|
||||
yield chunk
|
||||
# Handle file-like objects with read()
|
||||
elif hasattr(self._stream, "read"):
|
||||
while True:
|
||||
chunk = self._stream.read(self._chunk_size)
|
||||
if not chunk:
|
||||
break
|
||||
self._hasher.update(chunk)
|
||||
self._bytes_read += len(chunk)
|
||||
yield chunk
|
||||
# Handle iterators
|
||||
else:
|
||||
for chunk in self._stream:
|
||||
if chunk:
|
||||
self._hasher.update(chunk)
|
||||
self._bytes_read += len(chunk)
|
||||
yield chunk
|
||||
|
||||
self._finalized = True
|
||||
self._final_hash = self._hasher.hexdigest().lower()
|
||||
|
||||
def get_hash(self) -> str:
|
||||
"""
|
||||
Get the computed SHA256 hash.
|
||||
|
||||
If stream hasn't been fully consumed, consumes remaining chunks.
|
||||
|
||||
Returns:
|
||||
Lowercase hexadecimal SHA256 hash
|
||||
"""
|
||||
if not self._finalized:
|
||||
# Consume remaining stream
|
||||
for _ in self:
|
||||
pass
|
||||
|
||||
return self._final_hash or self._hasher.hexdigest().lower()
|
||||
|
||||
def get_hash_if_complete(self) -> Optional[str]:
|
||||
"""
|
||||
Get hash only if stream has been fully consumed.
|
||||
|
||||
Returns:
|
||||
Hash if complete, None otherwise
|
||||
"""
|
||||
if self._finalized:
|
||||
return self._final_hash
|
||||
return None
|
||||
|
||||
|
||||
class VerifyingStreamWrapper:
|
||||
"""
|
||||
Wrapper that yields chunks and verifies hash after streaming completes.
|
||||
|
||||
IMPORTANT: Because HTTP streams cannot be "un-sent", if verification
|
||||
fails after streaming, the client has already received potentially
|
||||
corrupt data. This wrapper logs an error but cannot prevent delivery.
|
||||
|
||||
For guaranteed verification before delivery, use pre-verification mode
|
||||
which buffers the entire content first.
|
||||
|
||||
Usage:
|
||||
wrapper = VerifyingStreamWrapper(stream, expected_hash)
|
||||
for chunk in wrapper:
|
||||
send_to_client(chunk)
|
||||
wrapper.verify() # Raises ChecksumMismatchError if failed
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
stream: Any,
|
||||
expected_hash: str,
|
||||
artifact_id: Optional[str] = None,
|
||||
s3_key: Optional[str] = None,
|
||||
chunk_size: int = DEFAULT_CHUNK_SIZE,
|
||||
on_failure: Optional[Callable[[Any], None]] = None,
|
||||
):
|
||||
"""
|
||||
Initialize the verifying stream wrapper.
|
||||
|
||||
Args:
|
||||
stream: Source stream
|
||||
expected_hash: Expected SHA256 hash to verify against
|
||||
artifact_id: Optional artifact ID for error context
|
||||
s3_key: Optional S3 key for error context
|
||||
chunk_size: Size of chunks to yield
|
||||
on_failure: Optional callback called on verification failure
|
||||
"""
|
||||
if not is_valid_sha256(expected_hash):
|
||||
raise InvalidHashFormatError(expected_hash)
|
||||
|
||||
self._hashing_wrapper = HashingStreamWrapper(stream, chunk_size)
|
||||
self._expected_hash = expected_hash.lower()
|
||||
self._artifact_id = artifact_id
|
||||
self._s3_key = s3_key
|
||||
self._on_failure = on_failure
|
||||
self._verified: Optional[bool] = None
|
||||
|
||||
@property
|
||||
def bytes_read(self) -> int:
|
||||
"""Total bytes read so far."""
|
||||
return self._hashing_wrapper.bytes_read
|
||||
|
||||
@property
|
||||
def is_verified(self) -> Optional[bool]:
|
||||
"""
|
||||
Verification status.
|
||||
|
||||
Returns:
|
||||
True if verified successfully, False if failed, None if not yet complete
|
||||
"""
|
||||
return self._verified
|
||||
|
||||
def __iter__(self) -> Generator[bytes, None, None]:
|
||||
"""Iterate over chunks."""
|
||||
yield from self._hashing_wrapper
|
||||
|
||||
def verify(self) -> bool:
|
||||
"""
|
||||
Verify the hash after stream is complete.
|
||||
|
||||
Must be called after fully consuming the iterator.
|
||||
|
||||
Returns:
|
||||
True if verification passed
|
||||
|
||||
Raises:
|
||||
ChecksumMismatchError: If verification failed
|
||||
"""
|
||||
actual_hash = self._hashing_wrapper.get_hash()
|
||||
|
||||
if actual_hash == self._expected_hash:
|
||||
self._verified = True
|
||||
logger.debug(
|
||||
f"Verification passed for {self._artifact_id or 'unknown'}: {actual_hash[:16]}..."
|
||||
)
|
||||
return True
|
||||
|
||||
self._verified = False
|
||||
error = ChecksumMismatchError(
|
||||
expected=self._expected_hash,
|
||||
actual=actual_hash,
|
||||
artifact_id=self._artifact_id,
|
||||
s3_key=self._s3_key,
|
||||
size=self._hashing_wrapper.bytes_read,
|
||||
)
|
||||
|
||||
# Log the failure
|
||||
logger.error(f"Checksum verification FAILED after streaming: {error.to_dict()}")
|
||||
|
||||
# Call failure callback if provided
|
||||
if self._on_failure:
|
||||
try:
|
||||
self._on_failure(error)
|
||||
except Exception as e:
|
||||
logger.warning(f"Verification failure callback raised exception: {e}")
|
||||
|
||||
raise error
|
||||
|
||||
def verify_silent(self) -> bool:
|
||||
"""
|
||||
Verify the hash without raising exception.
|
||||
|
||||
Returns:
|
||||
True if verification passed, False otherwise
|
||||
"""
|
||||
try:
|
||||
return self.verify()
|
||||
except ChecksumMismatchError:
|
||||
return False
|
||||
|
||||
def get_actual_hash(self) -> Optional[str]:
|
||||
"""Get the actual computed hash (only available after iteration)."""
|
||||
return self._hashing_wrapper.get_hash_if_complete()
|
||||
@@ -1,8 +1,14 @@
|
||||
from pydantic_settings import BaseSettings
|
||||
from functools import lru_cache
|
||||
from typing import Optional
|
||||
import os
|
||||
import re
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
# Environment
|
||||
env: str = "development" # "development" or "production"
|
||||
|
||||
# Server
|
||||
server_host: str = "0.0.0.0"
|
||||
server_port: int = 8080
|
||||
@@ -15,6 +21,15 @@ class Settings(BaseSettings):
|
||||
database_dbname: str = "orchard"
|
||||
database_sslmode: str = "disable"
|
||||
|
||||
# Database connection pool settings
|
||||
database_pool_size: int = 5 # Number of connections to keep open
|
||||
database_max_overflow: int = 10 # Max additional connections beyond pool_size
|
||||
database_pool_timeout: int = 30 # Seconds to wait for a connection from pool
|
||||
database_pool_recycle: int = (
|
||||
1800 # Recycle connections after this many seconds (30 min)
|
||||
)
|
||||
database_query_timeout: int = 30 # Query timeout in seconds (0 = no timeout)
|
||||
|
||||
# S3
|
||||
s3_endpoint: str = ""
|
||||
s3_region: str = "us-east-1"
|
||||
@@ -22,12 +37,57 @@ class Settings(BaseSettings):
|
||||
s3_access_key_id: str = ""
|
||||
s3_secret_access_key: str = ""
|
||||
s3_use_path_style: bool = True
|
||||
s3_verify_ssl: bool = True # Set to False for self-signed certs (dev only)
|
||||
s3_connect_timeout: int = 10 # Connection timeout in seconds
|
||||
s3_read_timeout: int = 60 # Read timeout in seconds
|
||||
s3_max_retries: int = 3 # Max retry attempts for transient failures
|
||||
|
||||
# Upload settings
|
||||
max_file_size: int = 10 * 1024 * 1024 * 1024 # 10GB default max file size
|
||||
min_file_size: int = 1 # Minimum 1 byte (empty files rejected)
|
||||
|
||||
# Download settings
|
||||
download_mode: str = "presigned" # "presigned", "redirect", or "proxy"
|
||||
presigned_url_expiry: int = (
|
||||
3600 # Presigned URL expiry in seconds (default: 1 hour)
|
||||
)
|
||||
|
||||
# Logging settings
|
||||
log_level: str = "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL
|
||||
log_format: str = "auto" # "json", "standard", or "auto" (json in production)
|
||||
|
||||
# Initial admin user settings
|
||||
admin_password: str = "" # Initial admin password (if empty, uses 'changeme123')
|
||||
|
||||
# Cache settings
|
||||
cache_encryption_key: str = "" # Fernet key for encrypting upstream credentials (auto-generated if empty)
|
||||
# Global cache settings override (None = use DB value, True/False = override DB)
|
||||
cache_auto_create_system_projects: Optional[bool] = None # Override auto_create_system_projects
|
||||
|
||||
# JWT Authentication settings (optional, for external identity providers)
|
||||
jwt_enabled: bool = False # Enable JWT token validation
|
||||
jwt_secret: str = "" # Secret key for HS256, or leave empty for RS256 with JWKS
|
||||
jwt_algorithm: str = "HS256" # HS256 or RS256
|
||||
jwt_issuer: str = "" # Expected issuer (iss claim), leave empty to skip validation
|
||||
jwt_audience: str = "" # Expected audience (aud claim), leave empty to skip validation
|
||||
jwt_jwks_url: str = "" # JWKS URL for RS256 (e.g., https://auth.example.com/.well-known/jwks.json)
|
||||
jwt_username_claim: str = (
|
||||
"sub" # JWT claim to use as username (sub, email, preferred_username, etc.)
|
||||
)
|
||||
|
||||
@property
|
||||
def database_url(self) -> str:
|
||||
sslmode = f"?sslmode={self.database_sslmode}" if self.database_sslmode else ""
|
||||
return f"postgresql://{self.database_user}:{self.database_password}@{self.database_host}:{self.database_port}/{self.database_dbname}{sslmode}"
|
||||
|
||||
@property
|
||||
def is_development(self) -> bool:
|
||||
return self.env.lower() == "development"
|
||||
|
||||
@property
|
||||
def is_production(self) -> bool:
|
||||
return self.env.lower() == "production"
|
||||
|
||||
class Config:
|
||||
env_prefix = "ORCHARD_"
|
||||
case_sensitive = False
|
||||
@@ -36,3 +96,110 @@ class Settings(BaseSettings):
|
||||
@lru_cache()
|
||||
def get_settings() -> Settings:
|
||||
return Settings()
|
||||
|
||||
|
||||
class EnvUpstreamSource:
|
||||
"""Represents an upstream source defined via environment variables."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
url: str,
|
||||
source_type: str = "generic",
|
||||
enabled: bool = True,
|
||||
auth_type: str = "none",
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
priority: int = 100,
|
||||
):
|
||||
self.name = name
|
||||
self.url = url
|
||||
self.source_type = source_type
|
||||
self.enabled = enabled
|
||||
self.auth_type = auth_type
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.priority = priority
|
||||
self.source = "env" # Mark as env-defined
|
||||
|
||||
|
||||
def parse_upstream_sources_from_env() -> list[EnvUpstreamSource]:
|
||||
"""
|
||||
Parse upstream sources from environment variables.
|
||||
|
||||
Uses double underscore (__) as separator to allow source names with single underscores.
|
||||
Pattern: ORCHARD_UPSTREAM__{NAME}__FIELD
|
||||
|
||||
Example:
|
||||
ORCHARD_UPSTREAM__NPM_PRIVATE__URL=https://npm.corp.com
|
||||
ORCHARD_UPSTREAM__NPM_PRIVATE__TYPE=npm
|
||||
ORCHARD_UPSTREAM__NPM_PRIVATE__ENABLED=true
|
||||
ORCHARD_UPSTREAM__NPM_PRIVATE__AUTH_TYPE=basic
|
||||
ORCHARD_UPSTREAM__NPM_PRIVATE__USERNAME=reader
|
||||
ORCHARD_UPSTREAM__NPM_PRIVATE__PASSWORD=secret
|
||||
|
||||
Returns:
|
||||
List of EnvUpstreamSource objects parsed from environment variables.
|
||||
"""
|
||||
# Pattern: ORCHARD_UPSTREAM__{NAME}__{FIELD}
|
||||
pattern = re.compile(r"^ORCHARD_UPSTREAM__([A-Z0-9_]+)__([A-Z_]+)$", re.IGNORECASE)
|
||||
|
||||
# Collect all env vars matching the pattern, grouped by source name
|
||||
sources_data: dict[str, dict[str, str]] = {}
|
||||
|
||||
for key, value in os.environ.items():
|
||||
match = pattern.match(key)
|
||||
if match:
|
||||
source_name = match.group(1).lower() # Normalize to lowercase
|
||||
field = match.group(2).upper()
|
||||
if source_name not in sources_data:
|
||||
sources_data[source_name] = {}
|
||||
sources_data[source_name][field] = value
|
||||
|
||||
# Build source objects from collected data
|
||||
sources: list[EnvUpstreamSource] = []
|
||||
|
||||
for name, data in sources_data.items():
|
||||
# URL is required
|
||||
url = data.get("URL")
|
||||
if not url:
|
||||
continue # Skip sources without URL
|
||||
|
||||
# Parse boolean fields
|
||||
def parse_bool(val: Optional[str], default: bool) -> bool:
|
||||
if val is None:
|
||||
return default
|
||||
return val.lower() in ("true", "1", "yes", "on")
|
||||
|
||||
# Parse integer fields
|
||||
def parse_int(val: Optional[str], default: int) -> int:
|
||||
if val is None:
|
||||
return default
|
||||
try:
|
||||
return int(val)
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
source = EnvUpstreamSource(
|
||||
name=name.replace("_", "-"), # Convert underscores to hyphens for readability
|
||||
url=url,
|
||||
source_type=data.get("TYPE", "generic").lower(),
|
||||
enabled=parse_bool(data.get("ENABLED"), True),
|
||||
auth_type=data.get("AUTH_TYPE", "none").lower(),
|
||||
username=data.get("USERNAME"),
|
||||
password=data.get("PASSWORD"),
|
||||
priority=parse_int(data.get("PRIORITY"), 100),
|
||||
)
|
||||
sources.append(source)
|
||||
|
||||
return sources
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def get_env_upstream_sources() -> tuple[EnvUpstreamSource, ...]:
|
||||
"""
|
||||
Get cached list of upstream sources from environment variables.
|
||||
|
||||
Returns a tuple for hashability (required by lru_cache).
|
||||
"""
|
||||
return tuple(parse_upstream_sources_from_env())
|
||||
|
||||
@@ -1,20 +1,614 @@
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy import create_engine, text, event
|
||||
from sqlalchemy.orm import sessionmaker, Session
|
||||
from typing import Generator
|
||||
from sqlalchemy.pool import QueuePool
|
||||
from typing import Generator, NamedTuple
|
||||
from contextlib import contextmanager
|
||||
import logging
|
||||
import time
|
||||
import hashlib
|
||||
|
||||
from .config import get_settings
|
||||
from .models import Base
|
||||
from .purge_seed_data import should_purge_seed_data, purge_seed_data
|
||||
|
||||
settings = get_settings()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Migration(NamedTuple):
|
||||
"""A database migration with a unique name and SQL to execute."""
|
||||
name: str
|
||||
sql: str
|
||||
|
||||
|
||||
# PostgreSQL error codes that indicate "already exists" - safe to skip
|
||||
SAFE_PG_ERROR_CODES = {
|
||||
"42P07", # duplicate_table
|
||||
"42701", # duplicate_column
|
||||
"42710", # duplicate_object (index, constraint, etc.)
|
||||
"42P16", # invalid_table_definition (e.g., column already exists)
|
||||
}
|
||||
|
||||
# Build connect_args with query timeout if configured
|
||||
connect_args = {}
|
||||
if settings.database_query_timeout > 0:
|
||||
# PostgreSQL statement_timeout is in milliseconds
|
||||
connect_args["options"] = f"-c statement_timeout={settings.database_query_timeout * 1000}"
|
||||
|
||||
# Create engine with connection pool configuration
|
||||
engine = create_engine(
|
||||
settings.database_url,
|
||||
pool_pre_ping=True, # Check connection health before using
|
||||
poolclass=QueuePool,
|
||||
pool_size=settings.database_pool_size,
|
||||
max_overflow=settings.database_max_overflow,
|
||||
pool_timeout=settings.database_pool_timeout,
|
||||
pool_recycle=settings.database_pool_recycle,
|
||||
connect_args=connect_args,
|
||||
)
|
||||
|
||||
engine = create_engine(settings.database_url, pool_pre_ping=True)
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
|
||||
|
||||
# Connection pool monitoring
|
||||
@event.listens_for(engine, "checkout")
|
||||
def receive_checkout(dbapi_connection, connection_record, connection_proxy):
|
||||
"""Log when a connection is checked out from the pool"""
|
||||
logger.debug(f"Connection checked out from pool: {id(dbapi_connection)}")
|
||||
|
||||
|
||||
@event.listens_for(engine, "checkin")
|
||||
def receive_checkin(dbapi_connection, connection_record):
|
||||
"""Log when a connection is returned to the pool"""
|
||||
logger.debug(f"Connection returned to pool: {id(dbapi_connection)}")
|
||||
|
||||
|
||||
def get_pool_status() -> dict:
|
||||
"""Get current connection pool status for monitoring"""
|
||||
pool = engine.pool
|
||||
return {
|
||||
"pool_size": pool.size(),
|
||||
"checked_out": pool.checkedout(),
|
||||
"overflow": pool.overflow(),
|
||||
"checked_in": pool.checkedin(),
|
||||
}
|
||||
|
||||
|
||||
def init_db():
|
||||
"""Create all tables"""
|
||||
"""Create all tables and run migrations"""
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
# Run migrations for schema updates
|
||||
_run_migrations()
|
||||
|
||||
# Purge seed data if requested (for transitioning to production-like environment)
|
||||
if should_purge_seed_data():
|
||||
db = SessionLocal()
|
||||
try:
|
||||
purge_seed_data(db)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def _ensure_migrations_table(conn) -> None:
|
||||
"""Create the migrations tracking table if it doesn't exist."""
|
||||
conn.execute(text("""
|
||||
CREATE TABLE IF NOT EXISTS _schema_migrations (
|
||||
name VARCHAR(255) PRIMARY KEY,
|
||||
checksum VARCHAR(64) NOT NULL,
|
||||
applied_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||
);
|
||||
"""))
|
||||
conn.commit()
|
||||
|
||||
|
||||
def _get_applied_migrations(conn) -> dict[str, str]:
|
||||
"""Get all applied migrations and their checksums."""
|
||||
result = conn.execute(text(
|
||||
"SELECT name, checksum FROM _schema_migrations"
|
||||
))
|
||||
return {row[0]: row[1] for row in result}
|
||||
|
||||
|
||||
def _compute_checksum(sql: str) -> str:
|
||||
"""Compute a checksum for migration SQL to detect changes."""
|
||||
return hashlib.sha256(sql.strip().encode()).hexdigest()[:16]
|
||||
|
||||
|
||||
def _is_safe_error(exception: Exception) -> bool:
|
||||
"""Check if the error indicates the migration was already applied."""
|
||||
# Check for psycopg2 errors with pgcode attribute
|
||||
original = getattr(exception, "orig", None)
|
||||
if original is not None:
|
||||
pgcode = getattr(original, "pgcode", None)
|
||||
if pgcode in SAFE_PG_ERROR_CODES:
|
||||
return True
|
||||
|
||||
# Fallback: check error message for common "already exists" patterns
|
||||
error_str = str(exception).lower()
|
||||
safe_patterns = [
|
||||
"already exists",
|
||||
"duplicate key",
|
||||
"relation .* already exists",
|
||||
"column .* already exists",
|
||||
]
|
||||
return any(pattern in error_str for pattern in safe_patterns)
|
||||
|
||||
|
||||
def _record_migration(conn, name: str, checksum: str) -> None:
|
||||
"""Record a migration as applied."""
|
||||
conn.execute(text(
|
||||
"INSERT INTO _schema_migrations (name, checksum) VALUES (:name, :checksum)"
|
||||
), {"name": name, "checksum": checksum})
|
||||
conn.commit()
|
||||
|
||||
|
||||
def _run_migrations():
|
||||
"""Run manual migrations for schema updates with tracking and error detection."""
|
||||
migrations = [
|
||||
Migration(
|
||||
name="001_add_format_metadata",
|
||||
sql="""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'artifacts' AND column_name = 'format_metadata'
|
||||
) THEN
|
||||
ALTER TABLE artifacts ADD COLUMN format_metadata JSONB DEFAULT '{}';
|
||||
END IF;
|
||||
END $$;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="002_add_package_format",
|
||||
sql="""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'packages' AND column_name = 'format'
|
||||
) THEN
|
||||
ALTER TABLE packages ADD COLUMN format VARCHAR(50) DEFAULT 'generic' NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_packages_format ON packages(format);
|
||||
END IF;
|
||||
END $$;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="003_add_package_platform",
|
||||
sql="""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'packages' AND column_name = 'platform'
|
||||
) THEN
|
||||
ALTER TABLE packages ADD COLUMN platform VARCHAR(50) DEFAULT 'any' NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_packages_platform ON packages(platform);
|
||||
END IF;
|
||||
END $$;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="004_add_ref_count_index_constraint",
|
||||
sql="""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_indexes WHERE indexname = 'idx_artifacts_ref_count'
|
||||
) THEN
|
||||
CREATE INDEX idx_artifacts_ref_count ON artifacts(ref_count);
|
||||
END IF;
|
||||
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint WHERE conname = 'check_ref_count_non_negative'
|
||||
) THEN
|
||||
ALTER TABLE artifacts ADD CONSTRAINT check_ref_count_non_negative CHECK (ref_count >= 0);
|
||||
END IF;
|
||||
END $$;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="005_add_composite_indexes",
|
||||
sql="""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_indexes WHERE indexname = 'idx_packages_project_name'
|
||||
) THEN
|
||||
CREATE UNIQUE INDEX idx_packages_project_name ON packages(project_id, name);
|
||||
END IF;
|
||||
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_indexes WHERE indexname = 'idx_tags_package_name'
|
||||
) THEN
|
||||
CREATE UNIQUE INDEX idx_tags_package_name ON tags(package_id, name);
|
||||
END IF;
|
||||
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_indexes WHERE indexname = 'idx_tags_package_created_at'
|
||||
) THEN
|
||||
CREATE INDEX idx_tags_package_created_at ON tags(package_id, created_at);
|
||||
END IF;
|
||||
END $$;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="006_add_package_versions_indexes",
|
||||
sql="""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'package_versions') THEN
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_package_versions_package_id') THEN
|
||||
CREATE INDEX idx_package_versions_package_id ON package_versions(package_id);
|
||||
END IF;
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_package_versions_artifact_id') THEN
|
||||
CREATE INDEX idx_package_versions_artifact_id ON package_versions(artifact_id);
|
||||
END IF;
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_package_versions_package_version') THEN
|
||||
CREATE INDEX idx_package_versions_package_version ON package_versions(package_id, version);
|
||||
END IF;
|
||||
END IF;
|
||||
END $$;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="007_create_ref_count_trigger_functions",
|
||||
sql="""
|
||||
CREATE OR REPLACE FUNCTION increment_artifact_ref_count()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION decrement_artifact_ref_count()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
|
||||
RETURN OLD;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION update_artifact_ref_count()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
IF OLD.artifact_id != NEW.artifact_id THEN
|
||||
UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
|
||||
UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
|
||||
END IF;
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="008_create_tags_ref_count_triggers",
|
||||
sql="""
|
||||
DO $$
|
||||
BEGIN
|
||||
DROP TRIGGER IF EXISTS tags_ref_count_insert_trigger ON tags;
|
||||
CREATE TRIGGER tags_ref_count_insert_trigger
|
||||
AFTER INSERT ON tags
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION increment_artifact_ref_count();
|
||||
|
||||
DROP TRIGGER IF EXISTS tags_ref_count_delete_trigger ON tags;
|
||||
CREATE TRIGGER tags_ref_count_delete_trigger
|
||||
AFTER DELETE ON tags
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION decrement_artifact_ref_count();
|
||||
|
||||
DROP TRIGGER IF EXISTS tags_ref_count_update_trigger ON tags;
|
||||
CREATE TRIGGER tags_ref_count_update_trigger
|
||||
AFTER UPDATE ON tags
|
||||
FOR EACH ROW
|
||||
WHEN (OLD.artifact_id IS DISTINCT FROM NEW.artifact_id)
|
||||
EXECUTE FUNCTION update_artifact_ref_count();
|
||||
END $$;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="009_create_version_ref_count_functions",
|
||||
sql="""
|
||||
CREATE OR REPLACE FUNCTION increment_version_ref_count()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
UPDATE artifacts SET ref_count = ref_count + 1 WHERE id = NEW.artifact_id;
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION decrement_version_ref_count()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
UPDATE artifacts SET ref_count = ref_count - 1 WHERE id = OLD.artifact_id;
|
||||
RETURN OLD;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="010_create_package_versions_triggers",
|
||||
sql="""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'package_versions') THEN
|
||||
DROP TRIGGER IF EXISTS package_versions_ref_count_insert ON package_versions;
|
||||
CREATE TRIGGER package_versions_ref_count_insert
|
||||
AFTER INSERT ON package_versions
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION increment_version_ref_count();
|
||||
|
||||
DROP TRIGGER IF EXISTS package_versions_ref_count_delete ON package_versions;
|
||||
CREATE TRIGGER package_versions_ref_count_delete
|
||||
AFTER DELETE ON package_versions
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION decrement_version_ref_count();
|
||||
END IF;
|
||||
END $$;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="011_migrate_semver_tags_to_versions",
|
||||
sql=r"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'package_versions') THEN
|
||||
INSERT INTO package_versions (id, package_id, artifact_id, version, version_source, created_by, created_at)
|
||||
SELECT
|
||||
gen_random_uuid(),
|
||||
t.package_id,
|
||||
t.artifact_id,
|
||||
CASE WHEN t.name LIKE 'v%' THEN substring(t.name from 2) ELSE t.name END,
|
||||
'migrated_from_tag',
|
||||
t.created_by,
|
||||
t.created_at
|
||||
FROM tags t
|
||||
WHERE t.name ~ '^v?[0-9]+\.[0-9]+(\.[0-9]+)?([-.][a-zA-Z0-9]+)?$'
|
||||
ON CONFLICT (package_id, version) DO NOTHING;
|
||||
END IF;
|
||||
END $$;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="012_create_teams_table",
|
||||
sql="""
|
||||
CREATE TABLE IF NOT EXISTS teams (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
name VARCHAR(255) NOT NULL,
|
||||
slug VARCHAR(255) NOT NULL UNIQUE,
|
||||
description TEXT,
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||
created_by VARCHAR(255) NOT NULL,
|
||||
settings JSONB DEFAULT '{}'
|
||||
);
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="013_create_team_memberships_table",
|
||||
sql="""
|
||||
CREATE TABLE IF NOT EXISTS team_memberships (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
team_id UUID NOT NULL REFERENCES teams(id) ON DELETE CASCADE,
|
||||
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||||
role VARCHAR(50) NOT NULL DEFAULT 'member',
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||
invited_by VARCHAR(255),
|
||||
CONSTRAINT team_memberships_unique UNIQUE (team_id, user_id),
|
||||
CONSTRAINT team_memberships_role_check CHECK (role IN ('owner', 'admin', 'member'))
|
||||
);
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="014_add_team_id_to_projects",
|
||||
sql="""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'projects' AND column_name = 'team_id'
|
||||
) THEN
|
||||
ALTER TABLE projects ADD COLUMN team_id UUID REFERENCES teams(id) ON DELETE SET NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_projects_team_id ON projects(team_id);
|
||||
END IF;
|
||||
END $$;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="015_add_teams_indexes",
|
||||
sql="""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_teams_slug') THEN
|
||||
CREATE INDEX idx_teams_slug ON teams(slug);
|
||||
END IF;
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_teams_created_by') THEN
|
||||
CREATE INDEX idx_teams_created_by ON teams(created_by);
|
||||
END IF;
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_team_memberships_team_id') THEN
|
||||
CREATE INDEX idx_team_memberships_team_id ON team_memberships(team_id);
|
||||
END IF;
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = 'idx_team_memberships_user_id') THEN
|
||||
CREATE INDEX idx_team_memberships_user_id ON team_memberships(user_id);
|
||||
END IF;
|
||||
END $$;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="016_add_is_system_to_projects",
|
||||
sql="""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'projects' AND column_name = 'is_system'
|
||||
) THEN
|
||||
ALTER TABLE projects ADD COLUMN is_system BOOLEAN NOT NULL DEFAULT FALSE;
|
||||
CREATE INDEX IF NOT EXISTS idx_projects_is_system ON projects(is_system);
|
||||
END IF;
|
||||
END $$;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="017_create_upstream_sources",
|
||||
sql="""
|
||||
CREATE TABLE IF NOT EXISTS upstream_sources (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
name VARCHAR(255) NOT NULL UNIQUE,
|
||||
source_type VARCHAR(50) NOT NULL DEFAULT 'generic',
|
||||
url VARCHAR(2048) NOT NULL,
|
||||
enabled BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
auth_type VARCHAR(20) NOT NULL DEFAULT 'none',
|
||||
username VARCHAR(255),
|
||||
password_encrypted BYTEA,
|
||||
headers_encrypted BYTEA,
|
||||
priority INTEGER NOT NULL DEFAULT 100,
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||
CONSTRAINT check_source_type CHECK (
|
||||
source_type IN ('npm', 'pypi', 'maven', 'docker', 'helm', 'nuget', 'deb', 'rpm', 'generic')
|
||||
),
|
||||
CONSTRAINT check_auth_type CHECK (
|
||||
auth_type IN ('none', 'basic', 'bearer', 'api_key')
|
||||
),
|
||||
CONSTRAINT check_priority_positive CHECK (priority > 0)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_upstream_sources_enabled ON upstream_sources(enabled);
|
||||
CREATE INDEX IF NOT EXISTS idx_upstream_sources_source_type ON upstream_sources(source_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_upstream_sources_priority ON upstream_sources(priority);
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="018_create_cache_settings",
|
||||
sql="""
|
||||
CREATE TABLE IF NOT EXISTS cache_settings (
|
||||
id INTEGER PRIMARY KEY DEFAULT 1,
|
||||
auto_create_system_projects BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||
CONSTRAINT check_cache_settings_singleton CHECK (id = 1)
|
||||
);
|
||||
INSERT INTO cache_settings (id, auto_create_system_projects)
|
||||
VALUES (1, TRUE)
|
||||
ON CONFLICT (id) DO NOTHING;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="019_create_cached_urls",
|
||||
sql="""
|
||||
CREATE TABLE IF NOT EXISTS cached_urls (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
url VARCHAR(4096) NOT NULL,
|
||||
url_hash VARCHAR(64) NOT NULL UNIQUE,
|
||||
artifact_id VARCHAR(64) NOT NULL REFERENCES artifacts(id),
|
||||
source_id UUID REFERENCES upstream_sources(id) ON DELETE SET NULL,
|
||||
fetched_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
|
||||
response_headers JSONB DEFAULT '{}',
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_cached_urls_url_hash ON cached_urls(url_hash);
|
||||
CREATE INDEX IF NOT EXISTS idx_cached_urls_artifact_id ON cached_urls(artifact_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_cached_urls_source_id ON cached_urls(source_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_cached_urls_fetched_at ON cached_urls(fetched_at);
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="020_seed_default_upstream_sources",
|
||||
sql="""
|
||||
-- Originally seeded public sources, but these are no longer used.
|
||||
-- Migration 023 deletes any previously seeded sources.
|
||||
-- This migration is now a no-op for fresh installs.
|
||||
SELECT 1;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="021_remove_is_public_from_upstream_sources",
|
||||
sql="""
|
||||
DO $$
|
||||
BEGIN
|
||||
-- Drop the index if it exists
|
||||
DROP INDEX IF EXISTS idx_upstream_sources_is_public;
|
||||
|
||||
-- Drop the column if it exists
|
||||
IF EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'upstream_sources' AND column_name = 'is_public'
|
||||
) THEN
|
||||
ALTER TABLE upstream_sources DROP COLUMN is_public;
|
||||
END IF;
|
||||
END $$;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="022_remove_allow_public_internet_from_cache_settings",
|
||||
sql="""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'cache_settings' AND column_name = 'allow_public_internet'
|
||||
) THEN
|
||||
ALTER TABLE cache_settings DROP COLUMN allow_public_internet;
|
||||
END IF;
|
||||
END $$;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="023_delete_seeded_public_sources",
|
||||
sql="""
|
||||
-- Delete the seeded public sources that were added by migration 020
|
||||
DELETE FROM upstream_sources
|
||||
WHERE name IN ('npm-public', 'pypi-public', 'maven-central', 'docker-hub');
|
||||
""",
|
||||
),
|
||||
]
|
||||
|
||||
with engine.connect() as conn:
|
||||
# Ensure migrations tracking table exists
|
||||
_ensure_migrations_table(conn)
|
||||
|
||||
# Get already-applied migrations
|
||||
applied = _get_applied_migrations(conn)
|
||||
|
||||
for migration in migrations:
|
||||
checksum = _compute_checksum(migration.sql)
|
||||
|
||||
# Check if migration was already applied
|
||||
if migration.name in applied:
|
||||
stored_checksum = applied[migration.name]
|
||||
if stored_checksum != checksum:
|
||||
logger.warning(
|
||||
f"Migration '{migration.name}' has changed since it was applied! "
|
||||
f"Stored checksum: {stored_checksum}, current: {checksum}"
|
||||
)
|
||||
continue
|
||||
|
||||
# Run the migration
|
||||
try:
|
||||
logger.info(f"Running migration: {migration.name}")
|
||||
conn.execute(text(migration.sql))
|
||||
conn.commit()
|
||||
_record_migration(conn, migration.name, checksum)
|
||||
logger.info(f"Migration '{migration.name}' applied successfully")
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
if _is_safe_error(e):
|
||||
# Migration was already applied (schema already exists)
|
||||
logger.info(
|
||||
f"Migration '{migration.name}' already applied (schema exists), recording as complete"
|
||||
)
|
||||
_record_migration(conn, migration.name, checksum)
|
||||
else:
|
||||
# Real error - fail hard
|
||||
logger.error(f"Migration '{migration.name}' failed: {e}")
|
||||
raise RuntimeError(
|
||||
f"Migration '{migration.name}' failed with error: {e}"
|
||||
) from e
|
||||
|
||||
|
||||
def get_db() -> Generator[Session, None, None]:
|
||||
"""Dependency for getting database sessions"""
|
||||
@@ -23,3 +617,75 @@ def get_db() -> Generator[Session, None, None]:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@contextmanager
|
||||
def transaction(db: Session):
|
||||
"""
|
||||
Context manager for explicit transaction management with savepoint support.
|
||||
|
||||
Usage:
|
||||
with transaction(db):
|
||||
# operations here
|
||||
# automatically commits on success, rolls back on exception
|
||||
"""
|
||||
try:
|
||||
yield db
|
||||
db.commit()
|
||||
except Exception:
|
||||
db.rollback()
|
||||
raise
|
||||
|
||||
|
||||
@contextmanager
|
||||
def savepoint(db: Session, name: str = None):
|
||||
"""
|
||||
Create a savepoint for partial rollback support.
|
||||
|
||||
Usage:
|
||||
with savepoint(db, "my_savepoint"):
|
||||
# operations here
|
||||
# rolls back to savepoint on exception, but doesn't rollback whole transaction
|
||||
"""
|
||||
savepoint_obj = db.begin_nested()
|
||||
try:
|
||||
yield savepoint_obj
|
||||
savepoint_obj.commit()
|
||||
except Exception:
|
||||
savepoint_obj.rollback()
|
||||
raise
|
||||
|
||||
|
||||
def retry_on_deadlock(func, max_retries: int = 3, delay: float = 0.1):
|
||||
"""
|
||||
Decorator/wrapper to retry operations on deadlock detection.
|
||||
|
||||
Usage:
|
||||
@retry_on_deadlock
|
||||
def my_operation(db):
|
||||
...
|
||||
|
||||
Or:
|
||||
retry_on_deadlock(lambda: my_operation(db))()
|
||||
"""
|
||||
import functools
|
||||
from sqlalchemy.exc import OperationalError
|
||||
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
last_exception = None
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except OperationalError as e:
|
||||
# Check for deadlock error codes (PostgreSQL: 40P01, MySQL: 1213)
|
||||
error_str = str(e).lower()
|
||||
if "deadlock" in error_str or "40p01" in error_str:
|
||||
last_exception = e
|
||||
logger.warning(f"Deadlock detected, retrying (attempt {attempt + 1}/{max_retries})")
|
||||
time.sleep(delay * (attempt + 1)) # Exponential backoff
|
||||
else:
|
||||
raise
|
||||
raise last_exception
|
||||
|
||||
return wrapper
|
||||
|
||||
723
backend/app/dependencies.py
Normal file
723
backend/app/dependencies.py
Normal file
@@ -0,0 +1,723 @@
|
||||
"""
|
||||
Dependency management module for artifact dependencies.
|
||||
|
||||
Handles:
|
||||
- Parsing orchard.ensure files
|
||||
- Storing dependencies in the database
|
||||
- Querying dependencies and reverse dependencies
|
||||
- Dependency resolution with topological sorting
|
||||
- Circular dependency detection
|
||||
- Conflict detection
|
||||
"""
|
||||
|
||||
import yaml
|
||||
from typing import List, Dict, Any, Optional, Set, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import and_
|
||||
|
||||
from .models import (
|
||||
Project,
|
||||
Package,
|
||||
Artifact,
|
||||
Tag,
|
||||
ArtifactDependency,
|
||||
PackageVersion,
|
||||
)
|
||||
from .schemas import (
|
||||
EnsureFileContent,
|
||||
EnsureFileDependency,
|
||||
DependencyResponse,
|
||||
ArtifactDependenciesResponse,
|
||||
DependentInfo,
|
||||
ReverseDependenciesResponse,
|
||||
ResolvedArtifact,
|
||||
DependencyResolutionResponse,
|
||||
DependencyConflict,
|
||||
PaginationMeta,
|
||||
)
|
||||
|
||||
|
||||
class DependencyError(Exception):
|
||||
"""Base exception for dependency errors."""
|
||||
pass
|
||||
|
||||
|
||||
class CircularDependencyError(DependencyError):
|
||||
"""Raised when a circular dependency is detected."""
|
||||
def __init__(self, cycle: List[str]):
|
||||
self.cycle = cycle
|
||||
super().__init__(f"Circular dependency detected: {' -> '.join(cycle)}")
|
||||
|
||||
|
||||
class DependencyConflictError(DependencyError):
|
||||
"""Raised when conflicting dependency versions are detected."""
|
||||
def __init__(self, conflicts: List[DependencyConflict]):
|
||||
self.conflicts = conflicts
|
||||
super().__init__(f"Dependency conflicts detected: {len(conflicts)} conflict(s)")
|
||||
|
||||
|
||||
class DependencyNotFoundError(DependencyError):
|
||||
"""Raised when a dependency cannot be resolved."""
|
||||
def __init__(self, project: str, package: str, constraint: str):
|
||||
self.project = project
|
||||
self.package = package
|
||||
self.constraint = constraint
|
||||
super().__init__(f"Dependency not found: {project}/{package}@{constraint}")
|
||||
|
||||
|
||||
class InvalidEnsureFileError(DependencyError):
|
||||
"""Raised when the ensure file is invalid."""
|
||||
pass
|
||||
|
||||
|
||||
class DependencyDepthExceededError(DependencyError):
|
||||
"""Raised when dependency resolution exceeds max depth."""
|
||||
def __init__(self, max_depth: int):
|
||||
self.max_depth = max_depth
|
||||
super().__init__(f"Dependency resolution exceeded maximum depth of {max_depth}")
|
||||
|
||||
|
||||
# Safety limits to prevent DoS attacks
|
||||
MAX_DEPENDENCY_DEPTH = 50 # Maximum levels of nested dependencies
|
||||
MAX_DEPENDENCIES_PER_ARTIFACT = 200 # Maximum direct dependencies per artifact
|
||||
|
||||
|
||||
def parse_ensure_file(content: bytes) -> EnsureFileContent:
|
||||
"""
|
||||
Parse an orchard.ensure file.
|
||||
|
||||
Args:
|
||||
content: Raw bytes of the ensure file
|
||||
|
||||
Returns:
|
||||
Parsed EnsureFileContent
|
||||
|
||||
Raises:
|
||||
InvalidEnsureFileError: If the file is invalid YAML or has wrong structure
|
||||
"""
|
||||
try:
|
||||
data = yaml.safe_load(content.decode('utf-8'))
|
||||
except yaml.YAMLError as e:
|
||||
raise InvalidEnsureFileError(f"Invalid YAML: {e}")
|
||||
except UnicodeDecodeError as e:
|
||||
raise InvalidEnsureFileError(f"Invalid encoding: {e}")
|
||||
|
||||
if data is None:
|
||||
return EnsureFileContent(dependencies=[])
|
||||
|
||||
if not isinstance(data, dict):
|
||||
raise InvalidEnsureFileError("Ensure file must be a YAML dictionary")
|
||||
|
||||
dependencies = []
|
||||
deps_data = data.get('dependencies', [])
|
||||
|
||||
if not isinstance(deps_data, list):
|
||||
raise InvalidEnsureFileError("'dependencies' must be a list")
|
||||
|
||||
# Safety limit: prevent DoS through excessive dependencies
|
||||
if len(deps_data) > MAX_DEPENDENCIES_PER_ARTIFACT:
|
||||
raise InvalidEnsureFileError(
|
||||
f"Too many dependencies: {len(deps_data)} exceeds maximum of {MAX_DEPENDENCIES_PER_ARTIFACT}"
|
||||
)
|
||||
|
||||
for i, dep in enumerate(deps_data):
|
||||
if not isinstance(dep, dict):
|
||||
raise InvalidEnsureFileError(f"Dependency {i} must be a dictionary")
|
||||
|
||||
project = dep.get('project')
|
||||
package = dep.get('package')
|
||||
version = dep.get('version')
|
||||
tag = dep.get('tag')
|
||||
|
||||
if not project:
|
||||
raise InvalidEnsureFileError(f"Dependency {i} missing 'project'")
|
||||
if not package:
|
||||
raise InvalidEnsureFileError(f"Dependency {i} missing 'package'")
|
||||
if not version and not tag:
|
||||
raise InvalidEnsureFileError(
|
||||
f"Dependency {i} must have either 'version' or 'tag'"
|
||||
)
|
||||
if version and tag:
|
||||
raise InvalidEnsureFileError(
|
||||
f"Dependency {i} cannot have both 'version' and 'tag'"
|
||||
)
|
||||
|
||||
dependencies.append(EnsureFileDependency(
|
||||
project=project,
|
||||
package=package,
|
||||
version=version,
|
||||
tag=tag,
|
||||
))
|
||||
|
||||
return EnsureFileContent(dependencies=dependencies)
|
||||
|
||||
|
||||
def validate_dependencies(
|
||||
db: Session,
|
||||
dependencies: List[EnsureFileDependency],
|
||||
) -> List[str]:
|
||||
"""
|
||||
Validate that all dependency projects exist.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
dependencies: List of dependencies to validate
|
||||
|
||||
Returns:
|
||||
List of error messages (empty if all valid)
|
||||
"""
|
||||
errors = []
|
||||
|
||||
for dep in dependencies:
|
||||
project = db.query(Project).filter(Project.name == dep.project).first()
|
||||
if not project:
|
||||
errors.append(f"Project '{dep.project}' not found")
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def store_dependencies(
|
||||
db: Session,
|
||||
artifact_id: str,
|
||||
dependencies: List[EnsureFileDependency],
|
||||
) -> List[ArtifactDependency]:
|
||||
"""
|
||||
Store dependencies for an artifact.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
artifact_id: The artifact ID that has these dependencies
|
||||
dependencies: List of dependencies to store
|
||||
|
||||
Returns:
|
||||
List of created ArtifactDependency objects
|
||||
"""
|
||||
created = []
|
||||
|
||||
for dep in dependencies:
|
||||
artifact_dep = ArtifactDependency(
|
||||
artifact_id=artifact_id,
|
||||
dependency_project=dep.project,
|
||||
dependency_package=dep.package,
|
||||
version_constraint=dep.version,
|
||||
tag_constraint=dep.tag,
|
||||
)
|
||||
db.add(artifact_dep)
|
||||
created.append(artifact_dep)
|
||||
|
||||
return created
|
||||
|
||||
|
||||
def get_artifact_dependencies(
|
||||
db: Session,
|
||||
artifact_id: str,
|
||||
) -> List[DependencyResponse]:
|
||||
"""
|
||||
Get all dependencies for an artifact.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
artifact_id: The artifact ID
|
||||
|
||||
Returns:
|
||||
List of DependencyResponse objects
|
||||
"""
|
||||
deps = db.query(ArtifactDependency).filter(
|
||||
ArtifactDependency.artifact_id == artifact_id
|
||||
).all()
|
||||
|
||||
return [DependencyResponse.from_orm_model(dep) for dep in deps]
|
||||
|
||||
|
||||
def get_reverse_dependencies(
|
||||
db: Session,
|
||||
project_name: str,
|
||||
package_name: str,
|
||||
page: int = 1,
|
||||
limit: int = 50,
|
||||
) -> ReverseDependenciesResponse:
|
||||
"""
|
||||
Get all artifacts that depend on a given package.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
project_name: Target project name
|
||||
package_name: Target package name
|
||||
page: Page number (1-indexed)
|
||||
limit: Results per page
|
||||
|
||||
Returns:
|
||||
ReverseDependenciesResponse with dependents and pagination
|
||||
"""
|
||||
# Query dependencies that point to this project/package
|
||||
query = db.query(ArtifactDependency).filter(
|
||||
ArtifactDependency.dependency_project == project_name,
|
||||
ArtifactDependency.dependency_package == package_name,
|
||||
)
|
||||
|
||||
total = query.count()
|
||||
offset = (page - 1) * limit
|
||||
deps = query.offset(offset).limit(limit).all()
|
||||
|
||||
dependents = []
|
||||
for dep in deps:
|
||||
# Get artifact info to find the project/package/version
|
||||
artifact = db.query(Artifact).filter(Artifact.id == dep.artifact_id).first()
|
||||
if not artifact:
|
||||
continue
|
||||
|
||||
# Find which package this artifact belongs to via tags or versions
|
||||
tag = db.query(Tag).filter(Tag.artifact_id == dep.artifact_id).first()
|
||||
if tag:
|
||||
pkg = db.query(Package).filter(Package.id == tag.package_id).first()
|
||||
if pkg:
|
||||
proj = db.query(Project).filter(Project.id == pkg.project_id).first()
|
||||
if proj:
|
||||
# Get version if available
|
||||
version_record = db.query(PackageVersion).filter(
|
||||
PackageVersion.artifact_id == dep.artifact_id,
|
||||
PackageVersion.package_id == pkg.id,
|
||||
).first()
|
||||
|
||||
dependents.append(DependentInfo(
|
||||
artifact_id=dep.artifact_id,
|
||||
project=proj.name,
|
||||
package=pkg.name,
|
||||
version=version_record.version if version_record else None,
|
||||
constraint_type="version" if dep.version_constraint else "tag",
|
||||
constraint_value=dep.version_constraint or dep.tag_constraint,
|
||||
))
|
||||
|
||||
total_pages = (total + limit - 1) // limit
|
||||
|
||||
return ReverseDependenciesResponse(
|
||||
project=project_name,
|
||||
package=package_name,
|
||||
dependents=dependents,
|
||||
pagination=PaginationMeta(
|
||||
page=page,
|
||||
limit=limit,
|
||||
total=total,
|
||||
total_pages=total_pages,
|
||||
has_more=page < total_pages,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _resolve_dependency_to_artifact(
|
||||
db: Session,
|
||||
project_name: str,
|
||||
package_name: str,
|
||||
version: Optional[str],
|
||||
tag: Optional[str],
|
||||
) -> Optional[Tuple[str, str, int]]:
|
||||
"""
|
||||
Resolve a dependency constraint to an artifact ID.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
project_name: Project name
|
||||
package_name: Package name
|
||||
version: Version constraint (exact)
|
||||
tag: Tag constraint
|
||||
|
||||
Returns:
|
||||
Tuple of (artifact_id, resolved_version_or_tag, size) or None if not found
|
||||
"""
|
||||
# Get project and package
|
||||
project = db.query(Project).filter(Project.name == project_name).first()
|
||||
if not project:
|
||||
return None
|
||||
|
||||
package = db.query(Package).filter(
|
||||
Package.project_id == project.id,
|
||||
Package.name == package_name,
|
||||
).first()
|
||||
if not package:
|
||||
return None
|
||||
|
||||
if version:
|
||||
# Look up by version
|
||||
pkg_version = db.query(PackageVersion).filter(
|
||||
PackageVersion.package_id == package.id,
|
||||
PackageVersion.version == version,
|
||||
).first()
|
||||
if pkg_version:
|
||||
artifact = db.query(Artifact).filter(
|
||||
Artifact.id == pkg_version.artifact_id
|
||||
).first()
|
||||
if artifact:
|
||||
return (artifact.id, version, artifact.size)
|
||||
|
||||
# Also check if there's a tag with this exact name
|
||||
tag_record = db.query(Tag).filter(
|
||||
Tag.package_id == package.id,
|
||||
Tag.name == version,
|
||||
).first()
|
||||
if tag_record:
|
||||
artifact = db.query(Artifact).filter(
|
||||
Artifact.id == tag_record.artifact_id
|
||||
).first()
|
||||
if artifact:
|
||||
return (artifact.id, version, artifact.size)
|
||||
|
||||
if tag:
|
||||
# Look up by tag
|
||||
tag_record = db.query(Tag).filter(
|
||||
Tag.package_id == package.id,
|
||||
Tag.name == tag,
|
||||
).first()
|
||||
if tag_record:
|
||||
artifact = db.query(Artifact).filter(
|
||||
Artifact.id == tag_record.artifact_id
|
||||
).first()
|
||||
if artifact:
|
||||
return (artifact.id, tag, artifact.size)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _detect_package_cycle(
|
||||
db: Session,
|
||||
project_name: str,
|
||||
package_name: str,
|
||||
target_project: str,
|
||||
target_package: str,
|
||||
visiting: Set[str],
|
||||
visited: Set[str],
|
||||
path: List[str],
|
||||
) -> Optional[List[str]]:
|
||||
"""
|
||||
Detect cycles at the package level using DFS.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
project_name: Current project being visited
|
||||
package_name: Current package being visited
|
||||
target_project: The project we're checking for cycles back to
|
||||
target_package: The package we're checking for cycles back to
|
||||
visiting: Set of package keys currently in the recursion stack
|
||||
visited: Set of fully processed package keys
|
||||
path: Current path for cycle reporting
|
||||
|
||||
Returns:
|
||||
Cycle path if detected, None otherwise
|
||||
"""
|
||||
pkg_key = f"{project_name}/{package_name}"
|
||||
|
||||
# Check if we've reached the target package (cycle detected)
|
||||
if project_name == target_project and package_name == target_package:
|
||||
return path + [pkg_key]
|
||||
|
||||
if pkg_key in visiting:
|
||||
# Unexpected internal cycle
|
||||
return None
|
||||
|
||||
if pkg_key in visited:
|
||||
return None
|
||||
|
||||
visiting.add(pkg_key)
|
||||
path.append(pkg_key)
|
||||
|
||||
# Get the package and find any artifacts with dependencies
|
||||
project = db.query(Project).filter(Project.name == project_name).first()
|
||||
if project:
|
||||
package = db.query(Package).filter(
|
||||
Package.project_id == project.id,
|
||||
Package.name == package_name,
|
||||
).first()
|
||||
if package:
|
||||
# Find all artifacts in this package via tags
|
||||
tags = db.query(Tag).filter(Tag.package_id == package.id).all()
|
||||
artifact_ids = {t.artifact_id for t in tags}
|
||||
|
||||
# Get dependencies from all artifacts in this package
|
||||
for artifact_id in artifact_ids:
|
||||
deps = db.query(ArtifactDependency).filter(
|
||||
ArtifactDependency.artifact_id == artifact_id
|
||||
).all()
|
||||
|
||||
for dep in deps:
|
||||
cycle = _detect_package_cycle(
|
||||
db,
|
||||
dep.dependency_project,
|
||||
dep.dependency_package,
|
||||
target_project,
|
||||
target_package,
|
||||
visiting,
|
||||
visited,
|
||||
path,
|
||||
)
|
||||
if cycle:
|
||||
return cycle
|
||||
|
||||
path.pop()
|
||||
visiting.remove(pkg_key)
|
||||
visited.add(pkg_key)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def check_circular_dependencies(
|
||||
db: Session,
|
||||
artifact_id: str,
|
||||
new_dependencies: List[EnsureFileDependency],
|
||||
project_name: Optional[str] = None,
|
||||
package_name: Optional[str] = None,
|
||||
) -> Optional[List[str]]:
|
||||
"""
|
||||
Check if adding the new dependencies would create a circular dependency.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
artifact_id: The artifact that will have these dependencies
|
||||
new_dependencies: Dependencies to be added
|
||||
project_name: Project name (optional, will try to look up from tag if not provided)
|
||||
package_name: Package name (optional, will try to look up from tag if not provided)
|
||||
|
||||
Returns:
|
||||
Cycle path if detected, None otherwise
|
||||
"""
|
||||
# First, get the package info for this artifact to build path labels
|
||||
if project_name and package_name:
|
||||
current_path = f"{project_name}/{package_name}"
|
||||
else:
|
||||
# Try to look up from tag
|
||||
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
|
||||
if not artifact:
|
||||
return None
|
||||
|
||||
# Find package for this artifact
|
||||
tag = db.query(Tag).filter(Tag.artifact_id == artifact_id).first()
|
||||
if not tag:
|
||||
return None
|
||||
|
||||
package = db.query(Package).filter(Package.id == tag.package_id).first()
|
||||
if not package:
|
||||
return None
|
||||
|
||||
project = db.query(Project).filter(Project.id == package.project_id).first()
|
||||
if not project:
|
||||
return None
|
||||
|
||||
current_path = f"{project.name}/{package.name}"
|
||||
|
||||
# Extract target project and package from current_path
|
||||
if "/" in current_path:
|
||||
target_project, target_package = current_path.split("/", 1)
|
||||
else:
|
||||
return None
|
||||
|
||||
# For each new dependency, check if it would create a cycle back to our package
|
||||
for dep in new_dependencies:
|
||||
# Check if this dependency (transitively) depends on us at the package level
|
||||
visiting: Set[str] = set()
|
||||
visited: Set[str] = set()
|
||||
path: List[str] = [current_path]
|
||||
|
||||
# Check from the dependency's package
|
||||
cycle = _detect_package_cycle(
|
||||
db,
|
||||
dep.project,
|
||||
dep.package,
|
||||
target_project,
|
||||
target_package,
|
||||
visiting,
|
||||
visited,
|
||||
path,
|
||||
)
|
||||
if cycle:
|
||||
return cycle
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def resolve_dependencies(
|
||||
db: Session,
|
||||
project_name: str,
|
||||
package_name: str,
|
||||
ref: str,
|
||||
base_url: str,
|
||||
) -> DependencyResolutionResponse:
|
||||
"""
|
||||
Resolve all dependencies for an artifact recursively.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
project_name: Project name
|
||||
package_name: Package name
|
||||
ref: Tag or version reference
|
||||
base_url: Base URL for download URLs
|
||||
|
||||
Returns:
|
||||
DependencyResolutionResponse with all resolved artifacts
|
||||
|
||||
Raises:
|
||||
DependencyNotFoundError: If a dependency cannot be resolved
|
||||
CircularDependencyError: If circular dependencies are detected
|
||||
DependencyConflictError: If conflicting versions are required
|
||||
"""
|
||||
# Resolve the initial artifact
|
||||
project = db.query(Project).filter(Project.name == project_name).first()
|
||||
if not project:
|
||||
raise DependencyNotFoundError(project_name, package_name, ref)
|
||||
|
||||
package = db.query(Package).filter(
|
||||
Package.project_id == project.id,
|
||||
Package.name == package_name,
|
||||
).first()
|
||||
if not package:
|
||||
raise DependencyNotFoundError(project_name, package_name, ref)
|
||||
|
||||
# Try to find artifact by tag or version
|
||||
resolved = _resolve_dependency_to_artifact(
|
||||
db, project_name, package_name, ref, ref
|
||||
)
|
||||
if not resolved:
|
||||
raise DependencyNotFoundError(project_name, package_name, ref)
|
||||
|
||||
root_artifact_id, root_version, root_size = resolved
|
||||
|
||||
# Track resolved artifacts and their versions
|
||||
resolved_artifacts: Dict[str, ResolvedArtifact] = {}
|
||||
# Track version requirements for conflict detection
|
||||
version_requirements: Dict[str, List[Dict[str, Any]]] = {} # pkg_key -> [(version, required_by)]
|
||||
# Track visiting/visited for cycle detection
|
||||
visiting: Set[str] = set()
|
||||
visited: Set[str] = set()
|
||||
# Resolution order (topological)
|
||||
resolution_order: List[str] = []
|
||||
|
||||
def _resolve_recursive(
|
||||
artifact_id: str,
|
||||
proj_name: str,
|
||||
pkg_name: str,
|
||||
version_or_tag: str,
|
||||
size: int,
|
||||
required_by: Optional[str],
|
||||
depth: int = 0,
|
||||
):
|
||||
"""Recursively resolve dependencies with cycle/conflict detection."""
|
||||
# Safety limit: prevent DoS through deeply nested dependencies
|
||||
if depth > MAX_DEPENDENCY_DEPTH:
|
||||
raise DependencyDepthExceededError(MAX_DEPENDENCY_DEPTH)
|
||||
|
||||
pkg_key = f"{proj_name}/{pkg_name}"
|
||||
|
||||
# Cycle detection (at artifact level)
|
||||
if artifact_id in visiting:
|
||||
# Build cycle path
|
||||
raise CircularDependencyError([pkg_key, pkg_key])
|
||||
|
||||
# Conflict detection - check if we've seen this package before with a different version
|
||||
if pkg_key in version_requirements:
|
||||
existing_versions = {r["version"] for r in version_requirements[pkg_key]}
|
||||
if version_or_tag not in existing_versions:
|
||||
# Conflict detected - same package, different version
|
||||
requirements = version_requirements[pkg_key] + [
|
||||
{"version": version_or_tag, "required_by": required_by}
|
||||
]
|
||||
raise DependencyConflictError([
|
||||
DependencyConflict(
|
||||
project=proj_name,
|
||||
package=pkg_name,
|
||||
requirements=[
|
||||
{
|
||||
"version": r["version"],
|
||||
"required_by": [{"path": r["required_by"]}] if r["required_by"] else []
|
||||
}
|
||||
for r in requirements
|
||||
],
|
||||
)
|
||||
])
|
||||
# Same version already resolved - skip
|
||||
if artifact_id in visited:
|
||||
return
|
||||
|
||||
if artifact_id in visited:
|
||||
return
|
||||
|
||||
visiting.add(artifact_id)
|
||||
|
||||
# Track version requirement
|
||||
if pkg_key not in version_requirements:
|
||||
version_requirements[pkg_key] = []
|
||||
version_requirements[pkg_key].append({
|
||||
"version": version_or_tag,
|
||||
"required_by": required_by,
|
||||
})
|
||||
|
||||
# Get dependencies
|
||||
deps = db.query(ArtifactDependency).filter(
|
||||
ArtifactDependency.artifact_id == artifact_id
|
||||
).all()
|
||||
|
||||
# Resolve each dependency first (depth-first)
|
||||
for dep in deps:
|
||||
resolved_dep = _resolve_dependency_to_artifact(
|
||||
db,
|
||||
dep.dependency_project,
|
||||
dep.dependency_package,
|
||||
dep.version_constraint,
|
||||
dep.tag_constraint,
|
||||
)
|
||||
|
||||
if not resolved_dep:
|
||||
constraint = dep.version_constraint or dep.tag_constraint
|
||||
raise DependencyNotFoundError(
|
||||
dep.dependency_project,
|
||||
dep.dependency_package,
|
||||
constraint,
|
||||
)
|
||||
|
||||
dep_artifact_id, dep_version, dep_size = resolved_dep
|
||||
_resolve_recursive(
|
||||
dep_artifact_id,
|
||||
dep.dependency_project,
|
||||
dep.dependency_package,
|
||||
dep_version,
|
||||
dep_size,
|
||||
pkg_key,
|
||||
depth + 1,
|
||||
)
|
||||
|
||||
visiting.remove(artifact_id)
|
||||
visited.add(artifact_id)
|
||||
|
||||
# Add to resolution order (dependencies before dependents)
|
||||
resolution_order.append(artifact_id)
|
||||
|
||||
# Store resolved artifact info
|
||||
resolved_artifacts[artifact_id] = ResolvedArtifact(
|
||||
artifact_id=artifact_id,
|
||||
project=proj_name,
|
||||
package=pkg_name,
|
||||
version=version_or_tag,
|
||||
size=size,
|
||||
download_url=f"{base_url}/api/v1/project/{proj_name}/{pkg_name}/+/{version_or_tag}",
|
||||
)
|
||||
|
||||
# Start resolution from root
|
||||
_resolve_recursive(
|
||||
root_artifact_id,
|
||||
project_name,
|
||||
package_name,
|
||||
root_version,
|
||||
root_size,
|
||||
None,
|
||||
)
|
||||
|
||||
# Build response in topological order
|
||||
resolved_list = [resolved_artifacts[aid] for aid in resolution_order]
|
||||
total_size = sum(r.size for r in resolved_list)
|
||||
|
||||
return DependencyResolutionResponse(
|
||||
requested={
|
||||
"project": project_name,
|
||||
"package": package_name,
|
||||
"ref": ref,
|
||||
},
|
||||
resolved=resolved_list,
|
||||
total_size=total_size,
|
||||
artifact_count=len(resolved_list),
|
||||
)
|
||||
160
backend/app/encryption.py
Normal file
160
backend/app/encryption.py
Normal file
@@ -0,0 +1,160 @@
|
||||
"""
|
||||
Encryption utilities for sensitive data storage.
|
||||
|
||||
Uses Fernet symmetric encryption for credentials like upstream passwords.
|
||||
The encryption key is sourced from ORCHARD_CACHE_ENCRYPTION_KEY environment variable.
|
||||
If not set, a random key is generated on startup (with a warning).
|
||||
"""
|
||||
|
||||
import base64
|
||||
import logging
|
||||
import os
|
||||
import secrets
|
||||
from functools import lru_cache
|
||||
from typing import Optional
|
||||
|
||||
from cryptography.fernet import Fernet, InvalidToken
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Module-level storage for auto-generated key (only used if env var not set)
|
||||
_generated_key: Optional[bytes] = None
|
||||
|
||||
|
||||
def _get_key_from_env() -> Optional[bytes]:
|
||||
"""Get encryption key from environment variable."""
|
||||
key_str = os.environ.get("ORCHARD_CACHE_ENCRYPTION_KEY", "")
|
||||
if not key_str:
|
||||
return None
|
||||
|
||||
# Support both raw base64 and url-safe base64 formats
|
||||
try:
|
||||
# Try to decode as-is (Fernet keys are url-safe base64)
|
||||
key_bytes = key_str.encode("utf-8")
|
||||
# Validate it's a valid Fernet key by trying to create a Fernet instance
|
||||
Fernet(key_bytes)
|
||||
return key_bytes
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try base64 decoding if it's a raw 32-byte key encoded as base64
|
||||
try:
|
||||
decoded = base64.urlsafe_b64decode(key_str)
|
||||
if len(decoded) == 32:
|
||||
# Re-encode as url-safe base64 for Fernet
|
||||
key_bytes = base64.urlsafe_b64encode(decoded)
|
||||
Fernet(key_bytes)
|
||||
return key_bytes
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logger.error(
|
||||
"ORCHARD_CACHE_ENCRYPTION_KEY is set but invalid. "
|
||||
"Must be a valid Fernet key (32 bytes, url-safe base64 encoded). "
|
||||
"Generate one with: python -c \"from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())\""
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def get_encryption_key() -> bytes:
|
||||
"""
|
||||
Get the Fernet encryption key.
|
||||
|
||||
Returns the key from ORCHARD_CACHE_ENCRYPTION_KEY if set and valid,
|
||||
otherwise generates a random key (with a warning logged).
|
||||
|
||||
The generated key is cached for the lifetime of the process.
|
||||
"""
|
||||
global _generated_key
|
||||
|
||||
# Try to get from environment
|
||||
env_key = _get_key_from_env()
|
||||
if env_key:
|
||||
return env_key
|
||||
|
||||
# Generate a new key if needed
|
||||
if _generated_key is None:
|
||||
_generated_key = Fernet.generate_key()
|
||||
logger.warning(
|
||||
"ORCHARD_CACHE_ENCRYPTION_KEY not set - using auto-generated key. "
|
||||
"Encrypted credentials will be lost on restart! "
|
||||
"Set ORCHARD_CACHE_ENCRYPTION_KEY for persistent encryption. "
|
||||
"Generate a key with: python -c \"from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())\""
|
||||
)
|
||||
|
||||
return _generated_key
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _get_fernet() -> Fernet:
|
||||
"""Get a cached Fernet instance."""
|
||||
return Fernet(get_encryption_key())
|
||||
|
||||
|
||||
def encrypt_value(plaintext: str) -> bytes:
|
||||
"""
|
||||
Encrypt a string value using Fernet.
|
||||
|
||||
Args:
|
||||
plaintext: The string to encrypt
|
||||
|
||||
Returns:
|
||||
Encrypted bytes (includes Fernet token with timestamp)
|
||||
"""
|
||||
if not plaintext:
|
||||
raise ValueError("Cannot encrypt empty value")
|
||||
|
||||
fernet = _get_fernet()
|
||||
return fernet.encrypt(plaintext.encode("utf-8"))
|
||||
|
||||
|
||||
def decrypt_value(ciphertext: bytes) -> str:
|
||||
"""
|
||||
Decrypt a Fernet-encrypted value.
|
||||
|
||||
Args:
|
||||
ciphertext: The encrypted bytes
|
||||
|
||||
Returns:
|
||||
Decrypted string
|
||||
|
||||
Raises:
|
||||
InvalidToken: If decryption fails (wrong key or corrupted data)
|
||||
"""
|
||||
if not ciphertext:
|
||||
raise ValueError("Cannot decrypt empty value")
|
||||
|
||||
fernet = _get_fernet()
|
||||
return fernet.decrypt(ciphertext).decode("utf-8")
|
||||
|
||||
|
||||
def can_decrypt(ciphertext: bytes) -> bool:
|
||||
"""
|
||||
Check if a value can be decrypted with the current key.
|
||||
|
||||
Useful for checking if credentials are still valid after key rotation.
|
||||
|
||||
Args:
|
||||
ciphertext: The encrypted bytes
|
||||
|
||||
Returns:
|
||||
True if decryption succeeds, False otherwise
|
||||
"""
|
||||
if not ciphertext:
|
||||
return False
|
||||
|
||||
try:
|
||||
decrypt_value(ciphertext)
|
||||
return True
|
||||
except (InvalidToken, ValueError):
|
||||
return False
|
||||
|
||||
|
||||
def generate_key() -> str:
|
||||
"""
|
||||
Generate a new Fernet encryption key.
|
||||
|
||||
Returns:
|
||||
A valid Fernet key as a string (url-safe base64 encoded)
|
||||
"""
|
||||
return Fernet.generate_key().decode("utf-8")
|
||||
254
backend/app/logging_config.py
Normal file
254
backend/app/logging_config.py
Normal file
@@ -0,0 +1,254 @@
|
||||
"""
|
||||
Structured logging configuration for Orchard.
|
||||
|
||||
This module provides:
|
||||
- Structured JSON logging for production environments
|
||||
- Request tracing via X-Request-ID header
|
||||
- Verification failure logging with context
|
||||
- Configurable log levels via environment
|
||||
|
||||
Usage:
|
||||
from app.logging_config import setup_logging, get_request_id
|
||||
|
||||
setup_logging() # Call once at app startup
|
||||
request_id = get_request_id() # Get current request's ID
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import sys
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional, Any, Dict
|
||||
from contextvars import ContextVar
|
||||
|
||||
from .config import get_settings
|
||||
|
||||
# Context variable for request ID (thread-safe)
|
||||
_request_id_var: ContextVar[Optional[str]] = ContextVar("request_id", default=None)
|
||||
|
||||
|
||||
def get_request_id() -> Optional[str]:
|
||||
"""Get the current request's ID from context."""
|
||||
return _request_id_var.get()
|
||||
|
||||
|
||||
def set_request_id(request_id: Optional[str] = None) -> str:
|
||||
"""
|
||||
Set the request ID for the current context.
|
||||
|
||||
If no ID provided, generates a new UUID.
|
||||
Returns the request ID that was set.
|
||||
"""
|
||||
if request_id is None:
|
||||
request_id = str(uuid.uuid4())
|
||||
_request_id_var.set(request_id)
|
||||
return request_id
|
||||
|
||||
|
||||
def clear_request_id():
|
||||
"""Clear the request ID from context."""
|
||||
_request_id_var.set(None)
|
||||
|
||||
|
||||
class JSONFormatter(logging.Formatter):
|
||||
"""
|
||||
JSON log formatter for structured logging.
|
||||
|
||||
Output format:
|
||||
{
|
||||
"timestamp": "2025-01-01T00:00:00.000Z",
|
||||
"level": "INFO",
|
||||
"logger": "app.routes",
|
||||
"message": "Request completed",
|
||||
"request_id": "abc-123",
|
||||
"extra": {...}
|
||||
}
|
||||
"""
|
||||
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
log_entry: Dict[str, Any] = {
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"level": record.levelname,
|
||||
"logger": record.name,
|
||||
"message": record.getMessage(),
|
||||
}
|
||||
|
||||
# Add request ID if available
|
||||
request_id = get_request_id()
|
||||
if request_id:
|
||||
log_entry["request_id"] = request_id
|
||||
|
||||
# Add exception info if present
|
||||
if record.exc_info:
|
||||
log_entry["exception"] = self.formatException(record.exc_info)
|
||||
|
||||
# Add extra fields from record
|
||||
extra_fields: Dict[str, Any] = {}
|
||||
for key, value in record.__dict__.items():
|
||||
if key not in (
|
||||
"name",
|
||||
"msg",
|
||||
"args",
|
||||
"created",
|
||||
"filename",
|
||||
"funcName",
|
||||
"levelname",
|
||||
"levelno",
|
||||
"lineno",
|
||||
"module",
|
||||
"msecs",
|
||||
"pathname",
|
||||
"process",
|
||||
"processName",
|
||||
"relativeCreated",
|
||||
"stack_info",
|
||||
"exc_info",
|
||||
"exc_text",
|
||||
"thread",
|
||||
"threadName",
|
||||
"message",
|
||||
"asctime",
|
||||
):
|
||||
try:
|
||||
json.dumps(value) # Ensure serializable
|
||||
extra_fields[key] = value
|
||||
except (TypeError, ValueError):
|
||||
extra_fields[key] = str(value)
|
||||
|
||||
if extra_fields:
|
||||
log_entry["extra"] = extra_fields
|
||||
|
||||
return json.dumps(log_entry)
|
||||
|
||||
|
||||
class StandardFormatter(logging.Formatter):
|
||||
"""
|
||||
Standard log formatter for development.
|
||||
|
||||
Output format:
|
||||
[2025-01-01 00:00:00] INFO [app.routes] [req-abc123] Request completed
|
||||
"""
|
||||
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
request_id = get_request_id()
|
||||
req_str = f" [req-{request_id[:8]}]" if request_id else ""
|
||||
|
||||
base_msg = f"[{timestamp}] {record.levelname:5} [{record.name}]{req_str} {record.getMessage()}"
|
||||
|
||||
if record.exc_info:
|
||||
base_msg += "\n" + self.formatException(record.exc_info)
|
||||
|
||||
return base_msg
|
||||
|
||||
|
||||
def setup_logging(log_level: Optional[str] = None, json_format: Optional[bool] = None):
|
||||
"""
|
||||
Configure logging for the application.
|
||||
|
||||
Args:
|
||||
log_level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).
|
||||
Defaults to ORCHARD_LOG_LEVEL env var or INFO.
|
||||
json_format: Use JSON format. Defaults to True in production.
|
||||
"""
|
||||
settings = get_settings()
|
||||
|
||||
# Determine log level
|
||||
if log_level is None:
|
||||
log_level = getattr(settings, "log_level", "INFO")
|
||||
effective_level = log_level if log_level else "INFO"
|
||||
level = getattr(logging, effective_level.upper(), logging.INFO)
|
||||
|
||||
# Determine format
|
||||
if json_format is None:
|
||||
json_format = settings.is_production
|
||||
|
||||
# Create handler
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setLevel(level)
|
||||
|
||||
# Set formatter
|
||||
if json_format:
|
||||
handler.setFormatter(JSONFormatter())
|
||||
else:
|
||||
handler.setFormatter(StandardFormatter())
|
||||
|
||||
# Configure root logger
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(level)
|
||||
|
||||
# Remove existing handlers
|
||||
root_logger.handlers.clear()
|
||||
root_logger.addHandler(handler)
|
||||
|
||||
# Configure specific loggers
|
||||
for logger_name in ["app", "uvicorn", "uvicorn.access", "uvicorn.error"]:
|
||||
logger = logging.getLogger(logger_name)
|
||||
logger.setLevel(level)
|
||||
logger.handlers.clear()
|
||||
logger.addHandler(handler)
|
||||
logger.propagate = False
|
||||
|
||||
# Quiet down noisy loggers
|
||||
logging.getLogger("botocore").setLevel(logging.WARNING)
|
||||
logging.getLogger("boto3").setLevel(logging.WARNING)
|
||||
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
||||
|
||||
|
||||
def log_verification_failure(
|
||||
logger: logging.Logger,
|
||||
expected_hash: str,
|
||||
actual_hash: str,
|
||||
artifact_id: Optional[str] = None,
|
||||
s3_key: Optional[str] = None,
|
||||
project: Optional[str] = None,
|
||||
package: Optional[str] = None,
|
||||
size: Optional[int] = None,
|
||||
user_id: Optional[str] = None,
|
||||
source_ip: Optional[str] = None,
|
||||
verification_mode: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
Log a verification failure with full context.
|
||||
|
||||
This creates a structured log entry with all relevant details
|
||||
for debugging and alerting.
|
||||
"""
|
||||
logger.error(
|
||||
"Checksum verification failed",
|
||||
extra={
|
||||
"event": "verification_failure",
|
||||
"expected_hash": expected_hash,
|
||||
"actual_hash": actual_hash,
|
||||
"artifact_id": artifact_id,
|
||||
"s3_key": s3_key,
|
||||
"project": project,
|
||||
"package": package,
|
||||
"size": size,
|
||||
"user_id": user_id,
|
||||
"source_ip": source_ip,
|
||||
"verification_mode": verification_mode,
|
||||
"hash_match": expected_hash == actual_hash,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def log_verification_success(
|
||||
logger: logging.Logger,
|
||||
artifact_id: str,
|
||||
size: Optional[int] = None,
|
||||
verification_mode: Optional[str] = None,
|
||||
duration_ms: Optional[float] = None,
|
||||
):
|
||||
"""Log a successful verification."""
|
||||
logger.info(
|
||||
f"Verification passed for artifact {artifact_id[:16]}...",
|
||||
extra={
|
||||
"event": "verification_success",
|
||||
"artifact_id": artifact_id,
|
||||
"size": size,
|
||||
"verification_mode": verification_mode,
|
||||
"duration_ms": duration_ms,
|
||||
},
|
||||
)
|
||||
@@ -1,20 +1,54 @@
|
||||
from fastapi import FastAPI
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.responses import FileResponse
|
||||
from contextlib import asynccontextmanager
|
||||
import logging
|
||||
import os
|
||||
|
||||
from slowapi import _rate_limit_exceeded_handler
|
||||
from slowapi.errors import RateLimitExceeded
|
||||
|
||||
from .config import get_settings
|
||||
from .database import init_db
|
||||
from .database import init_db, SessionLocal
|
||||
from .routes import router
|
||||
from .pypi_proxy import router as pypi_router
|
||||
from .seed import seed_database
|
||||
from .auth import create_default_admin
|
||||
from .rate_limit import limiter
|
||||
|
||||
settings = get_settings()
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
# Startup: initialize database
|
||||
init_db()
|
||||
|
||||
# Create default admin user if no users exist
|
||||
db = SessionLocal()
|
||||
try:
|
||||
admin = create_default_admin(db)
|
||||
if admin:
|
||||
logger.warning(
|
||||
"Default admin user created with username 'admin' and password 'changeme123'. "
|
||||
"CHANGE THIS PASSWORD IMMEDIATELY!"
|
||||
)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# Seed test data in development mode
|
||||
if settings.is_development:
|
||||
logger.info(f"Running in {settings.env} mode - checking for seed data")
|
||||
db = SessionLocal()
|
||||
try:
|
||||
seed_database(db)
|
||||
finally:
|
||||
db.close()
|
||||
else:
|
||||
logger.info(f"Running in {settings.env} mode - skipping seed data")
|
||||
|
||||
yield
|
||||
# Shutdown: cleanup if needed
|
||||
|
||||
@@ -26,13 +60,22 @@ app = FastAPI(
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
# Set up rate limiting
|
||||
app.state.limiter = limiter
|
||||
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
|
||||
|
||||
# Include API routes
|
||||
app.include_router(router)
|
||||
app.include_router(pypi_router)
|
||||
|
||||
# Serve static files (React build) if the directory exists
|
||||
static_dir = os.path.join(os.path.dirname(__file__), "..", "..", "frontend", "dist")
|
||||
if os.path.exists(static_dir):
|
||||
app.mount("/assets", StaticFiles(directory=os.path.join(static_dir, "assets")), name="assets")
|
||||
app.mount(
|
||||
"/assets",
|
||||
StaticFiles(directory=os.path.join(static_dir, "assets")),
|
||||
name="assets",
|
||||
)
|
||||
|
||||
@app.get("/")
|
||||
async def serve_spa():
|
||||
@@ -41,14 +84,22 @@ if os.path.exists(static_dir):
|
||||
# Catch-all for SPA routing (must be last)
|
||||
@app.get("/{full_path:path}")
|
||||
async def serve_spa_routes(full_path: str):
|
||||
# Don't catch API routes
|
||||
if full_path.startswith("api/") or full_path.startswith("health") or full_path.startswith("project/"):
|
||||
# Don't catch API routes or health endpoint
|
||||
if full_path.startswith("api/") or full_path.startswith("health"):
|
||||
from fastapi import HTTPException
|
||||
|
||||
raise HTTPException(status_code=404, detail="Not found")
|
||||
|
||||
# Check if requesting a static file from dist root (favicon, etc.)
|
||||
static_file_path = os.path.join(static_dir, full_path)
|
||||
if os.path.isfile(static_file_path) and not full_path.startswith("."):
|
||||
return FileResponse(static_file_path)
|
||||
|
||||
# Serve SPA for all other routes (including /project/*)
|
||||
index_path = os.path.join(static_dir, "index.html")
|
||||
if os.path.exists(index_path):
|
||||
return FileResponse(index_path)
|
||||
|
||||
from fastapi import HTTPException
|
||||
|
||||
raise HTTPException(status_code=404, detail="Not found")
|
||||
|
||||
355
backend/app/metadata.py
Normal file
355
backend/app/metadata.py
Normal file
@@ -0,0 +1,355 @@
|
||||
"""
|
||||
Format-specific metadata extraction for uploaded artifacts.
|
||||
Supports extracting version info and other metadata from package formats.
|
||||
"""
|
||||
import struct
|
||||
import gzip
|
||||
import tarfile
|
||||
import io
|
||||
import re
|
||||
import logging
|
||||
from typing import Dict, Any, Optional, BinaryIO
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_metadata(file: BinaryIO, filename: str, content_type: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Extract format-specific metadata from an uploaded file.
|
||||
Returns a dict with extracted metadata fields.
|
||||
"""
|
||||
metadata = {}
|
||||
|
||||
# Determine format from filename extension
|
||||
lower_filename = filename.lower() if filename else ""
|
||||
|
||||
try:
|
||||
if lower_filename.endswith(".deb"):
|
||||
metadata = extract_deb_metadata(file)
|
||||
elif lower_filename.endswith(".rpm"):
|
||||
metadata = extract_rpm_metadata(file)
|
||||
elif lower_filename.endswith(".tar.gz") or lower_filename.endswith(".tgz"):
|
||||
metadata = extract_tarball_metadata(file, filename)
|
||||
elif lower_filename.endswith(".whl"):
|
||||
metadata = extract_wheel_metadata(file)
|
||||
elif lower_filename.endswith(".jar"):
|
||||
metadata = extract_jar_metadata(file)
|
||||
elif lower_filename.endswith(".zip"):
|
||||
metadata = extract_zip_metadata(file)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to extract metadata from {filename}: {e}")
|
||||
|
||||
# Always seek back to start after reading
|
||||
try:
|
||||
file.seek(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
def extract_deb_metadata(file: BinaryIO) -> Dict[str, Any]:
|
||||
"""
|
||||
Extract metadata from a Debian .deb package.
|
||||
Deb files are ar archives containing control.tar.gz with package info.
|
||||
"""
|
||||
metadata = {}
|
||||
|
||||
# Read ar archive header
|
||||
ar_magic = file.read(8)
|
||||
if ar_magic != b"!<arch>\n":
|
||||
return metadata
|
||||
|
||||
# Parse ar archive to find control.tar.gz or control.tar.xz
|
||||
while True:
|
||||
# Read ar entry header (60 bytes)
|
||||
header = file.read(60)
|
||||
if len(header) < 60:
|
||||
break
|
||||
|
||||
name = header[0:16].decode("ascii").strip()
|
||||
size_str = header[48:58].decode("ascii").strip()
|
||||
|
||||
try:
|
||||
size = int(size_str)
|
||||
except ValueError:
|
||||
break
|
||||
|
||||
if name.startswith("control.tar"):
|
||||
# Read control archive
|
||||
control_data = file.read(size)
|
||||
|
||||
# Decompress and read control file
|
||||
try:
|
||||
if name.endswith(".gz"):
|
||||
control_data = gzip.decompress(control_data)
|
||||
|
||||
# Parse tar archive
|
||||
with tarfile.open(fileobj=io.BytesIO(control_data), mode="r:*") as tar:
|
||||
for member in tar.getmembers():
|
||||
if member.name in ("./control", "control"):
|
||||
f = tar.extractfile(member)
|
||||
if f:
|
||||
control_content = f.read().decode("utf-8", errors="replace")
|
||||
metadata = parse_deb_control(control_content)
|
||||
break
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to parse deb control: {e}")
|
||||
|
||||
break
|
||||
else:
|
||||
# Skip to next entry (align to 2 bytes)
|
||||
file.seek(size + (size % 2), 1)
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
def parse_deb_control(content: str) -> Dict[str, Any]:
|
||||
"""Parse Debian control file format"""
|
||||
metadata = {}
|
||||
current_key = None
|
||||
current_value = []
|
||||
|
||||
for line in content.split("\n"):
|
||||
if line.startswith(" ") or line.startswith("\t"):
|
||||
# Continuation line
|
||||
if current_key:
|
||||
current_value.append(line.strip())
|
||||
elif ":" in line:
|
||||
# Save previous field
|
||||
if current_key:
|
||||
metadata[current_key] = "\n".join(current_value)
|
||||
|
||||
# Parse new field
|
||||
key, value = line.split(":", 1)
|
||||
current_key = key.strip().lower()
|
||||
current_value = [value.strip()]
|
||||
else:
|
||||
# Empty line or malformed
|
||||
if current_key:
|
||||
metadata[current_key] = "\n".join(current_value)
|
||||
current_key = None
|
||||
current_value = []
|
||||
|
||||
# Don't forget the last field
|
||||
if current_key:
|
||||
metadata[current_key] = "\n".join(current_value)
|
||||
|
||||
# Extract key fields
|
||||
result = {}
|
||||
if "package" in metadata:
|
||||
result["package_name"] = metadata["package"]
|
||||
if "version" in metadata:
|
||||
result["version"] = metadata["version"]
|
||||
if "architecture" in metadata:
|
||||
result["architecture"] = metadata["architecture"]
|
||||
if "maintainer" in metadata:
|
||||
result["maintainer"] = metadata["maintainer"]
|
||||
if "description" in metadata:
|
||||
result["description"] = metadata["description"].split("\n")[0] # First line only
|
||||
if "depends" in metadata:
|
||||
result["depends"] = metadata["depends"]
|
||||
|
||||
result["format"] = "deb"
|
||||
return result
|
||||
|
||||
|
||||
def extract_rpm_metadata(file: BinaryIO) -> Dict[str, Any]:
|
||||
"""
|
||||
Extract metadata from an RPM package.
|
||||
RPM files have a lead, signature, and header with metadata.
|
||||
"""
|
||||
metadata = {"format": "rpm"}
|
||||
|
||||
# Read RPM lead (96 bytes)
|
||||
lead = file.read(96)
|
||||
if len(lead) < 96:
|
||||
return metadata
|
||||
|
||||
# Check magic number
|
||||
if lead[0:4] != b"\xed\xab\xee\xdb":
|
||||
return metadata
|
||||
|
||||
# Read name from lead (offset 10, max 66 bytes)
|
||||
name_bytes = lead[10:76]
|
||||
null_idx = name_bytes.find(b"\x00")
|
||||
if null_idx > 0:
|
||||
metadata["package_name"] = name_bytes[:null_idx].decode("ascii", errors="replace")
|
||||
|
||||
# Skip signature header to get to the main header
|
||||
# This is complex - simplified version just extracts from lead
|
||||
try:
|
||||
# Skip to header
|
||||
while True:
|
||||
header_magic = file.read(8)
|
||||
if len(header_magic) < 8:
|
||||
break
|
||||
|
||||
if header_magic[0:3] == b"\x8e\xad\xe8":
|
||||
# Found header magic
|
||||
# Read header index count and data size
|
||||
index_count = struct.unpack(">I", header_magic[4:8])[0]
|
||||
data_size_bytes = file.read(4)
|
||||
if len(data_size_bytes) < 4:
|
||||
break
|
||||
data_size = struct.unpack(">I", data_size_bytes)[0]
|
||||
|
||||
# Read header entries
|
||||
entries = []
|
||||
for _ in range(index_count):
|
||||
entry = file.read(16)
|
||||
if len(entry) < 16:
|
||||
break
|
||||
tag, type_, offset, count = struct.unpack(">IIII", entry)
|
||||
entries.append((tag, type_, offset, count))
|
||||
|
||||
# Read header data
|
||||
header_data = file.read(data_size)
|
||||
|
||||
# Extract relevant tags
|
||||
# Tag 1000 = Name, Tag 1001 = Version, Tag 1002 = Release
|
||||
# Tag 1004 = Summary, Tag 1022 = Arch
|
||||
for tag, type_, offset, count in entries:
|
||||
if type_ == 6: # STRING type
|
||||
end = header_data.find(b"\x00", offset)
|
||||
if end > offset:
|
||||
value = header_data[offset:end].decode("utf-8", errors="replace")
|
||||
if tag == 1000:
|
||||
metadata["package_name"] = value
|
||||
elif tag == 1001:
|
||||
metadata["version"] = value
|
||||
elif tag == 1002:
|
||||
metadata["release"] = value
|
||||
elif tag == 1004:
|
||||
metadata["description"] = value
|
||||
elif tag == 1022:
|
||||
metadata["architecture"] = value
|
||||
|
||||
break
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to parse RPM header: {e}")
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
def extract_tarball_metadata(file: BinaryIO, filename: str) -> Dict[str, Any]:
|
||||
"""Extract metadata from a tarball (name and version from filename)"""
|
||||
metadata = {"format": "tarball"}
|
||||
|
||||
# Try to extract name and version from filename
|
||||
# Common patterns: package-1.0.0.tar.gz, package_1.0.0.tar.gz
|
||||
basename = filename
|
||||
for suffix in [".tar.gz", ".tgz", ".tar.bz2", ".tar.xz"]:
|
||||
if basename.lower().endswith(suffix):
|
||||
basename = basename[:-len(suffix)]
|
||||
break
|
||||
|
||||
# Try to split name and version
|
||||
# Handle optional 'v' prefix on version (e.g., package-v1.0.0)
|
||||
patterns = [
|
||||
r"^(.+)-v?(\d+\.\d+(?:\.\d+)?(?:[-_]\w+)?)$", # name-version or name-vversion
|
||||
r"^(.+)_v?(\d+\.\d+(?:\.\d+)?(?:[-_]\w+)?)$", # name_version or name_vversion
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
match = re.match(pattern, basename)
|
||||
if match:
|
||||
metadata["package_name"] = match.group(1)
|
||||
metadata["version"] = match.group(2)
|
||||
break
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
def extract_wheel_metadata(file: BinaryIO) -> Dict[str, Any]:
|
||||
"""Extract metadata from a Python wheel (.whl) file"""
|
||||
import zipfile
|
||||
|
||||
metadata = {"format": "wheel"}
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(file, "r") as zf:
|
||||
# Find METADATA file in .dist-info directory
|
||||
for name in zf.namelist():
|
||||
if name.endswith("/METADATA") and ".dist-info/" in name:
|
||||
with zf.open(name) as f:
|
||||
content = f.read().decode("utf-8", errors="replace")
|
||||
# Parse email-style headers
|
||||
for line in content.split("\n"):
|
||||
if line.startswith("Name:"):
|
||||
metadata["package_name"] = line[5:].strip()
|
||||
elif line.startswith("Version:"):
|
||||
metadata["version"] = line[8:].strip()
|
||||
elif line.startswith("Summary:"):
|
||||
metadata["description"] = line[8:].strip()
|
||||
elif line.startswith("Author:"):
|
||||
metadata["author"] = line[7:].strip()
|
||||
elif line == "":
|
||||
break # End of headers
|
||||
break
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to parse wheel: {e}")
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
def extract_jar_metadata(file: BinaryIO) -> Dict[str, Any]:
|
||||
"""Extract metadata from a Java JAR file"""
|
||||
import zipfile
|
||||
|
||||
metadata = {"format": "jar"}
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(file, "r") as zf:
|
||||
# Look for MANIFEST.MF
|
||||
if "META-INF/MANIFEST.MF" in zf.namelist():
|
||||
with zf.open("META-INF/MANIFEST.MF") as f:
|
||||
content = f.read().decode("utf-8", errors="replace")
|
||||
for line in content.split("\n"):
|
||||
line = line.strip()
|
||||
if line.startswith("Implementation-Title:"):
|
||||
metadata["package_name"] = line[21:].strip()
|
||||
elif line.startswith("Implementation-Version:"):
|
||||
metadata["version"] = line[23:].strip()
|
||||
elif line.startswith("Bundle-Name:"):
|
||||
metadata["bundle_name"] = line[12:].strip()
|
||||
elif line.startswith("Bundle-Version:"):
|
||||
metadata["bundle_version"] = line[15:].strip()
|
||||
|
||||
# Also look for pom.properties in Maven JARs
|
||||
for name in zf.namelist():
|
||||
if name.endswith("/pom.properties"):
|
||||
with zf.open(name) as f:
|
||||
content = f.read().decode("utf-8", errors="replace")
|
||||
for line in content.split("\n"):
|
||||
if line.startswith("artifactId="):
|
||||
metadata["artifact_id"] = line[11:].strip()
|
||||
elif line.startswith("groupId="):
|
||||
metadata["group_id"] = line[8:].strip()
|
||||
elif line.startswith("version="):
|
||||
if "version" not in metadata:
|
||||
metadata["version"] = line[8:].strip()
|
||||
break
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to parse JAR: {e}")
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
def extract_zip_metadata(file: BinaryIO) -> Dict[str, Any]:
|
||||
"""Extract basic metadata from a ZIP file"""
|
||||
import zipfile
|
||||
|
||||
metadata = {"format": "zip"}
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(file, "r") as zf:
|
||||
metadata["file_count"] = len(zf.namelist())
|
||||
|
||||
# Calculate total uncompressed size
|
||||
total_size = sum(info.file_size for info in zf.infolist())
|
||||
metadata["uncompressed_size"] = total_size
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to parse ZIP: {e}")
|
||||
|
||||
return metadata
|
||||
@@ -1,8 +1,18 @@
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from sqlalchemy import (
|
||||
Column, String, Text, Boolean, Integer, BigInteger,
|
||||
DateTime, ForeignKey, CheckConstraint, Index, JSON
|
||||
Column,
|
||||
String,
|
||||
Text,
|
||||
Boolean,
|
||||
Integer,
|
||||
BigInteger,
|
||||
DateTime,
|
||||
ForeignKey,
|
||||
CheckConstraint,
|
||||
Index,
|
||||
JSON,
|
||||
ARRAY,
|
||||
LargeBinary,
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import relationship, declarative_base
|
||||
@@ -18,16 +28,27 @@ class Project(Base):
|
||||
name = Column(String(255), unique=True, nullable=False)
|
||||
description = Column(Text)
|
||||
is_public = Column(Boolean, default=True)
|
||||
is_system = Column(Boolean, default=False, nullable=False)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
updated_at = Column(
|
||||
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
|
||||
)
|
||||
created_by = Column(String(255), nullable=False)
|
||||
team_id = Column(UUID(as_uuid=True), ForeignKey("teams.id", ondelete="SET NULL"))
|
||||
|
||||
packages = relationship("Package", back_populates="project", cascade="all, delete-orphan")
|
||||
permissions = relationship("AccessPermission", back_populates="project", cascade="all, delete-orphan")
|
||||
packages = relationship(
|
||||
"Package", back_populates="project", cascade="all, delete-orphan"
|
||||
)
|
||||
permissions = relationship(
|
||||
"AccessPermission", back_populates="project", cascade="all, delete-orphan"
|
||||
)
|
||||
team = relationship("Team", back_populates="projects")
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_projects_name", "name"),
|
||||
Index("idx_projects_created_by", "created_by"),
|
||||
Index("idx_projects_team_id", "team_id"),
|
||||
Index("idx_projects_is_system", "is_system"),
|
||||
)
|
||||
|
||||
|
||||
@@ -35,20 +56,48 @@ class Package(Base):
|
||||
__tablename__ = "packages"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
|
||||
project_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("projects.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
name = Column(String(255), nullable=False)
|
||||
description = Column(Text)
|
||||
format = Column(String(50), default="generic", nullable=False)
|
||||
platform = Column(String(50), default="any", nullable=False)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
updated_at = Column(
|
||||
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
|
||||
)
|
||||
|
||||
project = relationship("Project", back_populates="packages")
|
||||
tags = relationship("Tag", back_populates="package", cascade="all, delete-orphan")
|
||||
uploads = relationship("Upload", back_populates="package", cascade="all, delete-orphan")
|
||||
consumers = relationship("Consumer", back_populates="package", cascade="all, delete-orphan")
|
||||
uploads = relationship(
|
||||
"Upload", back_populates="package", cascade="all, delete-orphan"
|
||||
)
|
||||
consumers = relationship(
|
||||
"Consumer", back_populates="package", cascade="all, delete-orphan"
|
||||
)
|
||||
versions = relationship(
|
||||
"PackageVersion", back_populates="package", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_packages_project_id", "project_id"),
|
||||
Index("idx_packages_name", "name"),
|
||||
Index("idx_packages_format", "format"),
|
||||
Index("idx_packages_platform", "platform"),
|
||||
Index(
|
||||
"idx_packages_project_name", "project_id", "name", unique=True
|
||||
), # Composite unique index
|
||||
CheckConstraint(
|
||||
"format IN ('generic', 'npm', 'pypi', 'docker', 'deb', 'rpm', 'maven', 'nuget', 'helm')",
|
||||
name="check_package_format",
|
||||
),
|
||||
CheckConstraint(
|
||||
"platform IN ('any', 'linux', 'darwin', 'windows', 'linux-amd64', 'linux-arm64', 'darwin-amd64', 'darwin-arm64', 'windows-amd64')",
|
||||
name="check_package_platform",
|
||||
),
|
||||
{"extend_existing": True},
|
||||
)
|
||||
|
||||
@@ -60,6 +109,12 @@ class Artifact(Base):
|
||||
size = Column(BigInteger, nullable=False)
|
||||
content_type = Column(String(255))
|
||||
original_name = Column(String(1024))
|
||||
checksum_md5 = Column(String(32)) # MD5 hash for additional verification
|
||||
checksum_sha1 = Column(String(40)) # SHA1 hash for compatibility
|
||||
s3_etag = Column(String(64)) # S3 ETag for verification
|
||||
artifact_metadata = Column(
|
||||
"metadata", JSON, default=dict
|
||||
) # Format-specific metadata (column name is 'metadata')
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
created_by = Column(String(255), nullable=False)
|
||||
ref_count = Column(Integer, default=1)
|
||||
@@ -67,10 +122,32 @@ class Artifact(Base):
|
||||
|
||||
tags = relationship("Tag", back_populates="artifact")
|
||||
uploads = relationship("Upload", back_populates="artifact")
|
||||
versions = relationship("PackageVersion", back_populates="artifact")
|
||||
dependencies = relationship(
|
||||
"ArtifactDependency", back_populates="artifact", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
@property
|
||||
def sha256(self) -> str:
|
||||
"""Alias for id - the SHA256 hash of the artifact content"""
|
||||
return self.id
|
||||
|
||||
@property
|
||||
def format_metadata(self):
|
||||
"""Alias for artifact_metadata - backward compatibility"""
|
||||
return self.artifact_metadata
|
||||
|
||||
@format_metadata.setter
|
||||
def format_metadata(self, value):
|
||||
"""Alias setter for artifact_metadata - backward compatibility"""
|
||||
self.artifact_metadata = value
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_artifacts_created_at", "created_at"),
|
||||
Index("idx_artifacts_created_by", "created_by"),
|
||||
Index("idx_artifacts_ref_count", "ref_count"), # For cleanup queries
|
||||
CheckConstraint("ref_count >= 0", name="check_ref_count_non_negative"),
|
||||
CheckConstraint("size > 0", name="check_size_positive"),
|
||||
)
|
||||
|
||||
|
||||
@@ -78,19 +155,34 @@ class Tag(Base):
|
||||
__tablename__ = "tags"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
package_id = Column(UUID(as_uuid=True), ForeignKey("packages.id", ondelete="CASCADE"), nullable=False)
|
||||
package_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("packages.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
name = Column(String(255), nullable=False)
|
||||
artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
updated_at = Column(
|
||||
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
|
||||
)
|
||||
created_by = Column(String(255), nullable=False)
|
||||
|
||||
package = relationship("Package", back_populates="tags")
|
||||
artifact = relationship("Artifact", back_populates="tags")
|
||||
history = relationship("TagHistory", back_populates="tag", cascade="all, delete-orphan")
|
||||
history = relationship(
|
||||
"TagHistory", back_populates="tag", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_tags_package_id", "package_id"),
|
||||
Index("idx_tags_artifact_id", "artifact_id"),
|
||||
Index(
|
||||
"idx_tags_package_name", "package_id", "name", unique=True
|
||||
), # Composite unique index
|
||||
Index(
|
||||
"idx_tags_package_created_at", "package_id", "created_at"
|
||||
), # For recent tags queries
|
||||
)
|
||||
|
||||
|
||||
@@ -98,9 +190,12 @@ class TagHistory(Base):
|
||||
__tablename__ = "tag_history"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
tag_id = Column(UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False)
|
||||
tag_id = Column(
|
||||
UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
old_artifact_id = Column(String(64), ForeignKey("artifacts.id"))
|
||||
new_artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
|
||||
change_type = Column(String(20), nullable=False, default="update")
|
||||
changed_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
changed_by = Column(String(255), nullable=False)
|
||||
|
||||
@@ -108,6 +203,42 @@ class TagHistory(Base):
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_tag_history_tag_id", "tag_id"),
|
||||
Index("idx_tag_history_changed_at", "changed_at"),
|
||||
CheckConstraint(
|
||||
"change_type IN ('create', 'update', 'delete')", name="check_change_type"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class PackageVersion(Base):
|
||||
"""Immutable version record for a package-artifact relationship.
|
||||
|
||||
Separates versions (immutable, set at upload) from tags (mutable labels).
|
||||
Each artifact in a package can have at most one version.
|
||||
"""
|
||||
|
||||
__tablename__ = "package_versions"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
package_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("packages.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
|
||||
version = Column(String(255), nullable=False)
|
||||
version_source = Column(String(50)) # 'explicit', 'filename', 'metadata', 'migrated_from_tag'
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
created_by = Column(String(255), nullable=False)
|
||||
|
||||
package = relationship("Package", back_populates="versions")
|
||||
artifact = relationship("Artifact", back_populates="versions")
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_package_versions_package_id", "package_id"),
|
||||
Index("idx_package_versions_artifact_id", "artifact_id"),
|
||||
Index("idx_package_versions_package_version", "package_id", "version", unique=True),
|
||||
Index("idx_package_versions_package_artifact", "package_id", "artifact_id", unique=True),
|
||||
)
|
||||
|
||||
|
||||
@@ -118,6 +249,16 @@ class Upload(Base):
|
||||
artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
|
||||
package_id = Column(UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False)
|
||||
original_name = Column(String(1024))
|
||||
tag_name = Column(String(255)) # Tag assigned during upload
|
||||
user_agent = Column(String(512)) # Client identification
|
||||
duration_ms = Column(Integer) # Upload timing in milliseconds
|
||||
deduplicated = Column(Boolean, default=False) # Whether artifact was deduplicated
|
||||
checksum_verified = Column(Boolean, default=True) # Whether checksum was verified
|
||||
status = Column(
|
||||
String(20), default="completed", nullable=False
|
||||
) # pending, completed, failed
|
||||
error_message = Column(Text) # Error details for failed uploads
|
||||
client_checksum = Column(String(64)) # Client-provided SHA256 for verification
|
||||
uploaded_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
uploaded_by = Column(String(255), nullable=False)
|
||||
source_ip = Column(String(45))
|
||||
@@ -129,6 +270,37 @@ class Upload(Base):
|
||||
Index("idx_uploads_artifact_id", "artifact_id"),
|
||||
Index("idx_uploads_package_id", "package_id"),
|
||||
Index("idx_uploads_uploaded_at", "uploaded_at"),
|
||||
Index("idx_uploads_package_uploaded_at", "package_id", "uploaded_at"),
|
||||
Index("idx_uploads_uploaded_by_at", "uploaded_by", "uploaded_at"),
|
||||
Index("idx_uploads_status", "status"),
|
||||
Index("idx_uploads_status_uploaded_at", "status", "uploaded_at"),
|
||||
CheckConstraint(
|
||||
"status IN ('pending', 'completed', 'failed')", name="check_upload_status"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class UploadLock(Base):
|
||||
"""Track in-progress uploads for conflict detection (409 responses)."""
|
||||
|
||||
__tablename__ = "upload_locks"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
sha256_hash = Column(String(64), nullable=False)
|
||||
package_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("packages.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
locked_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
locked_by = Column(String(255), nullable=False)
|
||||
expires_at = Column(DateTime(timezone=True), nullable=False)
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_upload_locks_expires_at", "expires_at"),
|
||||
Index(
|
||||
"idx_upload_locks_hash_package", "sha256_hash", "package_id", unique=True
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -136,7 +308,11 @@ class Consumer(Base):
|
||||
__tablename__ = "consumers"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
package_id = Column(UUID(as_uuid=True), ForeignKey("packages.id", ondelete="CASCADE"), nullable=False)
|
||||
package_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("packages.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
project_url = Column(String(2048), nullable=False)
|
||||
last_access = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
@@ -153,7 +329,11 @@ class AccessPermission(Base):
|
||||
__tablename__ = "access_permissions"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
project_id = Column(UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE"), nullable=False)
|
||||
project_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("projects.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
user_id = Column(String(255), nullable=False)
|
||||
level = Column(String(20), nullable=False)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
@@ -168,20 +348,107 @@ class AccessPermission(Base):
|
||||
)
|
||||
|
||||
|
||||
class User(Base):
|
||||
"""User account for authentication."""
|
||||
|
||||
__tablename__ = "users"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
username = Column(String(255), unique=True, nullable=False)
|
||||
password_hash = Column(String(255)) # NULL if OIDC-only user
|
||||
email = Column(String(255))
|
||||
is_admin = Column(Boolean, default=False)
|
||||
is_active = Column(Boolean, default=True)
|
||||
must_change_password = Column(Boolean, default=False)
|
||||
oidc_subject = Column(String(255)) # OIDC subject claim
|
||||
oidc_issuer = Column(String(512)) # OIDC issuer URL
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
updated_at = Column(
|
||||
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
|
||||
)
|
||||
last_login = Column(DateTime(timezone=True))
|
||||
|
||||
# Relationships
|
||||
api_keys = relationship(
|
||||
"APIKey", back_populates="owner", cascade="all, delete-orphan"
|
||||
)
|
||||
sessions = relationship(
|
||||
"Session", back_populates="user", cascade="all, delete-orphan"
|
||||
)
|
||||
team_memberships = relationship(
|
||||
"TeamMembership", back_populates="user", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_users_username", "username"),
|
||||
Index("idx_users_email", "email"),
|
||||
Index("idx_users_oidc_subject", "oidc_subject"),
|
||||
)
|
||||
|
||||
|
||||
class Session(Base):
|
||||
"""User session for web login."""
|
||||
|
||||
__tablename__ = "sessions"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
user_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("users.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
token_hash = Column(String(64), unique=True, nullable=False)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
expires_at = Column(DateTime(timezone=True), nullable=False)
|
||||
last_accessed = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
user_agent = Column(String(512))
|
||||
ip_address = Column(String(45))
|
||||
|
||||
user = relationship("User", back_populates="sessions")
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_sessions_user_id", "user_id"),
|
||||
Index("idx_sessions_token_hash", "token_hash"),
|
||||
Index("idx_sessions_expires_at", "expires_at"),
|
||||
)
|
||||
|
||||
|
||||
class AuthSettings(Base):
|
||||
"""Authentication settings for OIDC configuration."""
|
||||
|
||||
__tablename__ = "auth_settings"
|
||||
|
||||
key = Column(String(255), primary_key=True)
|
||||
value = Column(Text, nullable=False)
|
||||
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
|
||||
|
||||
class APIKey(Base):
|
||||
__tablename__ = "api_keys"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
key_hash = Column(String(64), unique=True, nullable=False)
|
||||
name = Column(String(255), nullable=False)
|
||||
user_id = Column(String(255), nullable=False)
|
||||
user_id = Column(
|
||||
String(255), nullable=False
|
||||
) # Legacy field, kept for compatibility
|
||||
owner_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("users.id", ondelete="CASCADE"),
|
||||
nullable=True, # Nullable for migration compatibility
|
||||
)
|
||||
description = Column(Text)
|
||||
scopes = Column(ARRAY(String), default=["read", "write"])
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
expires_at = Column(DateTime(timezone=True))
|
||||
last_used = Column(DateTime(timezone=True))
|
||||
|
||||
owner = relationship("User", back_populates="api_keys")
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_api_keys_user_id", "user_id"),
|
||||
Index("idx_api_keys_key_hash", "key_hash"),
|
||||
Index("idx_api_keys_owner_id", "owner_id"),
|
||||
)
|
||||
|
||||
|
||||
@@ -201,4 +468,338 @@ class AuditLog(Base):
|
||||
Index("idx_audit_logs_resource", "resource"),
|
||||
Index("idx_audit_logs_user_id", "user_id"),
|
||||
Index("idx_audit_logs_timestamp", "timestamp"),
|
||||
Index("idx_audit_logs_resource_timestamp", "resource", "timestamp"),
|
||||
Index("idx_audit_logs_user_timestamp", "user_id", "timestamp"),
|
||||
)
|
||||
|
||||
|
||||
class ProjectHistory(Base):
|
||||
"""Track changes to project metadata over time."""
|
||||
|
||||
__tablename__ = "project_history"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
project_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("projects.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
field_name = Column(String(100), nullable=False)
|
||||
old_value = Column(Text)
|
||||
new_value = Column(Text)
|
||||
changed_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
changed_by = Column(String(255), nullable=False)
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_project_history_project_id", "project_id"),
|
||||
Index("idx_project_history_changed_at", "changed_at"),
|
||||
Index("idx_project_history_project_changed_at", "project_id", "changed_at"),
|
||||
)
|
||||
|
||||
|
||||
class PackageHistory(Base):
|
||||
"""Track changes to package metadata over time."""
|
||||
|
||||
__tablename__ = "package_history"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
package_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("packages.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
field_name = Column(String(100), nullable=False)
|
||||
old_value = Column(Text)
|
||||
new_value = Column(Text)
|
||||
changed_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
changed_by = Column(String(255), nullable=False)
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_package_history_package_id", "package_id"),
|
||||
Index("idx_package_history_changed_at", "changed_at"),
|
||||
Index("idx_package_history_package_changed_at", "package_id", "changed_at"),
|
||||
)
|
||||
|
||||
|
||||
class ArtifactDependency(Base):
|
||||
"""Dependency declared by an artifact on another package.
|
||||
|
||||
Each artifact can declare dependencies on other packages, specifying either
|
||||
an exact version or a tag. This enables recursive dependency resolution.
|
||||
"""
|
||||
|
||||
__tablename__ = "artifact_dependencies"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
artifact_id = Column(
|
||||
String(64),
|
||||
ForeignKey("artifacts.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
dependency_project = Column(String(255), nullable=False)
|
||||
dependency_package = Column(String(255), nullable=False)
|
||||
version_constraint = Column(String(255), nullable=True)
|
||||
tag_constraint = Column(String(255), nullable=True)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
|
||||
# Relationship to the artifact that declares this dependency
|
||||
artifact = relationship("Artifact", back_populates="dependencies")
|
||||
|
||||
__table_args__ = (
|
||||
# Exactly one of version_constraint or tag_constraint must be set
|
||||
CheckConstraint(
|
||||
"(version_constraint IS NOT NULL AND tag_constraint IS NULL) OR "
|
||||
"(version_constraint IS NULL AND tag_constraint IS NOT NULL)",
|
||||
name="check_constraint_type",
|
||||
),
|
||||
# Each artifact can only depend on a specific project/package once
|
||||
Index(
|
||||
"idx_artifact_dependencies_artifact_id",
|
||||
"artifact_id",
|
||||
),
|
||||
Index(
|
||||
"idx_artifact_dependencies_target",
|
||||
"dependency_project",
|
||||
"dependency_package",
|
||||
),
|
||||
Index(
|
||||
"idx_artifact_dependencies_unique",
|
||||
"artifact_id",
|
||||
"dependency_project",
|
||||
"dependency_package",
|
||||
unique=True,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class Team(Base):
|
||||
"""Team for organizing projects and users."""
|
||||
|
||||
__tablename__ = "teams"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
name = Column(String(255), nullable=False)
|
||||
slug = Column(String(255), unique=True, nullable=False)
|
||||
description = Column(Text)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
updated_at = Column(
|
||||
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
|
||||
)
|
||||
created_by = Column(String(255), nullable=False)
|
||||
settings = Column(JSON, default=dict)
|
||||
|
||||
# Relationships
|
||||
memberships = relationship(
|
||||
"TeamMembership", back_populates="team", cascade="all, delete-orphan"
|
||||
)
|
||||
projects = relationship("Project", back_populates="team")
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_teams_slug", "slug"),
|
||||
Index("idx_teams_created_by", "created_by"),
|
||||
Index("idx_teams_created_at", "created_at"),
|
||||
CheckConstraint(
|
||||
"slug ~ '^[a-z0-9][a-z0-9-]*[a-z0-9]$' OR slug ~ '^[a-z0-9]$'",
|
||||
name="check_team_slug_format",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class TeamMembership(Base):
|
||||
"""Maps users to teams with their roles."""
|
||||
|
||||
__tablename__ = "team_memberships"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
team_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("teams.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
user_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("users.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
role = Column(String(20), nullable=False, default="member")
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
invited_by = Column(String(255))
|
||||
|
||||
# Relationships
|
||||
team = relationship("Team", back_populates="memberships")
|
||||
user = relationship("User", back_populates="team_memberships")
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_team_memberships_team_id", "team_id"),
|
||||
Index("idx_team_memberships_user_id", "user_id"),
|
||||
Index("idx_team_memberships_role", "role"),
|
||||
Index("idx_team_memberships_team_role", "team_id", "role"),
|
||||
Index("idx_team_memberships_unique", "team_id", "user_id", unique=True),
|
||||
CheckConstraint(
|
||||
"role IN ('owner', 'admin', 'member')",
|
||||
name="check_team_role",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Upstream Caching Models
|
||||
# =============================================================================
|
||||
|
||||
# Valid source types for upstream registries
|
||||
SOURCE_TYPES = ["npm", "pypi", "maven", "docker", "helm", "nuget", "deb", "rpm", "generic"]
|
||||
|
||||
# Valid authentication types
|
||||
AUTH_TYPES = ["none", "basic", "bearer", "api_key"]
|
||||
|
||||
|
||||
class UpstreamSource(Base):
|
||||
"""Configuration for an upstream artifact registry.
|
||||
|
||||
Stores connection details and authentication for upstream registries
|
||||
like npm, PyPI, Maven Central, or private Artifactory instances.
|
||||
"""
|
||||
|
||||
__tablename__ = "upstream_sources"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
name = Column(String(255), unique=True, nullable=False)
|
||||
source_type = Column(String(50), default="generic", nullable=False)
|
||||
url = Column(String(2048), nullable=False)
|
||||
enabled = Column(Boolean, default=False, nullable=False)
|
||||
auth_type = Column(String(20), default="none", nullable=False)
|
||||
username = Column(String(255))
|
||||
password_encrypted = Column(LargeBinary)
|
||||
headers_encrypted = Column(LargeBinary)
|
||||
priority = Column(Integer, default=100, nullable=False)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
updated_at = Column(
|
||||
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
|
||||
)
|
||||
|
||||
# Relationships
|
||||
cached_urls = relationship("CachedUrl", back_populates="source")
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_upstream_sources_enabled", "enabled"),
|
||||
Index("idx_upstream_sources_source_type", "source_type"),
|
||||
Index("idx_upstream_sources_priority", "priority"),
|
||||
CheckConstraint(
|
||||
"source_type IN ('npm', 'pypi', 'maven', 'docker', 'helm', 'nuget', 'deb', 'rpm', 'generic')",
|
||||
name="check_source_type",
|
||||
),
|
||||
CheckConstraint(
|
||||
"auth_type IN ('none', 'basic', 'bearer', 'api_key')",
|
||||
name="check_auth_type",
|
||||
),
|
||||
CheckConstraint("priority > 0", name="check_priority_positive"),
|
||||
)
|
||||
|
||||
def set_password(self, password: str) -> None:
|
||||
"""Encrypt and store a password/token."""
|
||||
from .encryption import encrypt_value
|
||||
|
||||
if password:
|
||||
self.password_encrypted = encrypt_value(password)
|
||||
else:
|
||||
self.password_encrypted = None
|
||||
|
||||
def get_password(self) -> str | None:
|
||||
"""Decrypt and return the stored password/token."""
|
||||
from .encryption import decrypt_value
|
||||
|
||||
if self.password_encrypted:
|
||||
try:
|
||||
return decrypt_value(self.password_encrypted)
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
def has_password(self) -> bool:
|
||||
"""Check if a password/token is stored."""
|
||||
return self.password_encrypted is not None
|
||||
|
||||
def set_headers(self, headers: dict) -> None:
|
||||
"""Encrypt and store custom headers as JSON."""
|
||||
from .encryption import encrypt_value
|
||||
import json
|
||||
|
||||
if headers:
|
||||
self.headers_encrypted = encrypt_value(json.dumps(headers))
|
||||
else:
|
||||
self.headers_encrypted = None
|
||||
|
||||
def get_headers(self) -> dict | None:
|
||||
"""Decrypt and return custom headers."""
|
||||
from .encryption import decrypt_value
|
||||
import json
|
||||
|
||||
if self.headers_encrypted:
|
||||
try:
|
||||
return json.loads(decrypt_value(self.headers_encrypted))
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
class CacheSettings(Base):
|
||||
"""Global cache settings (singleton table).
|
||||
|
||||
Controls behavior of the upstream caching system.
|
||||
"""
|
||||
|
||||
__tablename__ = "cache_settings"
|
||||
|
||||
id = Column(Integer, primary_key=True, default=1)
|
||||
auto_create_system_projects = Column(Boolean, default=True, nullable=False)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
updated_at = Column(
|
||||
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
CheckConstraint("id = 1", name="check_cache_settings_singleton"),
|
||||
)
|
||||
|
||||
|
||||
class CachedUrl(Base):
|
||||
"""Tracks URL to artifact mappings for provenance.
|
||||
|
||||
Records which URLs have been cached and maps them to their stored artifacts.
|
||||
Enables "is this URL already cached?" lookups and audit trails.
|
||||
"""
|
||||
|
||||
__tablename__ = "cached_urls"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
url = Column(String(4096), nullable=False)
|
||||
url_hash = Column(String(64), unique=True, nullable=False)
|
||||
artifact_id = Column(
|
||||
String(64), ForeignKey("artifacts.id"), nullable=False
|
||||
)
|
||||
source_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("upstream_sources.id", ondelete="SET NULL"),
|
||||
)
|
||||
fetched_at = Column(DateTime(timezone=True), default=datetime.utcnow, nullable=False)
|
||||
response_headers = Column(JSON, default=dict)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
|
||||
# Relationships
|
||||
artifact = relationship("Artifact")
|
||||
source = relationship("UpstreamSource", back_populates="cached_urls")
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_cached_urls_url_hash", "url_hash"),
|
||||
Index("idx_cached_urls_artifact_id", "artifact_id"),
|
||||
Index("idx_cached_urls_source_id", "source_id"),
|
||||
Index("idx_cached_urls_fetched_at", "fetched_at"),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def compute_url_hash(url: str) -> str:
|
||||
"""Compute SHA256 hash of a URL for fast lookups."""
|
||||
import hashlib
|
||||
return hashlib.sha256(url.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
|
||||
212
backend/app/purge_seed_data.py
Normal file
212
backend/app/purge_seed_data.py
Normal file
@@ -0,0 +1,212 @@
|
||||
"""
|
||||
Purge seed/demo data from the database.
|
||||
|
||||
This is used when transitioning an environment from dev/test to production-like.
|
||||
Triggered by setting ORCHARD_PURGE_SEED_DATA=true environment variable.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from .models import (
|
||||
Project,
|
||||
Package,
|
||||
Artifact,
|
||||
Tag,
|
||||
Upload,
|
||||
PackageVersion,
|
||||
ArtifactDependency,
|
||||
Team,
|
||||
TeamMembership,
|
||||
User,
|
||||
AccessPermission,
|
||||
)
|
||||
from .storage import get_storage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Seed data identifiers (from seed.py)
|
||||
SEED_PROJECT_NAMES = [
|
||||
"frontend-libs",
|
||||
"backend-services",
|
||||
"mobile-apps",
|
||||
"internal-tools",
|
||||
]
|
||||
|
||||
SEED_TEAM_SLUG = "demo-team"
|
||||
|
||||
SEED_USERNAMES = [
|
||||
"alice",
|
||||
"bob",
|
||||
"charlie",
|
||||
"diana",
|
||||
"eve",
|
||||
"frank",
|
||||
]
|
||||
|
||||
|
||||
def should_purge_seed_data() -> bool:
|
||||
"""Check if seed data should be purged based on environment variable."""
|
||||
return os.environ.get("ORCHARD_PURGE_SEED_DATA", "").lower() == "true"
|
||||
|
||||
|
||||
def purge_seed_data(db: Session) -> dict:
|
||||
"""
|
||||
Purge all seed/demo data from the database.
|
||||
|
||||
Returns a dict with counts of deleted items.
|
||||
"""
|
||||
logger.warning("PURGING SEED DATA - This will delete demo projects, users, and teams")
|
||||
|
||||
results = {
|
||||
"dependencies_deleted": 0,
|
||||
"tags_deleted": 0,
|
||||
"versions_deleted": 0,
|
||||
"uploads_deleted": 0,
|
||||
"artifacts_deleted": 0,
|
||||
"packages_deleted": 0,
|
||||
"projects_deleted": 0,
|
||||
"permissions_deleted": 0,
|
||||
"team_memberships_deleted": 0,
|
||||
"users_deleted": 0,
|
||||
"teams_deleted": 0,
|
||||
"s3_objects_deleted": 0,
|
||||
}
|
||||
|
||||
storage = get_storage()
|
||||
|
||||
# Find seed projects
|
||||
seed_projects = db.query(Project).filter(Project.name.in_(SEED_PROJECT_NAMES)).all()
|
||||
seed_project_ids = [p.id for p in seed_projects]
|
||||
|
||||
if not seed_projects:
|
||||
logger.info("No seed projects found, nothing to purge")
|
||||
return results
|
||||
|
||||
logger.info(f"Found {len(seed_projects)} seed projects to purge")
|
||||
|
||||
# Find packages in seed projects
|
||||
seed_packages = db.query(Package).filter(Package.project_id.in_(seed_project_ids)).all()
|
||||
seed_package_ids = [p.id for p in seed_packages]
|
||||
|
||||
# Find artifacts in seed packages (via uploads)
|
||||
seed_uploads = db.query(Upload).filter(Upload.package_id.in_(seed_package_ids)).all()
|
||||
seed_artifact_ids = list(set(u.artifact_id for u in seed_uploads))
|
||||
|
||||
# Delete in order (respecting foreign keys)
|
||||
|
||||
# 1. Delete artifact dependencies
|
||||
if seed_artifact_ids:
|
||||
count = db.query(ArtifactDependency).filter(
|
||||
ArtifactDependency.artifact_id.in_(seed_artifact_ids)
|
||||
).delete(synchronize_session=False)
|
||||
results["dependencies_deleted"] = count
|
||||
logger.info(f"Deleted {count} artifact dependencies")
|
||||
|
||||
# 2. Delete tags
|
||||
if seed_package_ids:
|
||||
count = db.query(Tag).filter(Tag.package_id.in_(seed_package_ids)).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
results["tags_deleted"] = count
|
||||
logger.info(f"Deleted {count} tags")
|
||||
|
||||
# 3. Delete package versions
|
||||
if seed_package_ids:
|
||||
count = db.query(PackageVersion).filter(
|
||||
PackageVersion.package_id.in_(seed_package_ids)
|
||||
).delete(synchronize_session=False)
|
||||
results["versions_deleted"] = count
|
||||
logger.info(f"Deleted {count} package versions")
|
||||
|
||||
# 4. Delete uploads
|
||||
if seed_package_ids:
|
||||
count = db.query(Upload).filter(Upload.package_id.in_(seed_package_ids)).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
results["uploads_deleted"] = count
|
||||
logger.info(f"Deleted {count} uploads")
|
||||
|
||||
# 5. Delete S3 objects for seed artifacts
|
||||
if seed_artifact_ids:
|
||||
seed_artifacts = db.query(Artifact).filter(Artifact.id.in_(seed_artifact_ids)).all()
|
||||
for artifact in seed_artifacts:
|
||||
if artifact.s3_key:
|
||||
try:
|
||||
storage.client.delete_object(Bucket=storage.bucket, Key=artifact.s3_key)
|
||||
results["s3_objects_deleted"] += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to delete S3 object {artifact.s3_key}: {e}")
|
||||
logger.info(f"Deleted {results['s3_objects_deleted']} S3 objects")
|
||||
|
||||
# 6. Delete artifacts (only those with ref_count that would be 0 after our deletions)
|
||||
# Since we deleted all tags/versions pointing to these artifacts, we can delete them
|
||||
if seed_artifact_ids:
|
||||
count = db.query(Artifact).filter(Artifact.id.in_(seed_artifact_ids)).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
results["artifacts_deleted"] = count
|
||||
logger.info(f"Deleted {count} artifacts")
|
||||
|
||||
# 7. Delete packages
|
||||
if seed_package_ids:
|
||||
count = db.query(Package).filter(Package.id.in_(seed_package_ids)).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
results["packages_deleted"] = count
|
||||
logger.info(f"Deleted {count} packages")
|
||||
|
||||
# 8. Delete access permissions for seed projects
|
||||
if seed_project_ids:
|
||||
count = db.query(AccessPermission).filter(
|
||||
AccessPermission.project_id.in_(seed_project_ids)
|
||||
).delete(synchronize_session=False)
|
||||
results["permissions_deleted"] = count
|
||||
logger.info(f"Deleted {count} access permissions")
|
||||
|
||||
# 9. Delete seed projects
|
||||
count = db.query(Project).filter(Project.name.in_(SEED_PROJECT_NAMES)).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
results["projects_deleted"] = count
|
||||
logger.info(f"Deleted {count} projects")
|
||||
|
||||
# 10. Find and delete seed team
|
||||
seed_team = db.query(Team).filter(Team.slug == SEED_TEAM_SLUG).first()
|
||||
if seed_team:
|
||||
# Delete team memberships first
|
||||
count = db.query(TeamMembership).filter(
|
||||
TeamMembership.team_id == seed_team.id
|
||||
).delete(synchronize_session=False)
|
||||
results["team_memberships_deleted"] = count
|
||||
logger.info(f"Deleted {count} team memberships")
|
||||
|
||||
# Delete the team
|
||||
db.delete(seed_team)
|
||||
results["teams_deleted"] = 1
|
||||
logger.info(f"Deleted team: {SEED_TEAM_SLUG}")
|
||||
|
||||
# 11. Delete seed users (but NOT admin)
|
||||
seed_users = db.query(User).filter(User.username.in_(SEED_USERNAMES)).all()
|
||||
for user in seed_users:
|
||||
# Delete any remaining team memberships for this user
|
||||
db.query(TeamMembership).filter(TeamMembership.user_id == user.id).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
# Delete any access permissions for this user
|
||||
# Note: AccessPermission.user_id is VARCHAR (username), not UUID
|
||||
db.query(AccessPermission).filter(AccessPermission.user_id == user.username).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
db.delete(user)
|
||||
results["users_deleted"] += 1
|
||||
|
||||
if results["users_deleted"] > 0:
|
||||
logger.info(f"Deleted {results['users_deleted']} seed users")
|
||||
|
||||
db.commit()
|
||||
|
||||
logger.warning("SEED DATA PURGE COMPLETE")
|
||||
logger.info(f"Purge results: {results}")
|
||||
|
||||
return results
|
||||
534
backend/app/pypi_proxy.py
Normal file
534
backend/app/pypi_proxy.py
Normal file
@@ -0,0 +1,534 @@
|
||||
"""
|
||||
Transparent PyPI proxy implementing PEP 503 (Simple API).
|
||||
|
||||
Provides endpoints that allow pip to use Orchard as a PyPI index URL.
|
||||
Artifacts are cached on first access through configured upstream sources.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import re
|
||||
from typing import Optional
|
||||
from urllib.parse import urljoin, urlparse, quote, unquote
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, Response
|
||||
from fastapi.responses import StreamingResponse, HTMLResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from .database import get_db
|
||||
from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, Tag
|
||||
from .storage import S3Storage, get_storage
|
||||
from .upstream import (
|
||||
UpstreamClient,
|
||||
UpstreamClientConfig,
|
||||
UpstreamHTTPError,
|
||||
UpstreamConnectionError,
|
||||
UpstreamTimeoutError,
|
||||
)
|
||||
from .config import get_env_upstream_sources
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/pypi", tags=["pypi-proxy"])
|
||||
|
||||
# Timeout configuration for proxy requests
|
||||
PROXY_CONNECT_TIMEOUT = 30.0
|
||||
PROXY_READ_TIMEOUT = 60.0
|
||||
|
||||
|
||||
def _get_pypi_upstream_sources(db: Session) -> list[UpstreamSource]:
|
||||
"""Get all enabled upstream sources configured for PyPI."""
|
||||
# Get database sources
|
||||
db_sources = (
|
||||
db.query(UpstreamSource)
|
||||
.filter(
|
||||
UpstreamSource.source_type == "pypi",
|
||||
UpstreamSource.enabled == True,
|
||||
)
|
||||
.order_by(UpstreamSource.priority)
|
||||
.all()
|
||||
)
|
||||
|
||||
# Get env sources
|
||||
env_sources = [
|
||||
s for s in get_env_upstream_sources()
|
||||
if s.source_type == "pypi" and s.enabled
|
||||
]
|
||||
|
||||
# Combine and sort by priority
|
||||
all_sources = list(db_sources) + list(env_sources)
|
||||
return sorted(all_sources, key=lambda s: s.priority)
|
||||
|
||||
|
||||
def _build_auth_headers(source) -> dict:
|
||||
"""Build authentication headers for an upstream source."""
|
||||
headers = {}
|
||||
|
||||
if hasattr(source, 'auth_type'):
|
||||
if source.auth_type == "bearer":
|
||||
password = source.get_password() if hasattr(source, 'get_password') else getattr(source, 'password', None)
|
||||
if password:
|
||||
headers["Authorization"] = f"Bearer {password}"
|
||||
elif source.auth_type == "api_key":
|
||||
custom_headers = source.get_headers() if hasattr(source, 'get_headers') else {}
|
||||
if custom_headers:
|
||||
headers.update(custom_headers)
|
||||
|
||||
return headers
|
||||
|
||||
|
||||
def _get_basic_auth(source) -> Optional[tuple[str, str]]:
|
||||
"""Get basic auth credentials if applicable."""
|
||||
if hasattr(source, 'auth_type') and source.auth_type == "basic":
|
||||
username = getattr(source, 'username', None)
|
||||
if username:
|
||||
password = source.get_password() if hasattr(source, 'get_password') else getattr(source, 'password', '')
|
||||
return (username, password or '')
|
||||
return None
|
||||
|
||||
|
||||
def _rewrite_package_links(html: str, base_url: str, package_name: str) -> str:
|
||||
"""
|
||||
Rewrite download links in a PyPI simple page to go through our proxy.
|
||||
|
||||
Args:
|
||||
html: The HTML content from upstream
|
||||
base_url: Our server's base URL
|
||||
package_name: The package name for the URL path
|
||||
|
||||
Returns:
|
||||
HTML with rewritten download links
|
||||
"""
|
||||
# Pattern to match href attributes in anchor tags
|
||||
# PyPI simple pages have links like:
|
||||
# <a href="https://files.pythonhosted.org/packages/.../file.tar.gz#sha256=...">file.tar.gz</a>
|
||||
|
||||
def replace_href(match):
|
||||
original_url = match.group(1)
|
||||
# Extract the filename from the URL
|
||||
parsed = urlparse(original_url)
|
||||
path_parts = parsed.path.split('/')
|
||||
filename = path_parts[-1] if path_parts else ''
|
||||
|
||||
# Keep the hash fragment if present
|
||||
fragment = f"#{parsed.fragment}" if parsed.fragment else ""
|
||||
|
||||
# Encode the original URL for safe transmission
|
||||
encoded_url = quote(original_url.split('#')[0], safe='')
|
||||
|
||||
# Build new URL pointing to our proxy
|
||||
new_url = f"{base_url}/pypi/simple/{package_name}/{filename}?upstream={encoded_url}{fragment}"
|
||||
|
||||
return f'href="{new_url}"'
|
||||
|
||||
# Match href="..." patterns
|
||||
rewritten = re.sub(r'href="([^"]+)"', replace_href, html)
|
||||
|
||||
return rewritten
|
||||
|
||||
|
||||
@router.get("/simple/")
|
||||
async def pypi_simple_index(
|
||||
request: Request,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
PyPI Simple API index - lists all packages.
|
||||
|
||||
Proxies to the first available upstream PyPI source.
|
||||
"""
|
||||
sources = _get_pypi_upstream_sources(db)
|
||||
|
||||
if not sources:
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="No PyPI upstream sources configured"
|
||||
)
|
||||
|
||||
# Try each source in priority order
|
||||
last_error = None
|
||||
for source in sources:
|
||||
try:
|
||||
headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"}
|
||||
headers.update(_build_auth_headers(source))
|
||||
auth = _get_basic_auth(source)
|
||||
|
||||
simple_url = source.url.rstrip('/') + '/simple/'
|
||||
|
||||
timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)
|
||||
|
||||
with httpx.Client(timeout=timeout, follow_redirects=False) as client:
|
||||
response = client.get(
|
||||
simple_url,
|
||||
headers=headers,
|
||||
auth=auth,
|
||||
)
|
||||
|
||||
# Handle redirects manually to avoid loops
|
||||
if response.status_code in (301, 302, 303, 307, 308):
|
||||
redirect_url = response.headers.get('location')
|
||||
if redirect_url:
|
||||
# Follow the redirect once
|
||||
response = client.get(
|
||||
redirect_url,
|
||||
headers=headers,
|
||||
auth=auth,
|
||||
follow_redirects=False,
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
# Return the index as-is (links are to package pages, not files)
|
||||
# We could rewrite these too, but for now just proxy
|
||||
content = response.text
|
||||
|
||||
# Rewrite package links to go through our proxy
|
||||
base_url = str(request.base_url).rstrip('/')
|
||||
content = re.sub(
|
||||
r'href="([^"]+)/"',
|
||||
lambda m: f'href="{base_url}/pypi/simple/{m.group(1)}/"',
|
||||
content
|
||||
)
|
||||
|
||||
return HTMLResponse(content=content)
|
||||
|
||||
last_error = f"HTTP {response.status_code}"
|
||||
|
||||
except httpx.ConnectError as e:
|
||||
last_error = f"Connection failed: {e}"
|
||||
logger.warning(f"PyPI proxy: failed to connect to {source.url}: {e}")
|
||||
except httpx.TimeoutException as e:
|
||||
last_error = f"Timeout: {e}"
|
||||
logger.warning(f"PyPI proxy: timeout connecting to {source.url}: {e}")
|
||||
except Exception as e:
|
||||
last_error = str(e)
|
||||
logger.warning(f"PyPI proxy: error fetching from {source.url}: {e}")
|
||||
|
||||
raise HTTPException(
|
||||
status_code=502,
|
||||
detail=f"Failed to fetch package index from upstream: {last_error}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/simple/{package_name}/")
|
||||
async def pypi_package_versions(
|
||||
request: Request,
|
||||
package_name: str,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""
|
||||
PyPI Simple API package page - lists all versions/files for a package.
|
||||
|
||||
Proxies to upstream and rewrites download links to go through our cache.
|
||||
"""
|
||||
sources = _get_pypi_upstream_sources(db)
|
||||
|
||||
if not sources:
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="No PyPI upstream sources configured"
|
||||
)
|
||||
|
||||
base_url = str(request.base_url).rstrip('/')
|
||||
|
||||
# Normalize package name (PEP 503)
|
||||
normalized_name = re.sub(r'[-_.]+', '-', package_name).lower()
|
||||
|
||||
# Try each source in priority order
|
||||
last_error = None
|
||||
for source in sources:
|
||||
try:
|
||||
headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"}
|
||||
headers.update(_build_auth_headers(source))
|
||||
auth = _get_basic_auth(source)
|
||||
|
||||
package_url = source.url.rstrip('/') + f'/simple/{normalized_name}/'
|
||||
|
||||
timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)
|
||||
|
||||
with httpx.Client(timeout=timeout, follow_redirects=False) as client:
|
||||
response = client.get(
|
||||
package_url,
|
||||
headers=headers,
|
||||
auth=auth,
|
||||
)
|
||||
|
||||
# Handle redirects manually
|
||||
redirect_count = 0
|
||||
while response.status_code in (301, 302, 303, 307, 308) and redirect_count < 5:
|
||||
redirect_url = response.headers.get('location')
|
||||
if not redirect_url:
|
||||
break
|
||||
|
||||
# Make redirect URL absolute if needed
|
||||
if not redirect_url.startswith('http'):
|
||||
redirect_url = urljoin(package_url, redirect_url)
|
||||
|
||||
response = client.get(
|
||||
redirect_url,
|
||||
headers=headers,
|
||||
auth=auth,
|
||||
follow_redirects=False,
|
||||
)
|
||||
redirect_count += 1
|
||||
|
||||
if response.status_code == 200:
|
||||
content = response.text
|
||||
|
||||
# Rewrite download links to go through our proxy
|
||||
content = _rewrite_package_links(content, base_url, normalized_name)
|
||||
|
||||
return HTMLResponse(content=content)
|
||||
|
||||
if response.status_code == 404:
|
||||
# Package not found in this source, try next
|
||||
last_error = f"Package not found in {source.name}"
|
||||
continue
|
||||
|
||||
last_error = f"HTTP {response.status_code}"
|
||||
|
||||
except httpx.ConnectError as e:
|
||||
last_error = f"Connection failed: {e}"
|
||||
logger.warning(f"PyPI proxy: failed to connect to {source.url}: {e}")
|
||||
except httpx.TimeoutException as e:
|
||||
last_error = f"Timeout: {e}"
|
||||
logger.warning(f"PyPI proxy: timeout connecting to {source.url}: {e}")
|
||||
except Exception as e:
|
||||
last_error = str(e)
|
||||
logger.warning(f"PyPI proxy: error fetching {package_name} from {source.url}: {e}")
|
||||
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Package '{package_name}' not found: {last_error}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/simple/{package_name}/{filename}")
|
||||
async def pypi_download_file(
|
||||
request: Request,
|
||||
package_name: str,
|
||||
filename: str,
|
||||
upstream: Optional[str] = None,
|
||||
db: Session = Depends(get_db),
|
||||
storage: S3Storage = Depends(get_storage),
|
||||
):
|
||||
"""
|
||||
Download a package file, caching it in Orchard.
|
||||
|
||||
Args:
|
||||
package_name: The package name
|
||||
filename: The filename to download
|
||||
upstream: URL-encoded upstream URL to fetch from
|
||||
"""
|
||||
if not upstream:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Missing 'upstream' query parameter with source URL"
|
||||
)
|
||||
|
||||
# Decode the upstream URL
|
||||
upstream_url = unquote(upstream)
|
||||
|
||||
# Check if we already have this URL cached
|
||||
url_hash = hashlib.sha256(upstream_url.encode()).hexdigest()
|
||||
cached_url = db.query(CachedUrl).filter(CachedUrl.url_hash == url_hash).first()
|
||||
|
||||
if cached_url:
|
||||
# Serve from cache
|
||||
artifact = db.query(Artifact).filter(Artifact.id == cached_url.artifact_id).first()
|
||||
if artifact:
|
||||
logger.info(f"PyPI proxy: serving cached {filename} (artifact {artifact.id[:12]})")
|
||||
|
||||
# Stream from S3
|
||||
try:
|
||||
content_stream = storage.get_artifact_stream(artifact.id)
|
||||
|
||||
return StreamingResponse(
|
||||
content_stream,
|
||||
media_type=artifact.content_type or "application/octet-stream",
|
||||
headers={
|
||||
"Content-Disposition": f'attachment; filename="{filename}"',
|
||||
"Content-Length": str(artifact.size),
|
||||
"X-Checksum-SHA256": artifact.id,
|
||||
"X-Cache": "HIT",
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"PyPI proxy: error streaming cached artifact: {e}")
|
||||
# Fall through to fetch from upstream
|
||||
|
||||
# Not cached - fetch from upstream
|
||||
sources = _get_pypi_upstream_sources(db)
|
||||
|
||||
# Find a source that matches the upstream URL
|
||||
matched_source = None
|
||||
for source in sources:
|
||||
source_url = getattr(source, 'url', '')
|
||||
# Check if the upstream URL could come from this source
|
||||
# (This is a loose check - the URL might be from files.pythonhosted.org)
|
||||
if urlparse(upstream_url).netloc in source_url or True: # Allow any source for now
|
||||
matched_source = source
|
||||
break
|
||||
|
||||
if not matched_source and sources:
|
||||
matched_source = sources[0] # Use first source for auth if available
|
||||
|
||||
try:
|
||||
headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"}
|
||||
if matched_source:
|
||||
headers.update(_build_auth_headers(matched_source))
|
||||
auth = _get_basic_auth(matched_source) if matched_source else None
|
||||
|
||||
timeout = httpx.Timeout(300.0, connect=PROXY_CONNECT_TIMEOUT) # 5 minutes for large files
|
||||
|
||||
# Fetch the file
|
||||
logger.info(f"PyPI proxy: fetching {filename} from {upstream_url}")
|
||||
|
||||
with httpx.Client(timeout=timeout, follow_redirects=False) as client:
|
||||
response = client.get(
|
||||
upstream_url,
|
||||
headers=headers,
|
||||
auth=auth,
|
||||
)
|
||||
|
||||
# Handle redirects manually
|
||||
redirect_count = 0
|
||||
while response.status_code in (301, 302, 303, 307, 308) and redirect_count < 5:
|
||||
redirect_url = response.headers.get('location')
|
||||
if not redirect_url:
|
||||
break
|
||||
|
||||
if not redirect_url.startswith('http'):
|
||||
redirect_url = urljoin(upstream_url, redirect_url)
|
||||
|
||||
logger.info(f"PyPI proxy: following redirect to {redirect_url}")
|
||||
|
||||
# Don't send auth to different hosts
|
||||
redirect_headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"}
|
||||
redirect_auth = None
|
||||
if urlparse(redirect_url).netloc == urlparse(upstream_url).netloc:
|
||||
redirect_headers.update(headers)
|
||||
redirect_auth = auth
|
||||
|
||||
response = client.get(
|
||||
redirect_url,
|
||||
headers=redirect_headers,
|
||||
auth=redirect_auth,
|
||||
follow_redirects=False,
|
||||
)
|
||||
redirect_count += 1
|
||||
|
||||
if response.status_code != 200:
|
||||
raise HTTPException(
|
||||
status_code=response.status_code,
|
||||
detail=f"Upstream returned {response.status_code}"
|
||||
)
|
||||
|
||||
content = response.content
|
||||
content_type = response.headers.get('content-type', 'application/octet-stream')
|
||||
|
||||
# Compute hash
|
||||
sha256 = hashlib.sha256(content).hexdigest()
|
||||
size = len(content)
|
||||
|
||||
logger.info(f"PyPI proxy: downloaded {filename}, {size} bytes, sha256={sha256[:12]}")
|
||||
|
||||
# Store in S3
|
||||
from io import BytesIO
|
||||
artifact = storage.store_artifact(
|
||||
file_obj=BytesIO(content),
|
||||
filename=filename,
|
||||
content_type=content_type,
|
||||
)
|
||||
|
||||
# Check if artifact already exists
|
||||
existing = db.query(Artifact).filter(Artifact.id == sha256).first()
|
||||
if existing:
|
||||
# Increment ref count
|
||||
existing.ref_count += 1
|
||||
db.flush()
|
||||
else:
|
||||
# Create artifact record
|
||||
new_artifact = Artifact(
|
||||
id=sha256,
|
||||
filename=filename,
|
||||
content_type=content_type,
|
||||
size=size,
|
||||
ref_count=1,
|
||||
)
|
||||
db.add(new_artifact)
|
||||
db.flush()
|
||||
|
||||
# Create/get system project and package
|
||||
system_project = db.query(Project).filter(Project.name == "_pypi").first()
|
||||
if not system_project:
|
||||
system_project = Project(
|
||||
name="_pypi",
|
||||
description="System project for cached PyPI packages",
|
||||
visibility="private",
|
||||
)
|
||||
db.add(system_project)
|
||||
db.flush()
|
||||
|
||||
# Normalize package name
|
||||
normalized_name = re.sub(r'[-_.]+', '-', package_name).lower()
|
||||
|
||||
package = db.query(Package).filter(
|
||||
Package.project_id == system_project.id,
|
||||
Package.name == normalized_name,
|
||||
).first()
|
||||
if not package:
|
||||
package = Package(
|
||||
project_id=system_project.id,
|
||||
name=normalized_name,
|
||||
description=f"PyPI package: {normalized_name}",
|
||||
)
|
||||
db.add(package)
|
||||
db.flush()
|
||||
|
||||
# Create tag with filename
|
||||
existing_tag = db.query(Tag).filter(
|
||||
Tag.package_id == package.id,
|
||||
Tag.name == filename,
|
||||
).first()
|
||||
if not existing_tag:
|
||||
tag = Tag(
|
||||
package_id=package.id,
|
||||
name=filename,
|
||||
artifact_id=sha256,
|
||||
)
|
||||
db.add(tag)
|
||||
|
||||
# Cache the URL mapping
|
||||
existing_cached = db.query(CachedUrl).filter(CachedUrl.url_hash == url_hash).first()
|
||||
if not existing_cached:
|
||||
cached_url_record = CachedUrl(
|
||||
url_hash=url_hash,
|
||||
url=upstream_url,
|
||||
artifact_id=sha256,
|
||||
)
|
||||
db.add(cached_url_record)
|
||||
|
||||
db.commit()
|
||||
|
||||
# Return the file
|
||||
return Response(
|
||||
content=content,
|
||||
media_type=content_type,
|
||||
headers={
|
||||
"Content-Disposition": f'attachment; filename="{filename}"',
|
||||
"Content-Length": str(size),
|
||||
"X-Checksum-SHA256": sha256,
|
||||
"X-Cache": "MISS",
|
||||
}
|
||||
)
|
||||
|
||||
except httpx.ConnectError as e:
|
||||
raise HTTPException(status_code=502, detail=f"Connection failed: {e}")
|
||||
except httpx.TimeoutException as e:
|
||||
raise HTTPException(status_code=504, detail=f"Timeout: {e}")
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception(f"PyPI proxy: error downloading {filename}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
16
backend/app/rate_limit.py
Normal file
16
backend/app/rate_limit.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""Rate limiting configuration for Orchard API.
|
||||
|
||||
Uses slowapi for rate limiting with IP-based keys.
|
||||
"""
|
||||
|
||||
import os
|
||||
from slowapi import Limiter
|
||||
from slowapi.util import get_remote_address
|
||||
|
||||
# Rate limiter - uses IP address as key
|
||||
limiter = Limiter(key_func=get_remote_address)
|
||||
|
||||
# Rate limit strings - configurable via environment for testing
|
||||
# Default: 5 login attempts per minute per IP
|
||||
# In tests: set ORCHARD_LOGIN_RATE_LIMIT to a high value like "1000/minute"
|
||||
LOGIN_RATE_LIMIT = os.environ.get("ORCHARD_LOGIN_RATE_LIMIT", "5/minute")
|
||||
22
backend/app/repositories/__init__.py
Normal file
22
backend/app/repositories/__init__.py
Normal file
@@ -0,0 +1,22 @@
|
||||
"""
|
||||
Repository pattern implementation for data access layer.
|
||||
|
||||
Repositories abstract database operations from business logic,
|
||||
providing clean interfaces for CRUD operations on each entity.
|
||||
"""
|
||||
|
||||
from .base import BaseRepository
|
||||
from .project import ProjectRepository
|
||||
from .package import PackageRepository
|
||||
from .artifact import ArtifactRepository
|
||||
from .tag import TagRepository
|
||||
from .upload import UploadRepository
|
||||
|
||||
__all__ = [
|
||||
"BaseRepository",
|
||||
"ProjectRepository",
|
||||
"PackageRepository",
|
||||
"ArtifactRepository",
|
||||
"TagRepository",
|
||||
"UploadRepository",
|
||||
]
|
||||
157
backend/app/repositories/artifact.py
Normal file
157
backend/app/repositories/artifact.py
Normal file
@@ -0,0 +1,157 @@
|
||||
"""
|
||||
Artifact repository for data access operations.
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func, or_
|
||||
from uuid import UUID
|
||||
|
||||
from .base import BaseRepository
|
||||
from ..models import Artifact, Tag, Upload, Package, Project
|
||||
|
||||
|
||||
class ArtifactRepository(BaseRepository[Artifact]):
|
||||
"""Repository for Artifact entity operations."""
|
||||
|
||||
model = Artifact
|
||||
|
||||
def get_by_sha256(self, sha256: str) -> Optional[Artifact]:
|
||||
"""Get artifact by SHA256 hash (primary key)."""
|
||||
return self.db.query(Artifact).filter(Artifact.id == sha256).first()
|
||||
|
||||
def exists_by_sha256(self, sha256: str) -> bool:
|
||||
"""Check if artifact with SHA256 exists."""
|
||||
return self.db.query(
|
||||
self.db.query(Artifact).filter(Artifact.id == sha256).exists()
|
||||
).scalar()
|
||||
|
||||
def create_artifact(
|
||||
self,
|
||||
sha256: str,
|
||||
size: int,
|
||||
s3_key: str,
|
||||
created_by: str,
|
||||
content_type: Optional[str] = None,
|
||||
original_name: Optional[str] = None,
|
||||
format_metadata: Optional[dict] = None,
|
||||
) -> Artifact:
|
||||
"""Create a new artifact."""
|
||||
artifact = Artifact(
|
||||
id=sha256,
|
||||
size=size,
|
||||
s3_key=s3_key,
|
||||
created_by=created_by,
|
||||
content_type=content_type,
|
||||
original_name=original_name,
|
||||
format_metadata=format_metadata or {},
|
||||
ref_count=1,
|
||||
)
|
||||
self.db.add(artifact)
|
||||
self.db.flush()
|
||||
return artifact
|
||||
|
||||
def increment_ref_count(self, artifact: Artifact) -> Artifact:
|
||||
"""Increment artifact reference count."""
|
||||
artifact.ref_count += 1
|
||||
self.db.flush()
|
||||
return artifact
|
||||
|
||||
def decrement_ref_count(self, artifact: Artifact) -> Artifact:
|
||||
"""
|
||||
Decrement artifact reference count.
|
||||
Returns the artifact with updated count.
|
||||
Does not delete the artifact even if ref_count reaches 0.
|
||||
"""
|
||||
if artifact.ref_count > 0:
|
||||
artifact.ref_count -= 1
|
||||
self.db.flush()
|
||||
return artifact
|
||||
|
||||
def get_orphaned_artifacts(self, limit: int = 100) -> List[Artifact]:
|
||||
"""Get artifacts with ref_count = 0 (candidates for cleanup)."""
|
||||
return (
|
||||
self.db.query(Artifact)
|
||||
.filter(Artifact.ref_count == 0)
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
|
||||
def get_artifacts_without_tags(self, limit: int = 100) -> List[Artifact]:
|
||||
"""Get artifacts that have no tags pointing to them."""
|
||||
# Subquery to find artifact IDs that have tags
|
||||
tagged_artifacts = self.db.query(Tag.artifact_id).distinct().subquery()
|
||||
|
||||
return (
|
||||
self.db.query(Artifact)
|
||||
.filter(~Artifact.id.in_(tagged_artifacts))
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
|
||||
def find_by_package(
|
||||
self,
|
||||
package_id: UUID,
|
||||
page: int = 1,
|
||||
limit: int = 20,
|
||||
content_type: Optional[str] = None,
|
||||
) -> Tuple[List[Artifact], int]:
|
||||
"""Find artifacts uploaded to a package."""
|
||||
# Get distinct artifact IDs from uploads
|
||||
artifact_ids_subquery = (
|
||||
self.db.query(func.distinct(Upload.artifact_id))
|
||||
.filter(Upload.package_id == package_id)
|
||||
.subquery()
|
||||
)
|
||||
|
||||
query = self.db.query(Artifact).filter(Artifact.id.in_(artifact_ids_subquery))
|
||||
|
||||
if content_type:
|
||||
query = query.filter(Artifact.content_type == content_type)
|
||||
|
||||
total = query.count()
|
||||
offset = (page - 1) * limit
|
||||
artifacts = query.order_by(Artifact.created_at.desc()).offset(offset).limit(limit).all()
|
||||
|
||||
return artifacts, total
|
||||
|
||||
def get_referencing_tags(self, artifact_id: str) -> List[Tuple[Tag, Package, Project]]:
|
||||
"""Get all tags referencing this artifact with package and project info."""
|
||||
return (
|
||||
self.db.query(Tag, Package, Project)
|
||||
.join(Package, Tag.package_id == Package.id)
|
||||
.join(Project, Package.project_id == Project.id)
|
||||
.filter(Tag.artifact_id == artifact_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
def search(self, query_str: str, limit: int = 10) -> List[Tuple[Tag, Artifact, str, str]]:
|
||||
"""
|
||||
Search artifacts by tag name or original filename.
|
||||
Returns (tag, artifact, package_name, project_name) tuples.
|
||||
"""
|
||||
search_lower = query_str.lower()
|
||||
return (
|
||||
self.db.query(Tag, Artifact, Package.name, Project.name)
|
||||
.join(Artifact, Tag.artifact_id == Artifact.id)
|
||||
.join(Package, Tag.package_id == Package.id)
|
||||
.join(Project, Package.project_id == Project.id)
|
||||
.filter(
|
||||
or_(
|
||||
func.lower(Tag.name).contains(search_lower),
|
||||
func.lower(Artifact.original_name).contains(search_lower)
|
||||
)
|
||||
)
|
||||
.order_by(Tag.name)
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
|
||||
def update_metadata(self, artifact: Artifact, metadata: dict) -> Artifact:
|
||||
"""Update or merge format metadata."""
|
||||
if artifact.format_metadata:
|
||||
artifact.format_metadata = {**artifact.format_metadata, **metadata}
|
||||
else:
|
||||
artifact.format_metadata = metadata
|
||||
self.db.flush()
|
||||
return artifact
|
||||
96
backend/app/repositories/base.py
Normal file
96
backend/app/repositories/base.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""
|
||||
Base repository class with common CRUD operations.
|
||||
"""
|
||||
|
||||
from typing import TypeVar, Generic, Type, Optional, List, Any, Dict
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func, asc, desc
|
||||
from uuid import UUID
|
||||
|
||||
from ..models import Base
|
||||
|
||||
T = TypeVar("T", bound=Base)
|
||||
|
||||
|
||||
class BaseRepository(Generic[T]):
|
||||
"""
|
||||
Base repository providing common CRUD operations.
|
||||
|
||||
Subclasses should set `model` class attribute to the SQLAlchemy model.
|
||||
"""
|
||||
|
||||
model: Type[T]
|
||||
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
|
||||
def get_by_id(self, id: Any) -> Optional[T]:
|
||||
"""Get entity by primary key."""
|
||||
return self.db.query(self.model).filter(self.model.id == id).first()
|
||||
|
||||
def get_all(
|
||||
self,
|
||||
skip: int = 0,
|
||||
limit: int = 100,
|
||||
order_by: str = None,
|
||||
order_desc: bool = False,
|
||||
) -> List[T]:
|
||||
"""Get all entities with pagination and optional ordering."""
|
||||
query = self.db.query(self.model)
|
||||
|
||||
if order_by and hasattr(self.model, order_by):
|
||||
column = getattr(self.model, order_by)
|
||||
query = query.order_by(desc(column) if order_desc else asc(column))
|
||||
|
||||
return query.offset(skip).limit(limit).all()
|
||||
|
||||
def count(self) -> int:
|
||||
"""Count total entities."""
|
||||
return self.db.query(func.count(self.model.id)).scalar() or 0
|
||||
|
||||
def create(self, **kwargs) -> T:
|
||||
"""Create a new entity."""
|
||||
entity = self.model(**kwargs)
|
||||
self.db.add(entity)
|
||||
self.db.flush() # Flush to get ID without committing
|
||||
return entity
|
||||
|
||||
def update(self, entity: T, **kwargs) -> T:
|
||||
"""Update an existing entity."""
|
||||
for key, value in kwargs.items():
|
||||
if hasattr(entity, key):
|
||||
setattr(entity, key, value)
|
||||
self.db.flush()
|
||||
return entity
|
||||
|
||||
def delete(self, entity: T) -> None:
|
||||
"""Delete an entity."""
|
||||
self.db.delete(entity)
|
||||
self.db.flush()
|
||||
|
||||
def delete_by_id(self, id: Any) -> bool:
|
||||
"""Delete entity by ID. Returns True if deleted, False if not found."""
|
||||
entity = self.get_by_id(id)
|
||||
if entity:
|
||||
self.delete(entity)
|
||||
return True
|
||||
return False
|
||||
|
||||
def exists(self, id: Any) -> bool:
|
||||
"""Check if entity exists by ID."""
|
||||
return self.db.query(
|
||||
self.db.query(self.model).filter(self.model.id == id).exists()
|
||||
).scalar()
|
||||
|
||||
def commit(self) -> None:
|
||||
"""Commit the current transaction."""
|
||||
self.db.commit()
|
||||
|
||||
def rollback(self) -> None:
|
||||
"""Rollback the current transaction."""
|
||||
self.db.rollback()
|
||||
|
||||
def refresh(self, entity: T) -> T:
|
||||
"""Refresh entity from database."""
|
||||
self.db.refresh(entity)
|
||||
return entity
|
||||
177
backend/app/repositories/package.py
Normal file
177
backend/app/repositories/package.py
Normal file
@@ -0,0 +1,177 @@
|
||||
"""
|
||||
Package repository for data access operations.
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func, or_, asc, desc
|
||||
from uuid import UUID
|
||||
|
||||
from .base import BaseRepository
|
||||
from ..models import Package, Project, Tag, Upload, Artifact
|
||||
|
||||
|
||||
class PackageRepository(BaseRepository[Package]):
|
||||
"""Repository for Package entity operations."""
|
||||
|
||||
model = Package
|
||||
|
||||
def get_by_name(self, project_id: UUID, name: str) -> Optional[Package]:
|
||||
"""Get package by name within a project."""
|
||||
return (
|
||||
self.db.query(Package)
|
||||
.filter(Package.project_id == project_id, Package.name == name)
|
||||
.first()
|
||||
)
|
||||
|
||||
def get_by_project_and_name(self, project_name: str, package_name: str) -> Optional[Package]:
|
||||
"""Get package by project name and package name."""
|
||||
return (
|
||||
self.db.query(Package)
|
||||
.join(Project, Package.project_id == Project.id)
|
||||
.filter(Project.name == project_name, Package.name == package_name)
|
||||
.first()
|
||||
)
|
||||
|
||||
def exists_by_name(self, project_id: UUID, name: str) -> bool:
|
||||
"""Check if package with name exists in project."""
|
||||
return self.db.query(
|
||||
self.db.query(Package)
|
||||
.filter(Package.project_id == project_id, Package.name == name)
|
||||
.exists()
|
||||
).scalar()
|
||||
|
||||
def list_by_project(
|
||||
self,
|
||||
project_id: UUID,
|
||||
page: int = 1,
|
||||
limit: int = 20,
|
||||
search: Optional[str] = None,
|
||||
format: Optional[str] = None,
|
||||
platform: Optional[str] = None,
|
||||
sort: str = "name",
|
||||
order: str = "asc",
|
||||
) -> Tuple[List[Package], int]:
|
||||
"""
|
||||
List packages in a project with filtering and pagination.
|
||||
|
||||
Returns tuple of (packages, total_count).
|
||||
"""
|
||||
query = self.db.query(Package).filter(Package.project_id == project_id)
|
||||
|
||||
# Apply search filter
|
||||
if search:
|
||||
search_lower = search.lower()
|
||||
query = query.filter(
|
||||
or_(
|
||||
func.lower(Package.name).contains(search_lower),
|
||||
func.lower(Package.description).contains(search_lower)
|
||||
)
|
||||
)
|
||||
|
||||
# Apply format filter
|
||||
if format:
|
||||
query = query.filter(Package.format == format)
|
||||
|
||||
# Apply platform filter
|
||||
if platform:
|
||||
query = query.filter(Package.platform == platform)
|
||||
|
||||
# Get total count
|
||||
total = query.count()
|
||||
|
||||
# Apply sorting
|
||||
sort_columns = {
|
||||
"name": Package.name,
|
||||
"created_at": Package.created_at,
|
||||
"updated_at": Package.updated_at,
|
||||
}
|
||||
sort_column = sort_columns.get(sort, Package.name)
|
||||
if order == "desc":
|
||||
query = query.order_by(desc(sort_column))
|
||||
else:
|
||||
query = query.order_by(asc(sort_column))
|
||||
|
||||
# Apply pagination
|
||||
offset = (page - 1) * limit
|
||||
packages = query.offset(offset).limit(limit).all()
|
||||
|
||||
return packages, total
|
||||
|
||||
def create_package(
|
||||
self,
|
||||
project_id: UUID,
|
||||
name: str,
|
||||
description: Optional[str] = None,
|
||||
format: str = "generic",
|
||||
platform: str = "any",
|
||||
) -> Package:
|
||||
"""Create a new package."""
|
||||
return self.create(
|
||||
project_id=project_id,
|
||||
name=name,
|
||||
description=description,
|
||||
format=format,
|
||||
platform=platform,
|
||||
)
|
||||
|
||||
def update_package(
|
||||
self,
|
||||
package: Package,
|
||||
name: Optional[str] = None,
|
||||
description: Optional[str] = None,
|
||||
format: Optional[str] = None,
|
||||
platform: Optional[str] = None,
|
||||
) -> Package:
|
||||
"""Update package fields."""
|
||||
updates = {}
|
||||
if name is not None:
|
||||
updates["name"] = name
|
||||
if description is not None:
|
||||
updates["description"] = description
|
||||
if format is not None:
|
||||
updates["format"] = format
|
||||
if platform is not None:
|
||||
updates["platform"] = platform
|
||||
return self.update(package, **updates)
|
||||
|
||||
def get_stats(self, package_id: UUID) -> dict:
|
||||
"""Get package statistics (tag count, artifact count, total size)."""
|
||||
tag_count = (
|
||||
self.db.query(func.count(Tag.id))
|
||||
.filter(Tag.package_id == package_id)
|
||||
.scalar() or 0
|
||||
)
|
||||
|
||||
artifact_stats = (
|
||||
self.db.query(
|
||||
func.count(func.distinct(Upload.artifact_id)),
|
||||
func.coalesce(func.sum(Artifact.size), 0)
|
||||
)
|
||||
.join(Artifact, Upload.artifact_id == Artifact.id)
|
||||
.filter(Upload.package_id == package_id)
|
||||
.first()
|
||||
)
|
||||
|
||||
return {
|
||||
"tag_count": tag_count,
|
||||
"artifact_count": artifact_stats[0] if artifact_stats else 0,
|
||||
"total_size": artifact_stats[1] if artifact_stats else 0,
|
||||
}
|
||||
|
||||
def search(self, query_str: str, limit: int = 10) -> List[Tuple[Package, str]]:
|
||||
"""Search packages by name or description. Returns (package, project_name) tuples."""
|
||||
search_lower = query_str.lower()
|
||||
return (
|
||||
self.db.query(Package, Project.name)
|
||||
.join(Project, Package.project_id == Project.id)
|
||||
.filter(
|
||||
or_(
|
||||
func.lower(Package.name).contains(search_lower),
|
||||
func.lower(Package.description).contains(search_lower)
|
||||
)
|
||||
)
|
||||
.order_by(Package.name)
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
132
backend/app/repositories/project.py
Normal file
132
backend/app/repositories/project.py
Normal file
@@ -0,0 +1,132 @@
|
||||
"""
|
||||
Project repository for data access operations.
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func, or_, asc, desc
|
||||
from uuid import UUID
|
||||
|
||||
from .base import BaseRepository
|
||||
from ..models import Project
|
||||
|
||||
|
||||
class ProjectRepository(BaseRepository[Project]):
|
||||
"""Repository for Project entity operations."""
|
||||
|
||||
model = Project
|
||||
|
||||
def get_by_name(self, name: str) -> Optional[Project]:
|
||||
"""Get project by unique name."""
|
||||
return self.db.query(Project).filter(Project.name == name).first()
|
||||
|
||||
def exists_by_name(self, name: str) -> bool:
|
||||
"""Check if project with name exists."""
|
||||
return self.db.query(
|
||||
self.db.query(Project).filter(Project.name == name).exists()
|
||||
).scalar()
|
||||
|
||||
def list_accessible(
|
||||
self,
|
||||
user_id: str,
|
||||
page: int = 1,
|
||||
limit: int = 20,
|
||||
search: Optional[str] = None,
|
||||
visibility: Optional[str] = None,
|
||||
sort: str = "name",
|
||||
order: str = "asc",
|
||||
) -> Tuple[List[Project], int]:
|
||||
"""
|
||||
List projects accessible to user with filtering and pagination.
|
||||
|
||||
Returns tuple of (projects, total_count).
|
||||
"""
|
||||
# Base query - filter by access
|
||||
query = self.db.query(Project).filter(
|
||||
or_(Project.is_public == True, Project.created_by == user_id)
|
||||
)
|
||||
|
||||
# Apply visibility filter
|
||||
if visibility == "public":
|
||||
query = query.filter(Project.is_public == True)
|
||||
elif visibility == "private":
|
||||
query = query.filter(Project.is_public == False, Project.created_by == user_id)
|
||||
|
||||
# Apply search filter
|
||||
if search:
|
||||
search_lower = search.lower()
|
||||
query = query.filter(
|
||||
or_(
|
||||
func.lower(Project.name).contains(search_lower),
|
||||
func.lower(Project.description).contains(search_lower)
|
||||
)
|
||||
)
|
||||
|
||||
# Get total count before pagination
|
||||
total = query.count()
|
||||
|
||||
# Apply sorting
|
||||
sort_columns = {
|
||||
"name": Project.name,
|
||||
"created_at": Project.created_at,
|
||||
"updated_at": Project.updated_at,
|
||||
}
|
||||
sort_column = sort_columns.get(sort, Project.name)
|
||||
if order == "desc":
|
||||
query = query.order_by(desc(sort_column))
|
||||
else:
|
||||
query = query.order_by(asc(sort_column))
|
||||
|
||||
# Apply pagination
|
||||
offset = (page - 1) * limit
|
||||
projects = query.offset(offset).limit(limit).all()
|
||||
|
||||
return projects, total
|
||||
|
||||
def create_project(
|
||||
self,
|
||||
name: str,
|
||||
created_by: str,
|
||||
description: Optional[str] = None,
|
||||
is_public: bool = True,
|
||||
) -> Project:
|
||||
"""Create a new project."""
|
||||
return self.create(
|
||||
name=name,
|
||||
description=description,
|
||||
is_public=is_public,
|
||||
created_by=created_by,
|
||||
)
|
||||
|
||||
def update_project(
|
||||
self,
|
||||
project: Project,
|
||||
name: Optional[str] = None,
|
||||
description: Optional[str] = None,
|
||||
is_public: Optional[bool] = None,
|
||||
) -> Project:
|
||||
"""Update project fields."""
|
||||
updates = {}
|
||||
if name is not None:
|
||||
updates["name"] = name
|
||||
if description is not None:
|
||||
updates["description"] = description
|
||||
if is_public is not None:
|
||||
updates["is_public"] = is_public
|
||||
return self.update(project, **updates)
|
||||
|
||||
def search(self, query_str: str, limit: int = 10) -> List[Project]:
|
||||
"""Search projects by name or description."""
|
||||
search_lower = query_str.lower()
|
||||
return (
|
||||
self.db.query(Project)
|
||||
.filter(
|
||||
or_(
|
||||
func.lower(Project.name).contains(search_lower),
|
||||
func.lower(Project.description).contains(search_lower)
|
||||
)
|
||||
)
|
||||
.order_by(Project.name)
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
168
backend/app/repositories/tag.py
Normal file
168
backend/app/repositories/tag.py
Normal file
@@ -0,0 +1,168 @@
|
||||
"""
|
||||
Tag repository for data access operations.
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func, or_, asc, desc
|
||||
from uuid import UUID
|
||||
|
||||
from .base import BaseRepository
|
||||
from ..models import Tag, TagHistory, Artifact, Package, Project
|
||||
|
||||
|
||||
class TagRepository(BaseRepository[Tag]):
|
||||
"""Repository for Tag entity operations."""
|
||||
|
||||
model = Tag
|
||||
|
||||
def get_by_name(self, package_id: UUID, name: str) -> Optional[Tag]:
|
||||
"""Get tag by name within a package."""
|
||||
return (
|
||||
self.db.query(Tag)
|
||||
.filter(Tag.package_id == package_id, Tag.name == name)
|
||||
.first()
|
||||
)
|
||||
|
||||
def get_with_artifact(self, package_id: UUID, name: str) -> Optional[Tuple[Tag, Artifact]]:
|
||||
"""Get tag with its artifact."""
|
||||
return (
|
||||
self.db.query(Tag, Artifact)
|
||||
.join(Artifact, Tag.artifact_id == Artifact.id)
|
||||
.filter(Tag.package_id == package_id, Tag.name == name)
|
||||
.first()
|
||||
)
|
||||
|
||||
def exists_by_name(self, package_id: UUID, name: str) -> bool:
|
||||
"""Check if tag with name exists in package."""
|
||||
return self.db.query(
|
||||
self.db.query(Tag)
|
||||
.filter(Tag.package_id == package_id, Tag.name == name)
|
||||
.exists()
|
||||
).scalar()
|
||||
|
||||
def list_by_package(
|
||||
self,
|
||||
package_id: UUID,
|
||||
page: int = 1,
|
||||
limit: int = 20,
|
||||
search: Optional[str] = None,
|
||||
sort: str = "name",
|
||||
order: str = "asc",
|
||||
) -> Tuple[List[Tuple[Tag, Artifact]], int]:
|
||||
"""
|
||||
List tags in a package with artifact metadata.
|
||||
|
||||
Returns tuple of ((tag, artifact) tuples, total_count).
|
||||
"""
|
||||
query = (
|
||||
self.db.query(Tag, Artifact)
|
||||
.join(Artifact, Tag.artifact_id == Artifact.id)
|
||||
.filter(Tag.package_id == package_id)
|
||||
)
|
||||
|
||||
# Apply search filter (tag name or artifact original filename)
|
||||
if search:
|
||||
search_lower = search.lower()
|
||||
query = query.filter(
|
||||
or_(
|
||||
func.lower(Tag.name).contains(search_lower),
|
||||
func.lower(Artifact.original_name).contains(search_lower)
|
||||
)
|
||||
)
|
||||
|
||||
# Get total count
|
||||
total = query.count()
|
||||
|
||||
# Apply sorting
|
||||
sort_columns = {
|
||||
"name": Tag.name,
|
||||
"created_at": Tag.created_at,
|
||||
}
|
||||
sort_column = sort_columns.get(sort, Tag.name)
|
||||
if order == "desc":
|
||||
query = query.order_by(desc(sort_column))
|
||||
else:
|
||||
query = query.order_by(asc(sort_column))
|
||||
|
||||
# Apply pagination
|
||||
offset = (page - 1) * limit
|
||||
results = query.offset(offset).limit(limit).all()
|
||||
|
||||
return results, total
|
||||
|
||||
def create_tag(
|
||||
self,
|
||||
package_id: UUID,
|
||||
name: str,
|
||||
artifact_id: str,
|
||||
created_by: str,
|
||||
) -> Tag:
|
||||
"""Create a new tag."""
|
||||
return self.create(
|
||||
package_id=package_id,
|
||||
name=name,
|
||||
artifact_id=artifact_id,
|
||||
created_by=created_by,
|
||||
)
|
||||
|
||||
def update_artifact(
|
||||
self,
|
||||
tag: Tag,
|
||||
new_artifact_id: str,
|
||||
changed_by: str,
|
||||
record_history: bool = True,
|
||||
) -> Tag:
|
||||
"""
|
||||
Update tag to point to a different artifact.
|
||||
Optionally records change in tag history.
|
||||
"""
|
||||
old_artifact_id = tag.artifact_id
|
||||
|
||||
if record_history and old_artifact_id != new_artifact_id:
|
||||
history = TagHistory(
|
||||
tag_id=tag.id,
|
||||
old_artifact_id=old_artifact_id,
|
||||
new_artifact_id=new_artifact_id,
|
||||
changed_by=changed_by,
|
||||
)
|
||||
self.db.add(history)
|
||||
|
||||
tag.artifact_id = new_artifact_id
|
||||
tag.created_by = changed_by
|
||||
self.db.flush()
|
||||
return tag
|
||||
|
||||
def get_history(self, tag_id: UUID) -> List[TagHistory]:
|
||||
"""Get tag change history."""
|
||||
return (
|
||||
self.db.query(TagHistory)
|
||||
.filter(TagHistory.tag_id == tag_id)
|
||||
.order_by(TagHistory.changed_at.desc())
|
||||
.all()
|
||||
)
|
||||
|
||||
def get_latest_in_package(self, package_id: UUID) -> Optional[Tag]:
|
||||
"""Get the most recently created/updated tag in a package."""
|
||||
return (
|
||||
self.db.query(Tag)
|
||||
.filter(Tag.package_id == package_id)
|
||||
.order_by(Tag.created_at.desc())
|
||||
.first()
|
||||
)
|
||||
|
||||
def get_by_artifact(self, artifact_id: str) -> List[Tag]:
|
||||
"""Get all tags pointing to an artifact."""
|
||||
return (
|
||||
self.db.query(Tag)
|
||||
.filter(Tag.artifact_id == artifact_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
def count_by_artifact(self, artifact_id: str) -> int:
|
||||
"""Count tags pointing to an artifact."""
|
||||
return (
|
||||
self.db.query(func.count(Tag.id))
|
||||
.filter(Tag.artifact_id == artifact_id)
|
||||
.scalar() or 0
|
||||
)
|
||||
136
backend/app/repositories/upload.py
Normal file
136
backend/app/repositories/upload.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""
|
||||
Upload repository for data access operations.
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Tuple
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func, desc
|
||||
from uuid import UUID
|
||||
|
||||
from .base import BaseRepository
|
||||
from ..models import Upload, Artifact, Package, Project
|
||||
|
||||
|
||||
class UploadRepository(BaseRepository[Upload]):
|
||||
"""Repository for Upload entity operations."""
|
||||
|
||||
model = Upload
|
||||
|
||||
def create_upload(
|
||||
self,
|
||||
artifact_id: str,
|
||||
package_id: UUID,
|
||||
uploaded_by: str,
|
||||
original_name: Optional[str] = None,
|
||||
source_ip: Optional[str] = None,
|
||||
) -> Upload:
|
||||
"""Record a new upload event."""
|
||||
return self.create(
|
||||
artifact_id=artifact_id,
|
||||
package_id=package_id,
|
||||
original_name=original_name,
|
||||
uploaded_by=uploaded_by,
|
||||
source_ip=source_ip,
|
||||
)
|
||||
|
||||
def list_by_package(
|
||||
self,
|
||||
package_id: UUID,
|
||||
page: int = 1,
|
||||
limit: int = 20,
|
||||
) -> Tuple[List[Upload], int]:
|
||||
"""List uploads for a package with pagination."""
|
||||
query = self.db.query(Upload).filter(Upload.package_id == package_id)
|
||||
|
||||
total = query.count()
|
||||
offset = (page - 1) * limit
|
||||
uploads = query.order_by(Upload.uploaded_at.desc()).offset(offset).limit(limit).all()
|
||||
|
||||
return uploads, total
|
||||
|
||||
def list_by_artifact(self, artifact_id: str) -> List[Upload]:
|
||||
"""List all uploads of a specific artifact."""
|
||||
return (
|
||||
self.db.query(Upload)
|
||||
.filter(Upload.artifact_id == artifact_id)
|
||||
.order_by(Upload.uploaded_at.desc())
|
||||
.all()
|
||||
)
|
||||
|
||||
def get_latest_for_package(self, package_id: UUID) -> Optional[Upload]:
|
||||
"""Get the most recent upload for a package."""
|
||||
return (
|
||||
self.db.query(Upload)
|
||||
.filter(Upload.package_id == package_id)
|
||||
.order_by(Upload.uploaded_at.desc())
|
||||
.first()
|
||||
)
|
||||
|
||||
def get_latest_timestamp(self, package_id: UUID) -> Optional[datetime]:
|
||||
"""Get timestamp of most recent upload for a package."""
|
||||
result = (
|
||||
self.db.query(func.max(Upload.uploaded_at))
|
||||
.filter(Upload.package_id == package_id)
|
||||
.scalar()
|
||||
)
|
||||
return result
|
||||
|
||||
def count_by_artifact(self, artifact_id: str) -> int:
|
||||
"""Count uploads of a specific artifact."""
|
||||
return (
|
||||
self.db.query(func.count(Upload.id))
|
||||
.filter(Upload.artifact_id == artifact_id)
|
||||
.scalar() or 0
|
||||
)
|
||||
|
||||
def count_by_package(self, package_id: UUID) -> int:
|
||||
"""Count total uploads for a package."""
|
||||
return (
|
||||
self.db.query(func.count(Upload.id))
|
||||
.filter(Upload.package_id == package_id)
|
||||
.scalar() or 0
|
||||
)
|
||||
|
||||
def get_distinct_artifacts_count(self, package_id: UUID) -> int:
|
||||
"""Count distinct artifacts uploaded to a package."""
|
||||
return (
|
||||
self.db.query(func.count(func.distinct(Upload.artifact_id)))
|
||||
.filter(Upload.package_id == package_id)
|
||||
.scalar() or 0
|
||||
)
|
||||
|
||||
def get_uploads_by_user(
|
||||
self,
|
||||
user_id: str,
|
||||
page: int = 1,
|
||||
limit: int = 20,
|
||||
) -> Tuple[List[Upload], int]:
|
||||
"""List uploads by a specific user."""
|
||||
query = self.db.query(Upload).filter(Upload.uploaded_by == user_id)
|
||||
|
||||
total = query.count()
|
||||
offset = (page - 1) * limit
|
||||
uploads = query.order_by(Upload.uploaded_at.desc()).offset(offset).limit(limit).all()
|
||||
|
||||
return uploads, total
|
||||
|
||||
def get_upload_stats(self, package_id: UUID) -> dict:
|
||||
"""Get upload statistics for a package."""
|
||||
stats = (
|
||||
self.db.query(
|
||||
func.count(Upload.id),
|
||||
func.count(func.distinct(Upload.artifact_id)),
|
||||
func.min(Upload.uploaded_at),
|
||||
func.max(Upload.uploaded_at),
|
||||
)
|
||||
.filter(Upload.package_id == package_id)
|
||||
.first()
|
||||
)
|
||||
|
||||
return {
|
||||
"total_uploads": stats[0] if stats else 0,
|
||||
"unique_artifacts": stats[1] if stats else 0,
|
||||
"first_upload": stats[2] if stats else None,
|
||||
"last_upload": stats[3] if stats else None,
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
366
backend/app/seed.py
Normal file
366
backend/app/seed.py
Normal file
@@ -0,0 +1,366 @@
|
||||
"""
|
||||
Test data seeding for development environment.
|
||||
"""
|
||||
import hashlib
|
||||
import logging
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from .models import Project, Package, Artifact, Tag, Upload, PackageVersion, ArtifactDependency, Team, TeamMembership, User
|
||||
from .storage import get_storage
|
||||
from .auth import hash_password
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Test data definitions
|
||||
TEST_PROJECTS = [
|
||||
{
|
||||
"name": "frontend-libs",
|
||||
"description": "Shared frontend libraries and components",
|
||||
"is_public": True,
|
||||
"packages": [
|
||||
{
|
||||
"name": "ui-components",
|
||||
"description": "Reusable UI component library",
|
||||
},
|
||||
{
|
||||
"name": "design-tokens",
|
||||
"description": "Design system tokens and variables",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "backend-services",
|
||||
"description": "Backend microservices and shared utilities",
|
||||
"is_public": True,
|
||||
"packages": [
|
||||
{
|
||||
"name": "auth-lib",
|
||||
"description": "Authentication and authorization library",
|
||||
},
|
||||
{
|
||||
"name": "common-utils",
|
||||
"description": "Common utility functions",
|
||||
},
|
||||
{
|
||||
"name": "api-client",
|
||||
"description": "Generated API client library",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "mobile-apps",
|
||||
"description": "Mobile application builds and assets",
|
||||
"is_public": True,
|
||||
"packages": [
|
||||
{
|
||||
"name": "ios-release",
|
||||
"description": "iOS release builds",
|
||||
},
|
||||
{
|
||||
"name": "android-release",
|
||||
"description": "Android release builds",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "internal-tools",
|
||||
"description": "Internal development tools (private)",
|
||||
"is_public": False,
|
||||
"packages": [
|
||||
{
|
||||
"name": "dev-scripts",
|
||||
"description": "Development automation scripts",
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
# Sample artifacts to create (content, tags, version)
|
||||
TEST_ARTIFACTS = [
|
||||
{
|
||||
"project": "frontend-libs",
|
||||
"package": "ui-components",
|
||||
"content": b"/* UI Components v1.0.0 */\nexport const Button = () => {};\nexport const Input = () => {};\n",
|
||||
"filename": "ui-components-1.0.0.js",
|
||||
"content_type": "application/javascript",
|
||||
"tags": ["v1.0.0", "latest"],
|
||||
"version": "1.0.0",
|
||||
},
|
||||
{
|
||||
"project": "frontend-libs",
|
||||
"package": "ui-components",
|
||||
"content": b"/* UI Components v1.1.0 */\nexport const Button = () => {};\nexport const Input = () => {};\nexport const Modal = () => {};\n",
|
||||
"filename": "ui-components-1.1.0.js",
|
||||
"content_type": "application/javascript",
|
||||
"tags": ["v1.1.0"],
|
||||
"version": "1.1.0",
|
||||
},
|
||||
{
|
||||
"project": "frontend-libs",
|
||||
"package": "design-tokens",
|
||||
"content": b'{"colors": {"primary": "#007bff", "secondary": "#6c757d"}, "spacing": {"sm": "8px", "md": "16px"}}',
|
||||
"filename": "tokens.json",
|
||||
"content_type": "application/json",
|
||||
"tags": ["v1.0.0", "latest"],
|
||||
"version": "1.0.0",
|
||||
},
|
||||
{
|
||||
"project": "backend-services",
|
||||
"package": "common-utils",
|
||||
"content": b"# Common Utils\n\ndef format_date(dt):\n return dt.isoformat()\n\ndef slugify(text):\n return text.lower().replace(' ', '-')\n",
|
||||
"filename": "utils-2.0.0.py",
|
||||
"content_type": "text/x-python",
|
||||
"tags": ["v2.0.0", "stable", "latest"],
|
||||
"version": "2.0.0",
|
||||
},
|
||||
{
|
||||
"project": "backend-services",
|
||||
"package": "auth-lib",
|
||||
"content": b"package auth\n\nfunc ValidateToken(token string) bool {\n return len(token) > 0\n}\n",
|
||||
"filename": "auth-lib-1.0.0.go",
|
||||
"content_type": "text/x-go",
|
||||
"tags": ["v1.0.0", "latest"],
|
||||
"version": "1.0.0",
|
||||
},
|
||||
]
|
||||
|
||||
# Dependencies to create (source artifact -> dependency)
|
||||
# Format: (source_project, source_package, source_version, dep_project, dep_package, version_constraint, tag_constraint)
|
||||
TEST_DEPENDENCIES = [
|
||||
# ui-components v1.1.0 depends on design-tokens v1.0.0
|
||||
("frontend-libs", "ui-components", "1.1.0", "frontend-libs", "design-tokens", "1.0.0", None),
|
||||
# auth-lib v1.0.0 depends on common-utils v2.0.0
|
||||
("backend-services", "auth-lib", "1.0.0", "backend-services", "common-utils", "2.0.0", None),
|
||||
# auth-lib v1.0.0 also depends on design-tokens (stable tag)
|
||||
("backend-services", "auth-lib", "1.0.0", "frontend-libs", "design-tokens", None, "latest"),
|
||||
]
|
||||
|
||||
|
||||
def is_database_empty(db: Session) -> bool:
|
||||
"""Check if the database has any projects."""
|
||||
return db.query(Project).first() is None
|
||||
|
||||
|
||||
def seed_database(db: Session) -> None:
|
||||
"""Seed the database with test data."""
|
||||
if not is_database_empty(db):
|
||||
logger.info("Database already has data, skipping seed")
|
||||
return
|
||||
|
||||
logger.info("Seeding database with test data...")
|
||||
storage = get_storage()
|
||||
|
||||
# Find or use admin user for team ownership
|
||||
admin_user = db.query(User).filter(User.username == "admin").first()
|
||||
team_owner_username = admin_user.username if admin_user else "seed-user"
|
||||
|
||||
# Create a demo team
|
||||
demo_team = Team(
|
||||
name="Demo Team",
|
||||
slug="demo-team",
|
||||
description="A demonstration team with sample projects",
|
||||
created_by=team_owner_username,
|
||||
)
|
||||
db.add(demo_team)
|
||||
db.flush()
|
||||
|
||||
# Add admin user as team owner if they exist
|
||||
if admin_user:
|
||||
membership = TeamMembership(
|
||||
team_id=demo_team.id,
|
||||
user_id=admin_user.id,
|
||||
role="owner",
|
||||
invited_by=team_owner_username,
|
||||
)
|
||||
db.add(membership)
|
||||
db.flush()
|
||||
|
||||
logger.info(f"Created team: {demo_team.name} ({demo_team.slug})")
|
||||
|
||||
# Create test users with various roles
|
||||
test_users = [
|
||||
{"username": "alice", "email": "alice@example.com", "role": "admin"},
|
||||
{"username": "bob", "email": "bob@example.com", "role": "admin"},
|
||||
{"username": "charlie", "email": "charlie@example.com", "role": "member"},
|
||||
{"username": "diana", "email": "diana@example.com", "role": "member"},
|
||||
{"username": "eve", "email": "eve@example.com", "role": "member"},
|
||||
{"username": "frank", "email": None, "role": "member"},
|
||||
]
|
||||
|
||||
for user_data in test_users:
|
||||
# Check if user already exists
|
||||
existing_user = db.query(User).filter(User.username == user_data["username"]).first()
|
||||
if existing_user:
|
||||
test_user = existing_user
|
||||
else:
|
||||
# Create the user with password same as username
|
||||
test_user = User(
|
||||
username=user_data["username"],
|
||||
email=user_data["email"],
|
||||
password_hash=hash_password(user_data["username"]),
|
||||
is_admin=False,
|
||||
is_active=True,
|
||||
must_change_password=False,
|
||||
)
|
||||
db.add(test_user)
|
||||
db.flush()
|
||||
logger.info(f"Created test user: {user_data['username']}")
|
||||
|
||||
# Add to demo team with specified role
|
||||
existing_membership = db.query(TeamMembership).filter(
|
||||
TeamMembership.team_id == demo_team.id,
|
||||
TeamMembership.user_id == test_user.id,
|
||||
).first()
|
||||
|
||||
if not existing_membership:
|
||||
membership = TeamMembership(
|
||||
team_id=demo_team.id,
|
||||
user_id=test_user.id,
|
||||
role=user_data["role"],
|
||||
invited_by=team_owner_username,
|
||||
)
|
||||
db.add(membership)
|
||||
logger.info(f"Added {user_data['username']} to {demo_team.slug} as {user_data['role']}")
|
||||
|
||||
db.flush()
|
||||
|
||||
# Create projects and packages
|
||||
project_map = {}
|
||||
package_map = {}
|
||||
|
||||
for project_data in TEST_PROJECTS:
|
||||
project = Project(
|
||||
name=project_data["name"],
|
||||
description=project_data["description"],
|
||||
is_public=project_data["is_public"],
|
||||
created_by=team_owner_username,
|
||||
team_id=demo_team.id, # Assign to demo team
|
||||
)
|
||||
db.add(project)
|
||||
db.flush() # Get the ID
|
||||
project_map[project_data["name"]] = project
|
||||
|
||||
for package_data in project_data["packages"]:
|
||||
package = Package(
|
||||
project_id=project.id,
|
||||
name=package_data["name"],
|
||||
description=package_data["description"],
|
||||
)
|
||||
db.add(package)
|
||||
db.flush()
|
||||
package_map[(project_data["name"], package_data["name"])] = package
|
||||
|
||||
logger.info(f"Created {len(project_map)} projects and {len(package_map)} packages (assigned to {demo_team.slug})")
|
||||
|
||||
# Create artifacts, tags, and versions
|
||||
artifact_count = 0
|
||||
tag_count = 0
|
||||
version_count = 0
|
||||
|
||||
for artifact_data in TEST_ARTIFACTS:
|
||||
project = project_map[artifact_data["project"]]
|
||||
package = package_map[(artifact_data["project"], artifact_data["package"])]
|
||||
|
||||
content = artifact_data["content"]
|
||||
sha256_hash = hashlib.sha256(content).hexdigest()
|
||||
size = len(content)
|
||||
s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
|
||||
|
||||
# Store in S3
|
||||
try:
|
||||
storage.client.put_object(
|
||||
Bucket=storage.bucket,
|
||||
Key=s3_key,
|
||||
Body=content,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to store artifact in S3: {e}")
|
||||
continue
|
||||
|
||||
# Calculate ref_count: tags + version (if present)
|
||||
ref_count = len(artifact_data["tags"])
|
||||
if artifact_data.get("version"):
|
||||
ref_count += 1
|
||||
|
||||
# Create artifact record
|
||||
artifact = Artifact(
|
||||
id=sha256_hash,
|
||||
size=size,
|
||||
content_type=artifact_data["content_type"],
|
||||
original_name=artifact_data["filename"],
|
||||
created_by=team_owner_username,
|
||||
s3_key=s3_key,
|
||||
ref_count=ref_count,
|
||||
)
|
||||
db.add(artifact)
|
||||
|
||||
# Create upload record
|
||||
upload = Upload(
|
||||
artifact_id=sha256_hash,
|
||||
package_id=package.id,
|
||||
original_name=artifact_data["filename"],
|
||||
uploaded_by="seed-user",
|
||||
)
|
||||
db.add(upload)
|
||||
artifact_count += 1
|
||||
|
||||
# Create version record if specified
|
||||
if artifact_data.get("version"):
|
||||
version = PackageVersion(
|
||||
package_id=package.id,
|
||||
artifact_id=sha256_hash,
|
||||
version=artifact_data["version"],
|
||||
version_source="explicit",
|
||||
created_by=team_owner_username,
|
||||
)
|
||||
db.add(version)
|
||||
version_count += 1
|
||||
|
||||
# Create tags
|
||||
for tag_name in artifact_data["tags"]:
|
||||
tag = Tag(
|
||||
package_id=package.id,
|
||||
name=tag_name,
|
||||
artifact_id=sha256_hash,
|
||||
created_by=team_owner_username,
|
||||
)
|
||||
db.add(tag)
|
||||
tag_count += 1
|
||||
|
||||
db.flush()
|
||||
|
||||
# Create dependencies
|
||||
dependency_count = 0
|
||||
for dep_data in TEST_DEPENDENCIES:
|
||||
src_project, src_package, src_version, dep_project, dep_package, version_constraint, tag_constraint = dep_data
|
||||
|
||||
# Find the source artifact by looking up its version
|
||||
src_pkg = package_map.get((src_project, src_package))
|
||||
if not src_pkg:
|
||||
logger.warning(f"Source package not found: {src_project}/{src_package}")
|
||||
continue
|
||||
|
||||
# Find the artifact for this version
|
||||
src_version_record = db.query(PackageVersion).filter(
|
||||
PackageVersion.package_id == src_pkg.id,
|
||||
PackageVersion.version == src_version,
|
||||
).first()
|
||||
|
||||
if not src_version_record:
|
||||
logger.warning(f"Source version not found: {src_project}/{src_package}@{src_version}")
|
||||
continue
|
||||
|
||||
# Create the dependency
|
||||
dependency = ArtifactDependency(
|
||||
artifact_id=src_version_record.artifact_id,
|
||||
dependency_project=dep_project,
|
||||
dependency_package=dep_package,
|
||||
version_constraint=version_constraint,
|
||||
tag_constraint=tag_constraint,
|
||||
)
|
||||
db.add(dependency)
|
||||
dependency_count += 1
|
||||
|
||||
db.commit()
|
||||
logger.info(f"Created {artifact_count} artifacts, {tag_count} tags, {version_count} versions, and {dependency_count} dependencies")
|
||||
logger.info("Database seeding complete")
|
||||
9
backend/app/services/__init__.py
Normal file
9
backend/app/services/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""
|
||||
Service layer for business logic.
|
||||
"""
|
||||
|
||||
from .artifact_cleanup import ArtifactCleanupService
|
||||
|
||||
__all__ = [
|
||||
"ArtifactCleanupService",
|
||||
]
|
||||
199
backend/app/services/artifact_cleanup.py
Normal file
199
backend/app/services/artifact_cleanup.py
Normal file
@@ -0,0 +1,199 @@
|
||||
"""
|
||||
Service for artifact reference counting and cleanup.
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
import logging
|
||||
|
||||
from ..models import Artifact, Tag
|
||||
from ..repositories.artifact import ArtifactRepository
|
||||
from ..repositories.tag import TagRepository
|
||||
from ..storage import S3Storage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ArtifactCleanupService:
|
||||
"""
|
||||
Service for managing artifact reference counts and cleaning up orphaned artifacts.
|
||||
|
||||
Reference counting rules:
|
||||
- ref_count starts at 1 when artifact is first uploaded
|
||||
- ref_count increments when the same artifact is uploaded again (deduplication)
|
||||
- ref_count decrements when a tag is deleted or updated to point elsewhere
|
||||
- ref_count decrements when a package is deleted (for each tag pointing to artifact)
|
||||
- When ref_count reaches 0, artifact is a candidate for deletion from S3
|
||||
"""
|
||||
|
||||
def __init__(self, db: Session, storage: Optional[S3Storage] = None):
|
||||
self.db = db
|
||||
self.storage = storage
|
||||
self.artifact_repo = ArtifactRepository(db)
|
||||
self.tag_repo = TagRepository(db)
|
||||
|
||||
def on_tag_deleted(self, artifact_id: str) -> Artifact:
|
||||
"""
|
||||
Called when a tag is deleted.
|
||||
Decrements ref_count for the artifact the tag was pointing to.
|
||||
"""
|
||||
artifact = self.artifact_repo.get_by_sha256(artifact_id)
|
||||
if artifact:
|
||||
artifact = self.artifact_repo.decrement_ref_count(artifact)
|
||||
logger.info(
|
||||
f"Decremented ref_count for artifact {artifact_id}: now {artifact.ref_count}"
|
||||
)
|
||||
return artifact
|
||||
|
||||
def on_tag_updated(
|
||||
self, old_artifact_id: str, new_artifact_id: str
|
||||
) -> Tuple[Optional[Artifact], Optional[Artifact]]:
|
||||
"""
|
||||
Called when a tag is updated to point to a different artifact.
|
||||
Decrements ref_count for old artifact, increments for new (if different).
|
||||
|
||||
Returns (old_artifact, new_artifact) tuple.
|
||||
"""
|
||||
old_artifact = None
|
||||
new_artifact = None
|
||||
|
||||
if old_artifact_id != new_artifact_id:
|
||||
# Decrement old artifact ref_count
|
||||
old_artifact = self.artifact_repo.get_by_sha256(old_artifact_id)
|
||||
if old_artifact:
|
||||
old_artifact = self.artifact_repo.decrement_ref_count(old_artifact)
|
||||
logger.info(
|
||||
f"Decremented ref_count for old artifact {old_artifact_id}: now {old_artifact.ref_count}"
|
||||
)
|
||||
|
||||
# Increment new artifact ref_count
|
||||
new_artifact = self.artifact_repo.get_by_sha256(new_artifact_id)
|
||||
if new_artifact:
|
||||
new_artifact = self.artifact_repo.increment_ref_count(new_artifact)
|
||||
logger.info(
|
||||
f"Incremented ref_count for new artifact {new_artifact_id}: now {new_artifact.ref_count}"
|
||||
)
|
||||
|
||||
return old_artifact, new_artifact
|
||||
|
||||
def on_package_deleted(self, package_id) -> List[str]:
|
||||
"""
|
||||
Called when a package is deleted.
|
||||
Decrements ref_count for all artifacts that had tags in the package.
|
||||
|
||||
Returns list of artifact IDs that were affected.
|
||||
"""
|
||||
# Get all tags in the package before deletion
|
||||
tags = self.db.query(Tag).filter(Tag.package_id == package_id).all()
|
||||
|
||||
affected_artifacts = []
|
||||
for tag in tags:
|
||||
artifact = self.artifact_repo.get_by_sha256(tag.artifact_id)
|
||||
if artifact:
|
||||
self.artifact_repo.decrement_ref_count(artifact)
|
||||
affected_artifacts.append(tag.artifact_id)
|
||||
logger.info(
|
||||
f"Decremented ref_count for artifact {tag.artifact_id} (package delete)"
|
||||
)
|
||||
|
||||
return affected_artifacts
|
||||
|
||||
def cleanup_orphaned_artifacts(
|
||||
self, batch_size: int = 100, dry_run: bool = False
|
||||
) -> List[str]:
|
||||
"""
|
||||
Find and delete artifacts with ref_count = 0.
|
||||
|
||||
Args:
|
||||
batch_size: Maximum number of artifacts to process
|
||||
dry_run: If True, only report what would be deleted without actually deleting
|
||||
|
||||
Returns:
|
||||
List of artifact IDs that were (or would be) deleted
|
||||
"""
|
||||
orphaned = self.artifact_repo.get_orphaned_artifacts(limit=batch_size)
|
||||
|
||||
deleted_ids = []
|
||||
for artifact in orphaned:
|
||||
if dry_run:
|
||||
logger.info(f"[DRY RUN] Would delete orphaned artifact: {artifact.id}")
|
||||
deleted_ids.append(artifact.id)
|
||||
else:
|
||||
try:
|
||||
# Delete from S3 first
|
||||
if self.storage:
|
||||
self.storage.delete(artifact.s3_key)
|
||||
logger.info(f"Deleted artifact from S3: {artifact.s3_key}")
|
||||
|
||||
# Then delete from database
|
||||
self.artifact_repo.delete(artifact)
|
||||
deleted_ids.append(artifact.id)
|
||||
logger.info(
|
||||
f"Deleted orphaned artifact from database: {artifact.id}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete artifact {artifact.id}: {e}")
|
||||
|
||||
if not dry_run and deleted_ids:
|
||||
self.db.commit()
|
||||
|
||||
return deleted_ids
|
||||
|
||||
def get_orphaned_count(self) -> int:
|
||||
"""Get count of artifacts with ref_count = 0."""
|
||||
from sqlalchemy import func
|
||||
|
||||
return (
|
||||
self.db.query(func.count(Artifact.id))
|
||||
.filter(Artifact.ref_count == 0)
|
||||
.scalar()
|
||||
or 0
|
||||
)
|
||||
|
||||
def verify_ref_counts(self, fix: bool = False) -> List[dict]:
|
||||
"""
|
||||
Verify that ref_counts match actual tag references.
|
||||
|
||||
Args:
|
||||
fix: If True, fix any mismatched ref_counts
|
||||
|
||||
Returns:
|
||||
List of artifacts with mismatched ref_counts
|
||||
"""
|
||||
from sqlalchemy import func
|
||||
|
||||
# Get actual tag counts per artifact
|
||||
tag_counts = (
|
||||
self.db.query(Tag.artifact_id, func.count(Tag.id).label("tag_count"))
|
||||
.group_by(Tag.artifact_id)
|
||||
.all()
|
||||
)
|
||||
tag_count_map = {artifact_id: count for artifact_id, count in tag_counts}
|
||||
|
||||
# Check all artifacts
|
||||
artifacts = self.db.query(Artifact).all()
|
||||
mismatches = []
|
||||
|
||||
for artifact in artifacts:
|
||||
actual_count = tag_count_map.get(artifact.id, 0)
|
||||
# ref_count should be at least 1 (initial upload) + additional uploads
|
||||
# But tags are the primary reference, so we check against tag count
|
||||
|
||||
if artifact.ref_count < actual_count:
|
||||
mismatch = {
|
||||
"artifact_id": artifact.id,
|
||||
"stored_ref_count": artifact.ref_count,
|
||||
"actual_tag_count": actual_count,
|
||||
}
|
||||
mismatches.append(mismatch)
|
||||
|
||||
if fix:
|
||||
artifact.ref_count = max(actual_count, 1)
|
||||
logger.warning(
|
||||
f"Fixed ref_count for artifact {artifact.id}: {mismatch['stored_ref_count']} -> {artifact.ref_count}"
|
||||
)
|
||||
|
||||
if fix and mismatches:
|
||||
self.db.commit()
|
||||
|
||||
return mismatches
|
||||
File diff suppressed because it is too large
Load Diff
565
backend/app/upstream.py
Normal file
565
backend/app/upstream.py
Normal file
@@ -0,0 +1,565 @@
|
||||
"""
|
||||
HTTP client for fetching artifacts from upstream sources.
|
||||
|
||||
Provides streaming downloads with SHA256 computation, authentication support,
|
||||
and automatic source matching based on URL prefixes.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import tempfile
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import BinaryIO, Optional, TYPE_CHECKING
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import httpx
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .models import CacheSettings, UpstreamSource
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class UpstreamError(Exception):
|
||||
"""Base exception for upstream client errors."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class UpstreamConnectionError(UpstreamError):
|
||||
"""Connection to upstream failed (network error, DNS, etc.)."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class UpstreamTimeoutError(UpstreamError):
|
||||
"""Request to upstream timed out."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class UpstreamHTTPError(UpstreamError):
|
||||
"""Upstream returned an HTTP error response."""
|
||||
|
||||
def __init__(self, message: str, status_code: int, response_headers: dict = None):
|
||||
super().__init__(message)
|
||||
self.status_code = status_code
|
||||
self.response_headers = response_headers or {}
|
||||
|
||||
|
||||
class UpstreamSSLError(UpstreamError):
|
||||
"""SSL/TLS error when connecting to upstream."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
class FileSizeExceededError(UpstreamError):
|
||||
"""File size exceeds the maximum allowed."""
|
||||
|
||||
def __init__(self, message: str, content_length: int, max_size: int):
|
||||
super().__init__(message)
|
||||
self.content_length = content_length
|
||||
self.max_size = max_size
|
||||
|
||||
|
||||
class SourceNotFoundError(UpstreamError):
|
||||
"""No matching upstream source found for URL."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class SourceDisabledError(UpstreamError):
|
||||
"""The matching upstream source is disabled."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class FetchResult:
|
||||
"""Result of fetching an artifact from upstream."""
|
||||
|
||||
content: BinaryIO # File-like object with content
|
||||
sha256: str # SHA256 hash of content
|
||||
size: int # Size in bytes
|
||||
content_type: Optional[str] # Content-Type header
|
||||
response_headers: dict # All response headers for provenance
|
||||
source_name: Optional[str] = None # Name of matched upstream source
|
||||
temp_path: Optional[Path] = None # Path to temp file (for cleanup)
|
||||
|
||||
def close(self):
|
||||
"""Close and clean up resources."""
|
||||
if self.content:
|
||||
try:
|
||||
self.content.close()
|
||||
except Exception:
|
||||
pass
|
||||
if self.temp_path and self.temp_path.exists():
|
||||
try:
|
||||
self.temp_path.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class UpstreamClientConfig:
|
||||
"""Configuration for the upstream client."""
|
||||
|
||||
connect_timeout: float = 30.0 # Connection timeout in seconds
|
||||
read_timeout: float = 300.0 # Read timeout in seconds (5 minutes for large files)
|
||||
max_retries: int = 3 # Maximum number of retry attempts
|
||||
retry_backoff_base: float = 1.0 # Base delay for exponential backoff
|
||||
retry_backoff_max: float = 30.0 # Maximum delay between retries
|
||||
follow_redirects: bool = True # Whether to follow redirects
|
||||
max_redirects: int = 5 # Maximum number of redirects to follow
|
||||
max_file_size: Optional[int] = None # Maximum file size (None = unlimited)
|
||||
verify_ssl: bool = True # Verify SSL certificates
|
||||
user_agent: str = "Orchard-UpstreamClient/1.0"
|
||||
|
||||
|
||||
class UpstreamClient:
|
||||
"""
|
||||
HTTP client for fetching artifacts from upstream sources.
|
||||
|
||||
Supports streaming downloads, multiple authentication methods,
|
||||
automatic source matching, and air-gap mode enforcement.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
sources: list[UpstreamSource] = None,
|
||||
cache_settings: CacheSettings = None,
|
||||
config: UpstreamClientConfig = None,
|
||||
):
|
||||
"""
|
||||
Initialize the upstream client.
|
||||
|
||||
Args:
|
||||
sources: List of upstream sources for URL matching and auth.
|
||||
Should be sorted by priority (lowest first).
|
||||
cache_settings: Global cache settings including air-gap mode.
|
||||
config: Client configuration options.
|
||||
"""
|
||||
self.sources = sources or []
|
||||
self.cache_settings = cache_settings
|
||||
self.config = config or UpstreamClientConfig()
|
||||
|
||||
# Sort sources by priority (lower = higher priority)
|
||||
self.sources = sorted(self.sources, key=lambda s: s.priority)
|
||||
|
||||
def _match_source(self, url: str) -> Optional[UpstreamSource]:
|
||||
"""
|
||||
Find the upstream source that matches the given URL.
|
||||
|
||||
Matches by URL prefix, returns the highest priority match.
|
||||
|
||||
Args:
|
||||
url: The URL to match.
|
||||
|
||||
Returns:
|
||||
The matching UpstreamSource or None if no match.
|
||||
"""
|
||||
for source in self.sources:
|
||||
# Check if URL starts with source URL (prefix match)
|
||||
if url.startswith(source.url.rstrip("/")):
|
||||
return source
|
||||
|
||||
return None
|
||||
|
||||
def _build_auth_headers(self, source: UpstreamSource) -> dict:
|
||||
"""
|
||||
Build authentication headers for the given source.
|
||||
|
||||
Args:
|
||||
source: The upstream source with auth configuration.
|
||||
|
||||
Returns:
|
||||
Dictionary of headers to add to the request.
|
||||
"""
|
||||
headers = {}
|
||||
|
||||
if source.auth_type == "none":
|
||||
pass
|
||||
elif source.auth_type == "basic":
|
||||
# httpx handles basic auth via auth parameter, but we can also
|
||||
# do it manually if needed. We'll use the auth parameter instead.
|
||||
pass
|
||||
elif source.auth_type == "bearer":
|
||||
password = source.get_password()
|
||||
if password:
|
||||
headers["Authorization"] = f"Bearer {password}"
|
||||
elif source.auth_type == "api_key":
|
||||
# API key auth uses custom headers
|
||||
custom_headers = source.get_headers()
|
||||
if custom_headers:
|
||||
headers.update(custom_headers)
|
||||
|
||||
return headers
|
||||
|
||||
def _get_basic_auth(self, source: UpstreamSource) -> Optional[tuple[str, str]]:
|
||||
"""
|
||||
Get basic auth credentials if applicable.
|
||||
|
||||
Args:
|
||||
source: The upstream source.
|
||||
|
||||
Returns:
|
||||
Tuple of (username, password) or None.
|
||||
"""
|
||||
if source.auth_type == "basic" and source.username:
|
||||
password = source.get_password() or ""
|
||||
return (source.username, password)
|
||||
return None
|
||||
|
||||
def _should_retry(self, error: Exception, attempt: int) -> bool:
|
||||
"""
|
||||
Determine if a request should be retried.
|
||||
|
||||
Args:
|
||||
error: The exception that occurred.
|
||||
attempt: Current attempt number (0-indexed).
|
||||
|
||||
Returns:
|
||||
True if the request should be retried.
|
||||
"""
|
||||
if attempt >= self.config.max_retries - 1:
|
||||
return False
|
||||
|
||||
# Retry on connection errors and timeouts
|
||||
if isinstance(error, (httpx.ConnectError, httpx.ConnectTimeout)):
|
||||
return True
|
||||
|
||||
# Retry on read timeouts
|
||||
if isinstance(error, httpx.ReadTimeout):
|
||||
return True
|
||||
|
||||
# Retry on certain HTTP errors (502, 503, 504)
|
||||
if isinstance(error, httpx.HTTPStatusError):
|
||||
return error.response.status_code in (502, 503, 504)
|
||||
|
||||
return False
|
||||
|
||||
def _calculate_backoff(self, attempt: int) -> float:
|
||||
"""
|
||||
Calculate backoff delay for retry.
|
||||
|
||||
Uses exponential backoff with jitter.
|
||||
|
||||
Args:
|
||||
attempt: Current attempt number (0-indexed).
|
||||
|
||||
Returns:
|
||||
Delay in seconds.
|
||||
"""
|
||||
import random
|
||||
|
||||
delay = self.config.retry_backoff_base * (2**attempt)
|
||||
# Add jitter (±25%)
|
||||
delay *= 0.75 + random.random() * 0.5
|
||||
return min(delay, self.config.retry_backoff_max)
|
||||
|
||||
def fetch(self, url: str, expected_hash: Optional[str] = None) -> FetchResult:
|
||||
"""
|
||||
Fetch an artifact from the given URL.
|
||||
|
||||
Streams the response to a temp file while computing the SHA256 hash.
|
||||
Handles authentication, retries, and error cases.
|
||||
|
||||
Args:
|
||||
url: The URL to fetch.
|
||||
expected_hash: Optional expected SHA256 hash for verification.
|
||||
|
||||
Returns:
|
||||
FetchResult with content, hash, size, and headers.
|
||||
|
||||
Raises:
|
||||
SourceDisabledError: If the matching source is disabled.
|
||||
UpstreamConnectionError: On connection failures.
|
||||
UpstreamTimeoutError: On timeout.
|
||||
UpstreamHTTPError: On HTTP error responses.
|
||||
UpstreamSSLError: On SSL/TLS errors.
|
||||
FileSizeExceededError: If Content-Length exceeds max_file_size.
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
# Match URL to source
|
||||
source = self._match_source(url)
|
||||
|
||||
# Check if source is enabled (if we have a match)
|
||||
if source is not None and not source.enabled:
|
||||
raise SourceDisabledError(
|
||||
f"Upstream source '{source.name}' is disabled"
|
||||
)
|
||||
|
||||
source_name = source.name if source else None
|
||||
logger.info(
|
||||
f"Fetching URL: {url} (source: {source_name or 'none'})"
|
||||
)
|
||||
|
||||
# Build request parameters
|
||||
headers = {"User-Agent": self.config.user_agent}
|
||||
auth = None
|
||||
|
||||
if source:
|
||||
headers.update(self._build_auth_headers(source))
|
||||
auth = self._get_basic_auth(source)
|
||||
|
||||
timeout = httpx.Timeout(
|
||||
connect=self.config.connect_timeout,
|
||||
read=self.config.read_timeout,
|
||||
write=30.0,
|
||||
pool=10.0,
|
||||
)
|
||||
|
||||
# Attempt fetch with retries
|
||||
last_error = None
|
||||
for attempt in range(self.config.max_retries):
|
||||
try:
|
||||
return self._do_fetch(
|
||||
url=url,
|
||||
headers=headers,
|
||||
auth=auth,
|
||||
timeout=timeout,
|
||||
source_name=source_name,
|
||||
start_time=start_time,
|
||||
expected_hash=expected_hash,
|
||||
)
|
||||
except (
|
||||
httpx.ConnectError,
|
||||
httpx.ConnectTimeout,
|
||||
httpx.ReadTimeout,
|
||||
httpx.HTTPStatusError,
|
||||
) as e:
|
||||
last_error = e
|
||||
if self._should_retry(e, attempt):
|
||||
delay = self._calculate_backoff(attempt)
|
||||
logger.warning(
|
||||
f"Fetch failed (attempt {attempt + 1}/{self.config.max_retries}), "
|
||||
f"retrying in {delay:.1f}s: {e}"
|
||||
)
|
||||
time.sleep(delay)
|
||||
else:
|
||||
break
|
||||
|
||||
# Convert final error to our exception types
|
||||
self._raise_upstream_error(last_error, url)
|
||||
|
||||
def _do_fetch(
|
||||
self,
|
||||
url: str,
|
||||
headers: dict,
|
||||
auth: Optional[tuple[str, str]],
|
||||
timeout: httpx.Timeout,
|
||||
source_name: Optional[str],
|
||||
start_time: float,
|
||||
expected_hash: Optional[str] = None,
|
||||
) -> FetchResult:
|
||||
"""
|
||||
Perform the actual fetch operation.
|
||||
|
||||
Args:
|
||||
url: URL to fetch.
|
||||
headers: Request headers.
|
||||
auth: Basic auth credentials or None.
|
||||
timeout: Request timeout configuration.
|
||||
source_name: Name of matched source for logging.
|
||||
start_time: Request start time for timing.
|
||||
expected_hash: Optional expected hash for verification.
|
||||
|
||||
Returns:
|
||||
FetchResult with content and metadata.
|
||||
"""
|
||||
with httpx.Client(
|
||||
timeout=timeout,
|
||||
follow_redirects=self.config.follow_redirects,
|
||||
max_redirects=self.config.max_redirects,
|
||||
verify=self.config.verify_ssl,
|
||||
) as client:
|
||||
with client.stream("GET", url, headers=headers, auth=auth) as response:
|
||||
# Check for HTTP errors
|
||||
response.raise_for_status()
|
||||
|
||||
# Check Content-Length against max size
|
||||
content_length = response.headers.get("content-length")
|
||||
if content_length:
|
||||
content_length = int(content_length)
|
||||
if (
|
||||
self.config.max_file_size
|
||||
and content_length > self.config.max_file_size
|
||||
):
|
||||
raise FileSizeExceededError(
|
||||
f"File size {content_length} exceeds maximum {self.config.max_file_size}",
|
||||
content_length,
|
||||
self.config.max_file_size,
|
||||
)
|
||||
|
||||
# Stream to temp file while computing hash
|
||||
hasher = hashlib.sha256()
|
||||
size = 0
|
||||
|
||||
# Create temp file
|
||||
temp_file = tempfile.NamedTemporaryFile(
|
||||
delete=False, prefix="orchard_upstream_"
|
||||
)
|
||||
temp_path = Path(temp_file.name)
|
||||
|
||||
try:
|
||||
for chunk in response.iter_bytes(chunk_size=65536):
|
||||
temp_file.write(chunk)
|
||||
hasher.update(chunk)
|
||||
size += len(chunk)
|
||||
|
||||
# Check size while streaming if max_file_size is set
|
||||
if self.config.max_file_size and size > self.config.max_file_size:
|
||||
temp_file.close()
|
||||
temp_path.unlink()
|
||||
raise FileSizeExceededError(
|
||||
f"Downloaded size {size} exceeds maximum {self.config.max_file_size}",
|
||||
size,
|
||||
self.config.max_file_size,
|
||||
)
|
||||
|
||||
temp_file.close()
|
||||
|
||||
sha256 = hasher.hexdigest()
|
||||
|
||||
# Verify hash if expected
|
||||
if expected_hash and sha256 != expected_hash.lower():
|
||||
temp_path.unlink()
|
||||
raise UpstreamError(
|
||||
f"Hash mismatch: expected {expected_hash}, got {sha256}"
|
||||
)
|
||||
|
||||
# Capture response headers
|
||||
response_headers = dict(response.headers)
|
||||
|
||||
# Get content type
|
||||
content_type = response.headers.get("content-type")
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
logger.info(
|
||||
f"Fetched {url}: {size} bytes, sha256={sha256[:12]}..., "
|
||||
f"source={source_name}, time={elapsed:.2f}s"
|
||||
)
|
||||
|
||||
# Return file handle positioned at start
|
||||
content = open(temp_path, "rb")
|
||||
|
||||
return FetchResult(
|
||||
content=content,
|
||||
sha256=sha256,
|
||||
size=size,
|
||||
content_type=content_type,
|
||||
response_headers=response_headers,
|
||||
source_name=source_name,
|
||||
temp_path=temp_path,
|
||||
)
|
||||
|
||||
except Exception:
|
||||
# Clean up on error
|
||||
try:
|
||||
temp_file.close()
|
||||
except Exception:
|
||||
pass
|
||||
if temp_path.exists():
|
||||
temp_path.unlink()
|
||||
raise
|
||||
|
||||
def _raise_upstream_error(self, error: Exception, url: str):
|
||||
"""
|
||||
Convert httpx exception to appropriate UpstreamError.
|
||||
|
||||
Args:
|
||||
error: The httpx exception.
|
||||
url: The URL that was being fetched.
|
||||
|
||||
Raises:
|
||||
Appropriate UpstreamError subclass.
|
||||
"""
|
||||
if error is None:
|
||||
raise UpstreamError(f"Unknown error fetching {url}")
|
||||
|
||||
if isinstance(error, httpx.ConnectError):
|
||||
raise UpstreamConnectionError(
|
||||
f"Failed to connect to upstream: {error}"
|
||||
) from error
|
||||
|
||||
if isinstance(error, (httpx.ConnectTimeout, httpx.ReadTimeout)):
|
||||
raise UpstreamTimeoutError(
|
||||
f"Request timed out: {error}"
|
||||
) from error
|
||||
|
||||
if isinstance(error, httpx.HTTPStatusError):
|
||||
raise UpstreamHTTPError(
|
||||
f"HTTP {error.response.status_code}: {error}",
|
||||
error.response.status_code,
|
||||
dict(error.response.headers),
|
||||
) from error
|
||||
|
||||
# Check for SSL errors in the error chain
|
||||
if "ssl" in str(error).lower() or "certificate" in str(error).lower():
|
||||
raise UpstreamSSLError(f"SSL/TLS error: {error}") from error
|
||||
|
||||
raise UpstreamError(f"Error fetching {url}: {error}") from error
|
||||
|
||||
def test_connection(self, source: UpstreamSource) -> tuple[bool, Optional[str], Optional[int]]:
|
||||
"""
|
||||
Test connectivity to an upstream source.
|
||||
|
||||
Performs a HEAD request to the source URL to verify connectivity
|
||||
and authentication. Does not follow redirects - a 3xx response
|
||||
is considered successful since it proves the server is reachable.
|
||||
|
||||
Args:
|
||||
source: The upstream source to test.
|
||||
|
||||
Returns:
|
||||
Tuple of (success, error_message, status_code).
|
||||
"""
|
||||
headers = {"User-Agent": self.config.user_agent}
|
||||
headers.update(self._build_auth_headers(source))
|
||||
auth = self._get_basic_auth(source)
|
||||
|
||||
timeout = httpx.Timeout(
|
||||
connect=self.config.connect_timeout,
|
||||
read=30.0,
|
||||
write=30.0,
|
||||
pool=10.0,
|
||||
)
|
||||
|
||||
try:
|
||||
with httpx.Client(
|
||||
timeout=timeout,
|
||||
verify=self.config.verify_ssl,
|
||||
) as client:
|
||||
response = client.head(
|
||||
source.url,
|
||||
headers=headers,
|
||||
auth=auth,
|
||||
follow_redirects=False,
|
||||
)
|
||||
# Consider 2xx and 3xx as success, also 405 (Method Not Allowed)
|
||||
# since some servers don't support HEAD
|
||||
if response.status_code < 400 or response.status_code == 405:
|
||||
return (True, None, response.status_code)
|
||||
else:
|
||||
return (
|
||||
False,
|
||||
f"HTTP {response.status_code}",
|
||||
response.status_code,
|
||||
)
|
||||
except httpx.ConnectError as e:
|
||||
return (False, f"Connection failed: {e}", None)
|
||||
except httpx.ConnectTimeout as e:
|
||||
return (False, f"Connection timed out: {e}", None)
|
||||
except httpx.ReadTimeout as e:
|
||||
return (False, f"Read timed out: {e}", None)
|
||||
except httpx.TooManyRedirects as e:
|
||||
return (False, f"Too many redirects: {e}", None)
|
||||
except Exception as e:
|
||||
return (False, f"Error: {e}", None)
|
||||
31
backend/pytest.ini
Normal file
31
backend/pytest.ini
Normal file
@@ -0,0 +1,31 @@
|
||||
[pytest]
|
||||
testpaths = tests
|
||||
python_files = test_*.py
|
||||
python_functions = test_*
|
||||
python_classes = Test*
|
||||
asyncio_mode = auto
|
||||
addopts = -v --tb=short --cov=app --cov-report=term-missing --cov-report=html:coverage_html --cov-fail-under=0
|
||||
filterwarnings =
|
||||
ignore::DeprecationWarning
|
||||
ignore::UserWarning
|
||||
markers =
|
||||
unit: Unit tests (no external dependencies)
|
||||
integration: Integration tests (require database/storage)
|
||||
slow: Slow tests (skip with -m "not slow")
|
||||
large: Large file tests (100MB+, skip with -m "not large")
|
||||
concurrent: Concurrent operation tests
|
||||
|
||||
# Coverage configuration
|
||||
[coverage:run]
|
||||
source = app
|
||||
omit =
|
||||
*/tests/*
|
||||
*/__pycache__/*
|
||||
|
||||
[coverage:report]
|
||||
exclude_lines =
|
||||
pragma: no cover
|
||||
def __repr__
|
||||
raise NotImplementedError
|
||||
if __name__ == .__main__.:
|
||||
pass
|
||||
@@ -9,3 +9,12 @@ pydantic==2.5.3
|
||||
pydantic-settings==2.1.0
|
||||
python-jose[cryptography]==3.3.0
|
||||
passlib[bcrypt]==1.7.4
|
||||
bcrypt==4.0.1
|
||||
slowapi==0.1.9
|
||||
httpx>=0.25.0
|
||||
|
||||
# Test dependencies
|
||||
pytest>=7.4.0
|
||||
pytest-asyncio>=0.21.0
|
||||
pytest-cov>=4.1.0
|
||||
moto[s3]>=4.2.0
|
||||
|
||||
1
backend/tests/__init__.py
Normal file
1
backend/tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Test package
|
||||
399
backend/tests/conftest.py
Normal file
399
backend/tests/conftest.py
Normal file
@@ -0,0 +1,399 @@
|
||||
"""
|
||||
Test configuration and fixtures for Orchard backend tests.
|
||||
|
||||
This module provides:
|
||||
- Database fixtures with test isolation
|
||||
- Mock S3 storage using moto
|
||||
- Shared pytest fixtures
|
||||
"""
|
||||
|
||||
import os
|
||||
import pytest
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Pytest Markers
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
"""Register custom pytest markers."""
|
||||
config.addinivalue_line(
|
||||
"markers",
|
||||
"auth_intensive: marks tests that make many login requests (excluded from CI integration tests due to rate limiting)",
|
||||
)
|
||||
config.addinivalue_line(
|
||||
"markers",
|
||||
"integration: marks tests as integration tests",
|
||||
)
|
||||
config.addinivalue_line(
|
||||
"markers",
|
||||
"large: marks tests that handle large files (slow)",
|
||||
)
|
||||
config.addinivalue_line(
|
||||
"markers",
|
||||
"slow: marks tests as slow running",
|
||||
)
|
||||
config.addinivalue_line(
|
||||
"markers",
|
||||
"requires_direct_s3: marks tests that require direct S3/MinIO access (skipped in CI where S3 is not directly accessible)",
|
||||
)
|
||||
|
||||
|
||||
import io
|
||||
from typing import Generator
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
# Set test environment defaults before importing app modules
|
||||
# Use setdefault to NOT override existing env vars (from docker-compose)
|
||||
os.environ.setdefault("ORCHARD_DATABASE_HOST", "localhost")
|
||||
os.environ.setdefault("ORCHARD_DATABASE_PORT", "5432")
|
||||
os.environ.setdefault("ORCHARD_DATABASE_USER", "test")
|
||||
os.environ.setdefault("ORCHARD_DATABASE_PASSWORD", "test")
|
||||
os.environ.setdefault("ORCHARD_DATABASE_DBNAME", "orchard_test")
|
||||
os.environ.setdefault("ORCHARD_S3_ENDPOINT", "http://localhost:9000")
|
||||
os.environ.setdefault("ORCHARD_S3_BUCKET", "test-bucket")
|
||||
os.environ.setdefault("ORCHARD_S3_ACCESS_KEY_ID", "test")
|
||||
os.environ.setdefault("ORCHARD_S3_SECRET_ACCESS_KEY", "test")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Admin Credentials Helper
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def get_admin_password() -> str:
|
||||
"""Get the admin password for test authentication.
|
||||
|
||||
Returns the password from ORCHARD_TEST_PASSWORD environment variable,
|
||||
or 'changeme123' as the default for local development.
|
||||
"""
|
||||
return os.environ.get("ORCHARD_TEST_PASSWORD", "changeme123")
|
||||
|
||||
|
||||
def get_admin_username() -> str:
|
||||
"""Get the admin username for test authentication."""
|
||||
return os.environ.get("ORCHARD_TEST_USERNAME", "admin")
|
||||
|
||||
|
||||
# Re-export factory functions for backward compatibility
|
||||
from tests.factories import (
|
||||
create_test_file,
|
||||
compute_sha256,
|
||||
compute_md5,
|
||||
compute_sha1,
|
||||
upload_test_file,
|
||||
generate_content,
|
||||
generate_content_with_hash,
|
||||
TEST_CONTENT_HELLO,
|
||||
TEST_HASH_HELLO,
|
||||
TEST_MD5_HELLO,
|
||||
TEST_SHA1_HELLO,
|
||||
TEST_CONTENT_EMPTY,
|
||||
TEST_CONTENT_BINARY,
|
||||
TEST_HASH_BINARY,
|
||||
get_s3_client,
|
||||
get_s3_bucket,
|
||||
list_s3_objects_by_hash,
|
||||
count_s3_objects_by_prefix,
|
||||
s3_object_exists,
|
||||
delete_s3_object_by_hash,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Mock Storage Fixtures
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class MockS3Client:
|
||||
"""Mock S3 client for unit testing without actual S3/MinIO."""
|
||||
|
||||
def __init__(self):
|
||||
self.objects = {} # key -> content
|
||||
self.bucket = "test-bucket"
|
||||
|
||||
def put_object(self, Bucket: str, Key: str, Body: bytes) -> dict:
|
||||
self.objects[Key] = Body
|
||||
return {"ETag": f'"{compute_md5(Body)}"'}
|
||||
|
||||
def get_object(self, Bucket: str, Key: str, **kwargs) -> dict:
|
||||
if Key not in self.objects:
|
||||
raise Exception("NoSuchKey")
|
||||
content = self.objects[Key]
|
||||
return {
|
||||
"Body": io.BytesIO(content),
|
||||
"ContentLength": len(content),
|
||||
}
|
||||
|
||||
def head_object(self, Bucket: str, Key: str) -> dict:
|
||||
if Key not in self.objects:
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
error_response = {"Error": {"Code": "404", "Message": "Not Found"}}
|
||||
raise ClientError(error_response, "HeadObject")
|
||||
content = self.objects[Key]
|
||||
return {
|
||||
"ContentLength": len(content),
|
||||
"ETag": f'"{compute_md5(content)}"',
|
||||
}
|
||||
|
||||
def delete_object(self, Bucket: str, Key: str) -> dict:
|
||||
if Key in self.objects:
|
||||
del self.objects[Key]
|
||||
return {}
|
||||
|
||||
def head_bucket(self, Bucket: str) -> dict:
|
||||
return {}
|
||||
|
||||
def create_multipart_upload(self, Bucket: str, Key: str) -> dict:
|
||||
return {"UploadId": "test-upload-id"}
|
||||
|
||||
def upload_part(
|
||||
self, Bucket: str, Key: str, UploadId: str, PartNumber: int, Body: bytes
|
||||
) -> dict:
|
||||
return {"ETag": f'"{compute_md5(Body)}"'}
|
||||
|
||||
def complete_multipart_upload(
|
||||
self, Bucket: str, Key: str, UploadId: str, MultipartUpload: dict
|
||||
) -> dict:
|
||||
return {"ETag": '"test-etag"'}
|
||||
|
||||
def abort_multipart_upload(self, Bucket: str, Key: str, UploadId: str) -> dict:
|
||||
return {}
|
||||
|
||||
def generate_presigned_url(
|
||||
self, ClientMethod: str, Params: dict, ExpiresIn: int
|
||||
) -> str:
|
||||
return f"https://test-bucket.s3.amazonaws.com/{Params['Key']}?presigned=true"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_s3_client() -> MockS3Client:
|
||||
"""Provide a mock S3 client for unit tests."""
|
||||
return MockS3Client()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_storage(mock_s3_client):
|
||||
"""
|
||||
Provide a mock storage instance for unit tests.
|
||||
|
||||
Uses the MockS3Client to avoid actual S3/MinIO calls.
|
||||
"""
|
||||
from app.storage import S3Storage
|
||||
|
||||
storage = S3Storage.__new__(S3Storage)
|
||||
storage.client = mock_s3_client
|
||||
storage.bucket = "test-bucket"
|
||||
storage._active_uploads = {}
|
||||
|
||||
return storage
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Database Fixtures (for integration tests)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def test_db_url():
|
||||
"""Get the test database URL."""
|
||||
return (
|
||||
f"postgresql://{os.environ['ORCHARD_DATABASE_USER']}:"
|
||||
f"{os.environ['ORCHARD_DATABASE_PASSWORD']}@"
|
||||
f"{os.environ['ORCHARD_DATABASE_HOST']}:"
|
||||
f"{os.environ['ORCHARD_DATABASE_PORT']}/"
|
||||
f"{os.environ['ORCHARD_DATABASE_DBNAME']}"
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# HTTP Client Fixtures (for API tests)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_app():
|
||||
"""
|
||||
Create a test FastAPI application.
|
||||
|
||||
Note: This requires the database to be available for integration tests.
|
||||
For unit tests, use mock_storage fixture instead.
|
||||
"""
|
||||
from fastapi.testclient import TestClient
|
||||
from app.main import app
|
||||
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Integration Test Fixtures
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def integration_client():
|
||||
"""
|
||||
Create an authenticated test client for integration tests.
|
||||
|
||||
Uses the real database and MinIO from docker-compose.local.yml or deployed environment.
|
||||
Authenticates as admin for write operations. Session-scoped to reuse login across tests.
|
||||
|
||||
Environment variables:
|
||||
ORCHARD_TEST_URL: Base URL of the Orchard server (default: http://localhost:8080)
|
||||
ORCHARD_TEST_USERNAME: Admin username for authentication (default: admin)
|
||||
ORCHARD_TEST_PASSWORD: Admin password for authentication (default: changeme123)
|
||||
"""
|
||||
import httpx
|
||||
|
||||
# Connect to the running orchard-server container or deployed environment
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
username = os.environ.get("ORCHARD_TEST_USERNAME", "admin")
|
||||
password = os.environ.get("ORCHARD_TEST_PASSWORD", "changeme123")
|
||||
|
||||
with httpx.Client(base_url=base_url, timeout=30.0) as client:
|
||||
# Login as admin to enable write operations
|
||||
login_response = client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": username, "password": password},
|
||||
)
|
||||
if login_response.status_code != 200:
|
||||
pytest.fail(
|
||||
f"Authentication failed against {base_url}: {login_response.status_code} - {login_response.text}. "
|
||||
f"Set ORCHARD_TEST_USERNAME and ORCHARD_TEST_PASSWORD environment variables if using non-default credentials."
|
||||
)
|
||||
|
||||
# Verify cookie was set
|
||||
if not client.cookies:
|
||||
pytest.fail(
|
||||
f"Login succeeded but no session cookie was set. Response headers: {login_response.headers}"
|
||||
)
|
||||
|
||||
yield client
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def auth_client():
|
||||
"""
|
||||
Create a function-scoped test client for authentication tests.
|
||||
|
||||
Unlike integration_client (session-scoped), this creates a fresh client
|
||||
for each test. Use this for tests that manipulate authentication state
|
||||
(login, logout, cookie clearing) to avoid polluting other tests.
|
||||
|
||||
Environment variables:
|
||||
ORCHARD_TEST_URL: Base URL of the Orchard server (default: http://localhost:8080)
|
||||
"""
|
||||
import httpx
|
||||
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
|
||||
with httpx.Client(base_url=base_url, timeout=30.0) as client:
|
||||
yield client
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def unique_test_id():
|
||||
"""Generate a unique ID for test isolation."""
|
||||
import uuid
|
||||
|
||||
return f"test-{uuid.uuid4().hex[:8]}"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_project(integration_client, unique_test_id):
|
||||
"""
|
||||
Create a test project and clean it up after the test.
|
||||
|
||||
Yields the project name.
|
||||
"""
|
||||
project_name = f"test-project-{unique_test_id}"
|
||||
|
||||
# Create project
|
||||
response = integration_client.post(
|
||||
"/api/v1/projects",
|
||||
json={"name": project_name, "description": "Test project", "is_public": True},
|
||||
)
|
||||
assert response.status_code == 200, f"Failed to create project: {response.text}"
|
||||
|
||||
yield project_name
|
||||
|
||||
# Cleanup: delete project
|
||||
try:
|
||||
integration_client.delete(f"/api/v1/projects/{project_name}")
|
||||
except Exception:
|
||||
pass # Ignore cleanup errors
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_package(integration_client, test_project, unique_test_id):
|
||||
"""
|
||||
Create a test package within a test project.
|
||||
|
||||
Yields (project_name, package_name) tuple.
|
||||
"""
|
||||
package_name = f"test-package-{unique_test_id}"
|
||||
|
||||
# Create package
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{test_project}/packages",
|
||||
json={"name": package_name, "description": "Test package"},
|
||||
)
|
||||
assert response.status_code == 200, f"Failed to create package: {response.text}"
|
||||
|
||||
yield (test_project, package_name)
|
||||
|
||||
# Cleanup handled by test_project fixture (cascade delete)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_content():
|
||||
"""
|
||||
Generate unique test content for each test.
|
||||
|
||||
Returns (content_bytes, expected_sha256) tuple.
|
||||
"""
|
||||
import uuid
|
||||
|
||||
content = f"test-content-{uuid.uuid4().hex}".encode()
|
||||
sha256 = compute_sha256(content)
|
||||
return (content, sha256)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sized_content():
|
||||
"""
|
||||
Factory fixture for generating content of specific sizes.
|
||||
|
||||
Usage:
|
||||
def test_example(sized_content):
|
||||
content, hash = sized_content(1024) # 1KB
|
||||
content, hash = sized_content(1024 * 1024) # 1MB
|
||||
"""
|
||||
def _generate(size: int, seed: int = None):
|
||||
return generate_content_with_hash(size, seed)
|
||||
return _generate
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Size Constants for Tests
|
||||
# =============================================================================
|
||||
|
||||
# Common file sizes for boundary testing
|
||||
SIZE_1B = 1
|
||||
SIZE_1KB = 1024
|
||||
SIZE_10KB = 10 * 1024
|
||||
SIZE_100KB = 100 * 1024
|
||||
SIZE_1MB = 1024 * 1024
|
||||
SIZE_5MB = 5 * 1024 * 1024
|
||||
SIZE_10MB = 10 * 1024 * 1024
|
||||
SIZE_50MB = 50 * 1024 * 1024
|
||||
SIZE_100MB = 100 * 1024 * 1024
|
||||
SIZE_250MB = 250 * 1024 * 1024
|
||||
SIZE_500MB = 500 * 1024 * 1024
|
||||
SIZE_1GB = 1024 * 1024 * 1024
|
||||
|
||||
# Chunk size boundaries (based on typical S3 multipart chunk sizes)
|
||||
CHUNK_SIZE = 64 * 1024 # 64KB typical chunk
|
||||
MULTIPART_THRESHOLD = 100 * 1024 * 1024 # 100MB multipart threshold
|
||||
327
backend/tests/factories.py
Normal file
327
backend/tests/factories.py
Normal file
@@ -0,0 +1,327 @@
|
||||
"""
|
||||
Test data factories for Orchard backend tests.
|
||||
|
||||
This module provides factory functions for creating test data,
|
||||
including test files, pre-computed hashes, and helper utilities.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import io
|
||||
import os
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Hash Computation Utilities
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def compute_sha256(content: bytes) -> str:
|
||||
"""Compute SHA256 hash of content as lowercase hex string."""
|
||||
return hashlib.sha256(content).hexdigest()
|
||||
|
||||
|
||||
def compute_md5(content: bytes) -> str:
|
||||
"""Compute MD5 hash of content as lowercase hex string."""
|
||||
return hashlib.md5(content).hexdigest()
|
||||
|
||||
|
||||
def compute_sha1(content: bytes) -> str:
|
||||
"""Compute SHA1 hash of content as lowercase hex string."""
|
||||
return hashlib.sha1(content).hexdigest()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test File Factories
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def create_test_file(content: Optional[bytes] = None, size: int = 1024) -> io.BytesIO:
|
||||
"""
|
||||
Create a test file with known content.
|
||||
|
||||
Args:
|
||||
content: Specific content to use, or None to generate random-ish content
|
||||
size: Size of generated content if content is None
|
||||
|
||||
Returns:
|
||||
BytesIO object with the content
|
||||
"""
|
||||
if content is None:
|
||||
content = os.urandom(size)
|
||||
return io.BytesIO(content)
|
||||
|
||||
|
||||
def create_unique_content(prefix: str = "test-content") -> tuple[bytes, str]:
|
||||
"""
|
||||
Create unique test content with its SHA256 hash.
|
||||
|
||||
Args:
|
||||
prefix: Prefix for the content string
|
||||
|
||||
Returns:
|
||||
Tuple of (content_bytes, sha256_hash)
|
||||
"""
|
||||
content = f"{prefix}-{uuid.uuid4().hex}".encode()
|
||||
sha256 = compute_sha256(content)
|
||||
return content, sha256
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Known Test Data (Pre-computed hashes for deterministic tests)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
TEST_CONTENT_HELLO = b"Hello, World!"
|
||||
TEST_HASH_HELLO = "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f"
|
||||
TEST_MD5_HELLO = "65a8e27d8879283831b664bd8b7f0ad4"
|
||||
TEST_SHA1_HELLO = "0a0a9f2a6772942557ab5355d76af442f8f65e01"
|
||||
|
||||
TEST_CONTENT_EMPTY = b""
|
||||
# Note: Empty content should be rejected by the storage layer
|
||||
|
||||
TEST_CONTENT_BINARY = bytes(range(256))
|
||||
TEST_HASH_BINARY = compute_sha256(TEST_CONTENT_BINARY)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# API Test Helpers
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def upload_test_file(
|
||||
client,
|
||||
project: str,
|
||||
package: str,
|
||||
content: bytes,
|
||||
filename: str = "test.bin",
|
||||
tag: Optional[str] = None,
|
||||
version: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Helper function to upload a test file via the API.
|
||||
|
||||
Args:
|
||||
client: HTTP client (httpx or TestClient)
|
||||
project: Project name
|
||||
package: Package name
|
||||
content: File content as bytes
|
||||
filename: Original filename
|
||||
tag: Optional tag to assign
|
||||
version: Optional version to assign
|
||||
|
||||
Returns:
|
||||
The upload response as a dict
|
||||
"""
|
||||
files = {"file": (filename, io.BytesIO(content), "application/octet-stream")}
|
||||
data = {}
|
||||
if tag:
|
||||
data["tag"] = tag
|
||||
if version:
|
||||
data["version"] = version
|
||||
|
||||
response = client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data=data if data else None,
|
||||
)
|
||||
assert response.status_code == 200, f"Upload failed: {response.text}"
|
||||
return response.json()
|
||||
|
||||
|
||||
def generate_content(size: int, seed: Optional[int] = None) -> bytes:
|
||||
"""
|
||||
Generate deterministic or random content of a specified size.
|
||||
|
||||
Args:
|
||||
size: Size of content in bytes
|
||||
seed: Optional seed for reproducible content (None for random)
|
||||
|
||||
Returns:
|
||||
Bytes of the specified size
|
||||
"""
|
||||
if size == 0:
|
||||
return b""
|
||||
if seed is not None:
|
||||
import random
|
||||
rng = random.Random(seed)
|
||||
return bytes(rng.randint(0, 255) for _ in range(size))
|
||||
return os.urandom(size)
|
||||
|
||||
|
||||
def generate_content_with_hash(size: int, seed: Optional[int] = None) -> tuple[bytes, str]:
|
||||
"""
|
||||
Generate content of specified size and compute its SHA256 hash.
|
||||
|
||||
Args:
|
||||
size: Size of content in bytes
|
||||
seed: Optional seed for reproducible content
|
||||
|
||||
Returns:
|
||||
Tuple of (content_bytes, sha256_hash)
|
||||
"""
|
||||
content = generate_content(size, seed)
|
||||
return content, compute_sha256(content)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Project/Package Factories
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def create_test_project(client, unique_id: Optional[str] = None) -> str:
|
||||
"""
|
||||
Create a test project via the API.
|
||||
|
||||
Args:
|
||||
client: HTTP client
|
||||
unique_id: Unique identifier for the project name
|
||||
|
||||
Returns:
|
||||
Project name
|
||||
"""
|
||||
if unique_id is None:
|
||||
unique_id = uuid.uuid4().hex[:8]
|
||||
|
||||
project_name = f"test-project-{unique_id}"
|
||||
response = client.post(
|
||||
"/api/v1/projects",
|
||||
json={"name": project_name, "description": "Test project", "is_public": True},
|
||||
)
|
||||
assert response.status_code == 200, f"Failed to create project: {response.text}"
|
||||
return project_name
|
||||
|
||||
|
||||
def create_test_package(client, project: str, unique_id: Optional[str] = None) -> str:
|
||||
"""
|
||||
Create a test package via the API.
|
||||
|
||||
Args:
|
||||
client: HTTP client
|
||||
project: Project name
|
||||
unique_id: Unique identifier for the package name
|
||||
|
||||
Returns:
|
||||
Package name
|
||||
"""
|
||||
if unique_id is None:
|
||||
unique_id = uuid.uuid4().hex[:8]
|
||||
|
||||
package_name = f"test-package-{unique_id}"
|
||||
response = client.post(
|
||||
f"/api/v1/project/{project}/packages",
|
||||
json={"name": package_name, "description": "Test package"},
|
||||
)
|
||||
assert response.status_code == 200, f"Failed to create package: {response.text}"
|
||||
return package_name
|
||||
|
||||
|
||||
def delete_test_project(client, project: str) -> None:
|
||||
"""
|
||||
Delete a test project (cleanup helper).
|
||||
|
||||
Args:
|
||||
client: HTTP client
|
||||
project: Project name to delete
|
||||
"""
|
||||
try:
|
||||
client.delete(f"/api/v1/projects/{project}")
|
||||
except Exception:
|
||||
pass # Ignore cleanup errors
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# S3 Test Helpers
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def get_s3_client():
|
||||
"""
|
||||
Create a boto3 S3 client for direct S3 access in integration tests.
|
||||
|
||||
Uses environment variables for configuration (same as the app).
|
||||
Note: When running in container, S3 endpoint should be 'minio:9000' not 'localhost:9000'.
|
||||
"""
|
||||
import boto3
|
||||
from botocore.config import Config
|
||||
|
||||
config = Config(s3={"addressing_style": "path"})
|
||||
|
||||
# Use the same endpoint as the app (minio:9000 in container, localhost:9000 locally)
|
||||
endpoint = os.environ.get("ORCHARD_S3_ENDPOINT", "http://minio:9000")
|
||||
|
||||
return boto3.client(
|
||||
"s3",
|
||||
endpoint_url=endpoint,
|
||||
region_name=os.environ.get("ORCHARD_S3_REGION", "us-east-1"),
|
||||
aws_access_key_id=os.environ.get("ORCHARD_S3_ACCESS_KEY_ID", "minioadmin"),
|
||||
aws_secret_access_key=os.environ.get(
|
||||
"ORCHARD_S3_SECRET_ACCESS_KEY", "minioadmin"
|
||||
),
|
||||
config=config,
|
||||
)
|
||||
|
||||
|
||||
def get_s3_bucket() -> str:
|
||||
"""Get the S3 bucket name from environment."""
|
||||
return os.environ.get("ORCHARD_S3_BUCKET", "orchard-artifacts")
|
||||
|
||||
|
||||
def list_s3_objects_by_hash(sha256_hash: str) -> list:
|
||||
"""
|
||||
List S3 objects that match a specific SHA256 hash.
|
||||
|
||||
Uses the fruits/{hash[:2]}/{hash[2:4]}/{hash} key pattern.
|
||||
Returns list of matching object keys.
|
||||
"""
|
||||
client = get_s3_client()
|
||||
bucket = get_s3_bucket()
|
||||
prefix = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
|
||||
|
||||
response = client.list_objects_v2(Bucket=bucket, Prefix=prefix)
|
||||
|
||||
if "Contents" not in response:
|
||||
return []
|
||||
|
||||
return [obj["Key"] for obj in response["Contents"]]
|
||||
|
||||
|
||||
def count_s3_objects_by_prefix(prefix: str) -> int:
|
||||
"""
|
||||
Count S3 objects with a given prefix.
|
||||
|
||||
Useful for checking if duplicate uploads created multiple objects.
|
||||
"""
|
||||
client = get_s3_client()
|
||||
bucket = get_s3_bucket()
|
||||
|
||||
response = client.list_objects_v2(Bucket=bucket, Prefix=prefix)
|
||||
|
||||
if "Contents" not in response:
|
||||
return 0
|
||||
|
||||
return len(response["Contents"])
|
||||
|
||||
|
||||
def s3_object_exists(sha256_hash: str) -> bool:
|
||||
"""
|
||||
Check if an S3 object exists for a given SHA256 hash.
|
||||
"""
|
||||
objects = list_s3_objects_by_hash(sha256_hash)
|
||||
return len(objects) > 0
|
||||
|
||||
|
||||
def delete_s3_object_by_hash(sha256_hash: str) -> bool:
|
||||
"""
|
||||
Delete an S3 object by its SHA256 hash (for test cleanup).
|
||||
"""
|
||||
client = get_s3_client()
|
||||
bucket = get_s3_bucket()
|
||||
s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}"
|
||||
|
||||
try:
|
||||
client.delete_object(Bucket=bucket, Key=s3_key)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
0
backend/tests/integration/__init__.py
Normal file
0
backend/tests/integration/__init__.py
Normal file
638
backend/tests/integration/test_artifacts_api.py
Normal file
638
backend/tests/integration/test_artifacts_api.py
Normal file
@@ -0,0 +1,638 @@
|
||||
"""
|
||||
Integration tests for artifact API endpoints.
|
||||
|
||||
Tests cover:
|
||||
- Artifact retrieval by ID
|
||||
- Artifact stats endpoint
|
||||
- Artifact provenance/history
|
||||
- Artifact uploads listing
|
||||
- Garbage collection endpoints
|
||||
- Orphaned artifacts management
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from tests.factories import compute_sha256, upload_test_file
|
||||
|
||||
|
||||
class TestArtifactRetrieval:
|
||||
"""Tests for artifact retrieval endpoints."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_artifact_by_id(self, integration_client, test_package):
|
||||
"""Test retrieving an artifact by its SHA256 ID."""
|
||||
project_name, package_name = test_package
|
||||
content = b"artifact retrieval test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project_name, package_name, content, tag="v1"
|
||||
)
|
||||
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert data["id"] == expected_hash
|
||||
assert data["sha256"] == expected_hash
|
||||
assert data["size"] == len(content)
|
||||
assert "ref_count" in data
|
||||
assert "created_at" in data
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_nonexistent_artifact(self, integration_client):
|
||||
"""Test getting a non-existent artifact returns 404."""
|
||||
fake_hash = "a" * 64
|
||||
response = integration_client.get(f"/api/v1/artifact/{fake_hash}")
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_artifact_includes_tags(self, integration_client, test_package):
|
||||
"""Test artifact response includes tags pointing to it."""
|
||||
project_name, package_name = test_package
|
||||
content = b"artifact with tags test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project_name, package_name, content, tag="tagged-v1"
|
||||
)
|
||||
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "tags" in data
|
||||
assert len(data["tags"]) >= 1
|
||||
|
||||
tag = data["tags"][0]
|
||||
assert "name" in tag
|
||||
assert "package_name" in tag
|
||||
assert "project_name" in tag
|
||||
|
||||
|
||||
class TestArtifactStats:
|
||||
"""Tests for artifact statistics endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_artifact_stats_returns_valid_response(
|
||||
self, integration_client, test_package, unique_test_id
|
||||
):
|
||||
"""Test artifact stats returns expected fields."""
|
||||
project, package = test_package
|
||||
content = f"artifact stats test {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, content, tag=f"art-{unique_test_id}"
|
||||
)
|
||||
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "artifact_id" in data
|
||||
assert "sha256" in data
|
||||
assert "size" in data
|
||||
assert "ref_count" in data
|
||||
assert "storage_savings" in data
|
||||
assert "tags" in data
|
||||
assert "projects" in data
|
||||
assert "packages" in data
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_artifact_stats_not_found(self, integration_client):
|
||||
"""Test artifact stats returns 404 for non-existent artifact."""
|
||||
fake_hash = "0" * 64
|
||||
response = integration_client.get(f"/api/v1/artifact/{fake_hash}/stats")
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_artifact_stats_shows_correct_projects(
|
||||
self, integration_client, unique_test_id
|
||||
):
|
||||
"""Test artifact stats shows all projects using the artifact."""
|
||||
content = f"multi-project artifact {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
proj1 = f"art-stats-a-{unique_test_id}"
|
||||
proj2 = f"art-stats-b-{unique_test_id}"
|
||||
|
||||
try:
|
||||
# Create projects and packages
|
||||
integration_client.post(
|
||||
"/api/v1/projects",
|
||||
json={"name": proj1, "description": "Test", "is_public": True},
|
||||
)
|
||||
integration_client.post(
|
||||
"/api/v1/projects",
|
||||
json={"name": proj2, "description": "Test", "is_public": True},
|
||||
)
|
||||
integration_client.post(
|
||||
f"/api/v1/project/{proj1}/packages",
|
||||
json={"name": "pkg", "description": "Test"},
|
||||
)
|
||||
integration_client.post(
|
||||
f"/api/v1/project/{proj2}/packages",
|
||||
json={"name": "pkg", "description": "Test"},
|
||||
)
|
||||
|
||||
# Upload same content to both projects
|
||||
upload_test_file(integration_client, proj1, "pkg", content, tag="v1")
|
||||
upload_test_file(integration_client, proj2, "pkg", content, tag="v1")
|
||||
|
||||
# Check artifact stats
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}/stats")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert len(data["projects"]) == 2
|
||||
assert proj1 in data["projects"]
|
||||
assert proj2 in data["projects"]
|
||||
|
||||
finally:
|
||||
integration_client.delete(f"/api/v1/projects/{proj1}")
|
||||
integration_client.delete(f"/api/v1/projects/{proj2}")
|
||||
|
||||
|
||||
class TestArtifactProvenance:
|
||||
"""Tests for artifact provenance/history endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_artifact_history_returns_200(self, integration_client, test_package):
|
||||
"""Test artifact history endpoint returns 200."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
upload_result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"provenance test content",
|
||||
"prov.txt",
|
||||
)
|
||||
artifact_id = upload_result["artifact_id"]
|
||||
|
||||
response = integration_client.get(f"/api/v1/artifact/{artifact_id}/history")
|
||||
assert response.status_code == 200
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_artifact_history_has_required_fields(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test artifact history has all required fields."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
upload_result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"provenance fields test",
|
||||
"fields.txt",
|
||||
)
|
||||
artifact_id = upload_result["artifact_id"]
|
||||
|
||||
response = integration_client.get(f"/api/v1/artifact/{artifact_id}/history")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "artifact_id" in data
|
||||
assert "sha256" in data
|
||||
assert "size" in data
|
||||
assert "created_at" in data
|
||||
assert "created_by" in data
|
||||
assert "ref_count" in data
|
||||
assert "first_uploaded_at" in data
|
||||
assert "first_uploaded_by" in data
|
||||
assert "upload_count" in data
|
||||
assert "packages" in data
|
||||
assert "tags" in data
|
||||
assert "uploads" in data
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_artifact_history_not_found(self, integration_client):
|
||||
"""Test non-existent artifact returns 404."""
|
||||
fake_hash = "b" * 64
|
||||
response = integration_client.get(f"/api/v1/artifact/{fake_hash}/history")
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_artifact_history_with_tag(self, integration_client, test_package):
|
||||
"""Test artifact history includes tag information when tagged."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
upload_result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"tagged provenance test",
|
||||
"tagged.txt",
|
||||
tag="v1.0.0",
|
||||
)
|
||||
artifact_id = upload_result["artifact_id"]
|
||||
|
||||
response = integration_client.get(f"/api/v1/artifact/{artifact_id}/history")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert len(data["tags"]) >= 1
|
||||
|
||||
tag = data["tags"][0]
|
||||
assert "project_name" in tag
|
||||
assert "package_name" in tag
|
||||
assert "tag_name" in tag
|
||||
|
||||
|
||||
class TestArtifactUploads:
|
||||
"""Tests for artifact uploads listing endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_artifact_uploads_returns_200(self, integration_client, test_package):
|
||||
"""Test artifact uploads endpoint returns 200."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
upload_result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"artifact upload test",
|
||||
"artifact.txt",
|
||||
)
|
||||
artifact_id = upload_result["artifact_id"]
|
||||
|
||||
response = integration_client.get(f"/api/v1/artifact/{artifact_id}/uploads")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "items" in data
|
||||
assert "pagination" in data
|
||||
assert len(data["items"]) >= 1
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_artifact_uploads_not_found(self, integration_client):
|
||||
"""Test non-existent artifact returns 404."""
|
||||
fake_hash = "a" * 64
|
||||
response = integration_client.get(f"/api/v1/artifact/{fake_hash}/uploads")
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
class TestOrphanedArtifacts:
|
||||
"""Tests for orphaned artifacts management."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_list_orphaned_artifacts_returns_list(self, integration_client):
|
||||
"""Test orphaned artifacts endpoint returns a list."""
|
||||
response = integration_client.get("/api/v1/admin/orphaned-artifacts")
|
||||
assert response.status_code == 200
|
||||
assert isinstance(response.json(), list)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_orphaned_artifact_has_required_fields(self, integration_client):
|
||||
"""Test orphaned artifact response has required fields."""
|
||||
response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
if len(data) > 0:
|
||||
artifact = data[0]
|
||||
assert "id" in artifact
|
||||
assert "size" in artifact
|
||||
assert "created_at" in artifact
|
||||
assert "created_by" in artifact
|
||||
assert "original_name" in artifact
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_orphaned_artifacts_respects_limit(self, integration_client):
|
||||
"""Test orphaned artifacts endpoint respects limit parameter."""
|
||||
response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=5")
|
||||
assert response.status_code == 200
|
||||
assert len(response.json()) <= 5
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_artifact_becomes_orphaned_when_tag_deleted(
|
||||
self, integration_client, test_package, unique_test_id
|
||||
):
|
||||
"""Test artifact appears in orphaned list after tag is deleted."""
|
||||
project, package = test_package
|
||||
content = f"orphan test {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Upload with tag
|
||||
upload_test_file(integration_client, project, package, content, tag="temp-tag")
|
||||
|
||||
# Verify not in orphaned list
|
||||
response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1000")
|
||||
orphaned_ids = [a["id"] for a in response.json()]
|
||||
assert expected_hash not in orphaned_ids
|
||||
|
||||
# Delete the tag
|
||||
integration_client.delete(f"/api/v1/project/{project}/{package}/tags/temp-tag")
|
||||
|
||||
# Verify now in orphaned list
|
||||
response = integration_client.get("/api/v1/admin/orphaned-artifacts?limit=1000")
|
||||
orphaned_ids = [a["id"] for a in response.json()]
|
||||
assert expected_hash in orphaned_ids
|
||||
|
||||
|
||||
class TestGarbageCollection:
|
||||
"""Tests for garbage collection endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_garbage_collect_dry_run_returns_response(self, integration_client):
|
||||
"""Test garbage collection dry run returns valid response."""
|
||||
response = integration_client.post("/api/v1/admin/garbage-collect?dry_run=true")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "artifacts_deleted" in data
|
||||
assert "bytes_freed" in data
|
||||
assert "artifact_ids" in data
|
||||
assert "dry_run" in data
|
||||
assert data["dry_run"] is True
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_garbage_collect_dry_run_doesnt_delete(
|
||||
self, integration_client, test_package, unique_test_id
|
||||
):
|
||||
"""Test garbage collection dry run doesn't actually delete artifacts."""
|
||||
project, package = test_package
|
||||
content = f"dry run test {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Upload and delete tag to create orphan
|
||||
upload_test_file(integration_client, project, package, content, tag="dry-run")
|
||||
integration_client.delete(f"/api/v1/project/{project}/{package}/tags/dry-run")
|
||||
|
||||
# Verify artifact exists
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.status_code == 200
|
||||
|
||||
# Run garbage collection in dry-run mode
|
||||
gc_response = integration_client.post(
|
||||
"/api/v1/admin/garbage-collect?dry_run=true&limit=1000"
|
||||
)
|
||||
assert gc_response.status_code == 200
|
||||
assert expected_hash in gc_response.json()["artifact_ids"]
|
||||
|
||||
# Verify artifact STILL exists
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.status_code == 200
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_garbage_collect_preserves_referenced_artifacts(
|
||||
self, integration_client, test_package, unique_test_id
|
||||
):
|
||||
"""Test garbage collection doesn't delete artifacts with ref_count > 0."""
|
||||
project, package = test_package
|
||||
content = f"preserve test {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Upload with tag (ref_count=1)
|
||||
upload_test_file(integration_client, project, package, content, tag="keep-this")
|
||||
|
||||
# Verify artifact exists with ref_count=1
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.status_code == 200
|
||||
assert response.json()["ref_count"] == 1
|
||||
|
||||
# Run garbage collection (dry_run to not affect other tests)
|
||||
gc_response = integration_client.post(
|
||||
"/api/v1/admin/garbage-collect?dry_run=true&limit=1000"
|
||||
)
|
||||
assert gc_response.status_code == 200
|
||||
|
||||
# Verify artifact was NOT in delete list
|
||||
assert expected_hash not in gc_response.json()["artifact_ids"]
|
||||
|
||||
# Verify artifact still exists
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.status_code == 200
|
||||
assert response.json()["ref_count"] == 1
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_garbage_collect_respects_limit(self, integration_client):
|
||||
"""Test garbage collection respects limit parameter."""
|
||||
response = integration_client.post(
|
||||
"/api/v1/admin/garbage-collect?dry_run=true&limit=5"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["artifacts_deleted"] <= 5
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_garbage_collect_returns_bytes_freed(self, integration_client):
|
||||
"""Test garbage collection returns accurate bytes_freed."""
|
||||
response = integration_client.post("/api/v1/admin/garbage-collect?dry_run=true")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert data["bytes_freed"] >= 0
|
||||
assert isinstance(data["bytes_freed"], int)
|
||||
|
||||
|
||||
class TestGlobalUploads:
|
||||
"""Tests for global uploads endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_global_uploads_returns_200(self, integration_client):
|
||||
"""Test global uploads endpoint returns 200."""
|
||||
response = integration_client.get("/api/v1/uploads")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "items" in data
|
||||
assert "pagination" in data
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_global_uploads_pagination(self, integration_client):
|
||||
"""Test global uploads endpoint respects pagination."""
|
||||
response = integration_client.get("/api/v1/uploads?limit=5&page=1")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert len(data["items"]) <= 5
|
||||
assert data["pagination"]["limit"] == 5
|
||||
assert data["pagination"]["page"] == 1
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_global_uploads_filter_by_project(self, integration_client, test_package):
|
||||
"""Test filtering global uploads by project name."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
# Upload a file
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"global filter test",
|
||||
"global.txt",
|
||||
)
|
||||
|
||||
response = integration_client.get(f"/api/v1/uploads?project={project_name}")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
for item in data["items"]:
|
||||
assert item["project_name"] == project_name
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_global_uploads_has_more_field(self, integration_client):
|
||||
"""Test pagination includes has_more field."""
|
||||
response = integration_client.get("/api/v1/uploads?limit=1")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "has_more" in data["pagination"]
|
||||
assert isinstance(data["pagination"]["has_more"], bool)
|
||||
|
||||
|
||||
class TestGlobalArtifacts:
|
||||
"""Tests for global artifacts endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_global_artifacts_returns_200(self, integration_client):
|
||||
"""Test global artifacts endpoint returns 200."""
|
||||
response = integration_client.get("/api/v1/artifacts")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "items" in data
|
||||
assert "pagination" in data
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_global_artifacts_pagination(self, integration_client):
|
||||
"""Test global artifacts endpoint respects pagination."""
|
||||
response = integration_client.get("/api/v1/artifacts?limit=5&page=1")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert len(data["items"]) <= 5
|
||||
assert data["pagination"]["limit"] == 5
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_global_artifacts_filter_by_size(self, integration_client):
|
||||
"""Test filtering global artifacts by size range."""
|
||||
response = integration_client.get(
|
||||
"/api/v1/artifacts?min_size=1&max_size=1000000"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
for item in data["items"]:
|
||||
assert 1 <= item["size"] <= 1000000
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_global_artifacts_sort_by_size(self, integration_client):
|
||||
"""Test sorting global artifacts by size."""
|
||||
response = integration_client.get("/api/v1/artifacts?sort=size&order=desc")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
if len(data["items"]) > 1:
|
||||
sizes = [item["size"] for item in data["items"]]
|
||||
assert sizes == sorted(sizes, reverse=True)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_global_artifacts_invalid_sort_returns_400(self, integration_client):
|
||||
"""Test invalid sort field returns 400."""
|
||||
response = integration_client.get("/api/v1/artifacts?sort=invalid_field")
|
||||
assert response.status_code == 400
|
||||
|
||||
|
||||
class TestGlobalTags:
|
||||
"""Tests for global tags endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_global_tags_returns_200(self, integration_client):
|
||||
"""Test global tags endpoint returns 200."""
|
||||
response = integration_client.get("/api/v1/tags")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "items" in data
|
||||
assert "pagination" in data
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_global_tags_pagination(self, integration_client):
|
||||
"""Test global tags endpoint respects pagination."""
|
||||
response = integration_client.get("/api/v1/tags?limit=5&page=1")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert len(data["items"]) <= 5
|
||||
assert data["pagination"]["limit"] == 5
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_global_tags_has_project_context(self, integration_client):
|
||||
"""Test global tags response includes project/package context."""
|
||||
response = integration_client.get("/api/v1/tags?limit=1")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
if len(data["items"]) > 0:
|
||||
item = data["items"][0]
|
||||
assert "project_name" in item
|
||||
assert "package_name" in item
|
||||
assert "artifact_id" in item
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_global_tags_search_with_wildcard(self, integration_client):
|
||||
"""Test global tags search supports wildcards."""
|
||||
response = integration_client.get("/api/v1/tags?search=v*")
|
||||
assert response.status_code == 200
|
||||
# Just verify it doesn't error; results may vary
|
||||
|
||||
|
||||
class TestAuditLogs:
|
||||
"""Tests for global audit logs endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_list_audit_logs_returns_valid_response(self, integration_client):
|
||||
"""Test audit logs endpoint returns valid paginated response."""
|
||||
response = integration_client.get("/api/v1/audit-logs")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "items" in data
|
||||
assert "pagination" in data
|
||||
assert isinstance(data["items"], list)
|
||||
|
||||
pagination = data["pagination"]
|
||||
assert "page" in pagination
|
||||
assert "limit" in pagination
|
||||
assert "total" in pagination
|
||||
assert "total_pages" in pagination
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_audit_logs_respects_pagination(self, integration_client):
|
||||
"""Test audit logs endpoint respects limit parameter."""
|
||||
response = integration_client.get("/api/v1/audit-logs?limit=5")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert len(data["items"]) <= 5
|
||||
assert data["pagination"]["limit"] == 5
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_audit_logs_filter_by_action(self, integration_client, test_package):
|
||||
"""Test filtering audit logs by action type."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
response = integration_client.get("/api/v1/audit-logs?action=project.create")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
for item in data["items"]:
|
||||
assert item["action"] == "project.create"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_audit_log_entry_has_required_fields(
|
||||
self, integration_client, test_project
|
||||
):
|
||||
"""Test audit log entries have all required fields."""
|
||||
response = integration_client.get("/api/v1/audit-logs?limit=10")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
if data["items"]:
|
||||
item = data["items"][0]
|
||||
assert "id" in item
|
||||
assert "action" in item
|
||||
assert "resource" in item
|
||||
assert "user_id" in item
|
||||
assert "timestamp" in item
|
||||
617
backend/tests/integration/test_auth_api.py
Normal file
617
backend/tests/integration/test_auth_api.py
Normal file
@@ -0,0 +1,617 @@
|
||||
"""Integration tests for authentication API endpoints.
|
||||
|
||||
Note: These tests are marked as auth_intensive because they make many login
|
||||
requests. Dev/stage deployments have relaxed rate limits (1000/minute) to
|
||||
allow these tests to run. Production uses strict rate limits (5/minute).
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from uuid import uuid4
|
||||
|
||||
from tests.conftest import get_admin_password, get_admin_username
|
||||
|
||||
|
||||
# Mark all tests in this module as auth_intensive (informational, not excluded from CI)
|
||||
pytestmark = pytest.mark.auth_intensive
|
||||
|
||||
|
||||
class TestAuthLogin:
|
||||
"""Tests for login endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_login_success(self, auth_client):
|
||||
"""Test successful login with default admin credentials."""
|
||||
response = auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["username"] == get_admin_username()
|
||||
assert data["is_admin"] is True
|
||||
assert "orchard_session" in response.cookies
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_login_invalid_password(self, auth_client):
|
||||
"""Test login with wrong password."""
|
||||
response = auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": "wrongpassword"},
|
||||
)
|
||||
assert response.status_code == 401
|
||||
assert "Invalid username or password" in response.json()["detail"]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_login_nonexistent_user(self, auth_client):
|
||||
"""Test login with non-existent user."""
|
||||
response = auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": "nonexistent", "password": "password"},
|
||||
)
|
||||
assert response.status_code == 401
|
||||
|
||||
|
||||
class TestAuthLogout:
|
||||
"""Tests for logout endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_logout_success(self, auth_client):
|
||||
"""Test successful logout."""
|
||||
# First login
|
||||
login_response = auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
assert login_response.status_code == 200
|
||||
|
||||
# Then logout
|
||||
logout_response = auth_client.post("/api/v1/auth/logout")
|
||||
assert logout_response.status_code == 200
|
||||
assert "Logged out successfully" in logout_response.json()["message"]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_logout_without_session(self, auth_client):
|
||||
"""Test logout without being logged in."""
|
||||
response = auth_client.post("/api/v1/auth/logout")
|
||||
# Should succeed even without session
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
class TestAuthMe:
|
||||
"""Tests for get current user endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_me_authenticated(self, auth_client):
|
||||
"""Test getting current user when authenticated."""
|
||||
# Login first
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
|
||||
response = auth_client.get("/api/v1/auth/me")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["username"] == get_admin_username()
|
||||
assert data["is_admin"] is True
|
||||
assert "id" in data
|
||||
assert "created_at" in data
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_me_unauthenticated(self, auth_client):
|
||||
"""Test getting current user without authentication."""
|
||||
# Clear any existing cookies
|
||||
auth_client.cookies.clear()
|
||||
|
||||
response = auth_client.get("/api/v1/auth/me")
|
||||
assert response.status_code == 401
|
||||
assert "Not authenticated" in response.json()["detail"]
|
||||
|
||||
|
||||
class TestAuthChangePassword:
|
||||
"""Tests for change password endpoint.
|
||||
|
||||
Note: These tests use dedicated test users instead of admin to avoid
|
||||
invalidating the integration_client session (which uses admin).
|
||||
"""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_change_password_success(self, auth_client):
|
||||
"""Test successful password change."""
|
||||
# Login as admin to create a test user
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
test_username = f"pwchange_{uuid4().hex[:8]}"
|
||||
auth_client.post(
|
||||
"/api/v1/admin/users",
|
||||
json={"username": test_username, "password": "oldpassword123"},
|
||||
)
|
||||
|
||||
# Login as test user
|
||||
auth_client.cookies.clear()
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": test_username, "password": "oldpassword123"},
|
||||
)
|
||||
|
||||
# Change password
|
||||
response = auth_client.post(
|
||||
"/api/v1/auth/change-password",
|
||||
json={"current_password": "oldpassword123", "new_password": "newpassword123"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# Verify old password no longer works
|
||||
auth_client.cookies.clear()
|
||||
response = auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": test_username, "password": "oldpassword123"},
|
||||
)
|
||||
assert response.status_code == 401
|
||||
|
||||
# Verify new password works
|
||||
response = auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": test_username, "password": "newpassword123"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_change_password_wrong_current(self, auth_client):
|
||||
"""Test password change with wrong current password."""
|
||||
# Login as admin to create a test user
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
test_username = f"pwwrong_{uuid4().hex[:8]}"
|
||||
auth_client.post(
|
||||
"/api/v1/admin/users",
|
||||
json={"username": test_username, "password": "password123"},
|
||||
)
|
||||
|
||||
# Login as test user
|
||||
auth_client.cookies.clear()
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": test_username, "password": "password123"},
|
||||
)
|
||||
|
||||
response = auth_client.post(
|
||||
"/api/v1/auth/change-password",
|
||||
json={"current_password": "wrongpassword", "new_password": "newpassword"},
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert "Current password is incorrect" in response.json()["detail"]
|
||||
|
||||
|
||||
class TestAPIKeys:
|
||||
"""Tests for API key management endpoints."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_create_and_list_api_key(self, auth_client):
|
||||
"""Test creating and listing API keys."""
|
||||
# Login first
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
|
||||
# Create API key
|
||||
create_response = auth_client.post(
|
||||
"/api/v1/auth/keys",
|
||||
json={"name": "test-key", "description": "Test API key"},
|
||||
)
|
||||
assert create_response.status_code == 200
|
||||
data = create_response.json()
|
||||
assert data["name"] == "test-key"
|
||||
assert data["description"] == "Test API key"
|
||||
assert "key" in data
|
||||
assert data["key"].startswith("orch_")
|
||||
key_id = data["id"]
|
||||
api_key = data["key"]
|
||||
|
||||
# List API keys
|
||||
list_response = auth_client.get("/api/v1/auth/keys")
|
||||
assert list_response.status_code == 200
|
||||
keys = list_response.json()
|
||||
assert any(k["id"] == key_id for k in keys)
|
||||
|
||||
# Clean up - delete the key
|
||||
auth_client.delete(f"/api/v1/auth/keys/{key_id}")
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_use_api_key_for_auth(self, auth_client):
|
||||
"""Test using API key for authentication."""
|
||||
# Login and create API key
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
create_response = auth_client.post(
|
||||
"/api/v1/auth/keys",
|
||||
json={"name": "auth-test-key"},
|
||||
)
|
||||
api_key = create_response.json()["key"]
|
||||
key_id = create_response.json()["id"]
|
||||
|
||||
# Clear cookies and use API key
|
||||
auth_client.cookies.clear()
|
||||
response = auth_client.get(
|
||||
"/api/v1/auth/me",
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["username"] == get_admin_username()
|
||||
|
||||
# Clean up
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
auth_client.delete(f"/api/v1/auth/keys/{key_id}")
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_delete_api_key(self, auth_client):
|
||||
"""Test revoking an API key."""
|
||||
# Login and create API key
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
create_response = auth_client.post(
|
||||
"/api/v1/auth/keys",
|
||||
json={"name": "delete-test-key"},
|
||||
)
|
||||
key_id = create_response.json()["id"]
|
||||
api_key = create_response.json()["key"]
|
||||
|
||||
# Delete the key
|
||||
delete_response = auth_client.delete(f"/api/v1/auth/keys/{key_id}")
|
||||
assert delete_response.status_code == 200
|
||||
|
||||
# Verify key no longer works
|
||||
auth_client.cookies.clear()
|
||||
response = auth_client.get(
|
||||
"/api/v1/auth/me",
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
assert response.status_code == 401
|
||||
|
||||
|
||||
class TestAdminUserManagement:
|
||||
"""Tests for admin user management endpoints."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_list_users(self, auth_client):
|
||||
"""Test listing users as admin."""
|
||||
# Login as admin
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
|
||||
response = auth_client.get("/api/v1/admin/users")
|
||||
assert response.status_code == 200
|
||||
users = response.json()
|
||||
assert len(users) >= 1
|
||||
assert any(u["username"] == get_admin_username() for u in users)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_create_user(self, auth_client):
|
||||
"""Test creating a new user as admin."""
|
||||
# Login as admin
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
|
||||
# Create new user
|
||||
test_username = f"testuser_{uuid4().hex[:8]}"
|
||||
response = auth_client.post(
|
||||
"/api/v1/admin/users",
|
||||
json={
|
||||
"username": test_username,
|
||||
"password": "testpassword",
|
||||
"email": "test@example.com",
|
||||
},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["username"] == test_username
|
||||
assert data["email"] == "test@example.com"
|
||||
assert data["is_admin"] is False
|
||||
|
||||
# Verify new user can login
|
||||
auth_client.cookies.clear()
|
||||
login_response = auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": test_username, "password": "testpassword"},
|
||||
)
|
||||
assert login_response.status_code == 200
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_update_user(self, auth_client):
|
||||
"""Test updating a user as admin."""
|
||||
# Login as admin
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
|
||||
# Create a test user
|
||||
test_username = f"updateuser_{uuid4().hex[:8]}"
|
||||
auth_client.post(
|
||||
"/api/v1/admin/users",
|
||||
json={"username": test_username, "password": "password"},
|
||||
)
|
||||
|
||||
# Update the user
|
||||
response = auth_client.put(
|
||||
f"/api/v1/admin/users/{test_username}",
|
||||
json={"email": "updated@example.com", "is_admin": True},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["email"] == "updated@example.com"
|
||||
assert data["is_admin"] is True
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_reset_user_password(self, auth_client):
|
||||
"""Test resetting a user's password as admin."""
|
||||
# Login as admin
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
|
||||
# Create a test user
|
||||
test_username = f"resetuser_{uuid4().hex[:8]}"
|
||||
auth_client.post(
|
||||
"/api/v1/admin/users",
|
||||
json={"username": test_username, "password": "oldpassword"},
|
||||
)
|
||||
|
||||
# Reset password
|
||||
response = auth_client.post(
|
||||
f"/api/v1/admin/users/{test_username}/reset-password",
|
||||
json={"new_password": "newpassword"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# Verify new password works
|
||||
auth_client.cookies.clear()
|
||||
login_response = auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": test_username, "password": "newpassword"},
|
||||
)
|
||||
assert login_response.status_code == 200
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_non_admin_cannot_access_admin_endpoints(self, auth_client):
|
||||
"""Test that non-admin users cannot access admin endpoints."""
|
||||
# Login as admin and create non-admin user
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
test_username = f"nonadmin_{uuid4().hex[:8]}"
|
||||
auth_client.post(
|
||||
"/api/v1/admin/users",
|
||||
json={"username": test_username, "password": "password", "is_admin": False},
|
||||
)
|
||||
|
||||
# Login as non-admin
|
||||
auth_client.cookies.clear()
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": test_username, "password": "password"},
|
||||
)
|
||||
|
||||
# Try to access admin endpoints
|
||||
response = auth_client.get("/api/v1/admin/users")
|
||||
assert response.status_code == 403
|
||||
assert "Admin privileges required" in response.json()["detail"]
|
||||
|
||||
|
||||
class TestSecurityEdgeCases:
|
||||
"""Tests for security edge cases and validation."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_login_inactive_user(self, auth_client):
|
||||
"""Test that inactive users cannot login."""
|
||||
# Login as admin and create a user
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
test_username = f"inactive_{uuid4().hex[:8]}"
|
||||
auth_client.post(
|
||||
"/api/v1/admin/users",
|
||||
json={"username": test_username, "password": "password123"},
|
||||
)
|
||||
|
||||
# Deactivate the user
|
||||
auth_client.put(
|
||||
f"/api/v1/admin/users/{test_username}",
|
||||
json={"is_active": False},
|
||||
)
|
||||
|
||||
# Try to login as inactive user
|
||||
auth_client.cookies.clear()
|
||||
response = auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": test_username, "password": "password123"},
|
||||
)
|
||||
assert response.status_code == 401
|
||||
assert "Invalid username or password" in response.json()["detail"]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_password_too_short_on_create(self, auth_client):
|
||||
"""Test that short passwords are rejected when creating users."""
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
|
||||
response = auth_client.post(
|
||||
"/api/v1/admin/users",
|
||||
json={"username": f"shortpw_{uuid4().hex[:8]}", "password": "short"},
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert "at least 8 characters" in response.json()["detail"]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_password_too_short_on_change(self, auth_client):
|
||||
"""Test that short passwords are rejected when changing password."""
|
||||
# Create test user
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
test_username = f"shortchange_{uuid4().hex[:8]}"
|
||||
auth_client.post(
|
||||
"/api/v1/admin/users",
|
||||
json={"username": test_username, "password": "password123"},
|
||||
)
|
||||
|
||||
# Login as test user
|
||||
auth_client.cookies.clear()
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": test_username, "password": "password123"},
|
||||
)
|
||||
|
||||
response = auth_client.post(
|
||||
"/api/v1/auth/change-password",
|
||||
json={"current_password": "password123", "new_password": "short"},
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert "at least 8 characters" in response.json()["detail"]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_password_too_short_on_reset(self, auth_client):
|
||||
"""Test that short passwords are rejected when resetting password."""
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
|
||||
# Create a test user first
|
||||
test_username = f"resetshort_{uuid4().hex[:8]}"
|
||||
auth_client.post(
|
||||
"/api/v1/admin/users",
|
||||
json={"username": test_username, "password": "password123"},
|
||||
)
|
||||
|
||||
response = auth_client.post(
|
||||
f"/api/v1/admin/users/{test_username}/reset-password",
|
||||
json={"new_password": "short"},
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert "at least 8 characters" in response.json()["detail"]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_duplicate_username_rejected(self, auth_client):
|
||||
"""Test that duplicate usernames are rejected."""
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
|
||||
test_username = f"duplicate_{uuid4().hex[:8]}"
|
||||
# Create user first time
|
||||
response1 = auth_client.post(
|
||||
"/api/v1/admin/users",
|
||||
json={"username": test_username, "password": "password123"},
|
||||
)
|
||||
assert response1.status_code == 200
|
||||
|
||||
# Try to create same username again
|
||||
response2 = auth_client.post(
|
||||
"/api/v1/admin/users",
|
||||
json={"username": test_username, "password": "password456"},
|
||||
)
|
||||
assert response2.status_code == 409
|
||||
assert "already exists" in response2.json()["detail"]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_cannot_delete_other_users_api_key(self, auth_client):
|
||||
"""Test that users cannot delete API keys owned by other users."""
|
||||
# Login as admin and create an API key
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
create_response = auth_client.post(
|
||||
"/api/v1/auth/keys",
|
||||
json={"name": "admin-key"},
|
||||
)
|
||||
admin_key_id = create_response.json()["id"]
|
||||
|
||||
# Create a non-admin user
|
||||
test_username = f"nonadmin_{uuid4().hex[:8]}"
|
||||
auth_client.post(
|
||||
"/api/v1/admin/users",
|
||||
json={"username": test_username, "password": "password123"},
|
||||
)
|
||||
|
||||
# Login as non-admin
|
||||
auth_client.cookies.clear()
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": test_username, "password": "password123"},
|
||||
)
|
||||
|
||||
# Try to delete admin's API key
|
||||
response = auth_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
|
||||
assert response.status_code == 403
|
||||
assert "Cannot delete another user's API key" in response.json()["detail"]
|
||||
|
||||
# Cleanup: login as admin and delete the key
|
||||
auth_client.cookies.clear()
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
auth_client.delete(f"/api/v1/auth/keys/{admin_key_id}")
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_sessions_invalidated_on_password_change(self, auth_client):
|
||||
"""Test that all sessions are invalidated when password is changed."""
|
||||
# Create a test user
|
||||
auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": get_admin_username(), "password": get_admin_password()},
|
||||
)
|
||||
test_username = f"sessiontest_{uuid4().hex[:8]}"
|
||||
auth_client.post(
|
||||
"/api/v1/admin/users",
|
||||
json={"username": test_username, "password": "password123"},
|
||||
)
|
||||
|
||||
# Login as test user
|
||||
auth_client.cookies.clear()
|
||||
login_response = auth_client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": test_username, "password": "password123"},
|
||||
)
|
||||
assert login_response.status_code == 200
|
||||
|
||||
# Verify session works
|
||||
me_response = auth_client.get("/api/v1/auth/me")
|
||||
assert me_response.status_code == 200
|
||||
|
||||
# Change password
|
||||
auth_client.post(
|
||||
"/api/v1/auth/change-password",
|
||||
json={"current_password": "password123", "new_password": "newpassword123"},
|
||||
)
|
||||
|
||||
# Old session should be invalidated - try to access /me
|
||||
# (note: the change-password call itself may have cleared the session cookie)
|
||||
me_response2 = auth_client.get("/api/v1/auth/me")
|
||||
# This should fail because all sessions were invalidated
|
||||
assert me_response2.status_code == 401
|
||||
737
backend/tests/integration/test_concurrent_operations.py
Normal file
737
backend/tests/integration/test_concurrent_operations.py
Normal file
@@ -0,0 +1,737 @@
|
||||
"""
|
||||
Integration tests for concurrent upload and download operations.
|
||||
|
||||
Tests cover:
|
||||
- Concurrent uploads of different files
|
||||
- Concurrent uploads of same file (deduplication race)
|
||||
- Concurrent downloads of same artifact
|
||||
- Concurrent downloads of different artifacts
|
||||
- Mixed concurrent uploads and downloads
|
||||
- Data corruption prevention under concurrency
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import io
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from tests.factories import (
|
||||
compute_sha256,
|
||||
upload_test_file,
|
||||
generate_content_with_hash,
|
||||
)
|
||||
|
||||
|
||||
def get_api_key(integration_client):
|
||||
"""Create an API key for concurrent test workers."""
|
||||
import uuid
|
||||
response = integration_client.post(
|
||||
"/api/v1/auth/keys",
|
||||
json={"name": f"concurrent-test-{uuid.uuid4().hex[:8]}"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
return response.json()["key"]
|
||||
return None
|
||||
|
||||
|
||||
class TestConcurrentUploads:
|
||||
"""Tests for concurrent upload operations."""
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.concurrent
|
||||
def test_2_concurrent_uploads_different_files(self, integration_client, test_package):
|
||||
"""Test 2 concurrent uploads of different files."""
|
||||
project, package = test_package
|
||||
api_key = get_api_key(integration_client)
|
||||
assert api_key, "Failed to create API key"
|
||||
|
||||
files_data = [
|
||||
generate_content_with_hash(1024, seed=i) for i in range(2)
|
||||
]
|
||||
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def upload_worker(idx, content, expected_hash):
|
||||
try:
|
||||
from httpx import Client
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
|
||||
with Client(base_url=base_url, timeout=60.0) as client:
|
||||
files = {
|
||||
"file": (f"file-{idx}.bin", io.BytesIO(content), "application/octet-stream")
|
||||
}
|
||||
response = client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"tag": f"concurrent-{idx}"},
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
results.append((idx, result, expected_hash))
|
||||
else:
|
||||
errors.append(f"Worker {idx}: Status {response.status_code}: {response.text}")
|
||||
except Exception as e:
|
||||
errors.append(f"Worker {idx}: {str(e)}")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=2) as executor:
|
||||
futures = [
|
||||
executor.submit(upload_worker, i, content, hash)
|
||||
for i, (content, hash) in enumerate(files_data)
|
||||
]
|
||||
for future in as_completed(futures):
|
||||
pass
|
||||
|
||||
assert len(errors) == 0, f"Errors: {errors}"
|
||||
assert len(results) == 2
|
||||
|
||||
# Verify each upload returned correct artifact_id
|
||||
for idx, result, expected_hash in results:
|
||||
assert result["artifact_id"] == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.concurrent
|
||||
def test_5_concurrent_uploads_different_files(self, integration_client, test_package):
|
||||
"""Test 5 concurrent uploads of different files."""
|
||||
project, package = test_package
|
||||
api_key = get_api_key(integration_client)
|
||||
assert api_key, "Failed to create API key"
|
||||
|
||||
num_files = 5
|
||||
files_data = [
|
||||
generate_content_with_hash(2048, seed=100 + i) for i in range(num_files)
|
||||
]
|
||||
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def upload_worker(idx, content, expected_hash):
|
||||
try:
|
||||
from httpx import Client
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
|
||||
with Client(base_url=base_url, timeout=60.0) as client:
|
||||
files = {
|
||||
"file": (f"file-{idx}.bin", io.BytesIO(content), "application/octet-stream")
|
||||
}
|
||||
response = client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"tag": f"concurrent5-{idx}"},
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
results.append((idx, result, expected_hash))
|
||||
else:
|
||||
errors.append(f"Worker {idx}: Status {response.status_code}")
|
||||
except Exception as e:
|
||||
errors.append(f"Worker {idx}: {str(e)}")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=num_files) as executor:
|
||||
futures = [
|
||||
executor.submit(upload_worker, i, content, hash)
|
||||
for i, (content, hash) in enumerate(files_data)
|
||||
]
|
||||
for future in as_completed(futures):
|
||||
pass
|
||||
|
||||
assert len(errors) == 0, f"Errors: {errors}"
|
||||
assert len(results) == num_files
|
||||
|
||||
# Verify all uploads have unique artifact_ids
|
||||
artifact_ids = set(r[1]["artifact_id"] for r in results)
|
||||
assert len(artifact_ids) == num_files
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.concurrent
|
||||
def test_10_concurrent_uploads_different_files(self, integration_client, test_package):
|
||||
"""Test 10 concurrent uploads of different files."""
|
||||
project, package = test_package
|
||||
api_key = get_api_key(integration_client)
|
||||
assert api_key, "Failed to create API key"
|
||||
|
||||
num_files = 10
|
||||
files_data = [
|
||||
generate_content_with_hash(1024, seed=200 + i) for i in range(num_files)
|
||||
]
|
||||
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def upload_worker(idx, content, expected_hash):
|
||||
try:
|
||||
from httpx import Client
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
|
||||
with Client(base_url=base_url, timeout=60.0) as client:
|
||||
files = {
|
||||
"file": (f"file-{idx}.bin", io.BytesIO(content), "application/octet-stream")
|
||||
}
|
||||
response = client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"tag": f"concurrent10-{idx}"},
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
results.append((idx, result, expected_hash))
|
||||
else:
|
||||
errors.append(f"Worker {idx}: Status {response.status_code}")
|
||||
except Exception as e:
|
||||
errors.append(f"Worker {idx}: {str(e)}")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=num_files) as executor:
|
||||
futures = [
|
||||
executor.submit(upload_worker, i, content, hash)
|
||||
for i, (content, hash) in enumerate(files_data)
|
||||
]
|
||||
for future in as_completed(futures):
|
||||
pass
|
||||
|
||||
assert len(errors) == 0, f"Errors: {errors}"
|
||||
assert len(results) == num_files
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.concurrent
|
||||
def test_concurrent_uploads_same_file_deduplication(self, integration_client, test_package):
|
||||
"""Test concurrent uploads of same file handle deduplication correctly."""
|
||||
project, package = test_package
|
||||
api_key = get_api_key(integration_client)
|
||||
assert api_key, "Failed to create API key"
|
||||
|
||||
content, expected_hash = generate_content_with_hash(4096, seed=999)
|
||||
num_concurrent = 5
|
||||
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def upload_worker(idx):
|
||||
try:
|
||||
from httpx import Client
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
|
||||
with Client(base_url=base_url, timeout=60.0) as client:
|
||||
files = {
|
||||
"file": (f"same-{idx}.bin", io.BytesIO(content), "application/octet-stream")
|
||||
}
|
||||
response = client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"tag": f"dedup-{idx}"},
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
results.append(response.json())
|
||||
else:
|
||||
errors.append(f"Worker {idx}: Status {response.status_code}")
|
||||
except Exception as e:
|
||||
errors.append(f"Worker {idx}: {str(e)}")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=num_concurrent) as executor:
|
||||
futures = [executor.submit(upload_worker, i) for i in range(num_concurrent)]
|
||||
for future in as_completed(futures):
|
||||
pass
|
||||
|
||||
assert len(errors) == 0, f"Errors: {errors}"
|
||||
assert len(results) == num_concurrent
|
||||
|
||||
# All should have same artifact_id
|
||||
artifact_ids = set(r["artifact_id"] for r in results)
|
||||
assert len(artifact_ids) == 1
|
||||
assert expected_hash in artifact_ids
|
||||
|
||||
# Verify final ref_count equals number of uploads
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.status_code == 200
|
||||
assert response.json()["ref_count"] == num_concurrent
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.concurrent
|
||||
def test_concurrent_uploads_to_different_packages(self, integration_client, test_project, unique_test_id):
|
||||
"""Test concurrent uploads to different packages."""
|
||||
project = test_project
|
||||
api_key = get_api_key(integration_client)
|
||||
assert api_key, "Failed to create API key"
|
||||
|
||||
num_packages = 3
|
||||
package_names = []
|
||||
|
||||
# Create multiple packages
|
||||
for i in range(num_packages):
|
||||
pkg_name = f"pkg-{unique_test_id}-{i}"
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/packages",
|
||||
json={"name": pkg_name, "description": f"Package {i}"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
package_names.append(pkg_name)
|
||||
|
||||
files_data = [
|
||||
generate_content_with_hash(1024, seed=300 + i) for i in range(num_packages)
|
||||
]
|
||||
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def upload_worker(idx, package, content, expected_hash):
|
||||
try:
|
||||
from httpx import Client
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
|
||||
with Client(base_url=base_url, timeout=60.0) as client:
|
||||
files = {
|
||||
"file": (f"file-{idx}.bin", io.BytesIO(content), "application/octet-stream")
|
||||
}
|
||||
response = client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"tag": "latest"},
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
results.append((package, result, expected_hash))
|
||||
else:
|
||||
errors.append(f"Worker {idx}: Status {response.status_code}")
|
||||
except Exception as e:
|
||||
errors.append(f"Worker {idx}: {str(e)}")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=num_packages) as executor:
|
||||
futures = [
|
||||
executor.submit(upload_worker, i, package_names[i], content, hash)
|
||||
for i, (content, hash) in enumerate(files_data)
|
||||
]
|
||||
for future in as_completed(futures):
|
||||
pass
|
||||
|
||||
assert len(errors) == 0, f"Errors: {errors}"
|
||||
assert len(results) == num_packages
|
||||
|
||||
|
||||
class TestConcurrentDownloads:
|
||||
"""Tests for concurrent download operations."""
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.concurrent
|
||||
def test_2_concurrent_downloads_same_artifact(self, integration_client, test_package):
|
||||
"""Test 2 concurrent downloads of same artifact."""
|
||||
project, package = test_package
|
||||
content, expected_hash = generate_content_with_hash(2048, seed=400)
|
||||
|
||||
# Upload first
|
||||
upload_test_file(integration_client, project, package, content, tag="download-test")
|
||||
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def download_worker(idx):
|
||||
try:
|
||||
from httpx import Client
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
|
||||
with Client(base_url=base_url, timeout=60.0) as client:
|
||||
response = client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/download-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
results.append((idx, response.content))
|
||||
else:
|
||||
errors.append(f"Worker {idx}: Status {response.status_code}")
|
||||
except Exception as e:
|
||||
errors.append(f"Worker {idx}: {str(e)}")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=2) as executor:
|
||||
futures = [executor.submit(download_worker, i) for i in range(2)]
|
||||
for future in as_completed(futures):
|
||||
pass
|
||||
|
||||
assert len(errors) == 0, f"Errors: {errors}"
|
||||
assert len(results) == 2
|
||||
|
||||
# All downloads should match original
|
||||
for idx, downloaded in results:
|
||||
assert downloaded == content
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.concurrent
|
||||
def test_5_concurrent_downloads_same_artifact(self, integration_client, test_package):
|
||||
"""Test 5 concurrent downloads of same artifact."""
|
||||
project, package = test_package
|
||||
content, expected_hash = generate_content_with_hash(4096, seed=500)
|
||||
|
||||
upload_test_file(integration_client, project, package, content, tag="download5-test")
|
||||
|
||||
num_downloads = 5
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def download_worker(idx):
|
||||
try:
|
||||
from httpx import Client
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
|
||||
with Client(base_url=base_url, timeout=60.0) as client:
|
||||
response = client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/download5-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
results.append((idx, response.content))
|
||||
else:
|
||||
errors.append(f"Worker {idx}: Status {response.status_code}")
|
||||
except Exception as e:
|
||||
errors.append(f"Worker {idx}: {str(e)}")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=num_downloads) as executor:
|
||||
futures = [executor.submit(download_worker, i) for i in range(num_downloads)]
|
||||
for future in as_completed(futures):
|
||||
pass
|
||||
|
||||
assert len(errors) == 0, f"Errors: {errors}"
|
||||
assert len(results) == num_downloads
|
||||
|
||||
for idx, downloaded in results:
|
||||
assert downloaded == content
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.concurrent
|
||||
def test_10_concurrent_downloads_same_artifact(self, integration_client, test_package):
|
||||
"""Test 10 concurrent downloads of same artifact."""
|
||||
project, package = test_package
|
||||
content, expected_hash = generate_content_with_hash(8192, seed=600)
|
||||
|
||||
upload_test_file(integration_client, project, package, content, tag="download10-test")
|
||||
|
||||
num_downloads = 10
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def download_worker(idx):
|
||||
try:
|
||||
from httpx import Client
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
|
||||
with Client(base_url=base_url, timeout=60.0) as client:
|
||||
response = client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/download10-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
results.append((idx, response.content))
|
||||
else:
|
||||
errors.append(f"Worker {idx}: Status {response.status_code}")
|
||||
except Exception as e:
|
||||
errors.append(f"Worker {idx}: {str(e)}")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=num_downloads) as executor:
|
||||
futures = [executor.submit(download_worker, i) for i in range(num_downloads)]
|
||||
for future in as_completed(futures):
|
||||
pass
|
||||
|
||||
assert len(errors) == 0, f"Errors: {errors}"
|
||||
assert len(results) == num_downloads
|
||||
|
||||
for idx, downloaded in results:
|
||||
assert downloaded == content
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.concurrent
|
||||
def test_concurrent_downloads_different_artifacts(self, integration_client, test_package):
|
||||
"""Test concurrent downloads of different artifacts."""
|
||||
project, package = test_package
|
||||
|
||||
# Upload multiple files
|
||||
num_files = 5
|
||||
uploads = []
|
||||
for i in range(num_files):
|
||||
content, expected_hash = generate_content_with_hash(1024, seed=700 + i)
|
||||
upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
tag=f"multi-download-{i}"
|
||||
)
|
||||
uploads.append((f"multi-download-{i}", content))
|
||||
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def download_worker(tag, expected_content):
|
||||
try:
|
||||
from httpx import Client
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
|
||||
with Client(base_url=base_url, timeout=60.0) as client:
|
||||
response = client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/{tag}",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
results.append((tag, response.content, expected_content))
|
||||
else:
|
||||
errors.append(f"Tag {tag}: Status {response.status_code}")
|
||||
except Exception as e:
|
||||
errors.append(f"Tag {tag}: {str(e)}")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=num_files) as executor:
|
||||
futures = [
|
||||
executor.submit(download_worker, tag, content)
|
||||
for tag, content in uploads
|
||||
]
|
||||
for future in as_completed(futures):
|
||||
pass
|
||||
|
||||
assert len(errors) == 0, f"Errors: {errors}"
|
||||
assert len(results) == num_files
|
||||
|
||||
for tag, downloaded, expected in results:
|
||||
assert downloaded == expected, f"Content mismatch for {tag}"
|
||||
|
||||
|
||||
class TestMixedConcurrentOperations:
|
||||
"""Tests for mixed concurrent upload and download operations."""
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.concurrent
|
||||
def test_upload_while_download_in_progress(self, integration_client, test_package):
|
||||
"""Test uploading while a download is in progress."""
|
||||
project, package = test_package
|
||||
api_key = get_api_key(integration_client)
|
||||
assert api_key, "Failed to create API key"
|
||||
|
||||
# Upload initial content
|
||||
content1, hash1 = generate_content_with_hash(10240, seed=800) # 10KB
|
||||
upload_test_file(integration_client, project, package, content1, tag="initial")
|
||||
|
||||
# New content for upload during download
|
||||
content2, hash2 = generate_content_with_hash(10240, seed=801)
|
||||
|
||||
results = {"downloads": [], "uploads": []}
|
||||
errors = []
|
||||
|
||||
def download_worker():
|
||||
try:
|
||||
from httpx import Client
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
|
||||
with Client(base_url=base_url, timeout=60.0) as client:
|
||||
response = client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/initial",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
results["downloads"].append(response.content)
|
||||
else:
|
||||
errors.append(f"Download: Status {response.status_code}")
|
||||
except Exception as e:
|
||||
errors.append(f"Download: {str(e)}")
|
||||
|
||||
def upload_worker():
|
||||
try:
|
||||
from httpx import Client
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
|
||||
with Client(base_url=base_url, timeout=60.0) as client:
|
||||
files = {
|
||||
"file": ("new.bin", io.BytesIO(content2), "application/octet-stream")
|
||||
}
|
||||
response = client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"tag": "during-download"},
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
results["uploads"].append(response.json())
|
||||
else:
|
||||
errors.append(f"Upload: Status {response.status_code}")
|
||||
except Exception as e:
|
||||
errors.append(f"Upload: {str(e)}")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=2) as executor:
|
||||
futures = [
|
||||
executor.submit(download_worker),
|
||||
executor.submit(upload_worker),
|
||||
]
|
||||
for future in as_completed(futures):
|
||||
pass
|
||||
|
||||
assert len(errors) == 0, f"Errors: {errors}"
|
||||
assert len(results["downloads"]) == 1
|
||||
assert len(results["uploads"]) == 1
|
||||
|
||||
# Verify download got correct content
|
||||
assert results["downloads"][0] == content1
|
||||
|
||||
# Verify upload succeeded
|
||||
assert results["uploads"][0]["artifact_id"] == hash2
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.concurrent
|
||||
def test_multiple_uploads_and_downloads_simultaneously(self, integration_client, test_package):
|
||||
"""Test multiple uploads and downloads running simultaneously."""
|
||||
project, package = test_package
|
||||
api_key = get_api_key(integration_client)
|
||||
assert api_key, "Failed to create API key"
|
||||
|
||||
# Pre-upload some files for downloading
|
||||
existing_files = []
|
||||
for i in range(3):
|
||||
content, hash = generate_content_with_hash(2048, seed=900 + i)
|
||||
upload_test_file(integration_client, project, package, content, tag=f"existing-{i}")
|
||||
existing_files.append((f"existing-{i}", content))
|
||||
|
||||
# New files for uploading
|
||||
new_files = [
|
||||
generate_content_with_hash(2048, seed=910 + i) for i in range(3)
|
||||
]
|
||||
|
||||
results = {"downloads": [], "uploads": []}
|
||||
errors = []
|
||||
|
||||
def download_worker(tag, expected):
|
||||
try:
|
||||
from httpx import Client
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
|
||||
with Client(base_url=base_url, timeout=60.0) as client:
|
||||
response = client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/{tag}",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
results["downloads"].append((tag, response.content, expected))
|
||||
else:
|
||||
errors.append(f"Download {tag}: Status {response.status_code}")
|
||||
except Exception as e:
|
||||
errors.append(f"Download {tag}: {str(e)}")
|
||||
|
||||
def upload_worker(idx, content, expected_hash):
|
||||
try:
|
||||
from httpx import Client
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
|
||||
with Client(base_url=base_url, timeout=60.0) as client:
|
||||
files = {
|
||||
"file": (f"new-{idx}.bin", io.BytesIO(content), "application/octet-stream")
|
||||
}
|
||||
response = client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"tag": f"new-{idx}"},
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
results["uploads"].append((idx, response.json(), expected_hash))
|
||||
else:
|
||||
errors.append(f"Upload {idx}: Status {response.status_code}")
|
||||
except Exception as e:
|
||||
errors.append(f"Upload {idx}: {str(e)}")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=6) as executor:
|
||||
futures = []
|
||||
|
||||
# Submit downloads
|
||||
for tag, content in existing_files:
|
||||
futures.append(executor.submit(download_worker, tag, content))
|
||||
|
||||
# Submit uploads
|
||||
for i, (content, hash) in enumerate(new_files):
|
||||
futures.append(executor.submit(upload_worker, i, content, hash))
|
||||
|
||||
for future in as_completed(futures):
|
||||
pass
|
||||
|
||||
assert len(errors) == 0, f"Errors: {errors}"
|
||||
assert len(results["downloads"]) == 3
|
||||
assert len(results["uploads"]) == 3
|
||||
|
||||
# Verify downloads
|
||||
for tag, downloaded, expected in results["downloads"]:
|
||||
assert downloaded == expected, f"Download mismatch for {tag}"
|
||||
|
||||
# Verify uploads
|
||||
for idx, result, expected_hash in results["uploads"]:
|
||||
assert result["artifact_id"] == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.concurrent
|
||||
def test_no_data_corruption_under_concurrency(self, integration_client, test_package):
|
||||
"""Test that no data corruption occurs under concurrent operations."""
|
||||
project, package = test_package
|
||||
api_key = get_api_key(integration_client)
|
||||
assert api_key, "Failed to create API key"
|
||||
|
||||
# Create content with recognizable patterns
|
||||
num_files = 5
|
||||
files_data = []
|
||||
for i in range(num_files):
|
||||
# Each file has unique repeating pattern for easy corruption detection
|
||||
pattern = bytes([i] * 256)
|
||||
content = pattern * 40 # 10KB each
|
||||
hash = compute_sha256(content)
|
||||
files_data.append((content, hash))
|
||||
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def upload_and_verify(idx, content, expected_hash):
|
||||
try:
|
||||
from httpx import Client
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
|
||||
with Client(base_url=base_url, timeout=60.0) as client:
|
||||
# Upload
|
||||
files = {
|
||||
"file": (f"pattern-{idx}.bin", io.BytesIO(content), "application/octet-stream")
|
||||
}
|
||||
upload_resp = client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"tag": f"pattern-{idx}"},
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
if upload_resp.status_code != 200:
|
||||
errors.append(f"Upload {idx}: Status {upload_resp.status_code}")
|
||||
return
|
||||
|
||||
upload_result = upload_resp.json()
|
||||
if upload_result["artifact_id"] != expected_hash:
|
||||
errors.append(f"Upload {idx}: Hash mismatch")
|
||||
return
|
||||
|
||||
# Immediately download and verify
|
||||
download_resp = client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/pattern-{idx}",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
if download_resp.status_code != 200:
|
||||
errors.append(f"Download {idx}: Status {download_resp.status_code}")
|
||||
return
|
||||
|
||||
if download_resp.content != content:
|
||||
errors.append(f"Worker {idx}: DATA CORRUPTION DETECTED")
|
||||
return
|
||||
|
||||
# Verify the downloaded content hash
|
||||
downloaded_hash = compute_sha256(download_resp.content)
|
||||
if downloaded_hash != expected_hash:
|
||||
errors.append(f"Worker {idx}: Hash verification failed")
|
||||
return
|
||||
|
||||
results.append(idx)
|
||||
|
||||
except Exception as e:
|
||||
errors.append(f"Worker {idx}: {str(e)}")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=num_files) as executor:
|
||||
futures = [
|
||||
executor.submit(upload_and_verify, i, content, hash)
|
||||
for i, (content, hash) in enumerate(files_data)
|
||||
]
|
||||
for future in as_completed(futures):
|
||||
pass
|
||||
|
||||
assert len(errors) == 0, f"Errors: {errors}"
|
||||
assert len(results) == num_files
|
||||
322
backend/tests/integration/test_error_handling.py
Normal file
322
backend/tests/integration/test_error_handling.py
Normal file
@@ -0,0 +1,322 @@
|
||||
"""
|
||||
Integration tests for error handling in upload and download operations.
|
||||
|
||||
Tests cover:
|
||||
- Timeout handling
|
||||
- Invalid request handling
|
||||
- Resource cleanup on failures
|
||||
- Graceful error responses
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import io
|
||||
import time
|
||||
from tests.factories import (
|
||||
compute_sha256,
|
||||
upload_test_file,
|
||||
generate_content_with_hash,
|
||||
)
|
||||
|
||||
|
||||
class TestUploadErrorHandling:
|
||||
"""Tests for upload error handling."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_to_nonexistent_project_returns_404(
|
||||
self, integration_client, unique_test_id
|
||||
):
|
||||
"""Test upload to nonexistent project returns 404."""
|
||||
content = b"test content for nonexistent project"
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/nonexistent-project-{unique_test_id}/nonexistent-pkg/upload",
|
||||
files=files,
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_to_nonexistent_package_returns_404(
|
||||
self, integration_client, test_project, unique_test_id
|
||||
):
|
||||
"""Test upload to nonexistent package returns 404."""
|
||||
content = b"test content for nonexistent package"
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
|
||||
files=files,
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_empty_file_rejected(self, integration_client, test_package):
|
||||
"""Test empty file upload is rejected."""
|
||||
project, package = test_package
|
||||
|
||||
files = {"file": ("empty.bin", io.BytesIO(b""), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
)
|
||||
assert response.status_code in [400, 422]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_missing_file_returns_422(self, integration_client, test_package):
|
||||
"""Test upload without file field returns 422."""
|
||||
project, package = test_package
|
||||
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
data={"tag": "no-file-provided"},
|
||||
)
|
||||
assert response.status_code == 422
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_invalid_checksum_format_returns_400(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test upload with invalid checksum format returns 400."""
|
||||
project, package = test_package
|
||||
content = b"checksum format test"
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
headers={"X-Checksum-SHA256": "invalid-hash-format"},
|
||||
)
|
||||
assert response.status_code == 400
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_checksum_mismatch_returns_422(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test upload with mismatched checksum returns 422."""
|
||||
project, package = test_package
|
||||
content = b"checksum mismatch test"
|
||||
wrong_hash = "0" * 64 # Valid format but wrong hash
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
headers={"X-Checksum-SHA256": wrong_hash},
|
||||
)
|
||||
assert response.status_code == 422
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_with_correct_checksum_succeeds(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test upload with correct checksum succeeds."""
|
||||
project, package = test_package
|
||||
content = b"correct checksum test"
|
||||
correct_hash = compute_sha256(content)
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
headers={"X-Checksum-SHA256": correct_hash},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["artifact_id"] == correct_hash
|
||||
|
||||
|
||||
class TestDownloadErrorHandling:
|
||||
"""Tests for download error handling."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_nonexistent_tag_returns_404(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test download of nonexistent tag returns 404."""
|
||||
project, package = test_package
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/nonexistent-tag-xyz"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_nonexistent_artifact_returns_404(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test download of nonexistent artifact ID returns 404."""
|
||||
project, package = test_package
|
||||
fake_hash = "a" * 64
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/artifact:{fake_hash}"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_invalid_artifact_id_format(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test download with invalid artifact ID format."""
|
||||
project, package = test_package
|
||||
|
||||
# Too short
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/artifact:abc123"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_from_nonexistent_project_returns_404(
|
||||
self, integration_client, unique_test_id
|
||||
):
|
||||
"""Test download from nonexistent project returns 404."""
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/nonexistent-{unique_test_id}/pkg/+/tag"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_from_nonexistent_package_returns_404(
|
||||
self, integration_client, test_project, unique_test_id
|
||||
):
|
||||
"""Test download from nonexistent package returns 404."""
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{test_project}/nonexistent-{unique_test_id}/+/tag"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
class TestTimeoutBehavior:
|
||||
"""Tests for timeout behavior (integration level)."""
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.slow
|
||||
def test_large_upload_completes_within_reasonable_time(
|
||||
self, integration_client, test_package, sized_content
|
||||
):
|
||||
"""Test that a 10MB upload completes within reasonable time."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(10 * 1024 * 1024, seed=999) # 10MB
|
||||
|
||||
start_time = time.time()
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="timeout-test"
|
||||
)
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
assert result["artifact_id"] == expected_hash
|
||||
# Should complete within 60 seconds for 10MB on local docker
|
||||
assert elapsed < 60, f"Upload took too long: {elapsed:.2f}s"
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.slow
|
||||
def test_large_download_completes_within_reasonable_time(
|
||||
self, integration_client, test_package, sized_content
|
||||
):
|
||||
"""Test that a 10MB download completes within reasonable time."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(10 * 1024 * 1024, seed=998) # 10MB
|
||||
|
||||
# First upload
|
||||
upload_test_file(
|
||||
integration_client, project, package, content, tag="download-timeout-test"
|
||||
)
|
||||
|
||||
# Then download and time it
|
||||
start_time = time.time()
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/download-timeout-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
assert response.status_code == 200
|
||||
assert len(response.content) == len(content)
|
||||
# Should complete within 60 seconds for 10MB on local docker
|
||||
assert elapsed < 60, f"Download took too long: {elapsed:.2f}s"
|
||||
|
||||
|
||||
class TestResourceCleanup:
|
||||
"""Tests for proper resource cleanup on failures.
|
||||
|
||||
Note: More comprehensive cleanup tests are in test_upload_download_api.py
|
||||
(TestUploadFailureCleanup class) including S3 object cleanup verification.
|
||||
"""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_checksum_mismatch_no_orphaned_artifact(
|
||||
self, integration_client, test_package, unique_test_id
|
||||
):
|
||||
"""Test checksum mismatch doesn't leave orphaned artifact."""
|
||||
project, package = test_package
|
||||
# Use unique content to ensure artifact doesn't exist from prior tests
|
||||
content = f"checksum mismatch orphan test {unique_test_id}".encode()
|
||||
wrong_hash = "0" * 64
|
||||
actual_hash = compute_sha256(content)
|
||||
|
||||
# Verify artifact doesn't exist before test
|
||||
pre_check = integration_client.get(f"/api/v1/artifact/{actual_hash}")
|
||||
assert pre_check.status_code == 404, "Artifact should not exist before test"
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
headers={"X-Checksum-SHA256": wrong_hash},
|
||||
)
|
||||
assert response.status_code == 422
|
||||
|
||||
# Verify no artifact was created with either hash
|
||||
response1 = integration_client.get(f"/api/v1/artifact/{wrong_hash}")
|
||||
response2 = integration_client.get(f"/api/v1/artifact/{actual_hash}")
|
||||
assert response1.status_code == 404
|
||||
assert response2.status_code == 404
|
||||
|
||||
|
||||
class TestGracefulErrorResponses:
|
||||
"""Tests for graceful and informative error responses."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_404_response_has_detail_message(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test 404 responses include a detail message."""
|
||||
project, package = test_package
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/nonexistent-tag"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
data = response.json()
|
||||
assert "detail" in data
|
||||
assert len(data["detail"]) > 0
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_422_response_has_detail_message(self, integration_client, test_package):
|
||||
"""Test 422 responses include a detail message."""
|
||||
project, package = test_package
|
||||
|
||||
# Upload with mismatched checksum
|
||||
content = b"detail message test"
|
||||
wrong_hash = "0" * 64
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
headers={"X-Checksum-SHA256": wrong_hash},
|
||||
)
|
||||
assert response.status_code == 422
|
||||
data = response.json()
|
||||
assert "detail" in data
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_error_response_is_json(self, integration_client, unique_test_id):
|
||||
"""Test error responses are valid JSON."""
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/nonexistent-{unique_test_id}/pkg/+/tag"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
# Should not raise exception - valid JSON
|
||||
data = response.json()
|
||||
assert isinstance(data, dict)
|
||||
768
backend/tests/integration/test_integrity_verification.py
Normal file
768
backend/tests/integration/test_integrity_verification.py
Normal file
@@ -0,0 +1,768 @@
|
||||
"""
|
||||
Integration tests for artifact integrity verification.
|
||||
|
||||
Tests cover:
|
||||
- Round-trip verification (upload -> download -> verify hash)
|
||||
- Consistency check endpoint
|
||||
- Header-based verification
|
||||
- Integrity verification across file sizes
|
||||
- Client-side verification workflow
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import io
|
||||
import hashlib
|
||||
from tests.factories import (
|
||||
compute_sha256,
|
||||
upload_test_file,
|
||||
generate_content_with_hash,
|
||||
s3_object_exists,
|
||||
get_s3_client,
|
||||
get_s3_bucket,
|
||||
)
|
||||
from tests.conftest import (
|
||||
SIZE_1KB,
|
||||
SIZE_10KB,
|
||||
SIZE_100KB,
|
||||
SIZE_1MB,
|
||||
SIZE_10MB,
|
||||
)
|
||||
|
||||
|
||||
class TestRoundTripVerification:
|
||||
"""Tests for complete round-trip integrity verification."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_download_hash_matches(self, integration_client, test_package):
|
||||
"""Test that upload -> download round trip preserves content integrity."""
|
||||
project, package = test_package
|
||||
content = b"Round trip integrity test content"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Upload and capture returned hash
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="roundtrip"
|
||||
)
|
||||
uploaded_hash = result["artifact_id"]
|
||||
|
||||
# Verify upload returned correct hash
|
||||
assert uploaded_hash == expected_hash
|
||||
|
||||
# Download artifact
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/roundtrip",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# Compute hash of downloaded content
|
||||
downloaded_hash = compute_sha256(response.content)
|
||||
|
||||
# All three hashes should match
|
||||
assert downloaded_hash == expected_hash
|
||||
assert downloaded_hash == uploaded_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_response_contains_hash(self, integration_client, test_package):
|
||||
"""Test upload response contains artifact_id which is the SHA256 hash."""
|
||||
project, package = test_package
|
||||
content = b"Upload response hash test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
result = upload_test_file(integration_client, project, package, content)
|
||||
|
||||
assert "artifact_id" in result
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert len(result["artifact_id"]) == 64
|
||||
assert all(c in "0123456789abcdef" for c in result["artifact_id"])
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_header_matches_artifact_id(self, integration_client, test_package):
|
||||
"""Test X-Checksum-SHA256 header matches artifact ID."""
|
||||
project, package = test_package
|
||||
content = b"Header verification test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, content, tag="header-check"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/header-check",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.headers.get("X-Checksum-SHA256") == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_etag_matches_artifact_id(self, integration_client, test_package):
|
||||
"""Test ETag header matches artifact ID."""
|
||||
project, package = test_package
|
||||
content = b"ETag verification test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, content, tag="etag-check"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/etag-check",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
etag = response.headers.get("ETag", "").strip('"')
|
||||
assert etag == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_artifact_endpoint_returns_correct_hash(self, integration_client, test_package):
|
||||
"""Test artifact endpoint returns correct hash/ID."""
|
||||
project, package = test_package
|
||||
content = b"Artifact endpoint hash test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
upload_test_file(integration_client, project, package, content)
|
||||
|
||||
# Query artifact directly
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["id"] == expected_hash
|
||||
assert data.get("sha256") == expected_hash
|
||||
|
||||
|
||||
class TestClientSideVerificationWorkflow:
|
||||
"""Tests for client-side verification workflow."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_client_can_verify_before_upload(self, integration_client, test_package):
|
||||
"""Test client can compute hash before upload and verify response matches."""
|
||||
project, package = test_package
|
||||
content = b"Client pre-upload verification test"
|
||||
|
||||
# Client computes hash locally before upload
|
||||
client_hash = compute_sha256(content)
|
||||
|
||||
# Upload
|
||||
result = upload_test_file(integration_client, project, package, content)
|
||||
|
||||
# Client verifies server returned the same hash
|
||||
assert result["artifact_id"] == client_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_client_can_provide_checksum_header(self, integration_client, test_package):
|
||||
"""Test client can provide X-Checksum-SHA256 header for verification."""
|
||||
project, package = test_package
|
||||
content = b"Client checksum header test"
|
||||
client_hash = compute_sha256(content)
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
headers={"X-Checksum-SHA256": client_hash},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["artifact_id"] == client_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_checksum_mismatch_rejected(self, integration_client, test_package):
|
||||
"""Test upload with wrong client checksum is rejected."""
|
||||
project, package = test_package
|
||||
content = b"Checksum mismatch test"
|
||||
wrong_hash = "0" * 64
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
headers={"X-Checksum-SHA256": wrong_hash},
|
||||
)
|
||||
assert response.status_code == 422
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_client_can_verify_after_download(self, integration_client, test_package):
|
||||
"""Test client can verify downloaded content matches header hash."""
|
||||
project, package = test_package
|
||||
content = b"Client post-download verification"
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, content, tag="verify-after"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/verify-after",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# Client gets hash from header
|
||||
header_hash = response.headers.get("X-Checksum-SHA256")
|
||||
|
||||
# Client computes hash of downloaded content
|
||||
downloaded_hash = compute_sha256(response.content)
|
||||
|
||||
# Client verifies they match
|
||||
assert downloaded_hash == header_hash
|
||||
|
||||
|
||||
class TestIntegritySizeVariants:
|
||||
"""Tests for integrity verification across different file sizes."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_integrity_1kb(self, integration_client, test_package, sized_content):
|
||||
"""Test integrity verification for 1KB file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_1KB, seed=100)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="int-1kb"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/int-1kb",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert compute_sha256(response.content) == expected_hash
|
||||
assert response.headers.get("X-Checksum-SHA256") == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_integrity_100kb(self, integration_client, test_package, sized_content):
|
||||
"""Test integrity verification for 100KB file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_100KB, seed=101)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="int-100kb"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/int-100kb",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert compute_sha256(response.content) == expected_hash
|
||||
assert response.headers.get("X-Checksum-SHA256") == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_integrity_1mb(self, integration_client, test_package, sized_content):
|
||||
"""Test integrity verification for 1MB file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_1MB, seed=102)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="int-1mb"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/int-1mb",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert compute_sha256(response.content) == expected_hash
|
||||
assert response.headers.get("X-Checksum-SHA256") == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.slow
|
||||
def test_integrity_10mb(self, integration_client, test_package, sized_content):
|
||||
"""Test integrity verification for 10MB file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_10MB, seed=103)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="int-10mb"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/int-10mb",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert compute_sha256(response.content) == expected_hash
|
||||
assert response.headers.get("X-Checksum-SHA256") == expected_hash
|
||||
|
||||
|
||||
class TestConsistencyCheck:
|
||||
"""Tests for the admin consistency check endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_consistency_check_returns_200(self, integration_client):
|
||||
"""Test consistency check endpoint returns 200."""
|
||||
response = integration_client.get("/api/v1/admin/consistency-check")
|
||||
assert response.status_code == 200
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_consistency_check_response_format(self, integration_client):
|
||||
"""Test consistency check returns expected response format."""
|
||||
response = integration_client.get("/api/v1/admin/consistency-check")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# Check expected fields
|
||||
assert "total_artifacts_checked" in data
|
||||
assert "orphaned_s3_objects" in data
|
||||
assert "missing_s3_objects" in data
|
||||
assert "size_mismatches" in data
|
||||
assert "healthy" in data
|
||||
assert "orphaned_s3_keys" in data
|
||||
assert "missing_s3_keys" in data
|
||||
assert "size_mismatch_artifacts" in data
|
||||
# Verify types
|
||||
assert isinstance(data["total_artifacts_checked"], int)
|
||||
assert isinstance(data["orphaned_s3_objects"], int)
|
||||
assert isinstance(data["missing_s3_objects"], int)
|
||||
assert isinstance(data["size_mismatches"], int)
|
||||
assert isinstance(data["healthy"], bool)
|
||||
assert isinstance(data["orphaned_s3_keys"], list)
|
||||
assert isinstance(data["missing_s3_keys"], list)
|
||||
assert isinstance(data["size_mismatch_artifacts"], list)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_consistency_check_after_upload(self, integration_client, test_package):
|
||||
"""Test consistency check passes after valid upload."""
|
||||
project, package = test_package
|
||||
content = b"Consistency check test content"
|
||||
|
||||
# Upload artifact
|
||||
upload_test_file(integration_client, project, package, content)
|
||||
|
||||
# Run consistency check
|
||||
response = integration_client.get("/api/v1/admin/consistency-check")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# Verify check ran and no issues
|
||||
assert data["total_artifacts_checked"] >= 1
|
||||
assert data["healthy"] is True
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_consistency_check_limit_parameter(self, integration_client):
|
||||
"""Test consistency check respects limit parameter."""
|
||||
response = integration_client.get(
|
||||
"/api/v1/admin/consistency-check",
|
||||
params={"limit": 10}
|
||||
)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# Lists should not exceed limit
|
||||
assert len(data["orphaned_s3_keys"]) <= 10
|
||||
assert len(data["missing_s3_keys"]) <= 10
|
||||
assert len(data["size_mismatch_artifacts"]) <= 10
|
||||
|
||||
|
||||
class TestDigestHeader:
|
||||
"""Tests for RFC 3230 Digest header."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_includes_digest_header(self, integration_client, test_package):
|
||||
"""Test download includes Digest header in RFC 3230 format."""
|
||||
project, package = test_package
|
||||
content = b"Digest header test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, content, tag="digest-test"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/digest-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert "Digest" in response.headers
|
||||
|
||||
# Verify Digest format (sha-256=base64hash)
|
||||
digest = response.headers["Digest"]
|
||||
assert digest.startswith("sha-256=")
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_digest_header_base64_valid(self, integration_client, test_package):
|
||||
"""Test Digest header contains valid base64 encoding."""
|
||||
import base64
|
||||
|
||||
project, package = test_package
|
||||
content = b"Digest base64 test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, content, tag="digest-b64"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/digest-b64",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
digest = response.headers["Digest"]
|
||||
base64_part = digest.split("=", 1)[1]
|
||||
|
||||
# Should be valid base64
|
||||
try:
|
||||
decoded = base64.b64decode(base64_part)
|
||||
assert len(decoded) == 32 # SHA256 is 32 bytes
|
||||
except Exception as e:
|
||||
pytest.fail(f"Invalid base64 in Digest header: {e}")
|
||||
|
||||
|
||||
class TestVerificationModes:
|
||||
"""Tests for download verification modes."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_pre_verification_mode(self, integration_client, test_package):
|
||||
"""Test pre-verification mode verifies before streaming."""
|
||||
project, package = test_package
|
||||
content = b"Pre-verification mode test"
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, content, tag="pre-verify"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/pre-verify",
|
||||
params={"mode": "proxy", "verify": "true", "verify_mode": "pre"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
# X-Verified header should be true
|
||||
assert response.headers.get("X-Verified") == "true"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_stream_verification_mode(self, integration_client, test_package):
|
||||
"""Test streaming verification mode."""
|
||||
project, package = test_package
|
||||
content = b"Stream verification mode test"
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, content, tag="stream-verify"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/stream-verify",
|
||||
params={"mode": "proxy", "verify": "true", "verify_mode": "stream"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
|
||||
class TestArtifactIntegrityEndpoint:
|
||||
"""Tests for artifact-specific integrity operations."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_artifact_size_matches(self, integration_client, test_package):
|
||||
"""Test artifact endpoint returns correct size."""
|
||||
project, package = test_package
|
||||
content = b"Artifact size test content"
|
||||
expected_size = len(content)
|
||||
|
||||
result = upload_test_file(integration_client, project, package, content)
|
||||
artifact_id = result["artifact_id"]
|
||||
|
||||
response = integration_client.get(f"/api/v1/artifact/{artifact_id}")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["size"] == expected_size
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_content_length_header_matches_size(self, integration_client, test_package):
|
||||
"""Test Content-Length header matches artifact size."""
|
||||
project, package = test_package
|
||||
content = b"Content-Length header test"
|
||||
expected_size = len(content)
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, content, tag="content-len"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/content-len",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert int(response.headers.get("Content-Length", 0)) == expected_size
|
||||
assert len(response.content) == expected_size
|
||||
|
||||
|
||||
@pytest.mark.requires_direct_s3
|
||||
class TestCorruptionDetection:
|
||||
"""Tests for detecting corrupted S3 objects.
|
||||
|
||||
These tests directly manipulate S3 objects to simulate corruption
|
||||
and verify that the system can detect hash mismatches.
|
||||
|
||||
Note: These tests require direct S3/MinIO access and are skipped in CI
|
||||
where S3 is not directly accessible from the test runner.
|
||||
"""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_detection_of_corrupted_content(self, integration_client, test_package):
|
||||
"""Test that corrupted S3 content is detected via hash mismatch.
|
||||
|
||||
Uploads content, then directly modifies the S3 object, then
|
||||
verifies that the downloaded content hash doesn't match.
|
||||
"""
|
||||
project, package = test_package
|
||||
content = b"Original content for corruption test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Upload original content
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="corrupt-test"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
|
||||
# Get the S3 object and corrupt it
|
||||
s3_client = get_s3_client()
|
||||
bucket = get_s3_bucket()
|
||||
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
|
||||
|
||||
# Replace with corrupted content
|
||||
corrupted_content = b"Corrupted content - different from original!"
|
||||
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=corrupted_content)
|
||||
|
||||
# Download via proxy (bypasses hash verification)
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/corrupt-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# Verify the downloaded content doesn't match original hash
|
||||
downloaded_hash = compute_sha256(response.content)
|
||||
assert downloaded_hash != expected_hash, "Corruption was not detected - hashes match"
|
||||
assert response.content == corrupted_content
|
||||
|
||||
# The X-Checksum-SHA256 header should still show the original hash (from DB)
|
||||
# but the actual content hash is different
|
||||
header_hash = response.headers.get("X-Checksum-SHA256")
|
||||
assert header_hash == expected_hash # Header shows expected hash
|
||||
assert downloaded_hash != header_hash # But content is corrupted
|
||||
|
||||
# Restore original content for cleanup
|
||||
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_detection_of_single_bit_flip(self, integration_client, test_package):
|
||||
"""Test detection of a single bit flip in S3 object content."""
|
||||
project, package = test_package
|
||||
content = b"Content for single bit flip detection test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="bitflip-test"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
|
||||
# Get S3 object and flip a single bit
|
||||
s3_client = get_s3_client()
|
||||
bucket = get_s3_bucket()
|
||||
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
|
||||
|
||||
# Flip the first bit of the first byte
|
||||
corrupted_content = bytearray(content)
|
||||
corrupted_content[0] ^= 0x01
|
||||
corrupted_content = bytes(corrupted_content)
|
||||
|
||||
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=corrupted_content)
|
||||
|
||||
# Download and verify hash mismatch
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/bitflip-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
downloaded_hash = compute_sha256(response.content)
|
||||
assert downloaded_hash != expected_hash, "Single bit flip not detected"
|
||||
|
||||
# Restore original
|
||||
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_detection_of_truncated_content(self, integration_client, test_package):
|
||||
"""Test detection of truncated S3 object."""
|
||||
project, package = test_package
|
||||
content = b"This is content that will be truncated for testing purposes"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="truncate-test"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
|
||||
# Get S3 object and truncate it
|
||||
s3_client = get_s3_client()
|
||||
bucket = get_s3_bucket()
|
||||
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
|
||||
|
||||
# Truncate to half the original size
|
||||
truncated_content = content[: len(content) // 2]
|
||||
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=truncated_content)
|
||||
|
||||
# Download and verify hash mismatch
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/truncate-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
downloaded_hash = compute_sha256(response.content)
|
||||
assert downloaded_hash != expected_hash, "Truncation not detected"
|
||||
assert len(response.content) < len(content), "Content was not truncated"
|
||||
|
||||
# Restore original
|
||||
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_detection_of_appended_content(self, integration_client, test_package):
|
||||
"""Test detection of content with extra bytes appended."""
|
||||
project, package = test_package
|
||||
content = b"Original content"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="append-test"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
|
||||
# Get S3 object and append extra bytes
|
||||
s3_client = get_s3_client()
|
||||
bucket = get_s3_bucket()
|
||||
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
|
||||
|
||||
appended_content = content + b" - extra bytes appended"
|
||||
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=appended_content)
|
||||
|
||||
# Download and verify hash mismatch
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/append-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
downloaded_hash = compute_sha256(response.content)
|
||||
assert downloaded_hash != expected_hash, "Appended content not detected"
|
||||
assert len(response.content) > len(content), "Content was not extended"
|
||||
|
||||
# Restore original
|
||||
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_client_detects_hash_mismatch_post_download(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test that a client can detect hash mismatch after downloading corrupted content.
|
||||
|
||||
This simulates the full client verification workflow:
|
||||
1. Download content
|
||||
2. Get expected hash from header
|
||||
3. Compute actual hash of content
|
||||
4. Verify they match (or detect corruption)
|
||||
"""
|
||||
project, package = test_package
|
||||
content = b"Content for client-side corruption detection"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="client-detect"
|
||||
)
|
||||
|
||||
# Corrupt the S3 object
|
||||
s3_client = get_s3_client()
|
||||
bucket = get_s3_bucket()
|
||||
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
|
||||
corrupted = b"This is completely different content"
|
||||
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=corrupted)
|
||||
|
||||
# Simulate client download and verification
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/client-detect",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# Client gets expected hash from header
|
||||
header_hash = response.headers.get("X-Checksum-SHA256")
|
||||
|
||||
# Client computes hash of downloaded content
|
||||
actual_hash = compute_sha256(response.content)
|
||||
|
||||
# Client detects the mismatch
|
||||
corruption_detected = actual_hash != header_hash
|
||||
assert corruption_detected, "Client should detect hash mismatch"
|
||||
|
||||
# Restore original
|
||||
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_consistency_check_detects_size_mismatch(
|
||||
self, integration_client, test_package, unique_test_id
|
||||
):
|
||||
"""Test that consistency check detects size mismatches.
|
||||
|
||||
Uploads content, modifies S3 object size, then runs consistency check.
|
||||
"""
|
||||
project, package = test_package
|
||||
content = b"Content for size mismatch consistency check test " + unique_test_id.encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="size-mismatch"
|
||||
)
|
||||
|
||||
# Modify S3 object to have different size
|
||||
s3_client = get_s3_client()
|
||||
bucket = get_s3_bucket()
|
||||
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
|
||||
different_size_content = content + b"extra extra extra"
|
||||
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=different_size_content)
|
||||
|
||||
# Run consistency check
|
||||
response = integration_client.get("/api/v1/admin/consistency-check")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# Should detect the size mismatch
|
||||
assert data["size_mismatches"] >= 1 or len(data["size_mismatch_artifacts"]) >= 1
|
||||
|
||||
# Restore original
|
||||
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_consistency_check_detects_missing_s3_object(
|
||||
self, integration_client, test_package, unique_test_id
|
||||
):
|
||||
"""Test that consistency check detects missing S3 objects.
|
||||
|
||||
Uploads content, deletes S3 object, then runs consistency check.
|
||||
"""
|
||||
project, package = test_package
|
||||
content = b"Content for missing S3 object test " + unique_test_id.encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="missing-s3"
|
||||
)
|
||||
|
||||
# Delete the S3 object
|
||||
s3_client = get_s3_client()
|
||||
bucket = get_s3_bucket()
|
||||
s3_key = f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
|
||||
s3_client.delete_object(Bucket=bucket, Key=s3_key)
|
||||
|
||||
# Run consistency check
|
||||
response = integration_client.get("/api/v1/admin/consistency-check")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# Should detect the missing S3 object
|
||||
assert data["missing_s3_objects"] >= 1 or len(data["missing_s3_keys"]) >= 1
|
||||
|
||||
# Restore the object for cleanup
|
||||
s3_client.put_object(Bucket=bucket, Key=s3_key, Body=content)
|
||||
552
backend/tests/integration/test_large_uploads.py
Normal file
552
backend/tests/integration/test_large_uploads.py
Normal file
@@ -0,0 +1,552 @@
|
||||
"""
|
||||
Integration tests for large file upload functionality.
|
||||
|
||||
Tests cover:
|
||||
- Large file uploads (100MB, 1GB)
|
||||
- Multipart upload behavior
|
||||
- Upload metrics (duration, throughput)
|
||||
- Memory efficiency during uploads
|
||||
- Upload progress tracking
|
||||
|
||||
Note: Large tests are marked with @pytest.mark.slow and will be skipped
|
||||
by default. Run with `pytest --run-slow` to include them.
|
||||
"""
|
||||
|
||||
import os
|
||||
import pytest
|
||||
import io
|
||||
import time
|
||||
from tests.factories import (
|
||||
compute_sha256,
|
||||
upload_test_file,
|
||||
s3_object_exists,
|
||||
)
|
||||
from tests.conftest import (
|
||||
SIZE_1KB,
|
||||
SIZE_100KB,
|
||||
SIZE_1MB,
|
||||
SIZE_10MB,
|
||||
SIZE_100MB,
|
||||
SIZE_1GB,
|
||||
)
|
||||
|
||||
|
||||
class TestUploadMetrics:
|
||||
"""Tests for upload duration and throughput metrics."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_response_includes_duration_ms(self, integration_client, test_package):
|
||||
"""Test upload response includes duration_ms field."""
|
||||
project, package = test_package
|
||||
content = b"duration test content"
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="duration-test"
|
||||
)
|
||||
|
||||
assert "duration_ms" in result
|
||||
assert result["duration_ms"] is not None
|
||||
assert result["duration_ms"] >= 0
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_response_includes_throughput(self, integration_client, test_package):
|
||||
"""Test upload response includes throughput_mbps field."""
|
||||
project, package = test_package
|
||||
content = b"throughput test content"
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="throughput-test"
|
||||
)
|
||||
|
||||
assert "throughput_mbps" in result
|
||||
# For small files throughput may be very high or None
|
||||
# Just verify the field exists
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_duration_reasonable(
|
||||
self, integration_client, test_package, sized_content
|
||||
):
|
||||
"""Test upload duration is reasonable for file size."""
|
||||
project, package = test_package
|
||||
content, _ = sized_content(SIZE_1MB, seed=100)
|
||||
|
||||
start = time.time()
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="duration-check"
|
||||
)
|
||||
actual_duration = (time.time() - start) * 1000 # ms
|
||||
|
||||
# Reported duration should be close to actual
|
||||
assert result["duration_ms"] is not None
|
||||
# Allow some variance (network overhead)
|
||||
assert result["duration_ms"] <= actual_duration + 1000 # Within 1s
|
||||
|
||||
|
||||
class TestLargeFileUploads:
|
||||
"""Tests for large file uploads using multipart."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_10mb_file(self, integration_client, test_package, sized_content):
|
||||
"""Test uploading a 10MB file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_10MB, seed=200)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="large-10mb"
|
||||
)
|
||||
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == SIZE_10MB
|
||||
assert result["duration_ms"] is not None
|
||||
assert result["throughput_mbps"] is not None
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.requires_direct_s3
|
||||
def test_upload_100mb_file(self, integration_client, test_package, sized_content):
|
||||
"""Test uploading a 100MB file (triggers multipart upload)."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_100MB, seed=300)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="large-100mb"
|
||||
)
|
||||
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == SIZE_100MB
|
||||
# Verify S3 object exists
|
||||
assert s3_object_exists(expected_hash)
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.large
|
||||
def test_upload_1gb_file(self, integration_client, test_package, sized_content):
|
||||
"""Test uploading a 1GB file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_1GB, seed=400)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="large-1gb"
|
||||
)
|
||||
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == SIZE_1GB
|
||||
# Should have measurable throughput
|
||||
assert result["throughput_mbps"] is not None
|
||||
assert result["throughput_mbps"] > 0
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_large_file_deduplication(
|
||||
self, integration_client, test_package, sized_content, unique_test_id
|
||||
):
|
||||
"""Test deduplication works for large files."""
|
||||
project, package = test_package
|
||||
# Use unique_test_id to ensure unique content per test run
|
||||
seed = hash(unique_test_id) % 10000
|
||||
content, expected_hash = sized_content(SIZE_10MB, seed=seed)
|
||||
|
||||
# First upload
|
||||
result1 = upload_test_file(
|
||||
integration_client, project, package, content, tag=f"dedup-{unique_test_id}-1"
|
||||
)
|
||||
# Note: may be True if previous test uploaded same content
|
||||
first_dedupe = result1["deduplicated"]
|
||||
|
||||
# Second upload of same content
|
||||
result2 = upload_test_file(
|
||||
integration_client, project, package, content, tag=f"dedup-{unique_test_id}-2"
|
||||
)
|
||||
assert result2["artifact_id"] == expected_hash
|
||||
# Second upload MUST be deduplicated
|
||||
assert result2["deduplicated"] is True
|
||||
|
||||
|
||||
class TestUploadProgress:
|
||||
"""Tests for upload progress tracking endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_progress_endpoint_returns_not_found_for_invalid_id(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test progress endpoint returns not_found status for invalid upload ID."""
|
||||
project, package = test_package
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/upload/invalid-upload-id/progress"
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["status"] == "not_found"
|
||||
assert data["upload_id"] == "invalid-upload-id"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_progress_endpoint_requires_valid_project(
|
||||
self, integration_client, unique_test_id
|
||||
):
|
||||
"""Test progress endpoint validates project exists."""
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/nonexistent-{unique_test_id}/pkg/upload/upload-id/progress"
|
||||
)
|
||||
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_progress_endpoint_requires_valid_package(
|
||||
self, integration_client, test_project, unique_test_id
|
||||
):
|
||||
"""Test progress endpoint validates package exists."""
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{test_project}/nonexistent-{unique_test_id}/upload/upload-id/progress"
|
||||
)
|
||||
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
class TestResumableUploadProgress:
|
||||
"""Tests for progress tracking during resumable uploads."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_resumable_upload_init_and_progress(
|
||||
self, integration_client, test_package, sized_content
|
||||
):
|
||||
"""Test initializing resumable upload and checking progress."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_100KB, seed=600)
|
||||
|
||||
# Get API key for auth
|
||||
api_key_response = integration_client.post(
|
||||
"/api/v1/auth/keys",
|
||||
json={"name": "progress-test-key"},
|
||||
)
|
||||
assert api_key_response.status_code == 200
|
||||
api_key = api_key_response.json()["key"]
|
||||
|
||||
# Initialize resumable upload
|
||||
init_response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload/init",
|
||||
json={
|
||||
"expected_hash": expected_hash,
|
||||
"filename": "progress-test.bin",
|
||||
"size": SIZE_100KB,
|
||||
},
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
assert init_response.status_code == 200
|
||||
upload_id = init_response.json().get("upload_id")
|
||||
|
||||
if upload_id:
|
||||
# Check initial progress
|
||||
progress_response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/upload/{upload_id}/progress",
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
assert progress_response.status_code == 200
|
||||
progress = progress_response.json()
|
||||
assert progress["status"] == "in_progress"
|
||||
assert progress["bytes_uploaded"] == 0
|
||||
assert progress["bytes_total"] == SIZE_100KB
|
||||
|
||||
# Abort to clean up
|
||||
integration_client.delete(
|
||||
f"/api/v1/project/{project}/{package}/upload/{upload_id}",
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
|
||||
|
||||
class TestUploadSizeLimits:
|
||||
"""Tests for upload size limit enforcement."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_empty_file_rejected(self, integration_client, test_package):
|
||||
"""Test empty files are rejected."""
|
||||
project, package = test_package
|
||||
|
||||
files = {"file": ("empty.txt", io.BytesIO(b""), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
)
|
||||
|
||||
assert response.status_code in [400, 422]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_minimum_size_accepted(self, integration_client, test_package):
|
||||
"""Test 1-byte file is accepted."""
|
||||
project, package = test_package
|
||||
content = b"X"
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="min-size"
|
||||
)
|
||||
|
||||
assert result["size"] == 1
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_content_length_header_used_in_response(self, integration_client, test_package):
|
||||
"""Test that upload response size matches Content-Length."""
|
||||
project, package = test_package
|
||||
content = b"content length verification test"
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="content-length-test"
|
||||
)
|
||||
|
||||
# Size in response should match actual content length
|
||||
assert result["size"] == len(content)
|
||||
|
||||
|
||||
class TestUploadErrorHandling:
|
||||
"""Tests for upload error handling."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_to_nonexistent_project_returns_404(
|
||||
self, integration_client, unique_test_id
|
||||
):
|
||||
"""Test upload to nonexistent project returns 404."""
|
||||
content = b"test content"
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/nonexistent-{unique_test_id}/pkg/upload",
|
||||
files=files,
|
||||
)
|
||||
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_to_nonexistent_package_returns_404(
|
||||
self, integration_client, test_project, unique_test_id
|
||||
):
|
||||
"""Test upload to nonexistent package returns 404."""
|
||||
content = b"test content"
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{test_project}/nonexistent-{unique_test_id}/upload",
|
||||
files=files,
|
||||
)
|
||||
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_without_file_returns_422(self, integration_client, test_package):
|
||||
"""Test upload without file field returns 422."""
|
||||
project, package = test_package
|
||||
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
data={"tag": "no-file"},
|
||||
)
|
||||
|
||||
assert response.status_code == 422
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_with_invalid_checksum_rejected(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test upload with invalid checksum header format is rejected."""
|
||||
project, package = test_package
|
||||
content = b"checksum test"
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
headers={"X-Checksum-SHA256": "invalid-checksum"},
|
||||
)
|
||||
|
||||
assert response.status_code == 400
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_with_mismatched_checksum_rejected(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test upload with wrong checksum is rejected."""
|
||||
project, package = test_package
|
||||
content = b"mismatch test"
|
||||
wrong_hash = "0" * 64
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
headers={"X-Checksum-SHA256": wrong_hash},
|
||||
)
|
||||
|
||||
assert response.status_code == 422
|
||||
assert "verification failed" in response.json().get("detail", "").lower()
|
||||
|
||||
|
||||
class TestResumableUploadCancellation:
|
||||
"""Tests for resumable upload cancellation."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_abort_resumable_upload(self, integration_client, test_package, sized_content):
|
||||
"""Test aborting a resumable upload cleans up properly."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_100KB, seed=700)
|
||||
|
||||
# Get API key for auth
|
||||
api_key_response = integration_client.post(
|
||||
"/api/v1/auth/keys",
|
||||
json={"name": "abort-test-key"},
|
||||
)
|
||||
assert api_key_response.status_code == 200
|
||||
api_key = api_key_response.json()["key"]
|
||||
|
||||
# Initialize resumable upload
|
||||
init_response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload/init",
|
||||
json={
|
||||
"expected_hash": expected_hash,
|
||||
"filename": "abort-test.bin",
|
||||
"size": SIZE_100KB,
|
||||
},
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
assert init_response.status_code == 200
|
||||
upload_id = init_response.json().get("upload_id")
|
||||
|
||||
if upload_id:
|
||||
# Abort the upload (without uploading any parts)
|
||||
abort_response = integration_client.delete(
|
||||
f"/api/v1/project/{project}/{package}/upload/{upload_id}",
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
assert abort_response.status_code in [200, 204]
|
||||
|
||||
# Verify progress shows not_found after abort
|
||||
progress_response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/upload/{upload_id}/progress",
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
assert progress_response.status_code == 200
|
||||
assert progress_response.json()["status"] == "not_found"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_abort_nonexistent_upload(self, integration_client, test_package):
|
||||
"""Test aborting nonexistent upload returns appropriate error."""
|
||||
project, package = test_package
|
||||
|
||||
# Get API key for auth
|
||||
api_key_response = integration_client.post(
|
||||
"/api/v1/auth/keys",
|
||||
json={"name": "abort-nonexistent-key"},
|
||||
)
|
||||
assert api_key_response.status_code == 200
|
||||
api_key = api_key_response.json()["key"]
|
||||
|
||||
response = integration_client.delete(
|
||||
f"/api/v1/project/{project}/{package}/upload/nonexistent-upload-id",
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
|
||||
# Should return 404 or 200 (idempotent delete)
|
||||
assert response.status_code in [200, 204, 404]
|
||||
|
||||
|
||||
class TestUploadTimeout:
|
||||
"""Tests for upload timeout handling."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_with_short_timeout_succeeds_for_small_file(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test small file upload succeeds with reasonable timeout."""
|
||||
project, package = test_package
|
||||
content = b"small timeout test"
|
||||
|
||||
# httpx client should handle this quickly
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="timeout-small"
|
||||
)
|
||||
|
||||
assert result["artifact_id"] is not None
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_response_duration_under_timeout(
|
||||
self, integration_client, test_package, sized_content
|
||||
):
|
||||
"""Test upload completes within reasonable time."""
|
||||
project, package = test_package
|
||||
content, _ = sized_content(SIZE_1MB, seed=800)
|
||||
|
||||
start = time.time()
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="timeout-check"
|
||||
)
|
||||
duration = time.time() - start
|
||||
|
||||
# 1MB should upload in well under 60 seconds on local
|
||||
assert duration < 60
|
||||
assert result["artifact_id"] is not None
|
||||
|
||||
|
||||
class TestConcurrentUploads:
|
||||
"""Tests for concurrent upload handling."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_concurrent_different_files(
|
||||
self, integration_client, test_package, sized_content
|
||||
):
|
||||
"""Test concurrent uploads of different files succeed."""
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
project, package = test_package
|
||||
|
||||
# Get API key for auth
|
||||
api_key_response = integration_client.post(
|
||||
"/api/v1/auth/keys",
|
||||
json={"name": "concurrent-diff-key"},
|
||||
)
|
||||
assert api_key_response.status_code == 200
|
||||
api_key = api_key_response.json()["key"]
|
||||
|
||||
num_uploads = 3
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def upload_unique_file(idx):
|
||||
try:
|
||||
from httpx import Client
|
||||
|
||||
content, expected_hash = sized_content(SIZE_100KB, seed=900 + idx)
|
||||
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
with Client(base_url=base_url, timeout=30.0) as client:
|
||||
files = {
|
||||
"file": (
|
||||
f"concurrent-{idx}.bin",
|
||||
io.BytesIO(content),
|
||||
"application/octet-stream",
|
||||
)
|
||||
}
|
||||
response = client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"tag": f"concurrent-diff-{idx}"},
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
results.append((idx, response.json(), expected_hash))
|
||||
else:
|
||||
errors.append(f"Upload {idx}: {response.status_code} - {response.text}")
|
||||
except Exception as e:
|
||||
errors.append(f"Upload {idx}: {str(e)}")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=num_uploads) as executor:
|
||||
futures = [executor.submit(upload_unique_file, i) for i in range(num_uploads)]
|
||||
for future in as_completed(futures):
|
||||
pass
|
||||
|
||||
assert len(errors) == 0, f"Concurrent upload errors: {errors}"
|
||||
assert len(results) == num_uploads
|
||||
|
||||
# Each upload should have unique artifact ID
|
||||
artifact_ids = set(r[1]["artifact_id"] for r in results)
|
||||
assert len(artifact_ids) == num_uploads
|
||||
|
||||
# Each should match expected hash
|
||||
for idx, result, expected_hash in results:
|
||||
assert result["artifact_id"] == expected_hash
|
||||
345
backend/tests/integration/test_packages_api.py
Normal file
345
backend/tests/integration/test_packages_api.py
Normal file
@@ -0,0 +1,345 @@
|
||||
"""
|
||||
Integration tests for package API endpoints.
|
||||
|
||||
Tests cover:
|
||||
- Package CRUD operations
|
||||
- Package listing with pagination, search, filtering
|
||||
- Package stats endpoint
|
||||
- Package-level audit logs
|
||||
- Cascade delete behavior
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from tests.factories import compute_sha256, upload_test_file
|
||||
|
||||
|
||||
class TestPackageCRUD:
|
||||
"""Tests for package create, read, update, delete operations."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_create_package(self, integration_client, test_project, unique_test_id):
|
||||
"""Test creating a new package."""
|
||||
package_name = f"test-create-pkg-{unique_test_id}"
|
||||
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{test_project}/packages",
|
||||
json={
|
||||
"name": package_name,
|
||||
"description": "Test package",
|
||||
"format": "npm",
|
||||
"platform": "linux",
|
||||
},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert data["name"] == package_name
|
||||
assert data["description"] == "Test package"
|
||||
assert data["format"] == "npm"
|
||||
assert data["platform"] == "linux"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_package(self, integration_client, test_package):
|
||||
"""Test getting a package by name."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/packages/{package_name}"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert data["name"] == package_name
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_nonexistent_package(self, integration_client, test_project):
|
||||
"""Test getting a non-existent package returns 404."""
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{test_project}/packages/nonexistent-pkg"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_list_packages(self, integration_client, test_package):
|
||||
"""Test listing packages includes created package."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
response = integration_client.get(f"/api/v1/project/{project_name}/packages")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "items" in data
|
||||
assert "pagination" in data
|
||||
|
||||
package_names = [p["name"] for p in data["items"]]
|
||||
assert package_name in package_names
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_delete_package(self, integration_client, test_project, unique_test_id):
|
||||
"""Test deleting a package."""
|
||||
package_name = f"test-delete-pkg-{unique_test_id}"
|
||||
|
||||
# Create package
|
||||
integration_client.post(
|
||||
f"/api/v1/project/{test_project}/packages",
|
||||
json={"name": package_name, "description": "To be deleted"},
|
||||
)
|
||||
|
||||
# Delete package
|
||||
response = integration_client.delete(
|
||||
f"/api/v1/project/{test_project}/packages/{package_name}"
|
||||
)
|
||||
assert response.status_code == 204
|
||||
|
||||
# Verify deleted
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{test_project}/packages/{package_name}"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
class TestPackageListingFilters:
|
||||
"""Tests for package listing with filters and pagination."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_packages_pagination(self, integration_client, test_project):
|
||||
"""Test package listing respects pagination parameters."""
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{test_project}/packages?page=1&limit=5"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert len(data["items"]) <= 5
|
||||
assert data["pagination"]["limit"] == 5
|
||||
assert data["pagination"]["page"] == 1
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_packages_filter_by_format(
|
||||
self, integration_client, test_project, unique_test_id
|
||||
):
|
||||
"""Test package filtering by format."""
|
||||
# Create a package with specific format
|
||||
package_name = f"npm-pkg-{unique_test_id}"
|
||||
integration_client.post(
|
||||
f"/api/v1/project/{test_project}/packages",
|
||||
json={"name": package_name, "format": "npm"},
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{test_project}/packages?format=npm"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
for pkg in data["items"]:
|
||||
assert pkg["format"] == "npm"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_packages_filter_by_platform(
|
||||
self, integration_client, test_project, unique_test_id
|
||||
):
|
||||
"""Test package filtering by platform."""
|
||||
# Create a package with specific platform
|
||||
package_name = f"linux-pkg-{unique_test_id}"
|
||||
integration_client.post(
|
||||
f"/api/v1/project/{test_project}/packages",
|
||||
json={"name": package_name, "platform": "linux"},
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{test_project}/packages?platform=linux"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
for pkg in data["items"]:
|
||||
assert pkg["platform"] == "linux"
|
||||
|
||||
|
||||
class TestPackageStats:
|
||||
"""Tests for package statistics endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_package_stats_returns_valid_response(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test package stats endpoint returns expected fields."""
|
||||
project, package = test_package
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/packages/{package}/stats"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "package_id" in data
|
||||
assert "package_name" in data
|
||||
assert "project_name" in data
|
||||
assert "tag_count" in data
|
||||
assert "artifact_count" in data
|
||||
assert "total_size_bytes" in data
|
||||
assert "upload_count" in data
|
||||
assert "deduplicated_uploads" in data
|
||||
assert "storage_saved_bytes" in data
|
||||
assert "deduplication_ratio" in data
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_package_stats_not_found(self, integration_client, test_project):
|
||||
"""Test package stats returns 404 for non-existent package."""
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{test_project}/packages/nonexistent-package/stats"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
class TestPackageAuditLogs:
|
||||
"""Tests for package-level audit logs endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_package_audit_logs_returns_200(self, integration_client, test_package):
|
||||
"""Test package audit logs endpoint returns 200."""
|
||||
project_name, package_name = test_package
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/audit-logs"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "items" in data
|
||||
assert "pagination" in data
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_package_audit_logs_project_not_found(self, integration_client):
|
||||
"""Test non-existent project returns 404."""
|
||||
response = integration_client.get(
|
||||
"/api/v1/project/nonexistent/nonexistent/audit-logs"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_package_audit_logs_package_not_found(
|
||||
self, integration_client, test_project
|
||||
):
|
||||
"""Test non-existent package returns 404."""
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{test_project}/nonexistent-package/audit-logs"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
class TestPackageCascadeDelete:
|
||||
"""Tests for cascade delete behavior when deleting packages."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_ref_count_decrements_on_package_delete(
|
||||
self, integration_client, unique_test_id
|
||||
):
|
||||
"""Test ref_count decrements for all tags when package is deleted."""
|
||||
project_name = f"cascade-pkg-{unique_test_id}"
|
||||
package_name = f"test-pkg-{unique_test_id}"
|
||||
|
||||
# Create project
|
||||
response = integration_client.post(
|
||||
"/api/v1/projects",
|
||||
json={
|
||||
"name": project_name,
|
||||
"description": "Test project",
|
||||
"is_public": True,
|
||||
},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# Create package
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project_name}/packages",
|
||||
json={"name": package_name, "description": "Test package"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# Upload content with multiple tags
|
||||
content = f"cascade delete test {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project_name, package_name, content, tag="v1"
|
||||
)
|
||||
upload_test_file(
|
||||
integration_client, project_name, package_name, content, tag="v2"
|
||||
)
|
||||
upload_test_file(
|
||||
integration_client, project_name, package_name, content, tag="v3"
|
||||
)
|
||||
|
||||
# Verify ref_count is 3
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.json()["ref_count"] == 3
|
||||
|
||||
# Delete the package
|
||||
delete_response = integration_client.delete(
|
||||
f"/api/v1/project/{project_name}/packages/{package_name}"
|
||||
)
|
||||
assert delete_response.status_code == 204
|
||||
|
||||
# Verify ref_count is 0
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.json()["ref_count"] == 0
|
||||
|
||||
# Cleanup
|
||||
integration_client.delete(f"/api/v1/projects/{project_name}")
|
||||
|
||||
|
||||
class TestPackageUploads:
|
||||
"""Tests for package-level uploads endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_package_uploads_returns_200(self, integration_client, test_package):
|
||||
"""Test package uploads endpoint returns 200."""
|
||||
project_name, package_name = test_package
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/uploads"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "items" in data
|
||||
assert "pagination" in data
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_package_uploads_after_upload(self, integration_client, test_package):
|
||||
"""Test uploads are recorded after file upload."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
# Upload a file
|
||||
upload_result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"test upload content",
|
||||
"test.txt",
|
||||
)
|
||||
assert upload_result["artifact_id"]
|
||||
|
||||
# Check uploads endpoint
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/uploads"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert len(data["items"]) >= 1
|
||||
|
||||
# Verify upload record fields
|
||||
upload = data["items"][0]
|
||||
assert "artifact_id" in upload
|
||||
assert "package_name" in upload
|
||||
assert "project_name" in upload
|
||||
assert "uploaded_at" in upload
|
||||
assert "uploaded_by" in upload
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_package_uploads_project_not_found(self, integration_client):
|
||||
"""Test non-existent project returns 404."""
|
||||
response = integration_client.get(
|
||||
"/api/v1/project/nonexistent/nonexistent/uploads"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
325
backend/tests/integration/test_projects_api.py
Normal file
325
backend/tests/integration/test_projects_api.py
Normal file
@@ -0,0 +1,325 @@
|
||||
"""
|
||||
Integration tests for project API endpoints.
|
||||
|
||||
Tests cover:
|
||||
- Project CRUD operations
|
||||
- Project listing with pagination, search, and sorting
|
||||
- Project stats endpoint
|
||||
- Project-level audit logs
|
||||
- Cascade delete behavior
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from tests.factories import compute_sha256, upload_test_file
|
||||
|
||||
|
||||
class TestProjectCRUD:
|
||||
"""Tests for project create, read, update, delete operations."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_create_project(self, integration_client, unique_test_id):
|
||||
"""Test creating a new project."""
|
||||
project_name = f"test-create-{unique_test_id}"
|
||||
|
||||
try:
|
||||
response = integration_client.post(
|
||||
"/api/v1/projects",
|
||||
json={
|
||||
"name": project_name,
|
||||
"description": "Test project",
|
||||
"is_public": True,
|
||||
},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert data["name"] == project_name
|
||||
assert data["description"] == "Test project"
|
||||
assert data["is_public"] is True
|
||||
assert "id" in data
|
||||
assert "created_at" in data
|
||||
finally:
|
||||
integration_client.delete(f"/api/v1/projects/{project_name}")
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_project(self, integration_client, test_project):
|
||||
"""Test getting a project by name."""
|
||||
response = integration_client.get(f"/api/v1/projects/{test_project}")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert data["name"] == test_project
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_nonexistent_project(self, integration_client):
|
||||
"""Test getting a non-existent project returns 404."""
|
||||
response = integration_client.get("/api/v1/projects/nonexistent-project-xyz")
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_list_projects(self, integration_client, test_project):
|
||||
"""Test listing projects includes created project."""
|
||||
# Search specifically for our test project to avoid pagination issues
|
||||
response = integration_client.get(f"/api/v1/projects?search={test_project}")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "items" in data
|
||||
assert "pagination" in data
|
||||
|
||||
project_names = [p["name"] for p in data["items"]]
|
||||
assert test_project in project_names
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_delete_project(self, integration_client, unique_test_id):
|
||||
"""Test deleting a project."""
|
||||
project_name = f"test-delete-{unique_test_id}"
|
||||
|
||||
# Create project
|
||||
integration_client.post(
|
||||
"/api/v1/projects",
|
||||
json={"name": project_name, "description": "To be deleted"},
|
||||
)
|
||||
|
||||
# Delete project
|
||||
response = integration_client.delete(f"/api/v1/projects/{project_name}")
|
||||
assert response.status_code == 204
|
||||
|
||||
# Verify deleted
|
||||
response = integration_client.get(f"/api/v1/projects/{project_name}")
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
class TestProjectListingFilters:
|
||||
"""Tests for project listing with filters and pagination."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_projects_pagination(self, integration_client):
|
||||
"""Test project listing respects pagination parameters."""
|
||||
response = integration_client.get("/api/v1/projects?page=1&limit=5")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert len(data["items"]) <= 5
|
||||
assert data["pagination"]["limit"] == 5
|
||||
assert data["pagination"]["page"] == 1
|
||||
assert "has_more" in data["pagination"]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_projects_search(self, integration_client, test_project):
|
||||
"""Test project search by name."""
|
||||
# Search using the unique portion of our test project name
|
||||
# test_project format is "test-project-test-{uuid[:8]}"
|
||||
unique_part = test_project.split("-")[-1] # Get the UUID portion
|
||||
response = integration_client.get(
|
||||
f"/api/v1/projects?search={unique_part}"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
# Our project should be in results
|
||||
project_names = [p["name"] for p in data["items"]]
|
||||
assert test_project in project_names
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_projects_sort_by_name(self, integration_client):
|
||||
"""Test project sorting by name."""
|
||||
response = integration_client.get("/api/v1/projects?sort=name&order=asc")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
names = [p["name"] for p in data["items"]]
|
||||
assert names == sorted(names)
|
||||
|
||||
|
||||
class TestProjectStats:
|
||||
"""Tests for project statistics endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_project_stats_returns_valid_response(
|
||||
self, integration_client, test_project
|
||||
):
|
||||
"""Test project stats endpoint returns expected fields."""
|
||||
response = integration_client.get(f"/api/v1/projects/{test_project}/stats")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "project_id" in data
|
||||
assert "project_name" in data
|
||||
assert "package_count" in data
|
||||
assert "tag_count" in data
|
||||
assert "artifact_count" in data
|
||||
assert "total_size_bytes" in data
|
||||
assert "upload_count" in data
|
||||
assert "deduplicated_uploads" in data
|
||||
assert "storage_saved_bytes" in data
|
||||
assert "deduplication_ratio" in data
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_project_stats_not_found(self, integration_client):
|
||||
"""Test project stats returns 404 for non-existent project."""
|
||||
response = integration_client.get("/api/v1/projects/nonexistent-project/stats")
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
class TestProjectAuditLogs:
|
||||
"""Tests for project-level audit logs endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_project_audit_logs_returns_200(self, integration_client, test_project):
|
||||
"""Test project audit logs endpoint returns 200."""
|
||||
response = integration_client.get(f"/api/v1/projects/{test_project}/audit-logs")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "items" in data
|
||||
assert "pagination" in data
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_project_audit_logs_not_found(self, integration_client):
|
||||
"""Test non-existent project returns 404."""
|
||||
response = integration_client.get(
|
||||
"/api/v1/projects/nonexistent-project/audit-logs"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
class TestProjectCascadeDelete:
|
||||
"""Tests for cascade delete behavior when deleting projects."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_project_delete_cascades_to_packages(
|
||||
self, integration_client, unique_test_id
|
||||
):
|
||||
"""Test deleting project cascades to packages."""
|
||||
project_name = f"cascade-proj-{unique_test_id}"
|
||||
package_name = f"cascade-pkg-{unique_test_id}"
|
||||
|
||||
try:
|
||||
# Create project and package
|
||||
integration_client.post(
|
||||
"/api/v1/projects",
|
||||
json={"name": project_name, "description": "Test", "is_public": True},
|
||||
)
|
||||
integration_client.post(
|
||||
f"/api/v1/project/{project_name}/packages",
|
||||
json={"name": package_name, "description": "Test package"},
|
||||
)
|
||||
|
||||
# Verify package exists
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/packages/{package_name}"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# Delete project
|
||||
integration_client.delete(f"/api/v1/projects/{project_name}")
|
||||
|
||||
# Verify project is deleted (and package with it)
|
||||
response = integration_client.get(f"/api/v1/projects/{project_name}")
|
||||
assert response.status_code == 404
|
||||
except Exception:
|
||||
# Cleanup if test fails
|
||||
integration_client.delete(f"/api/v1/projects/{project_name}")
|
||||
raise
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_ref_count_decrements_on_project_delete(
|
||||
self, integration_client, unique_test_id
|
||||
):
|
||||
"""Test ref_count decrements for all tags when project is deleted."""
|
||||
project_name = f"cascade-proj-{unique_test_id}"
|
||||
package1_name = f"pkg1-{unique_test_id}"
|
||||
package2_name = f"pkg2-{unique_test_id}"
|
||||
|
||||
# Create project
|
||||
response = integration_client.post(
|
||||
"/api/v1/projects",
|
||||
json={
|
||||
"name": project_name,
|
||||
"description": "Test project",
|
||||
"is_public": True,
|
||||
},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# Create two packages
|
||||
for pkg_name in [package1_name, package2_name]:
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project_name}/packages",
|
||||
json={"name": pkg_name, "description": "Test package"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# Upload same content with tags in both packages
|
||||
content = f"project cascade test {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project_name, package1_name, content, tag="v1"
|
||||
)
|
||||
upload_test_file(
|
||||
integration_client, project_name, package1_name, content, tag="v2"
|
||||
)
|
||||
upload_test_file(
|
||||
integration_client, project_name, package2_name, content, tag="latest"
|
||||
)
|
||||
upload_test_file(
|
||||
integration_client, project_name, package2_name, content, tag="stable"
|
||||
)
|
||||
|
||||
# Verify ref_count is 4 (2 tags in each of 2 packages)
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.json()["ref_count"] == 4
|
||||
|
||||
# Delete the project
|
||||
delete_response = integration_client.delete(f"/api/v1/projects/{project_name}")
|
||||
assert delete_response.status_code == 204
|
||||
|
||||
# Verify ref_count is 0
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.json()["ref_count"] == 0
|
||||
|
||||
|
||||
class TestProjectUploads:
|
||||
"""Tests for project-level uploads endpoint."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_project_uploads_returns_200(self, integration_client, test_project):
|
||||
"""Test project uploads endpoint returns 200."""
|
||||
response = integration_client.get(f"/api/v1/project/{test_project}/uploads")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "items" in data
|
||||
assert "pagination" in data
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_project_uploads_after_upload(self, integration_client, test_package):
|
||||
"""Test uploads are recorded in project uploads."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
# Upload a file
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"project uploads test",
|
||||
"project.txt",
|
||||
)
|
||||
|
||||
response = integration_client.get(f"/api/v1/project/{project_name}/uploads")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert len(data["items"]) >= 1
|
||||
|
||||
# Verify project name matches
|
||||
for item in data["items"]:
|
||||
assert item["project_name"] == project_name
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_project_uploads_not_found(self, integration_client):
|
||||
"""Test non-existent project returns 404."""
|
||||
response = integration_client.get("/api/v1/project/nonexistent/uploads")
|
||||
assert response.status_code == 404
|
||||
93
backend/tests/integration/test_pypi_proxy.py
Normal file
93
backend/tests/integration/test_pypi_proxy.py
Normal file
@@ -0,0 +1,93 @@
|
||||
"""Integration tests for PyPI transparent proxy."""
|
||||
|
||||
import os
|
||||
import pytest
|
||||
import httpx
|
||||
|
||||
|
||||
def get_base_url():
|
||||
"""Get the base URL for the Orchard server from environment."""
|
||||
return os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
|
||||
|
||||
class TestPyPIProxyEndpoints:
|
||||
"""Tests for PyPI proxy endpoints.
|
||||
|
||||
These endpoints are public (no auth required) since pip needs to use them.
|
||||
"""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_pypi_simple_index_no_sources(self):
|
||||
"""Test that /pypi/simple/ returns 503 when no sources configured."""
|
||||
with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
|
||||
response = client.get("/pypi/simple/")
|
||||
# Should return 503 when no PyPI upstream sources are configured
|
||||
assert response.status_code == 503
|
||||
assert "No PyPI upstream sources configured" in response.json()["detail"]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_pypi_package_no_sources(self):
|
||||
"""Test that /pypi/simple/{package}/ returns 503 when no sources configured."""
|
||||
with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
|
||||
response = client.get("/pypi/simple/requests/")
|
||||
assert response.status_code == 503
|
||||
assert "No PyPI upstream sources configured" in response.json()["detail"]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_pypi_download_missing_upstream_param(self):
|
||||
"""Test that /pypi/simple/{package}/{filename} requires upstream param."""
|
||||
with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
|
||||
response = client.get("/pypi/simple/requests/requests-2.31.0.tar.gz")
|
||||
assert response.status_code == 400
|
||||
assert "upstream" in response.json()["detail"].lower()
|
||||
|
||||
|
||||
class TestPyPILinkRewriting:
|
||||
"""Tests for URL rewriting in PyPI proxy responses."""
|
||||
|
||||
def test_rewrite_package_links(self):
|
||||
"""Test that download links are rewritten to go through proxy."""
|
||||
from app.pypi_proxy import _rewrite_package_links
|
||||
|
||||
html = '''
|
||||
<html>
|
||||
<body>
|
||||
<a href="https://files.pythonhosted.org/packages/ab/cd/requests-2.31.0.tar.gz#sha256=abc123">requests-2.31.0.tar.gz</a>
|
||||
<a href="https://files.pythonhosted.org/packages/ef/gh/requests-2.31.0-py3-none-any.whl#sha256=def456">requests-2.31.0-py3-none-any.whl</a>
|
||||
</body>
|
||||
</html>
|
||||
'''
|
||||
|
||||
result = _rewrite_package_links(html, "http://localhost:8080", "requests")
|
||||
|
||||
# Links should be rewritten to go through our proxy
|
||||
assert "/pypi/simple/requests/requests-2.31.0.tar.gz?upstream=" in result
|
||||
assert "/pypi/simple/requests/requests-2.31.0-py3-none-any.whl?upstream=" in result
|
||||
# Original URLs should be encoded in upstream param
|
||||
assert "files.pythonhosted.org" in result
|
||||
# Hash fragments should be preserved
|
||||
assert "#sha256=abc123" in result
|
||||
assert "#sha256=def456" in result
|
||||
|
||||
|
||||
class TestPyPIPackageNormalization:
|
||||
"""Tests for PyPI package name normalization."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_package_name_normalized(self):
|
||||
"""Test that package names are normalized per PEP 503."""
|
||||
# These should all be treated the same:
|
||||
# requests, Requests, requests_, requests-
|
||||
# The endpoint normalizes to lowercase with hyphens
|
||||
|
||||
with httpx.Client(base_url=get_base_url(), timeout=30.0) as client:
|
||||
# Without upstream sources, we get 503, but the normalization
|
||||
# happens before the source lookup
|
||||
response = client.get("/pypi/simple/Requests/")
|
||||
assert response.status_code == 503 # No sources, but path was valid
|
||||
|
||||
response = client.get("/pypi/simple/some_package/")
|
||||
assert response.status_code == 503
|
||||
|
||||
response = client.get("/pypi/simple/some-package/")
|
||||
assert response.status_code == 503
|
||||
583
backend/tests/integration/test_size_boundary.py
Normal file
583
backend/tests/integration/test_size_boundary.py
Normal file
@@ -0,0 +1,583 @@
|
||||
"""
|
||||
Integration tests for upload/download with various file sizes.
|
||||
|
||||
Tests cover:
|
||||
- Small files (0B - 100KB)
|
||||
- Medium files (1MB - 50MB)
|
||||
- Large files (100MB - 1GB) - marked as slow/large
|
||||
- Exact chunk boundaries
|
||||
- Data integrity verification across all sizes
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import io
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from tests.factories import (
|
||||
compute_sha256,
|
||||
upload_test_file,
|
||||
generate_content,
|
||||
generate_content_with_hash,
|
||||
)
|
||||
from tests.conftest import (
|
||||
SIZE_1B,
|
||||
SIZE_1KB,
|
||||
SIZE_10KB,
|
||||
SIZE_100KB,
|
||||
SIZE_1MB,
|
||||
SIZE_5MB,
|
||||
SIZE_10MB,
|
||||
SIZE_50MB,
|
||||
SIZE_100MB,
|
||||
SIZE_250MB,
|
||||
SIZE_500MB,
|
||||
SIZE_1GB,
|
||||
CHUNK_SIZE,
|
||||
MULTIPART_THRESHOLD,
|
||||
)
|
||||
|
||||
|
||||
class TestSmallFileSizes:
|
||||
"""Tests for small file uploads/downloads (0B - 100KB)."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_download_1_byte(self, integration_client, test_package, sized_content):
|
||||
"""Test upload/download of 1 byte file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_1B, seed=1)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="1byte.bin", tag="1byte"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == SIZE_1B
|
||||
|
||||
# Download and verify
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/1byte",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
assert len(response.content) == SIZE_1B
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_download_1kb(self, integration_client, test_package, sized_content):
|
||||
"""Test upload/download of 1KB file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_1KB, seed=2)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="1kb.bin", tag="1kb"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == SIZE_1KB
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/1kb",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_download_10kb(self, integration_client, test_package, sized_content):
|
||||
"""Test upload/download of 10KB file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_10KB, seed=3)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="10kb.bin", tag="10kb"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == SIZE_10KB
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/10kb",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_download_100kb(self, integration_client, test_package, sized_content):
|
||||
"""Test upload/download of 100KB file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_100KB, seed=4)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="100kb.bin", tag="100kb"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == SIZE_100KB
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/100kb",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
|
||||
class TestMediumFileSizes:
|
||||
"""Tests for medium file uploads/downloads (1MB - 50MB)."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_download_1mb(self, integration_client, test_package, sized_content):
|
||||
"""Test upload/download of 1MB file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_1MB, seed=10)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="1mb.bin", tag="1mb"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == SIZE_1MB
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/1mb",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert len(response.content) == SIZE_1MB
|
||||
assert compute_sha256(response.content) == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_download_5mb(self, integration_client, test_package, sized_content):
|
||||
"""Test upload/download of 5MB file (multipart threshold boundary area)."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_5MB, seed=11)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="5mb.bin", tag="5mb"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == SIZE_5MB
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/5mb",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert len(response.content) == SIZE_5MB
|
||||
assert compute_sha256(response.content) == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.slow
|
||||
def test_upload_download_10mb(self, integration_client, test_package, sized_content):
|
||||
"""Test upload/download of 10MB file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_10MB, seed=12)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="10mb.bin", tag="10mb"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == SIZE_10MB
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/10mb",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert len(response.content) == SIZE_10MB
|
||||
assert compute_sha256(response.content) == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.slow
|
||||
def test_upload_download_50mb(self, integration_client, test_package, sized_content):
|
||||
"""Test upload/download of 50MB file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_50MB, seed=13)
|
||||
|
||||
start_time = time.time()
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="50mb.bin", tag="50mb"
|
||||
)
|
||||
upload_time = time.time() - start_time
|
||||
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == SIZE_50MB
|
||||
|
||||
start_time = time.time()
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/50mb",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
download_time = time.time() - start_time
|
||||
|
||||
assert response.status_code == 200
|
||||
assert len(response.content) == SIZE_50MB
|
||||
assert compute_sha256(response.content) == expected_hash
|
||||
|
||||
# Log timing for performance tracking
|
||||
print(f"\n50MB upload: {upload_time:.2f}s, download: {download_time:.2f}s")
|
||||
|
||||
|
||||
class TestLargeFileSizes:
|
||||
"""Tests for large file uploads/downloads (100MB - 1GB).
|
||||
|
||||
These tests are marked as slow and large, skipped by default.
|
||||
Run with: pytest -m "large" to include these tests.
|
||||
"""
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.large
|
||||
def test_upload_download_100mb(self, integration_client, test_package, sized_content):
|
||||
"""Test upload/download of 100MB file (multipart threshold)."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_100MB, seed=100)
|
||||
|
||||
start_time = time.time()
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="100mb.bin", tag="100mb"
|
||||
)
|
||||
upload_time = time.time() - start_time
|
||||
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == SIZE_100MB
|
||||
|
||||
start_time = time.time()
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/100mb",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
download_time = time.time() - start_time
|
||||
|
||||
assert response.status_code == 200
|
||||
assert len(response.content) == SIZE_100MB
|
||||
assert compute_sha256(response.content) == expected_hash
|
||||
|
||||
print(f"\n100MB upload: {upload_time:.2f}s, download: {download_time:.2f}s")
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.large
|
||||
def test_upload_download_250mb(self, integration_client, test_package, sized_content):
|
||||
"""Test upload/download of 250MB file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_250MB, seed=250)
|
||||
|
||||
start_time = time.time()
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="250mb.bin", tag="250mb"
|
||||
)
|
||||
upload_time = time.time() - start_time
|
||||
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == SIZE_250MB
|
||||
|
||||
start_time = time.time()
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/250mb",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
download_time = time.time() - start_time
|
||||
|
||||
assert response.status_code == 200
|
||||
assert len(response.content) == SIZE_250MB
|
||||
assert compute_sha256(response.content) == expected_hash
|
||||
|
||||
print(f"\n250MB upload: {upload_time:.2f}s, download: {download_time:.2f}s")
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.large
|
||||
def test_upload_download_500mb(self, integration_client, test_package, sized_content):
|
||||
"""Test upload/download of 500MB file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_500MB, seed=500)
|
||||
|
||||
start_time = time.time()
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="500mb.bin", tag="500mb"
|
||||
)
|
||||
upload_time = time.time() - start_time
|
||||
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == SIZE_500MB
|
||||
|
||||
start_time = time.time()
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/500mb",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
download_time = time.time() - start_time
|
||||
|
||||
assert response.status_code == 200
|
||||
assert len(response.content) == SIZE_500MB
|
||||
assert compute_sha256(response.content) == expected_hash
|
||||
|
||||
print(f"\n500MB upload: {upload_time:.2f}s, download: {download_time:.2f}s")
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.large
|
||||
def test_upload_download_1gb(self, integration_client, test_package, sized_content):
|
||||
"""Test upload/download of 1GB file.
|
||||
|
||||
This test may take several minutes depending on network/disk speed.
|
||||
"""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_1GB, seed=1024)
|
||||
|
||||
start_time = time.time()
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="1gb.bin", tag="1gb"
|
||||
)
|
||||
upload_time = time.time() - start_time
|
||||
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == SIZE_1GB
|
||||
|
||||
start_time = time.time()
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/1gb",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
download_time = time.time() - start_time
|
||||
|
||||
assert response.status_code == 200
|
||||
assert len(response.content) == SIZE_1GB
|
||||
assert compute_sha256(response.content) == expected_hash
|
||||
|
||||
print(f"\n1GB upload: {upload_time:.2f}s, download: {download_time:.2f}s")
|
||||
|
||||
|
||||
class TestChunkBoundaries:
|
||||
"""Tests for exact chunk size boundaries."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_download_at_chunk_size(self, integration_client, test_package, sized_content):
|
||||
"""Test upload/download at exact chunk size (64KB)."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(CHUNK_SIZE, seed=64)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="chunk.bin", tag="chunk-exact"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == CHUNK_SIZE
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/chunk-exact",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_download_chunk_size_plus_1(self, integration_client, test_package, sized_content):
|
||||
"""Test upload/download at chunk size + 1 byte."""
|
||||
project, package = test_package
|
||||
size = CHUNK_SIZE + 1
|
||||
content, expected_hash = sized_content(size, seed=65)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="chunk_plus.bin", tag="chunk-plus"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == size
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/chunk-plus",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_download_chunk_size_minus_1(self, integration_client, test_package, sized_content):
|
||||
"""Test upload/download at chunk size - 1 byte."""
|
||||
project, package = test_package
|
||||
size = CHUNK_SIZE - 1
|
||||
content, expected_hash = sized_content(size, seed=63)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="chunk_minus.bin", tag="chunk-minus"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == size
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/chunk-minus",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_download_multiple_chunks(self, integration_client, test_package, sized_content):
|
||||
"""Test upload/download spanning multiple chunks."""
|
||||
project, package = test_package
|
||||
size = CHUNK_SIZE * 3 + 1000 # 3 full chunks + partial
|
||||
content, expected_hash = sized_content(size, seed=300)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="multi_chunk.bin", tag="multi-chunk"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["size"] == size
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/multi-chunk",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
|
||||
class TestDataIntegrity:
|
||||
"""Tests for data integrity with various content types."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_binary_content_integrity(self, integration_client, test_package):
|
||||
"""Test binary content (all byte values 0-255) integrity."""
|
||||
project, package = test_package
|
||||
# Content with all 256 possible byte values
|
||||
content = bytes(range(256)) * 100 # 25.6KB
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="binary.bin", tag="binary"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/binary",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_text_content_integrity(self, integration_client, test_package):
|
||||
"""Test UTF-8 text content integrity."""
|
||||
project, package = test_package
|
||||
content = "Hello, World! 你好世界 🌍 مرحبا العالم".encode("utf-8")
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="text.txt", tag="text"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/text",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
assert response.content.decode("utf-8") == "Hello, World! 你好世界 🌍 مرحبا العالم"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_null_bytes_content_integrity(self, integration_client, test_package):
|
||||
"""Test content with null bytes."""
|
||||
project, package = test_package
|
||||
content = b"before\x00null\x00bytes\x00after"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="nulls.bin", tag="nulls"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/nulls",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
assert b"\x00" in response.content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_unicode_filename_integrity(self, integration_client, test_package):
|
||||
"""Test file with unicode filename."""
|
||||
project, package = test_package
|
||||
content = b"unicode filename test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="文件名.txt", tag="unicode-name"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result["original_name"] == "文件名.txt"
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/unicode-name",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_compressed_content_integrity(self, integration_client, test_package):
|
||||
"""Test gzip-compressed content integrity."""
|
||||
import gzip
|
||||
|
||||
project, package = test_package
|
||||
original = b"This is some text that will be compressed " * 100
|
||||
content = gzip.compress(original)
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="data.gz", tag="compressed"
|
||||
)
|
||||
assert result["artifact_id"] == expected_hash
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/compressed",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
# Verify we can decompress
|
||||
assert gzip.decompress(response.content) == original
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_hash_verification_matches(self, integration_client, test_package, sized_content):
|
||||
"""Test that computed hash matches artifact_id for various sizes."""
|
||||
project, package = test_package
|
||||
|
||||
sizes = [SIZE_1B, SIZE_1KB, SIZE_10KB, SIZE_100KB, SIZE_1MB]
|
||||
|
||||
for i, size in enumerate(sizes):
|
||||
content, expected_hash = sized_content(size, seed=1000 + i)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename=f"hash_test_{size}.bin", tag=f"hash-{size}"
|
||||
)
|
||||
|
||||
# Verify artifact_id matches expected hash
|
||||
assert result["artifact_id"] == expected_hash
|
||||
|
||||
# Download and verify hash of downloaded content
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/hash-{size}",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
downloaded_hash = compute_sha256(response.content)
|
||||
assert downloaded_hash == expected_hash
|
||||
535
backend/tests/integration/test_streaming_download.py
Normal file
535
backend/tests/integration/test_streaming_download.py
Normal file
@@ -0,0 +1,535 @@
|
||||
"""
|
||||
Integration tests for streaming download functionality.
|
||||
|
||||
Tests cover:
|
||||
- HTTP Range requests (partial downloads, resume)
|
||||
- Conditional requests (If-None-Match, If-Modified-Since)
|
||||
- Caching headers (Cache-Control, Last-Modified, Accept-Ranges)
|
||||
- Large file streaming
|
||||
- Download modes (proxy, redirect, presigned)
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import io
|
||||
import time
|
||||
from email.utils import formatdate
|
||||
from tests.factories import (
|
||||
compute_sha256,
|
||||
upload_test_file,
|
||||
)
|
||||
from tests.conftest import (
|
||||
SIZE_1KB,
|
||||
SIZE_100KB,
|
||||
SIZE_1MB,
|
||||
)
|
||||
|
||||
|
||||
class TestRangeRequests:
|
||||
"""Tests for HTTP Range request support (partial downloads)."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_range_request_first_bytes(self, integration_client, test_package):
|
||||
"""Test range request for first N bytes."""
|
||||
project, package = test_package
|
||||
content = b"0123456789" * 100 # 1000 bytes
|
||||
upload_test_file(integration_client, project, package, content, tag="range-test")
|
||||
|
||||
# Request first 10 bytes
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/range-test",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": "bytes=0-9"},
|
||||
)
|
||||
assert response.status_code == 206 # Partial Content
|
||||
assert response.content == b"0123456789"
|
||||
assert "Content-Range" in response.headers
|
||||
assert response.headers["Content-Range"].startswith("bytes 0-9/")
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_range_request_middle_bytes(self, integration_client, test_package):
|
||||
"""Test range request for bytes in the middle."""
|
||||
project, package = test_package
|
||||
content = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
upload_test_file(integration_client, project, package, content, tag="range-mid")
|
||||
|
||||
# Request bytes 10-19 (KLMNOPQRST)
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/range-mid",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": "bytes=10-19"},
|
||||
)
|
||||
assert response.status_code == 206
|
||||
assert response.content == b"KLMNOPQRST"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_range_request_suffix_bytes(self, integration_client, test_package):
|
||||
"""Test range request for last N bytes (suffix range)."""
|
||||
project, package = test_package
|
||||
content = b"0123456789ABCDEF" # 16 bytes
|
||||
upload_test_file(integration_client, project, package, content, tag="range-suffix")
|
||||
|
||||
# Request last 4 bytes
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/range-suffix",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": "bytes=-4"},
|
||||
)
|
||||
assert response.status_code == 206
|
||||
assert response.content == b"CDEF"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_range_request_open_ended(self, integration_client, test_package):
|
||||
"""Test range request from offset to end."""
|
||||
project, package = test_package
|
||||
content = b"0123456789"
|
||||
upload_test_file(integration_client, project, package, content, tag="range-open")
|
||||
|
||||
# Request from byte 5 to end
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/range-open",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": "bytes=5-"},
|
||||
)
|
||||
assert response.status_code == 206
|
||||
assert response.content == b"56789"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_range_request_includes_accept_ranges_header(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test that range requests include Accept-Ranges header."""
|
||||
project, package = test_package
|
||||
content = b"test content"
|
||||
upload_test_file(integration_client, project, package, content, tag="accept-ranges")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/accept-ranges",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": "bytes=0-4"},
|
||||
)
|
||||
assert response.status_code == 206
|
||||
assert response.headers.get("Accept-Ranges") == "bytes"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_full_download_advertises_accept_ranges(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test that full downloads advertise range support."""
|
||||
project, package = test_package
|
||||
content = b"test content"
|
||||
upload_test_file(integration_client, project, package, content, tag="full-accept")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/full-accept",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.headers.get("Accept-Ranges") == "bytes"
|
||||
|
||||
|
||||
class TestConditionalRequests:
|
||||
"""Tests for conditional request handling (304 Not Modified)."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_if_none_match_returns_304(self, integration_client, test_package):
|
||||
"""Test If-None-Match with matching ETag returns 304."""
|
||||
project, package = test_package
|
||||
content = b"conditional request test content"
|
||||
expected_hash = compute_sha256(content)
|
||||
upload_test_file(integration_client, project, package, content, tag="cond-etag")
|
||||
|
||||
# Request with matching ETag
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/cond-etag",
|
||||
params={"mode": "proxy"},
|
||||
headers={"If-None-Match": f'"{expected_hash}"'},
|
||||
)
|
||||
assert response.status_code == 304
|
||||
assert response.content == b"" # No body for 304
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_if_none_match_without_quotes(self, integration_client, test_package):
|
||||
"""Test If-None-Match works with or without quotes."""
|
||||
project, package = test_package
|
||||
content = b"etag no quotes test"
|
||||
expected_hash = compute_sha256(content)
|
||||
upload_test_file(integration_client, project, package, content, tag="cond-noquote")
|
||||
|
||||
# Request with ETag without quotes
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/cond-noquote",
|
||||
params={"mode": "proxy"},
|
||||
headers={"If-None-Match": expected_hash},
|
||||
)
|
||||
assert response.status_code == 304
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_if_none_match_mismatch_returns_200(self, integration_client, test_package):
|
||||
"""Test If-None-Match with non-matching ETag returns 200."""
|
||||
project, package = test_package
|
||||
content = b"etag mismatch test"
|
||||
upload_test_file(integration_client, project, package, content, tag="cond-mismatch")
|
||||
|
||||
# Request with different ETag
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/cond-mismatch",
|
||||
params={"mode": "proxy"},
|
||||
headers={"If-None-Match": '"different-etag-value"'},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_if_modified_since_returns_304(self, integration_client, test_package):
|
||||
"""Test If-Modified-Since with future date returns 304."""
|
||||
project, package = test_package
|
||||
content = b"modified since test"
|
||||
upload_test_file(integration_client, project, package, content, tag="cond-modified")
|
||||
|
||||
# Request with future date (artifact was definitely created before this)
|
||||
future_date = formatdate(time.time() + 86400, usegmt=True) # Tomorrow
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/cond-modified",
|
||||
params={"mode": "proxy"},
|
||||
headers={"If-Modified-Since": future_date},
|
||||
)
|
||||
assert response.status_code == 304
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_if_modified_since_old_date_returns_200(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test If-Modified-Since with old date returns 200."""
|
||||
project, package = test_package
|
||||
content = b"old date test"
|
||||
upload_test_file(integration_client, project, package, content, tag="cond-old")
|
||||
|
||||
# Request with old date (2020-01-01)
|
||||
old_date = "Wed, 01 Jan 2020 00:00:00 GMT"
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/cond-old",
|
||||
params={"mode": "proxy"},
|
||||
headers={"If-Modified-Since": old_date},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_304_includes_etag(self, integration_client, test_package):
|
||||
"""Test 304 response includes ETag header."""
|
||||
project, package = test_package
|
||||
content = b"304 etag test"
|
||||
expected_hash = compute_sha256(content)
|
||||
upload_test_file(integration_client, project, package, content, tag="304-etag")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/304-etag",
|
||||
params={"mode": "proxy"},
|
||||
headers={"If-None-Match": f'"{expected_hash}"'},
|
||||
)
|
||||
assert response.status_code == 304
|
||||
assert response.headers.get("ETag") == f'"{expected_hash}"'
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_304_includes_cache_control(self, integration_client, test_package):
|
||||
"""Test 304 response includes Cache-Control header."""
|
||||
project, package = test_package
|
||||
content = b"304 cache test"
|
||||
expected_hash = compute_sha256(content)
|
||||
upload_test_file(integration_client, project, package, content, tag="304-cache")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/304-cache",
|
||||
params={"mode": "proxy"},
|
||||
headers={"If-None-Match": f'"{expected_hash}"'},
|
||||
)
|
||||
assert response.status_code == 304
|
||||
assert "immutable" in response.headers.get("Cache-Control", "")
|
||||
|
||||
|
||||
class TestCachingHeaders:
|
||||
"""Tests for caching headers on download responses."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_includes_cache_control(self, integration_client, test_package):
|
||||
"""Test download response includes Cache-Control header."""
|
||||
project, package = test_package
|
||||
content = b"cache control test"
|
||||
upload_test_file(integration_client, project, package, content, tag="cache-ctl")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/cache-ctl",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
cache_control = response.headers.get("Cache-Control", "")
|
||||
assert "public" in cache_control
|
||||
assert "immutable" in cache_control
|
||||
assert "max-age" in cache_control
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_includes_last_modified(self, integration_client, test_package):
|
||||
"""Test download response includes Last-Modified header."""
|
||||
project, package = test_package
|
||||
content = b"last modified test"
|
||||
upload_test_file(integration_client, project, package, content, tag="last-mod")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/last-mod",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert "Last-Modified" in response.headers
|
||||
# Should be in RFC 7231 format
|
||||
last_modified = response.headers["Last-Modified"]
|
||||
assert "GMT" in last_modified
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_includes_etag(self, integration_client, test_package):
|
||||
"""Test download response includes ETag header."""
|
||||
project, package = test_package
|
||||
content = b"etag header test"
|
||||
expected_hash = compute_sha256(content)
|
||||
upload_test_file(integration_client, project, package, content, tag="etag-hdr")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/etag-hdr",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.headers.get("ETag") == f'"{expected_hash}"'
|
||||
|
||||
|
||||
class TestDownloadResume:
|
||||
"""Tests for download resume functionality using range requests."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_resume_download_after_partial(self, integration_client, test_package):
|
||||
"""Test resuming download from where it left off."""
|
||||
project, package = test_package
|
||||
content = b"ABCDEFGHIJ" * 100 # 1000 bytes
|
||||
upload_test_file(integration_client, project, package, content, tag="resume-test")
|
||||
|
||||
# Simulate partial download (first 500 bytes)
|
||||
response1 = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/resume-test",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": "bytes=0-499"},
|
||||
)
|
||||
assert response1.status_code == 206
|
||||
first_half = response1.content
|
||||
assert len(first_half) == 500
|
||||
|
||||
# Resume from byte 500
|
||||
response2 = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/resume-test",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": "bytes=500-"},
|
||||
)
|
||||
assert response2.status_code == 206
|
||||
second_half = response2.content
|
||||
assert len(second_half) == 500
|
||||
|
||||
# Combine and verify
|
||||
combined = first_half + second_half
|
||||
assert combined == content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_resume_with_etag_verification(self, integration_client, test_package):
|
||||
"""Test that resumed download can verify content hasn't changed."""
|
||||
project, package = test_package
|
||||
content = b"resume etag verification test content"
|
||||
expected_hash = compute_sha256(content)
|
||||
upload_test_file(integration_client, project, package, content, tag="resume-etag")
|
||||
|
||||
# Get ETag from first request
|
||||
response1 = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/resume-etag",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": "bytes=0-9"},
|
||||
)
|
||||
assert response1.status_code == 206
|
||||
etag = response1.headers.get("ETag")
|
||||
assert etag == f'"{expected_hash}"'
|
||||
|
||||
# Resume with If-Match to ensure content hasn't changed
|
||||
# (Note: If-Match would fail and return 412 if content changed)
|
||||
response2 = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/resume-etag",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": "bytes=10-"},
|
||||
)
|
||||
assert response2.status_code == 206
|
||||
# ETag should be the same
|
||||
assert response2.headers.get("ETag") == etag
|
||||
|
||||
|
||||
class TestLargeFileStreaming:
|
||||
"""Tests for streaming large files."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_stream_1mb_file(self, integration_client, test_package, sized_content):
|
||||
"""Test streaming a 1MB file."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_1MB, seed=500)
|
||||
|
||||
upload_test_file(integration_client, project, package, content, tag="stream-1mb")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/stream-1mb",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert len(response.content) == SIZE_1MB
|
||||
assert compute_sha256(response.content) == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_stream_large_file_has_correct_headers(
|
||||
self, integration_client, test_package, sized_content
|
||||
):
|
||||
"""Test that large file streaming has correct headers."""
|
||||
project, package = test_package
|
||||
content, expected_hash = sized_content(SIZE_100KB, seed=501)
|
||||
|
||||
upload_test_file(integration_client, project, package, content, tag="stream-hdr")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/stream-hdr",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert int(response.headers.get("Content-Length", 0)) == SIZE_100KB
|
||||
assert response.headers.get("X-Checksum-SHA256") == expected_hash
|
||||
assert response.headers.get("Accept-Ranges") == "bytes"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_range_request_on_large_file(
|
||||
self, integration_client, test_package, sized_content
|
||||
):
|
||||
"""Test range request on a larger file."""
|
||||
project, package = test_package
|
||||
content, _ = sized_content(SIZE_100KB, seed=502)
|
||||
|
||||
upload_test_file(integration_client, project, package, content, tag="range-large")
|
||||
|
||||
# Request a slice from the middle
|
||||
start = 50000
|
||||
end = 50999
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/range-large",
|
||||
params={"mode": "proxy"},
|
||||
headers={"Range": f"bytes={start}-{end}"},
|
||||
)
|
||||
assert response.status_code == 206
|
||||
assert len(response.content) == 1000
|
||||
assert response.content == content[start : end + 1]
|
||||
|
||||
|
||||
class TestDownloadModes:
|
||||
"""Tests for different download modes."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_proxy_mode_streams_content(self, integration_client, test_package):
|
||||
"""Test proxy mode streams content through backend."""
|
||||
project, package = test_package
|
||||
content = b"proxy mode test content"
|
||||
upload_test_file(integration_client, project, package, content, tag="mode-proxy")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/mode-proxy",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_presigned_mode_returns_url(self, integration_client, test_package):
|
||||
"""Test presigned mode returns JSON with URL."""
|
||||
project, package = test_package
|
||||
content = b"presigned mode test"
|
||||
upload_test_file(integration_client, project, package, content, tag="mode-presign")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/mode-presign",
|
||||
params={"mode": "presigned"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "url" in data
|
||||
assert "expires_at" in data
|
||||
assert data["url"].startswith("http")
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_redirect_mode_returns_302(self, integration_client, test_package):
|
||||
"""Test redirect mode returns 302 to presigned URL."""
|
||||
project, package = test_package
|
||||
content = b"redirect mode test"
|
||||
upload_test_file(integration_client, project, package, content, tag="mode-redir")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/mode-redir",
|
||||
params={"mode": "redirect"},
|
||||
follow_redirects=False,
|
||||
)
|
||||
assert response.status_code == 302
|
||||
assert "Location" in response.headers
|
||||
|
||||
|
||||
class TestIntegrityDuringStreaming:
|
||||
"""Tests for data integrity during streaming downloads."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_checksum_header_matches_content(self, integration_client, test_package):
|
||||
"""Test X-Checksum-SHA256 header matches actual downloaded content."""
|
||||
project, package = test_package
|
||||
content = b"integrity check content"
|
||||
expected_hash = compute_sha256(content)
|
||||
upload_test_file(integration_client, project, package, content, tag="integrity")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/integrity",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
header_hash = response.headers.get("X-Checksum-SHA256")
|
||||
actual_hash = compute_sha256(response.content)
|
||||
|
||||
assert header_hash == expected_hash
|
||||
assert actual_hash == expected_hash
|
||||
assert header_hash == actual_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_etag_matches_content_hash(self, integration_client, test_package):
|
||||
"""Test ETag header matches content hash."""
|
||||
project, package = test_package
|
||||
content = b"etag integrity test"
|
||||
expected_hash = compute_sha256(content)
|
||||
upload_test_file(integration_client, project, package, content, tag="etag-int")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/etag-int",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
etag = response.headers.get("ETag", "").strip('"')
|
||||
actual_hash = compute_sha256(response.content)
|
||||
|
||||
assert etag == expected_hash
|
||||
assert actual_hash == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_digest_header_present(self, integration_client, test_package):
|
||||
"""Test Digest header is present in RFC 3230 format."""
|
||||
project, package = test_package
|
||||
content = b"digest header test"
|
||||
upload_test_file(integration_client, project, package, content, tag="digest")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/digest",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert "Digest" in response.headers
|
||||
assert response.headers["Digest"].startswith("sha-256=")
|
||||
403
backend/tests/integration/test_tags_api.py
Normal file
403
backend/tests/integration/test_tags_api.py
Normal file
@@ -0,0 +1,403 @@
|
||||
"""
|
||||
Integration tests for tag API endpoints.
|
||||
|
||||
Tests cover:
|
||||
- Tag CRUD operations
|
||||
- Tag listing with pagination and search
|
||||
- Tag history tracking
|
||||
- ref_count behavior with tag operations
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from tests.factories import compute_sha256, upload_test_file
|
||||
|
||||
|
||||
class TestTagCRUD:
|
||||
"""Tests for tag create, read, delete operations."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_create_tag_via_upload(self, integration_client, test_package):
|
||||
"""Test creating a tag via upload endpoint."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"tag create test",
|
||||
tag="v1.0.0",
|
||||
)
|
||||
|
||||
assert result["tag"] == "v1.0.0"
|
||||
assert result["artifact_id"]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_create_tag_via_post(
|
||||
self, integration_client, test_package, unique_test_id
|
||||
):
|
||||
"""Test creating a tag via POST /tags endpoint."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
# First upload an artifact
|
||||
result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"artifact for tag",
|
||||
)
|
||||
artifact_id = result["artifact_id"]
|
||||
|
||||
# Create tag via POST
|
||||
tag_name = f"post-tag-{unique_test_id}"
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project_name}/{package_name}/tags",
|
||||
json={"name": tag_name, "artifact_id": artifact_id},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert data["name"] == tag_name
|
||||
assert data["artifact_id"] == artifact_id
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_tag(self, integration_client, test_package):
|
||||
"""Test getting a tag by name."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"get tag test",
|
||||
tag="get-tag",
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/tags/get-tag"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert data["name"] == "get-tag"
|
||||
assert "artifact_id" in data
|
||||
assert "artifact_size" in data
|
||||
assert "artifact_content_type" in data
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_list_tags(self, integration_client, test_package):
|
||||
"""Test listing tags for a package."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
# Create some tags
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"list tags test",
|
||||
tag="list-v1",
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/tags"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "items" in data
|
||||
assert "pagination" in data
|
||||
|
||||
tag_names = [t["name"] for t in data["items"]]
|
||||
assert "list-v1" in tag_names
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_delete_tag(self, integration_client, test_package):
|
||||
"""Test deleting a tag."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"delete tag test",
|
||||
tag="to-delete",
|
||||
)
|
||||
|
||||
# Delete tag
|
||||
response = integration_client.delete(
|
||||
f"/api/v1/project/{project_name}/{package_name}/tags/to-delete"
|
||||
)
|
||||
assert response.status_code == 204
|
||||
|
||||
# Verify deleted
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/tags/to-delete"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
class TestTagListingFilters:
|
||||
"""Tests for tag listing with filters and search."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_tags_pagination(self, integration_client, test_package):
|
||||
"""Test tag listing respects pagination."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/tags?limit=5"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert len(data["items"]) <= 5
|
||||
assert data["pagination"]["limit"] == 5
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_tags_search(self, integration_client, test_package, unique_test_id):
|
||||
"""Test tag search by name."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
tag_name = f"searchable-{unique_test_id}"
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"search test",
|
||||
tag=tag_name,
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/tags?search=searchable"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
tag_names = [t["name"] for t in data["items"]]
|
||||
assert tag_name in tag_names
|
||||
|
||||
|
||||
class TestTagHistory:
|
||||
"""Tests for tag history tracking."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_tag_history_on_create(self, integration_client, test_package):
|
||||
"""Test tag history is created when tag is created."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"history create test",
|
||||
tag="history-create",
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/tags/history-create/history"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert len(data) >= 1
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_tag_history_on_update(
|
||||
self, integration_client, test_package, unique_test_id
|
||||
):
|
||||
"""Test tag history is created when tag is updated."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
tag_name = f"history-update-{unique_test_id}"
|
||||
|
||||
# Create tag with first artifact
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"first content",
|
||||
tag=tag_name,
|
||||
)
|
||||
|
||||
# Update tag with second artifact
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"second content",
|
||||
tag=tag_name,
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/tags/{tag_name}/history"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
# Should have at least 2 history entries (create + update)
|
||||
assert len(data) >= 2
|
||||
|
||||
|
||||
class TestTagRefCount:
|
||||
"""Tests for ref_count behavior with tag operations."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_ref_count_decrements_on_tag_delete(self, integration_client, test_package):
|
||||
"""Test ref_count decrements when a tag is deleted."""
|
||||
project_name, package_name = test_package
|
||||
content = b"ref count delete test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Upload with two tags
|
||||
upload_test_file(
|
||||
integration_client, project_name, package_name, content, tag="rc-v1"
|
||||
)
|
||||
upload_test_file(
|
||||
integration_client, project_name, package_name, content, tag="rc-v2"
|
||||
)
|
||||
|
||||
# Verify ref_count is 2
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.json()["ref_count"] == 2
|
||||
|
||||
# Delete one tag
|
||||
delete_response = integration_client.delete(
|
||||
f"/api/v1/project/{project_name}/{package_name}/tags/rc-v1"
|
||||
)
|
||||
assert delete_response.status_code == 204
|
||||
|
||||
# Verify ref_count is now 1
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.json()["ref_count"] == 1
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_ref_count_zero_after_all_tags_deleted(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test ref_count goes to 0 when all tags are deleted."""
|
||||
project_name, package_name = test_package
|
||||
content = b"orphan test content"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Upload with one tag
|
||||
upload_test_file(
|
||||
integration_client, project_name, package_name, content, tag="only-tag"
|
||||
)
|
||||
|
||||
# Delete the tag
|
||||
integration_client.delete(
|
||||
f"/api/v1/project/{project_name}/{package_name}/tags/only-tag"
|
||||
)
|
||||
|
||||
# Verify ref_count is 0
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.json()["ref_count"] == 0
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_ref_count_adjusts_on_tag_update(
|
||||
self, integration_client, test_package, unique_test_id
|
||||
):
|
||||
"""Test ref_count adjusts when a tag is updated to point to different artifact."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
# Upload two different artifacts
|
||||
content1 = f"artifact one {unique_test_id}".encode()
|
||||
content2 = f"artifact two {unique_test_id}".encode()
|
||||
hash1 = compute_sha256(content1)
|
||||
hash2 = compute_sha256(content2)
|
||||
|
||||
# Upload first artifact with tag "latest"
|
||||
upload_test_file(
|
||||
integration_client, project_name, package_name, content1, tag="latest"
|
||||
)
|
||||
|
||||
# Verify first artifact has ref_count 1
|
||||
response = integration_client.get(f"/api/v1/artifact/{hash1}")
|
||||
assert response.json()["ref_count"] == 1
|
||||
|
||||
# Upload second artifact with different tag
|
||||
upload_test_file(
|
||||
integration_client, project_name, package_name, content2, tag="stable"
|
||||
)
|
||||
|
||||
# Now update "latest" tag to point to second artifact
|
||||
upload_test_file(
|
||||
integration_client, project_name, package_name, content2, tag="latest"
|
||||
)
|
||||
|
||||
# Verify first artifact ref_count decreased to 0
|
||||
response = integration_client.get(f"/api/v1/artifact/{hash1}")
|
||||
assert response.json()["ref_count"] == 0
|
||||
|
||||
# Verify second artifact ref_count increased to 2
|
||||
response = integration_client.get(f"/api/v1/artifact/{hash2}")
|
||||
assert response.json()["ref_count"] == 2
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_ref_count_unchanged_when_tag_same_artifact(
|
||||
self, integration_client, test_package, unique_test_id
|
||||
):
|
||||
"""Test ref_count doesn't change when tag is 'updated' to same artifact."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
content = f"same artifact {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Upload with tag
|
||||
upload_test_file(
|
||||
integration_client, project_name, package_name, content, tag="same-v1"
|
||||
)
|
||||
|
||||
# Verify ref_count is 1
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.json()["ref_count"] == 1
|
||||
|
||||
# Upload same content with same tag (no-op)
|
||||
upload_test_file(
|
||||
integration_client, project_name, package_name, content, tag="same-v1"
|
||||
)
|
||||
|
||||
# Verify ref_count is still 1
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.json()["ref_count"] == 1
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_tag_via_post_endpoint_increments_ref_count(
|
||||
self, integration_client, test_package, unique_test_id
|
||||
):
|
||||
"""Test creating tag via POST /tags endpoint increments ref_count."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
content = f"tag endpoint test {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Upload artifact without tag
|
||||
result = upload_test_file(
|
||||
integration_client, project_name, package_name, content, filename="test.bin"
|
||||
)
|
||||
artifact_id = result["artifact_id"]
|
||||
|
||||
# Verify ref_count is 0 (no tags yet)
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.json()["ref_count"] == 0
|
||||
|
||||
# Create tag via POST endpoint
|
||||
tag_response = integration_client.post(
|
||||
f"/api/v1/project/{project_name}/{package_name}/tags",
|
||||
json={"name": "post-v1", "artifact_id": artifact_id},
|
||||
)
|
||||
assert tag_response.status_code == 200
|
||||
|
||||
# Verify ref_count is now 1
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.json()["ref_count"] == 1
|
||||
|
||||
# Create another tag via POST endpoint
|
||||
tag_response = integration_client.post(
|
||||
f"/api/v1/project/{project_name}/{package_name}/tags",
|
||||
json={"name": "post-latest", "artifact_id": artifact_id},
|
||||
)
|
||||
assert tag_response.status_code == 200
|
||||
|
||||
# Verify ref_count is now 2
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.json()["ref_count"] == 2
|
||||
316
backend/tests/integration/test_teams_api.py
Normal file
316
backend/tests/integration/test_teams_api.py
Normal file
@@ -0,0 +1,316 @@
|
||||
"""
|
||||
Integration tests for Teams API endpoints.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
class TestTeamsCRUD:
|
||||
"""Tests for team creation, listing, updating, and deletion."""
|
||||
|
||||
def test_create_team(self, integration_client, unique_test_id):
|
||||
"""Test creating a new team."""
|
||||
team_name = f"Test Team {unique_test_id}"
|
||||
team_slug = f"test-team-{unique_test_id}"
|
||||
|
||||
response = integration_client.post(
|
||||
"/api/v1/teams",
|
||||
json={
|
||||
"name": team_name,
|
||||
"slug": team_slug,
|
||||
"description": "A test team",
|
||||
},
|
||||
)
|
||||
assert response.status_code == 201, f"Failed to create team: {response.text}"
|
||||
|
||||
data = response.json()
|
||||
assert data["name"] == team_name
|
||||
assert data["slug"] == team_slug
|
||||
assert data["description"] == "A test team"
|
||||
assert data["user_role"] == "owner"
|
||||
assert data["member_count"] == 1
|
||||
assert data["project_count"] == 0
|
||||
|
||||
# Cleanup
|
||||
integration_client.delete(f"/api/v1/teams/{team_slug}")
|
||||
|
||||
def test_create_team_duplicate_slug(self, integration_client, unique_test_id):
|
||||
"""Test that duplicate team slugs are rejected."""
|
||||
team_slug = f"dup-team-{unique_test_id}"
|
||||
|
||||
# Create first team
|
||||
response = integration_client.post(
|
||||
"/api/v1/teams",
|
||||
json={"name": "First Team", "slug": team_slug},
|
||||
)
|
||||
assert response.status_code == 201
|
||||
|
||||
# Try to create second team with same slug
|
||||
response = integration_client.post(
|
||||
"/api/v1/teams",
|
||||
json={"name": "Second Team", "slug": team_slug},
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert "already exists" in response.json()["detail"].lower()
|
||||
|
||||
# Cleanup
|
||||
integration_client.delete(f"/api/v1/teams/{team_slug}")
|
||||
|
||||
def test_create_team_invalid_slug(self, integration_client):
|
||||
"""Test that invalid team slugs are rejected."""
|
||||
invalid_slugs = [
|
||||
"UPPERCASE",
|
||||
"with spaces",
|
||||
"-starts-with-hyphen",
|
||||
"ends-with-hyphen-",
|
||||
"has--double--hyphen",
|
||||
]
|
||||
|
||||
for invalid_slug in invalid_slugs:
|
||||
response = integration_client.post(
|
||||
"/api/v1/teams",
|
||||
json={"name": "Test", "slug": invalid_slug},
|
||||
)
|
||||
assert response.status_code == 422, f"Slug '{invalid_slug}' should be invalid"
|
||||
|
||||
def test_list_teams(self, integration_client, unique_test_id):
|
||||
"""Test listing teams the user belongs to."""
|
||||
# Create a team
|
||||
team_slug = f"list-team-{unique_test_id}"
|
||||
integration_client.post(
|
||||
"/api/v1/teams",
|
||||
json={"name": "List Test Team", "slug": team_slug},
|
||||
)
|
||||
|
||||
# List teams
|
||||
response = integration_client.get("/api/v1/teams")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "items" in data
|
||||
assert "pagination" in data
|
||||
|
||||
# Find our team
|
||||
team = next((t for t in data["items"] if t["slug"] == team_slug), None)
|
||||
assert team is not None
|
||||
assert team["name"] == "List Test Team"
|
||||
|
||||
# Cleanup
|
||||
integration_client.delete(f"/api/v1/teams/{team_slug}")
|
||||
|
||||
def test_get_team(self, integration_client, unique_test_id):
|
||||
"""Test getting team details."""
|
||||
team_slug = f"get-team-{unique_test_id}"
|
||||
integration_client.post(
|
||||
"/api/v1/teams",
|
||||
json={"name": "Get Test Team", "slug": team_slug, "description": "Test"},
|
||||
)
|
||||
|
||||
response = integration_client.get(f"/api/v1/teams/{team_slug}")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert data["slug"] == team_slug
|
||||
assert data["name"] == "Get Test Team"
|
||||
assert data["user_role"] == "owner"
|
||||
|
||||
# Cleanup
|
||||
integration_client.delete(f"/api/v1/teams/{team_slug}")
|
||||
|
||||
def test_get_nonexistent_team(self, integration_client):
|
||||
"""Test getting a team that doesn't exist."""
|
||||
response = integration_client.get("/api/v1/teams/nonexistent-team-12345")
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_update_team(self, integration_client, unique_test_id):
|
||||
"""Test updating team details."""
|
||||
team_slug = f"update-team-{unique_test_id}"
|
||||
integration_client.post(
|
||||
"/api/v1/teams",
|
||||
json={"name": "Original Name", "slug": team_slug},
|
||||
)
|
||||
|
||||
response = integration_client.put(
|
||||
f"/api/v1/teams/{team_slug}",
|
||||
json={"name": "Updated Name", "description": "New description"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert data["name"] == "Updated Name"
|
||||
assert data["description"] == "New description"
|
||||
assert data["slug"] == team_slug # Slug should not change
|
||||
|
||||
# Cleanup
|
||||
integration_client.delete(f"/api/v1/teams/{team_slug}")
|
||||
|
||||
def test_delete_team(self, integration_client, unique_test_id):
|
||||
"""Test deleting a team."""
|
||||
team_slug = f"delete-team-{unique_test_id}"
|
||||
integration_client.post(
|
||||
"/api/v1/teams",
|
||||
json={"name": "Delete Test Team", "slug": team_slug},
|
||||
)
|
||||
|
||||
response = integration_client.delete(f"/api/v1/teams/{team_slug}")
|
||||
assert response.status_code == 204
|
||||
|
||||
# Verify team is gone
|
||||
response = integration_client.get(f"/api/v1/teams/{team_slug}")
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
class TestTeamMembers:
|
||||
"""Tests for team membership management."""
|
||||
|
||||
@pytest.fixture
|
||||
def test_team(self, integration_client, unique_test_id):
|
||||
"""Create a test team for member tests."""
|
||||
team_slug = f"member-team-{unique_test_id}"
|
||||
response = integration_client.post(
|
||||
"/api/v1/teams",
|
||||
json={"name": "Member Test Team", "slug": team_slug},
|
||||
)
|
||||
assert response.status_code == 201
|
||||
|
||||
yield team_slug
|
||||
|
||||
# Cleanup
|
||||
try:
|
||||
integration_client.delete(f"/api/v1/teams/{team_slug}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def test_list_members(self, integration_client, test_team):
|
||||
"""Test listing team members."""
|
||||
response = integration_client.get(f"/api/v1/teams/{test_team}/members")
|
||||
assert response.status_code == 200
|
||||
|
||||
members = response.json()
|
||||
assert len(members) == 1
|
||||
assert members[0]["role"] == "owner"
|
||||
|
||||
def test_owner_is_first_member(self, integration_client, test_team):
|
||||
"""Test that the team creator is automatically the owner."""
|
||||
response = integration_client.get(f"/api/v1/teams/{test_team}/members")
|
||||
members = response.json()
|
||||
|
||||
assert len(members) >= 1
|
||||
owner = next((m for m in members if m["role"] == "owner"), None)
|
||||
assert owner is not None
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
class TestTeamProjects:
|
||||
"""Tests for team project management."""
|
||||
|
||||
@pytest.fixture
|
||||
def test_team(self, integration_client, unique_test_id):
|
||||
"""Create a test team for project tests."""
|
||||
team_slug = f"proj-team-{unique_test_id}"
|
||||
response = integration_client.post(
|
||||
"/api/v1/teams",
|
||||
json={"name": "Project Test Team", "slug": team_slug},
|
||||
)
|
||||
assert response.status_code == 201
|
||||
|
||||
data = response.json()
|
||||
yield {"slug": team_slug, "id": data["id"]}
|
||||
|
||||
# Cleanup
|
||||
try:
|
||||
integration_client.delete(f"/api/v1/teams/{team_slug}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def test_list_team_projects_empty(self, integration_client, test_team):
|
||||
"""Test listing projects in an empty team."""
|
||||
response = integration_client.get(f"/api/v1/teams/{test_team['slug']}/projects")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert data["items"] == []
|
||||
assert data["pagination"]["total"] == 0
|
||||
|
||||
def test_create_project_in_team(self, integration_client, test_team, unique_test_id):
|
||||
"""Test creating a project within a team."""
|
||||
project_name = f"team-project-{unique_test_id}"
|
||||
|
||||
response = integration_client.post(
|
||||
"/api/v1/projects",
|
||||
json={
|
||||
"name": project_name,
|
||||
"description": "A team project",
|
||||
"team_id": test_team["id"],
|
||||
},
|
||||
)
|
||||
assert response.status_code == 200, f"Failed to create project: {response.text}"
|
||||
|
||||
data = response.json()
|
||||
assert data["team_id"] == test_team["id"]
|
||||
assert data["team_slug"] == test_team["slug"]
|
||||
|
||||
# Verify project appears in team projects list
|
||||
response = integration_client.get(f"/api/v1/teams/{test_team['slug']}/projects")
|
||||
assert response.status_code == 200
|
||||
projects = response.json()["items"]
|
||||
assert any(p["name"] == project_name for p in projects)
|
||||
|
||||
# Cleanup
|
||||
integration_client.delete(f"/api/v1/projects/{project_name}")
|
||||
|
||||
def test_project_team_info_in_response(self, integration_client, test_team, unique_test_id):
|
||||
"""Test that project responses include team info."""
|
||||
project_name = f"team-info-project-{unique_test_id}"
|
||||
|
||||
# Create project in team
|
||||
integration_client.post(
|
||||
"/api/v1/projects",
|
||||
json={"name": project_name, "team_id": test_team["id"]},
|
||||
)
|
||||
|
||||
# Get project and verify team info
|
||||
response = integration_client.get(f"/api/v1/projects/{project_name}")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert data["team_id"] == test_team["id"]
|
||||
assert data["team_slug"] == test_team["slug"]
|
||||
assert data["team_name"] == "Project Test Team"
|
||||
|
||||
# Cleanup
|
||||
integration_client.delete(f"/api/v1/projects/{project_name}")
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
class TestTeamAuthorization:
|
||||
"""Tests for team-based authorization."""
|
||||
|
||||
def test_cannot_delete_team_with_projects(self, integration_client, unique_test_id):
|
||||
"""Test that teams with projects cannot be deleted."""
|
||||
team_slug = f"nodelete-team-{unique_test_id}"
|
||||
project_name = f"nodelete-project-{unique_test_id}"
|
||||
|
||||
# Create team
|
||||
response = integration_client.post(
|
||||
"/api/v1/teams",
|
||||
json={"name": "No Delete Team", "slug": team_slug},
|
||||
)
|
||||
team_id = response.json()["id"]
|
||||
|
||||
# Create project in team
|
||||
integration_client.post(
|
||||
"/api/v1/projects",
|
||||
json={"name": project_name, "team_id": team_id},
|
||||
)
|
||||
|
||||
# Try to delete team - should fail
|
||||
response = integration_client.delete(f"/api/v1/teams/{team_slug}")
|
||||
assert response.status_code == 400
|
||||
assert "project" in response.json()["detail"].lower()
|
||||
|
||||
# Cleanup - delete project first, then team
|
||||
integration_client.delete(f"/api/v1/projects/{project_name}")
|
||||
integration_client.delete(f"/api/v1/teams/{team_slug}")
|
||||
962
backend/tests/integration/test_upload_download_api.py
Normal file
962
backend/tests/integration/test_upload_download_api.py
Normal file
@@ -0,0 +1,962 @@
|
||||
"""
|
||||
Integration tests for upload and download API endpoints.
|
||||
|
||||
Tests cover:
|
||||
- Upload functionality and deduplication
|
||||
- Download by tag and artifact ID
|
||||
- Concurrent upload handling
|
||||
- File size validation
|
||||
- Upload failure cleanup
|
||||
- S3 storage verification
|
||||
"""
|
||||
|
||||
import os
|
||||
import pytest
|
||||
import io
|
||||
import threading
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from tests.factories import (
|
||||
compute_sha256,
|
||||
upload_test_file,
|
||||
list_s3_objects_by_hash,
|
||||
s3_object_exists,
|
||||
)
|
||||
|
||||
|
||||
class TestUploadBasics:
|
||||
"""Tests for basic upload functionality."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_returns_200(self, integration_client, test_package):
|
||||
"""Test upload with valid file returns 200."""
|
||||
project, package = test_package
|
||||
content = b"valid file upload test"
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_returns_artifact_id(self, integration_client, test_package):
|
||||
"""Test upload returns the artifact ID (SHA256 hash)."""
|
||||
project_name, package_name = test_package
|
||||
content = b"basic upload test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project_name, package_name, content, tag="v1"
|
||||
)
|
||||
|
||||
assert result["artifact_id"] == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_response_has_upload_id(self, integration_client, test_package):
|
||||
"""Test upload response includes upload_id."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"upload id test",
|
||||
"uploadid.txt",
|
||||
)
|
||||
|
||||
assert "upload_id" in result
|
||||
assert result["upload_id"] is not None
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_response_has_content_type(self, integration_client, test_package):
|
||||
"""Test upload response includes content_type."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"content type test",
|
||||
"content.txt",
|
||||
)
|
||||
|
||||
assert "content_type" in result
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_response_has_original_name(self, integration_client, test_package):
|
||||
"""Test upload response includes original_name."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"original name test",
|
||||
"originalname.txt",
|
||||
)
|
||||
|
||||
assert "original_name" in result
|
||||
assert result["original_name"] == "originalname.txt"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_response_has_created_at(self, integration_client, test_package):
|
||||
"""Test upload response includes created_at."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"created at test",
|
||||
"createdat.txt",
|
||||
)
|
||||
|
||||
assert "created_at" in result
|
||||
assert result["created_at"] is not None
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_without_tag_succeeds(self, integration_client, test_package):
|
||||
"""Test upload without tag succeeds (no tag created)."""
|
||||
project, package = test_package
|
||||
content = b"upload without tag test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
files = {"file": ("no_tag.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
# No tag parameter
|
||||
)
|
||||
assert response.status_code == 200
|
||||
result = response.json()
|
||||
assert result["artifact_id"] == expected_hash
|
||||
|
||||
# Verify no tag was created - list tags and check
|
||||
tags_response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/tags"
|
||||
)
|
||||
assert tags_response.status_code == 200
|
||||
tags = tags_response.json()
|
||||
# Filter for tags pointing to this artifact
|
||||
artifact_tags = [t for t in tags.get("items", tags) if t.get("artifact_id") == expected_hash]
|
||||
assert len(artifact_tags) == 0, "Tag should not be created when not specified"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_creates_artifact_in_database(self, integration_client, test_package):
|
||||
"""Test upload creates artifact record in database."""
|
||||
project, package = test_package
|
||||
content = b"database artifact test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
upload_test_file(integration_client, project, package, content)
|
||||
|
||||
# Verify artifact exists via API
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.status_code == 200
|
||||
artifact = response.json()
|
||||
assert artifact["id"] == expected_hash
|
||||
assert artifact["size"] == len(content)
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.requires_direct_s3
|
||||
def test_upload_creates_object_in_s3(self, integration_client, test_package):
|
||||
"""Test upload creates object in S3 storage."""
|
||||
project, package = test_package
|
||||
content = b"s3 object creation test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
upload_test_file(integration_client, project, package, content)
|
||||
|
||||
# Verify S3 object exists
|
||||
assert s3_object_exists(expected_hash), "S3 object should exist after upload"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_with_tag_creates_tag_record(self, integration_client, test_package):
|
||||
"""Test upload with tag creates tag record."""
|
||||
project, package = test_package
|
||||
content = b"tag creation test"
|
||||
expected_hash = compute_sha256(content)
|
||||
tag_name = "my-tag-v1"
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, content, tag=tag_name
|
||||
)
|
||||
|
||||
# Verify tag exists
|
||||
tags_response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/tags"
|
||||
)
|
||||
assert tags_response.status_code == 200
|
||||
tags = tags_response.json()
|
||||
tag_names = [t["name"] for t in tags.get("items", tags)]
|
||||
assert tag_name in tag_names
|
||||
|
||||
|
||||
class TestDuplicateUploads:
|
||||
"""Tests for duplicate upload deduplication behavior."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_same_file_twice_returns_same_artifact_id(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test uploading same file twice returns same artifact_id."""
|
||||
project, package = test_package
|
||||
content = b"content uploaded twice for same artifact test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# First upload
|
||||
result1 = upload_test_file(
|
||||
integration_client, project, package, content, tag="first"
|
||||
)
|
||||
assert result1["artifact_id"] == expected_hash
|
||||
|
||||
# Second upload
|
||||
result2 = upload_test_file(
|
||||
integration_client, project, package, content, tag="second"
|
||||
)
|
||||
assert result2["artifact_id"] == expected_hash
|
||||
assert result1["artifact_id"] == result2["artifact_id"]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_same_file_twice_increments_ref_count(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test uploading same file twice increments ref_count to 2."""
|
||||
project, package = test_package
|
||||
content = b"content for ref count increment test"
|
||||
|
||||
# First upload
|
||||
result1 = upload_test_file(
|
||||
integration_client, project, package, content, tag="v1"
|
||||
)
|
||||
assert result1["ref_count"] == 1
|
||||
|
||||
# Second upload
|
||||
result2 = upload_test_file(
|
||||
integration_client, project, package, content, tag="v2"
|
||||
)
|
||||
assert result2["ref_count"] == 2
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_same_file_different_packages_shares_artifact(
|
||||
self, integration_client, test_project, unique_test_id
|
||||
):
|
||||
"""Test uploading same file to different packages shares artifact."""
|
||||
project = test_project
|
||||
content = f"content shared across packages {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Create two packages
|
||||
pkg1 = f"package-a-{unique_test_id}"
|
||||
pkg2 = f"package-b-{unique_test_id}"
|
||||
|
||||
integration_client.post(
|
||||
f"/api/v1/project/{project}/packages",
|
||||
json={"name": pkg1, "description": "Package A"},
|
||||
)
|
||||
integration_client.post(
|
||||
f"/api/v1/project/{project}/packages",
|
||||
json={"name": pkg2, "description": "Package B"},
|
||||
)
|
||||
|
||||
# Upload to first package
|
||||
result1 = upload_test_file(integration_client, project, pkg1, content, tag="v1")
|
||||
assert result1["artifact_id"] == expected_hash
|
||||
assert result1["deduplicated"] is False
|
||||
|
||||
# Upload to second package
|
||||
result2 = upload_test_file(integration_client, project, pkg2, content, tag="v1")
|
||||
assert result2["artifact_id"] == expected_hash
|
||||
assert result2["deduplicated"] is True
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_same_file_different_filenames_shares_artifact(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test uploading same file with different filenames shares artifact."""
|
||||
project, package = test_package
|
||||
content = b"content with different filenames"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Upload with filename1
|
||||
result1 = upload_test_file(
|
||||
integration_client,
|
||||
project,
|
||||
package,
|
||||
content,
|
||||
filename="file1.bin",
|
||||
tag="v1",
|
||||
)
|
||||
assert result1["artifact_id"] == expected_hash
|
||||
|
||||
# Upload with filename2
|
||||
result2 = upload_test_file(
|
||||
integration_client,
|
||||
project,
|
||||
package,
|
||||
content,
|
||||
filename="file2.bin",
|
||||
tag="v2",
|
||||
)
|
||||
assert result2["artifact_id"] == expected_hash
|
||||
assert result2["deduplicated"] is True
|
||||
|
||||
|
||||
class TestDownload:
|
||||
"""Tests for download functionality."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_by_tag(self, integration_client, test_package):
|
||||
"""Test downloading artifact by tag name."""
|
||||
project, package = test_package
|
||||
original_content = b"download by tag test"
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, original_content, tag="download-tag"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/download-tag",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == original_content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_by_artifact_id(self, integration_client, test_package):
|
||||
"""Test downloading artifact by artifact ID."""
|
||||
project, package = test_package
|
||||
original_content = b"download by id test"
|
||||
expected_hash = compute_sha256(original_content)
|
||||
|
||||
upload_test_file(integration_client, project, package, original_content)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/artifact:{expected_hash}",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == original_content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_by_tag_prefix(self, integration_client, test_package):
|
||||
"""Test downloading artifact using tag: prefix."""
|
||||
project, package = test_package
|
||||
original_content = b"download by tag prefix test"
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, original_content, tag="prefix-tag"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/tag:prefix-tag",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == original_content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_nonexistent_tag(self, integration_client, test_package):
|
||||
"""Test downloading nonexistent tag returns 404."""
|
||||
project, package = test_package
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/nonexistent-tag"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_nonexistent_artifact(self, integration_client, test_package):
|
||||
"""Test downloading nonexistent artifact ID returns 404."""
|
||||
project, package = test_package
|
||||
fake_hash = "0" * 64
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/artifact:{fake_hash}"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_from_nonexistent_project(self, integration_client, unique_test_id):
|
||||
"""Test downloading from nonexistent project returns 404."""
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/nonexistent-project-{unique_test_id}/somepackage/+/sometag"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_from_nonexistent_package(self, integration_client, test_project, unique_test_id):
|
||||
"""Test downloading from nonexistent package returns 404."""
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/+/sometag"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_content_matches_original(self, integration_client, test_package):
|
||||
"""Test downloaded content matches original exactly."""
|
||||
project, package = test_package
|
||||
original_content = b"exact content verification test data 12345"
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, original_content, tag="verify"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/verify", params={"mode": "proxy"}
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == original_content
|
||||
|
||||
|
||||
class TestDownloadHeaders:
|
||||
"""Tests for download response headers."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_content_type_header(self, integration_client, test_package):
|
||||
"""Test download returns correct Content-Type header."""
|
||||
project, package = test_package
|
||||
content = b"content type header test"
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename="test.txt", tag="content-type-test"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/content-type-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
# Content-Type should be set (either text/plain or application/octet-stream)
|
||||
assert "content-type" in response.headers
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_content_length_header(self, integration_client, test_package):
|
||||
"""Test download returns correct Content-Length header."""
|
||||
project, package = test_package
|
||||
content = b"content length header test - exactly 41 bytes!"
|
||||
expected_length = len(content)
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, content, tag="content-length-test"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/content-length-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert "content-length" in response.headers
|
||||
assert int(response.headers["content-length"]) == expected_length
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_content_disposition_header(self, integration_client, test_package):
|
||||
"""Test download returns correct Content-Disposition header."""
|
||||
project, package = test_package
|
||||
content = b"content disposition test"
|
||||
filename = "my-test-file.bin"
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, content,
|
||||
filename=filename, tag="disposition-test"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/disposition-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert "content-disposition" in response.headers
|
||||
disposition = response.headers["content-disposition"]
|
||||
assert "attachment" in disposition
|
||||
assert filename in disposition
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_checksum_headers(self, integration_client, test_package):
|
||||
"""Test download returns checksum headers."""
|
||||
project, package = test_package
|
||||
content = b"checksum header test content"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, content, tag="checksum-headers"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/checksum-headers",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
# Check for checksum headers
|
||||
assert "x-checksum-sha256" in response.headers
|
||||
assert response.headers["x-checksum-sha256"] == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_etag_header(self, integration_client, test_package):
|
||||
"""Test download returns ETag header (artifact ID)."""
|
||||
project, package = test_package
|
||||
content = b"etag header test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
upload_test_file(
|
||||
integration_client, project, package, content, tag="etag-test"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/etag-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert "etag" in response.headers
|
||||
# ETag should contain the artifact ID (hash)
|
||||
etag = response.headers["etag"].strip('"')
|
||||
assert etag == expected_hash
|
||||
|
||||
|
||||
class TestConcurrentUploads:
|
||||
"""Tests for concurrent upload handling."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_concurrent_uploads_same_file(self, integration_client, test_package):
|
||||
"""Test concurrent uploads of same file handle deduplication correctly."""
|
||||
project, package = test_package
|
||||
content = b"content for concurrent upload test"
|
||||
expected_hash = compute_sha256(content)
|
||||
num_concurrent = 5
|
||||
|
||||
# Create an API key for worker threads
|
||||
api_key_response = integration_client.post(
|
||||
"/api/v1/auth/keys",
|
||||
json={"name": "concurrent-test-key"},
|
||||
)
|
||||
assert api_key_response.status_code == 200, f"Failed to create API key: {api_key_response.text}"
|
||||
api_key = api_key_response.json()["key"]
|
||||
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def upload_worker(tag_suffix):
|
||||
try:
|
||||
from httpx import Client
|
||||
|
||||
base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080")
|
||||
with Client(base_url=base_url, timeout=30.0) as client:
|
||||
files = {
|
||||
"file": (
|
||||
f"concurrent-{tag_suffix}.bin",
|
||||
io.BytesIO(content),
|
||||
"application/octet-stream",
|
||||
)
|
||||
}
|
||||
response = client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"tag": f"concurrent-{tag_suffix}"},
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
results.append(response.json())
|
||||
else:
|
||||
errors.append(f"Status {response.status_code}: {response.text}")
|
||||
except Exception as e:
|
||||
errors.append(str(e))
|
||||
|
||||
with ThreadPoolExecutor(max_workers=num_concurrent) as executor:
|
||||
futures = [executor.submit(upload_worker, i) for i in range(num_concurrent)]
|
||||
for future in as_completed(futures):
|
||||
pass
|
||||
|
||||
assert len(errors) == 0, f"Errors during concurrent uploads: {errors}"
|
||||
assert len(results) == num_concurrent
|
||||
|
||||
# All should have same artifact_id
|
||||
artifact_ids = set(r["artifact_id"] for r in results)
|
||||
assert len(artifact_ids) == 1
|
||||
assert expected_hash in artifact_ids
|
||||
|
||||
# Verify final ref_count
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.status_code == 200
|
||||
assert response.json()["ref_count"] == num_concurrent
|
||||
|
||||
|
||||
class TestFileSizeValidation:
|
||||
"""Tests for file size limits and empty file rejection."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_empty_file_rejected(self, integration_client, test_package):
|
||||
"""Test empty files are rejected with appropriate error."""
|
||||
project, package = test_package
|
||||
|
||||
files = {"file": ("empty.txt", io.BytesIO(b""), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
)
|
||||
|
||||
assert response.status_code in [422, 400]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_small_valid_file_accepted(self, integration_client, test_package):
|
||||
"""Test small (1 byte) files are accepted."""
|
||||
project, package = test_package
|
||||
content = b"X"
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="tiny"
|
||||
)
|
||||
|
||||
assert result["artifact_id"] is not None
|
||||
assert result["size"] == 1
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_file_size_reported_correctly(
|
||||
self, integration_client, test_package, unique_test_id
|
||||
):
|
||||
"""Test file size is correctly reported in response."""
|
||||
project, package = test_package
|
||||
content = f"Test content for size check {unique_test_id}".encode()
|
||||
expected_size = len(content)
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client, project, package, content, tag="size-test"
|
||||
)
|
||||
|
||||
assert result["size"] == expected_size
|
||||
|
||||
# Also verify via artifact endpoint
|
||||
artifact_response = integration_client.get(
|
||||
f"/api/v1/artifact/{result['artifact_id']}"
|
||||
)
|
||||
assert artifact_response.json()["size"] == expected_size
|
||||
|
||||
|
||||
class TestUploadFailureCleanup:
|
||||
"""Tests for cleanup when uploads fail."""
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.requires_direct_s3
|
||||
def test_upload_failure_invalid_project_no_orphaned_s3(
|
||||
self, integration_client, unique_test_id
|
||||
):
|
||||
"""Test upload to non-existent project doesn't leave orphaned S3 objects."""
|
||||
content = f"content for orphan s3 test {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/nonexistent-project-{unique_test_id}/nonexistent-pkg/upload",
|
||||
files=files,
|
||||
data={"tag": "test"},
|
||||
)
|
||||
|
||||
assert response.status_code == 404
|
||||
|
||||
# Verify no S3 object was created
|
||||
assert not s3_object_exists(expected_hash), (
|
||||
"Orphaned S3 object found after failed upload"
|
||||
)
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.requires_direct_s3
|
||||
def test_upload_failure_invalid_package_no_orphaned_s3(
|
||||
self, integration_client, test_project, unique_test_id
|
||||
):
|
||||
"""Test upload to non-existent package doesn't leave orphaned S3 objects."""
|
||||
content = f"content for orphan s3 test pkg {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
|
||||
files=files,
|
||||
data={"tag": "test"},
|
||||
)
|
||||
|
||||
assert response.status_code == 404
|
||||
|
||||
assert not s3_object_exists(expected_hash), (
|
||||
"Orphaned S3 object found after failed upload"
|
||||
)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_failure_no_orphaned_database_records(
|
||||
self, integration_client, test_project, unique_test_id
|
||||
):
|
||||
"""Test failed upload doesn't leave orphaned database records."""
|
||||
content = f"content for db orphan test {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{test_project}/nonexistent-package-{unique_test_id}/upload",
|
||||
files=files,
|
||||
data={"tag": "test"},
|
||||
)
|
||||
|
||||
assert response.status_code == 404
|
||||
|
||||
artifact_response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert artifact_response.status_code == 404, (
|
||||
"Orphaned artifact record found after failed upload"
|
||||
)
|
||||
|
||||
|
||||
class TestS3StorageVerification:
|
||||
"""Tests to verify S3 storage behavior."""
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.requires_direct_s3
|
||||
def test_s3_single_object_after_duplicates(
|
||||
self, integration_client, test_package, unique_test_id
|
||||
):
|
||||
"""Test S3 bucket contains only one object after duplicate uploads."""
|
||||
project, package = test_package
|
||||
content = f"content for s3 object count test {unique_test_id}".encode()
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Upload same content multiple times
|
||||
for tag in ["s3test1", "s3test2", "s3test3"]:
|
||||
upload_test_file(integration_client, project, package, content, tag=tag)
|
||||
|
||||
# Verify only one S3 object exists
|
||||
s3_objects = list_s3_objects_by_hash(expected_hash)
|
||||
assert len(s3_objects) == 1, (
|
||||
f"Expected 1 S3 object, found {len(s3_objects)}: {s3_objects}"
|
||||
)
|
||||
|
||||
# Verify object key follows expected pattern
|
||||
expected_key = (
|
||||
f"fruits/{expected_hash[:2]}/{expected_hash[2:4]}/{expected_hash}"
|
||||
)
|
||||
assert s3_objects[0] == expected_key
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_artifact_table_single_row_after_duplicates(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test artifact table contains only one row after duplicate uploads."""
|
||||
project, package = test_package
|
||||
content = b"content for single row test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Upload same content multiple times
|
||||
for tag in ["v1", "v2", "v3"]:
|
||||
upload_test_file(integration_client, project, package, content, tag=tag)
|
||||
|
||||
# Query artifact
|
||||
response = integration_client.get(f"/api/v1/artifact/{expected_hash}")
|
||||
assert response.status_code == 200
|
||||
artifact = response.json()
|
||||
assert artifact["id"] == expected_hash
|
||||
assert artifact["ref_count"] == 3
|
||||
|
||||
|
||||
class TestSecurityPathTraversal:
|
||||
"""Tests for path traversal attack prevention.
|
||||
|
||||
Note: Orchard uses content-addressable storage where files are stored by
|
||||
SHA256 hash, not filename. Filenames are metadata only and never used in
|
||||
file path construction, so path traversal in filenames is not a security
|
||||
vulnerability. These tests verify the system handles unusual inputs safely.
|
||||
"""
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.requires_direct_s3
|
||||
def test_path_traversal_in_filename_stored_safely(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test filenames with path traversal are stored safely (as metadata only)."""
|
||||
project, package = test_package
|
||||
content = b"path traversal test content"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
files = {
|
||||
"file": (
|
||||
"../../../etc/passwd",
|
||||
io.BytesIO(content),
|
||||
"application/octet-stream",
|
||||
)
|
||||
}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"tag": "traversal-test"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
result = response.json()
|
||||
assert result["artifact_id"] == expected_hash
|
||||
s3_objects = list_s3_objects_by_hash(expected_hash)
|
||||
assert len(s3_objects) == 1
|
||||
assert ".." not in s3_objects[0]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_path_traversal_in_package_name(self, integration_client, test_project):
|
||||
"""Test package names with path traversal sequences are rejected."""
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{test_project}/packages/../../../etc/passwd"
|
||||
)
|
||||
assert response.status_code in [400, 404, 422]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_path_traversal_in_tag_name(self, integration_client, test_package):
|
||||
"""Test tag names with path traversal are handled safely."""
|
||||
project, package = test_package
|
||||
content = b"tag traversal test"
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"tag": "../../../etc/passwd"},
|
||||
)
|
||||
assert response.status_code in [200, 400, 422]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_path_traversal_in_ref(self, integration_client, test_package):
|
||||
"""Test download ref with path traversal is rejected."""
|
||||
project, package = test_package
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/../../../etc/passwd"
|
||||
)
|
||||
assert response.status_code in [400, 404, 422]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_path_traversal_in_package_name(self, integration_client, test_project):
|
||||
"""Test package names with path traversal sequences are rejected."""
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{test_project}/packages/../../../etc/passwd"
|
||||
)
|
||||
assert response.status_code in [400, 404, 422]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_path_traversal_in_tag_name(self, integration_client, test_package):
|
||||
"""Test tag names with path traversal are rejected or handled safely."""
|
||||
project, package = test_package
|
||||
content = b"tag traversal test"
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"tag": "../../../etc/passwd"},
|
||||
)
|
||||
assert response.status_code in [200, 400, 422]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_path_traversal_in_ref(self, integration_client, test_package):
|
||||
"""Test download ref with path traversal is rejected."""
|
||||
project, package = test_package
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/../../../etc/passwd"
|
||||
)
|
||||
assert response.status_code in [400, 404, 422]
|
||||
|
||||
|
||||
class TestSecurityMalformedRequests:
|
||||
"""Tests for malformed request handling."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_missing_file_field(self, integration_client, test_package):
|
||||
"""Test upload without file field returns appropriate error."""
|
||||
project, package = test_package
|
||||
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
data={"tag": "no-file"},
|
||||
)
|
||||
assert response.status_code == 422
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_null_bytes_in_filename(self, integration_client, test_package):
|
||||
"""Test filename with null bytes is handled safely."""
|
||||
project, package = test_package
|
||||
content = b"null byte test"
|
||||
|
||||
files = {
|
||||
"file": ("test\x00.bin", io.BytesIO(content), "application/octet-stream")
|
||||
}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
)
|
||||
assert response.status_code in [200, 400, 422]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_very_long_filename(self, integration_client, test_package):
|
||||
"""Test very long filename is handled (truncated or rejected)."""
|
||||
project, package = test_package
|
||||
content = b"long filename test"
|
||||
long_filename = "a" * 1000 + ".bin"
|
||||
|
||||
files = {
|
||||
"file": (long_filename, io.BytesIO(content), "application/octet-stream")
|
||||
}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
)
|
||||
assert response.status_code in [200, 400, 413, 422]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_special_characters_in_filename(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test filenames with special characters are handled safely."""
|
||||
project, package = test_package
|
||||
content = b"special char test"
|
||||
|
||||
special_filenames = [
|
||||
"test<script>.bin",
|
||||
'test"quote.bin',
|
||||
"test'apostrophe.bin",
|
||||
"test;semicolon.bin",
|
||||
"test|pipe.bin",
|
||||
]
|
||||
|
||||
for filename in special_filenames:
|
||||
files = {
|
||||
"file": (filename, io.BytesIO(content), "application/octet-stream")
|
||||
}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
)
|
||||
assert response.status_code in [200, 400, 422], (
|
||||
f"Unexpected status {response.status_code} for filename: {filename}"
|
||||
)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_invalid_checksum_header_format(self, integration_client, test_package):
|
||||
"""Test invalid X-Checksum-SHA256 header format is rejected."""
|
||||
project, package = test_package
|
||||
content = b"checksum test"
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
headers={"X-Checksum-SHA256": "not-a-valid-hash"},
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert "Invalid" in response.json().get("detail", "")
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_checksum_mismatch_rejected(self, integration_client, test_package):
|
||||
"""Test upload with wrong checksum is rejected."""
|
||||
project, package = test_package
|
||||
content = b"checksum mismatch test"
|
||||
wrong_hash = "0" * 64
|
||||
|
||||
files = {"file": ("test.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
headers={"X-Checksum-SHA256": wrong_hash},
|
||||
)
|
||||
assert response.status_code == 422
|
||||
assert "verification failed" in response.json().get("detail", "").lower()
|
||||
347
backend/tests/integration/test_version_api.py
Normal file
347
backend/tests/integration/test_version_api.py
Normal file
@@ -0,0 +1,347 @@
|
||||
"""
|
||||
Integration tests for package version API endpoints.
|
||||
|
||||
Tests cover:
|
||||
- Version creation via upload
|
||||
- Version auto-detection from filename
|
||||
- Version listing and retrieval
|
||||
- Download by version prefix
|
||||
- Version deletion
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import io
|
||||
from tests.factories import (
|
||||
compute_sha256,
|
||||
upload_test_file,
|
||||
)
|
||||
|
||||
|
||||
class TestVersionCreation:
|
||||
"""Tests for creating versions via upload."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_with_explicit_version(self, integration_client, test_package):
|
||||
"""Test upload with explicit version parameter creates version record."""
|
||||
project, package = test_package
|
||||
content = b"version creation test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
files = {"file": ("app.tar.gz", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"version": "1.0.0"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
result = response.json()
|
||||
assert result["artifact_id"] == expected_hash
|
||||
assert result.get("version") == "1.0.0"
|
||||
assert result.get("version_source") == "explicit"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_with_version_and_tag(self, integration_client, test_package):
|
||||
"""Test upload with both version and tag creates both records."""
|
||||
project, package = test_package
|
||||
content = b"version and tag test"
|
||||
|
||||
files = {"file": ("app.tar.gz", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"version": "2.0.0", "tag": "latest"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
result = response.json()
|
||||
assert result.get("version") == "2.0.0"
|
||||
|
||||
# Verify tag was also created
|
||||
tags_response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/tags"
|
||||
)
|
||||
assert tags_response.status_code == 200
|
||||
tags = tags_response.json()
|
||||
tag_names = [t["name"] for t in tags.get("items", tags)]
|
||||
assert "latest" in tag_names
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_duplicate_version_same_content_succeeds(self, integration_client, test_package):
|
||||
"""Test uploading same version with same content succeeds (deduplication)."""
|
||||
project, package = test_package
|
||||
content = b"version dedup test"
|
||||
|
||||
# First upload with version
|
||||
files1 = {"file": ("app1.tar.gz", io.BytesIO(content), "application/octet-stream")}
|
||||
response1 = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files1,
|
||||
data={"version": "3.0.0"},
|
||||
)
|
||||
assert response1.status_code == 200
|
||||
|
||||
# Second upload with same version and same content succeeds
|
||||
files2 = {"file": ("app2.tar.gz", io.BytesIO(content), "application/octet-stream")}
|
||||
response2 = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files2,
|
||||
data={"version": "3.0.0"},
|
||||
)
|
||||
# This succeeds because it's the same artifact (deduplication)
|
||||
assert response2.status_code == 200
|
||||
|
||||
|
||||
class TestVersionAutoDetection:
|
||||
"""Tests for automatic version detection from filename."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_version_detected_from_filename_tarball(self, integration_client, test_package):
|
||||
"""Test version is auto-detected from tarball filename or metadata."""
|
||||
project, package = test_package
|
||||
content = b"auto detect version tarball"
|
||||
|
||||
files = {"file": ("myapp-1.2.3.tar.gz", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
)
|
||||
assert response.status_code == 200
|
||||
result = response.json()
|
||||
assert result.get("version") == "1.2.3"
|
||||
# Version source can be 'filename' or 'metadata' depending on detection order
|
||||
assert result.get("version_source") in ["filename", "metadata"]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_version_detected_from_filename_zip(self, integration_client, test_package):
|
||||
"""Test version is auto-detected from zip filename."""
|
||||
project, package = test_package
|
||||
content = b"auto detect version zip"
|
||||
|
||||
files = {"file": ("package-2.0.0.zip", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
)
|
||||
assert response.status_code == 200
|
||||
result = response.json()
|
||||
assert result.get("version") == "2.0.0"
|
||||
assert result.get("version_source") == "filename"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_explicit_version_overrides_filename(self, integration_client, test_package):
|
||||
"""Test explicit version parameter overrides filename detection."""
|
||||
project, package = test_package
|
||||
content = b"explicit override test"
|
||||
|
||||
files = {"file": ("myapp-1.0.0.tar.gz", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"version": "9.9.9"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
result = response.json()
|
||||
assert result.get("version") == "9.9.9"
|
||||
assert result.get("version_source") == "explicit"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_no_version_detected_from_plain_filename(self, integration_client, test_package):
|
||||
"""Test no version is created for filenames without version pattern."""
|
||||
project, package = test_package
|
||||
content = b"no version in filename"
|
||||
|
||||
files = {"file": ("plain-file.bin", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
)
|
||||
assert response.status_code == 200
|
||||
result = response.json()
|
||||
# Version should be None or not present
|
||||
assert result.get("version") is None
|
||||
|
||||
|
||||
class TestVersionListing:
|
||||
"""Tests for listing and retrieving versions."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_list_versions(self, integration_client, test_package):
|
||||
"""Test listing all versions for a package."""
|
||||
project, package = test_package
|
||||
|
||||
# Create multiple versions
|
||||
for ver in ["1.0.0", "1.1.0", "2.0.0"]:
|
||||
content = f"version {ver} content".encode()
|
||||
files = {"file": (f"app-{ver}.tar.gz", io.BytesIO(content), "application/octet-stream")}
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"version": ver},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# List versions
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/versions"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
versions = [v["version"] for v in data.get("items", data)]
|
||||
assert "1.0.0" in versions
|
||||
assert "1.1.0" in versions
|
||||
assert "2.0.0" in versions
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_specific_version(self, integration_client, test_package):
|
||||
"""Test getting details for a specific version."""
|
||||
project, package = test_package
|
||||
content = b"specific version test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Create version
|
||||
files = {"file": ("app-4.0.0.tar.gz", io.BytesIO(content), "application/octet-stream")}
|
||||
integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"version": "4.0.0"},
|
||||
)
|
||||
|
||||
# Get version details
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/versions/4.0.0"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["version"] == "4.0.0"
|
||||
assert data["artifact_id"] == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_nonexistent_version_returns_404(self, integration_client, test_package):
|
||||
"""Test getting nonexistent version returns 404."""
|
||||
project, package = test_package
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/versions/99.99.99"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
class TestDownloadByVersion:
|
||||
"""Tests for downloading artifacts by version."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_by_version_prefix(self, integration_client, test_package):
|
||||
"""Test downloading artifact using version: prefix."""
|
||||
project, package = test_package
|
||||
content = b"download by version test"
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
# Upload with version
|
||||
files = {"file": ("app.tar.gz", io.BytesIO(content), "application/octet-stream")}
|
||||
integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"version": "5.0.0"},
|
||||
)
|
||||
|
||||
# Download by version prefix
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/version:5.0.0",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_nonexistent_version_returns_404(self, integration_client, test_package):
|
||||
"""Test downloading nonexistent version returns 404."""
|
||||
project, package = test_package
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/version:99.0.0"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_version_resolution_priority(self, integration_client, test_package):
|
||||
"""Test that version: prefix explicitly resolves to version, not tag."""
|
||||
project, package = test_package
|
||||
version_content = b"this is the version content"
|
||||
tag_content = b"this is the tag content"
|
||||
|
||||
# Create a version 6.0.0
|
||||
files1 = {"file": ("app-v.tar.gz", io.BytesIO(version_content), "application/octet-stream")}
|
||||
integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files1,
|
||||
data={"version": "6.0.0"},
|
||||
)
|
||||
|
||||
# Create a tag named "6.0.0" pointing to different content
|
||||
files2 = {"file": ("app-t.tar.gz", io.BytesIO(tag_content), "application/octet-stream")}
|
||||
integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files2,
|
||||
data={"tag": "6.0.0"},
|
||||
)
|
||||
|
||||
# Download with version: prefix should get version content
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/version:6.0.0",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.content == version_content
|
||||
|
||||
# Download with tag: prefix should get tag content
|
||||
response2 = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/+/tag:6.0.0",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
assert response2.status_code == 200
|
||||
assert response2.content == tag_content
|
||||
|
||||
|
||||
class TestVersionDeletion:
|
||||
"""Tests for deleting versions."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_delete_version(self, integration_client, test_package):
|
||||
"""Test deleting a version."""
|
||||
project, package = test_package
|
||||
content = b"delete version test"
|
||||
|
||||
# Create version
|
||||
files = {"file": ("app.tar.gz", io.BytesIO(content), "application/octet-stream")}
|
||||
integration_client.post(
|
||||
f"/api/v1/project/{project}/{package}/upload",
|
||||
files=files,
|
||||
data={"version": "7.0.0"},
|
||||
)
|
||||
|
||||
# Verify version exists
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/versions/7.0.0"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# Delete version - returns 204 No Content on success
|
||||
delete_response = integration_client.delete(
|
||||
f"/api/v1/project/{project}/{package}/versions/7.0.0"
|
||||
)
|
||||
assert delete_response.status_code == 204
|
||||
|
||||
# Verify version no longer exists
|
||||
response2 = integration_client.get(
|
||||
f"/api/v1/project/{project}/{package}/versions/7.0.0"
|
||||
)
|
||||
assert response2.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_delete_nonexistent_version_returns_404(self, integration_client, test_package):
|
||||
"""Test deleting nonexistent version returns 404."""
|
||||
project, package = test_package
|
||||
|
||||
response = integration_client.delete(
|
||||
f"/api/v1/project/{project}/{package}/versions/99.0.0"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
412
backend/tests/integration/test_versions_api.py
Normal file
412
backend/tests/integration/test_versions_api.py
Normal file
@@ -0,0 +1,412 @@
|
||||
"""
|
||||
Integration tests for version API endpoints.
|
||||
|
||||
Tests cover:
|
||||
- Version creation via upload
|
||||
- Version auto-detection from filename
|
||||
- Version listing with pagination
|
||||
- Version deletion
|
||||
- Download by version ref
|
||||
- ref_count behavior with version operations
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from tests.factories import upload_test_file
|
||||
|
||||
|
||||
class TestVersionCreation:
|
||||
"""Tests for version creation during upload."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_with_explicit_version(self, integration_client, test_package):
|
||||
"""Test creating a version via explicit version parameter."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"version create test",
|
||||
tag="latest",
|
||||
version="1.0.0",
|
||||
)
|
||||
|
||||
assert result["tag"] == "latest"
|
||||
assert result["version"] == "1.0.0"
|
||||
assert result["version_source"] == "explicit"
|
||||
assert result["artifact_id"]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_with_version_auto_detect_from_tarball(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test version auto-detection from tarball filename pattern."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"auto version test",
|
||||
filename="myapp-2.1.0.tar.gz",
|
||||
)
|
||||
|
||||
assert result["version"] == "2.1.0"
|
||||
# Tarball metadata extractor parses version from filename
|
||||
assert result["version_source"] == "metadata"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_with_version_auto_detect_v_prefix(
|
||||
self, integration_client, test_package
|
||||
):
|
||||
"""Test version auto-detection strips 'v' prefix from tarball filename."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"v prefix test",
|
||||
filename="package-v3.0.0.tar.gz",
|
||||
)
|
||||
|
||||
assert result["version"] == "3.0.0"
|
||||
# Tarball metadata extractor parses version from filename
|
||||
assert result["version_source"] == "metadata"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_upload_duplicate_version_warning(self, integration_client, test_package):
|
||||
"""Test that duplicate version during upload returns response without error."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
# Upload with version 1.0.0
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"first upload",
|
||||
version="1.0.0",
|
||||
)
|
||||
|
||||
# Upload different content with same version - should succeed but no new version
|
||||
result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"second upload different content",
|
||||
version="1.0.0",
|
||||
)
|
||||
|
||||
# Upload succeeds but version may not be set (duplicate)
|
||||
assert result["artifact_id"]
|
||||
|
||||
|
||||
class TestVersionCRUD:
|
||||
"""Tests for version list, get, delete operations."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_list_versions(self, integration_client, test_package):
|
||||
"""Test listing versions for a package."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
# Create some versions
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"v1 content",
|
||||
version="1.0.0",
|
||||
)
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"v2 content",
|
||||
version="2.0.0",
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/versions"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "items" in data
|
||||
assert "pagination" in data
|
||||
|
||||
versions = [v["version"] for v in data["items"]]
|
||||
assert "1.0.0" in versions
|
||||
assert "2.0.0" in versions
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_list_versions_with_artifact_info(self, integration_client, test_package):
|
||||
"""Test that version list includes artifact metadata."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"version with info",
|
||||
version="1.0.0",
|
||||
tag="release",
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/versions"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert len(data["items"]) >= 1
|
||||
|
||||
version_item = next(
|
||||
(v for v in data["items"] if v["version"] == "1.0.0"), None
|
||||
)
|
||||
assert version_item is not None
|
||||
assert "size" in version_item
|
||||
assert "artifact_id" in version_item
|
||||
assert "tags" in version_item
|
||||
assert "release" in version_item["tags"]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_version(self, integration_client, test_package):
|
||||
"""Test getting a specific version."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
upload_result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"get version test",
|
||||
version="3.0.0",
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/versions/3.0.0"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert data["version"] == "3.0.0"
|
||||
assert data["artifact_id"] == upload_result["artifact_id"]
|
||||
assert data["version_source"] == "explicit"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_get_version_not_found(self, integration_client, test_package):
|
||||
"""Test getting a non-existent version returns 404."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/versions/99.99.99"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_delete_version(self, integration_client, test_package):
|
||||
"""Test deleting a version."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"delete version test",
|
||||
version="4.0.0",
|
||||
)
|
||||
|
||||
# Delete version
|
||||
response = integration_client.delete(
|
||||
f"/api/v1/project/{project_name}/{package_name}/versions/4.0.0"
|
||||
)
|
||||
assert response.status_code == 204
|
||||
|
||||
# Verify deleted
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/versions/4.0.0"
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
class TestVersionDownload:
|
||||
"""Tests for downloading artifacts by version reference."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_by_version_prefix(self, integration_client, test_package):
|
||||
"""Test downloading an artifact using version: prefix."""
|
||||
project_name, package_name = test_package
|
||||
content = b"download by version test"
|
||||
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
content,
|
||||
version="5.0.0",
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/version:5.0.0",
|
||||
follow_redirects=False,
|
||||
)
|
||||
|
||||
# Should either redirect or return content
|
||||
assert response.status_code in [200, 302, 307]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_by_implicit_version(self, integration_client, test_package):
|
||||
"""Test downloading an artifact using version number directly (no prefix)."""
|
||||
project_name, package_name = test_package
|
||||
content = b"implicit version download test"
|
||||
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
content,
|
||||
version="6.0.0",
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/6.0.0",
|
||||
follow_redirects=False,
|
||||
)
|
||||
|
||||
# Should resolve version first (before tag)
|
||||
assert response.status_code in [200, 302, 307]
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_version_takes_precedence_over_tag(self, integration_client, test_package):
|
||||
"""Test that version is checked before tag when resolving refs."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
# Upload with version "1.0"
|
||||
version_result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"version content",
|
||||
version="1.0",
|
||||
)
|
||||
|
||||
# Create a tag with the same name "1.0" pointing to different artifact
|
||||
tag_result = upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"tag content different",
|
||||
tag="1.0",
|
||||
)
|
||||
|
||||
# Download by "1.0" should resolve to version, not tag
|
||||
# Since version:1.0 artifact was uploaded first
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/1.0",
|
||||
follow_redirects=False,
|
||||
)
|
||||
|
||||
assert response.status_code in [200, 302, 307]
|
||||
|
||||
|
||||
class TestTagVersionEnrichment:
|
||||
"""Tests for tag responses including version information."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_tag_response_includes_version(self, integration_client, test_package):
|
||||
"""Test that tag responses include version of the artifact."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
# Upload with both version and tag
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"enriched tag test",
|
||||
version="7.0.0",
|
||||
tag="stable",
|
||||
)
|
||||
|
||||
# Get tag and check version field
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/tags/stable"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert data["name"] == "stable"
|
||||
assert data["version"] == "7.0.0"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_tag_list_includes_versions(self, integration_client, test_package):
|
||||
"""Test that tag list responses include version for each tag."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"list version test",
|
||||
version="8.0.0",
|
||||
tag="latest",
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/tags"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
tag_item = next((t for t in data["items"] if t["name"] == "latest"), None)
|
||||
assert tag_item is not None
|
||||
assert tag_item.get("version") == "8.0.0"
|
||||
|
||||
|
||||
class TestVersionPagination:
|
||||
"""Tests for version listing pagination and sorting."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_versions_pagination(self, integration_client, test_package):
|
||||
"""Test version listing respects pagination."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/versions?limit=5"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "pagination" in data
|
||||
assert data["pagination"]["limit"] == 5
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_versions_sorting(self, integration_client, test_package):
|
||||
"""Test version listing can be sorted."""
|
||||
project_name, package_name = test_package
|
||||
|
||||
# Create versions with different timestamps
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"sort test 1",
|
||||
version="1.0.0",
|
||||
)
|
||||
upload_test_file(
|
||||
integration_client,
|
||||
project_name,
|
||||
package_name,
|
||||
b"sort test 2",
|
||||
version="2.0.0",
|
||||
)
|
||||
|
||||
# Test ascending sort
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/versions?sort=version&order=asc"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
versions = [v["version"] for v in data["items"]]
|
||||
# First version should be 1.0.0 when sorted ascending
|
||||
if len(versions) >= 2:
|
||||
assert versions.index("1.0.0") < versions.index("2.0.0")
|
||||
675
backend/tests/test_checksum_verification.py
Normal file
675
backend/tests/test_checksum_verification.py
Normal file
@@ -0,0 +1,675 @@
|
||||
"""
|
||||
Tests for checksum calculation, verification, and download verification.
|
||||
|
||||
This module tests:
|
||||
- SHA256 hash computation (bytes and streams)
|
||||
- HashingStreamWrapper incremental hashing
|
||||
- VerifyingStreamWrapper with verification
|
||||
- ChecksumMismatchError exception handling
|
||||
- Download verification API endpoints
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import hashlib
|
||||
import io
|
||||
from typing import Generator
|
||||
|
||||
from app.checksum import (
|
||||
compute_sha256,
|
||||
compute_sha256_stream,
|
||||
verify_checksum,
|
||||
verify_checksum_strict,
|
||||
is_valid_sha256,
|
||||
sha256_to_base64,
|
||||
HashingStreamWrapper,
|
||||
VerifyingStreamWrapper,
|
||||
ChecksumMismatchError,
|
||||
ChecksumError,
|
||||
InvalidHashFormatError,
|
||||
DEFAULT_CHUNK_SIZE,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test Data
|
||||
# =============================================================================
|
||||
|
||||
# Known test vectors
|
||||
TEST_CONTENT_HELLO = b"Hello, World!"
|
||||
TEST_HASH_HELLO = "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f"
|
||||
|
||||
TEST_CONTENT_EMPTY = b""
|
||||
TEST_HASH_EMPTY = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
|
||||
|
||||
TEST_CONTENT_BINARY = bytes(range(256))
|
||||
TEST_HASH_BINARY = hashlib.sha256(TEST_CONTENT_BINARY).hexdigest()
|
||||
|
||||
# Invalid hashes for testing
|
||||
INVALID_HASH_TOO_SHORT = "abcd1234"
|
||||
INVALID_HASH_TOO_LONG = "a" * 65
|
||||
INVALID_HASH_NON_HEX = "zzzz" + "a" * 60
|
||||
INVALID_HASH_EMPTY = ""
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Unit Tests - SHA256 Computation
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestComputeSHA256:
|
||||
"""Tests for compute_sha256 function."""
|
||||
|
||||
def test_known_content_matches_expected_hash(self):
|
||||
"""Test SHA256 of known content matches pre-computed hash."""
|
||||
result = compute_sha256(TEST_CONTENT_HELLO)
|
||||
assert result == TEST_HASH_HELLO
|
||||
|
||||
def test_returns_64_character_hex_string(self):
|
||||
"""Test result is exactly 64 hex characters."""
|
||||
result = compute_sha256(TEST_CONTENT_HELLO)
|
||||
assert len(result) == 64
|
||||
assert all(c in "0123456789abcdef" for c in result)
|
||||
|
||||
def test_returns_lowercase_hex(self):
|
||||
"""Test result is lowercase."""
|
||||
result = compute_sha256(TEST_CONTENT_HELLO)
|
||||
assert result == result.lower()
|
||||
|
||||
def test_empty_content_returns_empty_hash(self):
|
||||
"""Test empty bytes returns SHA256 of empty content."""
|
||||
result = compute_sha256(TEST_CONTENT_EMPTY)
|
||||
assert result == TEST_HASH_EMPTY
|
||||
|
||||
def test_deterministic_same_input_same_output(self):
|
||||
"""Test same input always produces same output."""
|
||||
content = b"test content for determinism"
|
||||
result1 = compute_sha256(content)
|
||||
result2 = compute_sha256(content)
|
||||
assert result1 == result2
|
||||
|
||||
def test_different_content_different_hash(self):
|
||||
"""Test different content produces different hash."""
|
||||
hash1 = compute_sha256(b"content A")
|
||||
hash2 = compute_sha256(b"content B")
|
||||
assert hash1 != hash2
|
||||
|
||||
def test_single_bit_change_different_hash(self):
|
||||
"""Test single bit change produces completely different hash."""
|
||||
content1 = b"\x00" * 100
|
||||
content2 = b"\x00" * 99 + b"\x01"
|
||||
hash1 = compute_sha256(content1)
|
||||
hash2 = compute_sha256(content2)
|
||||
assert hash1 != hash2
|
||||
|
||||
def test_binary_content(self):
|
||||
"""Test hashing binary content with all byte values."""
|
||||
result = compute_sha256(TEST_CONTENT_BINARY)
|
||||
assert result == TEST_HASH_BINARY
|
||||
assert len(result) == 64
|
||||
|
||||
def test_large_content(self):
|
||||
"""Test hashing larger content (1MB)."""
|
||||
large_content = b"x" * (1024 * 1024)
|
||||
result = compute_sha256(large_content)
|
||||
expected = hashlib.sha256(large_content).hexdigest()
|
||||
assert result == expected
|
||||
|
||||
def test_none_content_raises_error(self):
|
||||
"""Test None content raises ChecksumError."""
|
||||
with pytest.raises(ChecksumError, match="Cannot compute hash of None"):
|
||||
compute_sha256(None)
|
||||
|
||||
|
||||
class TestComputeSHA256Stream:
|
||||
"""Tests for compute_sha256_stream function."""
|
||||
|
||||
def test_file_like_object(self):
|
||||
"""Test hashing from file-like object."""
|
||||
file_obj = io.BytesIO(TEST_CONTENT_HELLO)
|
||||
result = compute_sha256_stream(file_obj)
|
||||
assert result == TEST_HASH_HELLO
|
||||
|
||||
def test_iterator(self):
|
||||
"""Test hashing from iterator of chunks."""
|
||||
|
||||
def chunk_iterator():
|
||||
yield b"Hello, "
|
||||
yield b"World!"
|
||||
|
||||
result = compute_sha256_stream(chunk_iterator())
|
||||
assert result == TEST_HASH_HELLO
|
||||
|
||||
def test_various_chunk_sizes_same_result(self):
|
||||
"""Test different chunk sizes produce same hash."""
|
||||
content = b"x" * 10000
|
||||
expected = hashlib.sha256(content).hexdigest()
|
||||
|
||||
for chunk_size in [1, 10, 100, 1000, 8192]:
|
||||
file_obj = io.BytesIO(content)
|
||||
result = compute_sha256_stream(file_obj, chunk_size=chunk_size)
|
||||
assert result == expected, f"Failed for chunk_size={chunk_size}"
|
||||
|
||||
def test_single_byte_chunks(self):
|
||||
"""Test with 1-byte chunks (edge case)."""
|
||||
content = b"ABC"
|
||||
file_obj = io.BytesIO(content)
|
||||
result = compute_sha256_stream(file_obj, chunk_size=1)
|
||||
expected = hashlib.sha256(content).hexdigest()
|
||||
assert result == expected
|
||||
|
||||
def test_empty_stream(self):
|
||||
"""Test empty stream returns empty content hash."""
|
||||
file_obj = io.BytesIO(b"")
|
||||
result = compute_sha256_stream(file_obj)
|
||||
assert result == TEST_HASH_EMPTY
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Unit Tests - Hash Validation
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestIsValidSHA256:
|
||||
"""Tests for is_valid_sha256 function."""
|
||||
|
||||
def test_valid_hash_lowercase(self):
|
||||
"""Test valid lowercase hash."""
|
||||
assert is_valid_sha256(TEST_HASH_HELLO) is True
|
||||
|
||||
def test_valid_hash_uppercase(self):
|
||||
"""Test valid uppercase hash."""
|
||||
assert is_valid_sha256(TEST_HASH_HELLO.upper()) is True
|
||||
|
||||
def test_valid_hash_mixed_case(self):
|
||||
"""Test valid mixed case hash."""
|
||||
mixed = TEST_HASH_HELLO[:32].upper() + TEST_HASH_HELLO[32:].lower()
|
||||
assert is_valid_sha256(mixed) is True
|
||||
|
||||
def test_invalid_too_short(self):
|
||||
"""Test hash that's too short."""
|
||||
assert is_valid_sha256(INVALID_HASH_TOO_SHORT) is False
|
||||
|
||||
def test_invalid_too_long(self):
|
||||
"""Test hash that's too long."""
|
||||
assert is_valid_sha256(INVALID_HASH_TOO_LONG) is False
|
||||
|
||||
def test_invalid_non_hex(self):
|
||||
"""Test hash with non-hex characters."""
|
||||
assert is_valid_sha256(INVALID_HASH_NON_HEX) is False
|
||||
|
||||
def test_invalid_empty(self):
|
||||
"""Test empty string."""
|
||||
assert is_valid_sha256(INVALID_HASH_EMPTY) is False
|
||||
|
||||
def test_invalid_none(self):
|
||||
"""Test None value."""
|
||||
assert is_valid_sha256(None) is False
|
||||
|
||||
|
||||
class TestSHA256ToBase64:
|
||||
"""Tests for sha256_to_base64 function."""
|
||||
|
||||
def test_converts_to_base64(self):
|
||||
"""Test conversion to base64."""
|
||||
result = sha256_to_base64(TEST_HASH_HELLO)
|
||||
# Verify it's valid base64
|
||||
import base64
|
||||
|
||||
decoded = base64.b64decode(result)
|
||||
assert len(decoded) == 32 # SHA256 is 32 bytes
|
||||
|
||||
def test_invalid_hash_raises_error(self):
|
||||
"""Test invalid hash raises InvalidHashFormatError."""
|
||||
with pytest.raises(InvalidHashFormatError):
|
||||
sha256_to_base64(INVALID_HASH_TOO_SHORT)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Unit Tests - Verification Functions
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestVerifyChecksum:
|
||||
"""Tests for verify_checksum function."""
|
||||
|
||||
def test_matching_checksum_returns_true(self):
|
||||
"""Test matching checksum returns True."""
|
||||
result = verify_checksum(TEST_CONTENT_HELLO, TEST_HASH_HELLO)
|
||||
assert result is True
|
||||
|
||||
def test_mismatched_checksum_returns_false(self):
|
||||
"""Test mismatched checksum returns False."""
|
||||
wrong_hash = "a" * 64
|
||||
result = verify_checksum(TEST_CONTENT_HELLO, wrong_hash)
|
||||
assert result is False
|
||||
|
||||
def test_case_insensitive_comparison(self):
|
||||
"""Test comparison is case-insensitive."""
|
||||
result = verify_checksum(TEST_CONTENT_HELLO, TEST_HASH_HELLO.upper())
|
||||
assert result is True
|
||||
|
||||
def test_invalid_hash_format_raises_error(self):
|
||||
"""Test invalid hash format raises error."""
|
||||
with pytest.raises(InvalidHashFormatError):
|
||||
verify_checksum(TEST_CONTENT_HELLO, INVALID_HASH_TOO_SHORT)
|
||||
|
||||
|
||||
class TestVerifyChecksumStrict:
|
||||
"""Tests for verify_checksum_strict function."""
|
||||
|
||||
def test_matching_checksum_returns_none(self):
|
||||
"""Test matching checksum doesn't raise."""
|
||||
# Should not raise
|
||||
verify_checksum_strict(TEST_CONTENT_HELLO, TEST_HASH_HELLO)
|
||||
|
||||
def test_mismatched_checksum_raises_error(self):
|
||||
"""Test mismatched checksum raises ChecksumMismatchError."""
|
||||
wrong_hash = "a" * 64
|
||||
with pytest.raises(ChecksumMismatchError) as exc_info:
|
||||
verify_checksum_strict(TEST_CONTENT_HELLO, wrong_hash)
|
||||
|
||||
error = exc_info.value
|
||||
assert error.expected == wrong_hash.lower()
|
||||
assert error.actual == TEST_HASH_HELLO
|
||||
assert error.size == len(TEST_CONTENT_HELLO)
|
||||
|
||||
def test_error_includes_context(self):
|
||||
"""Test error includes artifact_id and s3_key context."""
|
||||
wrong_hash = "a" * 64
|
||||
with pytest.raises(ChecksumMismatchError) as exc_info:
|
||||
verify_checksum_strict(
|
||||
TEST_CONTENT_HELLO,
|
||||
wrong_hash,
|
||||
artifact_id="test-artifact-123",
|
||||
s3_key="fruits/ab/cd/abcd1234...",
|
||||
)
|
||||
|
||||
error = exc_info.value
|
||||
assert error.artifact_id == "test-artifact-123"
|
||||
assert error.s3_key == "fruits/ab/cd/abcd1234..."
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Unit Tests - HashingStreamWrapper
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestHashingStreamWrapper:
|
||||
"""Tests for HashingStreamWrapper class."""
|
||||
|
||||
def test_computes_correct_hash(self):
|
||||
"""Test wrapper computes correct hash."""
|
||||
stream = io.BytesIO(TEST_CONTENT_HELLO)
|
||||
wrapper = HashingStreamWrapper(stream)
|
||||
|
||||
# Consume the stream
|
||||
chunks = list(wrapper)
|
||||
|
||||
# Verify hash
|
||||
assert wrapper.get_hash() == TEST_HASH_HELLO
|
||||
|
||||
def test_yields_correct_chunks(self):
|
||||
"""Test wrapper yields all content."""
|
||||
stream = io.BytesIO(TEST_CONTENT_HELLO)
|
||||
wrapper = HashingStreamWrapper(stream)
|
||||
|
||||
chunks = list(wrapper)
|
||||
content = b"".join(chunks)
|
||||
|
||||
assert content == TEST_CONTENT_HELLO
|
||||
|
||||
def test_tracks_bytes_read(self):
|
||||
"""Test bytes_read property tracks correctly."""
|
||||
stream = io.BytesIO(TEST_CONTENT_HELLO)
|
||||
wrapper = HashingStreamWrapper(stream)
|
||||
|
||||
assert wrapper.bytes_read == 0
|
||||
list(wrapper) # Consume
|
||||
assert wrapper.bytes_read == len(TEST_CONTENT_HELLO)
|
||||
|
||||
def test_get_hash_before_iteration_consumes_stream(self):
|
||||
"""Test get_hash() consumes stream if not already done."""
|
||||
stream = io.BytesIO(TEST_CONTENT_HELLO)
|
||||
wrapper = HashingStreamWrapper(stream)
|
||||
|
||||
# Call get_hash without iterating
|
||||
hash_result = wrapper.get_hash()
|
||||
|
||||
assert hash_result == TEST_HASH_HELLO
|
||||
assert wrapper.bytes_read == len(TEST_CONTENT_HELLO)
|
||||
|
||||
def test_get_hash_if_complete_before_iteration_returns_none(self):
|
||||
"""Test get_hash_if_complete returns None before iteration."""
|
||||
stream = io.BytesIO(TEST_CONTENT_HELLO)
|
||||
wrapper = HashingStreamWrapper(stream)
|
||||
|
||||
assert wrapper.get_hash_if_complete() is None
|
||||
|
||||
def test_get_hash_if_complete_after_iteration_returns_hash(self):
|
||||
"""Test get_hash_if_complete returns hash after iteration."""
|
||||
stream = io.BytesIO(TEST_CONTENT_HELLO)
|
||||
wrapper = HashingStreamWrapper(stream)
|
||||
|
||||
list(wrapper) # Consume
|
||||
assert wrapper.get_hash_if_complete() == TEST_HASH_HELLO
|
||||
|
||||
def test_custom_chunk_size(self):
|
||||
"""Test custom chunk size is respected."""
|
||||
content = b"x" * 1000
|
||||
stream = io.BytesIO(content)
|
||||
wrapper = HashingStreamWrapper(stream, chunk_size=100)
|
||||
|
||||
chunks = list(wrapper)
|
||||
|
||||
# Each chunk should be at most 100 bytes
|
||||
for chunk in chunks[:-1]: # All but last
|
||||
assert len(chunk) == 100
|
||||
|
||||
# Total content should match
|
||||
assert b"".join(chunks) == content
|
||||
|
||||
def test_iterator_interface(self):
|
||||
"""Test wrapper supports iterator interface."""
|
||||
stream = io.BytesIO(TEST_CONTENT_HELLO)
|
||||
wrapper = HashingStreamWrapper(stream)
|
||||
|
||||
# Should be able to use for loop
|
||||
result = b""
|
||||
for chunk in wrapper:
|
||||
result += chunk
|
||||
|
||||
assert result == TEST_CONTENT_HELLO
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Unit Tests - VerifyingStreamWrapper
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestVerifyingStreamWrapper:
|
||||
"""Tests for VerifyingStreamWrapper class."""
|
||||
|
||||
def test_verify_success(self):
|
||||
"""Test verification succeeds for matching content."""
|
||||
stream = io.BytesIO(TEST_CONTENT_HELLO)
|
||||
wrapper = VerifyingStreamWrapper(stream, TEST_HASH_HELLO)
|
||||
|
||||
# Consume stream
|
||||
list(wrapper)
|
||||
|
||||
# Verify should succeed
|
||||
result = wrapper.verify()
|
||||
assert result is True
|
||||
assert wrapper.is_verified is True
|
||||
|
||||
def test_verify_failure_raises_error(self):
|
||||
"""Test verification failure raises ChecksumMismatchError."""
|
||||
stream = io.BytesIO(TEST_CONTENT_HELLO)
|
||||
wrong_hash = "a" * 64
|
||||
wrapper = VerifyingStreamWrapper(stream, wrong_hash)
|
||||
|
||||
# Consume stream
|
||||
list(wrapper)
|
||||
|
||||
# Verify should fail
|
||||
with pytest.raises(ChecksumMismatchError):
|
||||
wrapper.verify()
|
||||
|
||||
assert wrapper.is_verified is False
|
||||
|
||||
def test_verify_silent_success(self):
|
||||
"""Test verify_silent returns True on success."""
|
||||
stream = io.BytesIO(TEST_CONTENT_HELLO)
|
||||
wrapper = VerifyingStreamWrapper(stream, TEST_HASH_HELLO)
|
||||
|
||||
list(wrapper)
|
||||
|
||||
result = wrapper.verify_silent()
|
||||
assert result is True
|
||||
|
||||
def test_verify_silent_failure(self):
|
||||
"""Test verify_silent returns False on failure."""
|
||||
stream = io.BytesIO(TEST_CONTENT_HELLO)
|
||||
wrong_hash = "a" * 64
|
||||
wrapper = VerifyingStreamWrapper(stream, wrong_hash)
|
||||
|
||||
list(wrapper)
|
||||
|
||||
result = wrapper.verify_silent()
|
||||
assert result is False
|
||||
|
||||
def test_invalid_expected_hash_raises_error(self):
|
||||
"""Test invalid expected hash raises error at construction."""
|
||||
stream = io.BytesIO(TEST_CONTENT_HELLO)
|
||||
|
||||
with pytest.raises(InvalidHashFormatError):
|
||||
VerifyingStreamWrapper(stream, INVALID_HASH_TOO_SHORT)
|
||||
|
||||
def test_on_failure_callback(self):
|
||||
"""Test on_failure callback is called on verification failure."""
|
||||
stream = io.BytesIO(TEST_CONTENT_HELLO)
|
||||
wrong_hash = "a" * 64
|
||||
|
||||
callback_called = []
|
||||
|
||||
def on_failure(error):
|
||||
callback_called.append(error)
|
||||
|
||||
wrapper = VerifyingStreamWrapper(stream, wrong_hash, on_failure=on_failure)
|
||||
|
||||
list(wrapper)
|
||||
|
||||
with pytest.raises(ChecksumMismatchError):
|
||||
wrapper.verify()
|
||||
|
||||
assert len(callback_called) == 1
|
||||
assert isinstance(callback_called[0], ChecksumMismatchError)
|
||||
|
||||
def test_get_actual_hash_after_iteration(self):
|
||||
"""Test get_actual_hash returns hash after iteration."""
|
||||
stream = io.BytesIO(TEST_CONTENT_HELLO)
|
||||
wrapper = VerifyingStreamWrapper(stream, TEST_HASH_HELLO)
|
||||
|
||||
# Before iteration
|
||||
assert wrapper.get_actual_hash() is None
|
||||
|
||||
list(wrapper)
|
||||
|
||||
# After iteration
|
||||
assert wrapper.get_actual_hash() == TEST_HASH_HELLO
|
||||
|
||||
def test_includes_context_in_error(self):
|
||||
"""Test error includes artifact_id and s3_key."""
|
||||
stream = io.BytesIO(TEST_CONTENT_HELLO)
|
||||
wrong_hash = "a" * 64
|
||||
wrapper = VerifyingStreamWrapper(
|
||||
stream,
|
||||
wrong_hash,
|
||||
artifact_id="test-artifact",
|
||||
s3_key="test/key",
|
||||
)
|
||||
|
||||
list(wrapper)
|
||||
|
||||
with pytest.raises(ChecksumMismatchError) as exc_info:
|
||||
wrapper.verify()
|
||||
|
||||
error = exc_info.value
|
||||
assert error.artifact_id == "test-artifact"
|
||||
assert error.s3_key == "test/key"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Unit Tests - ChecksumMismatchError
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestChecksumMismatchError:
|
||||
"""Tests for ChecksumMismatchError class."""
|
||||
|
||||
def test_to_dict(self):
|
||||
"""Test to_dict returns proper dictionary."""
|
||||
error = ChecksumMismatchError(
|
||||
expected="a" * 64,
|
||||
actual="b" * 64,
|
||||
artifact_id="test-123",
|
||||
s3_key="test/key",
|
||||
size=1024,
|
||||
)
|
||||
|
||||
result = error.to_dict()
|
||||
|
||||
assert result["error"] == "checksum_mismatch"
|
||||
assert result["expected"] == "a" * 64
|
||||
assert result["actual"] == "b" * 64
|
||||
assert result["artifact_id"] == "test-123"
|
||||
assert result["s3_key"] == "test/key"
|
||||
assert result["size"] == 1024
|
||||
|
||||
def test_message_format(self):
|
||||
"""Test error message format."""
|
||||
error = ChecksumMismatchError(
|
||||
expected="a" * 64,
|
||||
actual="b" * 64,
|
||||
)
|
||||
|
||||
assert "verification failed" in str(error).lower()
|
||||
assert "expected" in str(error).lower()
|
||||
|
||||
def test_custom_message(self):
|
||||
"""Test custom message is used."""
|
||||
error = ChecksumMismatchError(
|
||||
expected="a" * 64,
|
||||
actual="b" * 64,
|
||||
message="Custom error message",
|
||||
)
|
||||
|
||||
assert str(error) == "Custom error message"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Corruption Simulation Tests
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestCorruptionDetection:
|
||||
"""Tests for detecting corrupted content."""
|
||||
|
||||
def test_detect_truncated_content(self):
|
||||
"""Test detection of truncated content."""
|
||||
original = TEST_CONTENT_HELLO
|
||||
truncated = original[:-1] # Remove last byte
|
||||
|
||||
original_hash = compute_sha256(original)
|
||||
truncated_hash = compute_sha256(truncated)
|
||||
|
||||
assert original_hash != truncated_hash
|
||||
assert verify_checksum(truncated, original_hash) is False
|
||||
|
||||
def test_detect_extra_bytes(self):
|
||||
"""Test detection of content with extra bytes."""
|
||||
original = TEST_CONTENT_HELLO
|
||||
extended = original + b"\x00" # Add null byte
|
||||
|
||||
original_hash = compute_sha256(original)
|
||||
|
||||
assert verify_checksum(extended, original_hash) is False
|
||||
|
||||
def test_detect_single_bit_flip(self):
|
||||
"""Test detection of single bit flip."""
|
||||
original = TEST_CONTENT_HELLO
|
||||
# Flip first bit of first byte
|
||||
corrupted = bytes([original[0] ^ 0x01]) + original[1:]
|
||||
|
||||
original_hash = compute_sha256(original)
|
||||
|
||||
assert verify_checksum(corrupted, original_hash) is False
|
||||
|
||||
def test_detect_wrong_content(self):
|
||||
"""Test detection of completely different content."""
|
||||
original = TEST_CONTENT_HELLO
|
||||
different = b"Something completely different"
|
||||
|
||||
original_hash = compute_sha256(original)
|
||||
|
||||
assert verify_checksum(different, original_hash) is False
|
||||
|
||||
def test_detect_empty_vs_nonempty(self):
|
||||
"""Test detection of empty content vs non-empty."""
|
||||
original = TEST_CONTENT_HELLO
|
||||
empty = b""
|
||||
|
||||
original_hash = compute_sha256(original)
|
||||
|
||||
assert verify_checksum(empty, original_hash) is False
|
||||
|
||||
def test_streaming_detection_of_corruption(self):
|
||||
"""Test VerifyingStreamWrapper detects corruption."""
|
||||
original = b"Original content that will be corrupted"
|
||||
original_hash = compute_sha256(original)
|
||||
|
||||
# Corrupt the content
|
||||
corrupted = b"Corrupted content that is different"
|
||||
stream = io.BytesIO(corrupted)
|
||||
|
||||
wrapper = VerifyingStreamWrapper(stream, original_hash)
|
||||
list(wrapper) # Consume
|
||||
|
||||
with pytest.raises(ChecksumMismatchError):
|
||||
wrapper.verify()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Edge Case Tests
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestEdgeCases:
|
||||
"""Tests for edge cases and boundary conditions."""
|
||||
|
||||
def test_null_bytes_in_content(self):
|
||||
"""Test content with null bytes."""
|
||||
content = b"\x00\x00\x00"
|
||||
hash_result = compute_sha256(content)
|
||||
|
||||
assert verify_checksum(content, hash_result) is True
|
||||
|
||||
def test_whitespace_only_content(self):
|
||||
"""Test content with only whitespace."""
|
||||
content = b" \t\n\r "
|
||||
hash_result = compute_sha256(content)
|
||||
|
||||
assert verify_checksum(content, hash_result) is True
|
||||
|
||||
def test_large_content_streaming(self):
|
||||
"""Test streaming verification of large content."""
|
||||
# 1MB of content
|
||||
large_content = b"x" * (1024 * 1024)
|
||||
expected_hash = compute_sha256(large_content)
|
||||
|
||||
stream = io.BytesIO(large_content)
|
||||
wrapper = VerifyingStreamWrapper(stream, expected_hash)
|
||||
|
||||
# Consume and verify
|
||||
chunks = list(wrapper)
|
||||
assert wrapper.verify() is True
|
||||
assert b"".join(chunks) == large_content
|
||||
|
||||
def test_unicode_bytes_content(self):
|
||||
"""Test content with unicode bytes."""
|
||||
content = "Hello, 世界! 🌍".encode("utf-8")
|
||||
hash_result = compute_sha256(content)
|
||||
|
||||
assert verify_checksum(content, hash_result) is True
|
||||
|
||||
def test_maximum_chunk_size_larger_than_content(self):
|
||||
"""Test chunk size larger than content."""
|
||||
content = b"small"
|
||||
stream = io.BytesIO(content)
|
||||
wrapper = HashingStreamWrapper(stream, chunk_size=1024 * 1024)
|
||||
|
||||
chunks = list(wrapper)
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0] == content
|
||||
assert wrapper.get_hash() == compute_sha256(content)
|
||||
1080
backend/tests/test_dependencies.py
Normal file
1080
backend/tests/test_dependencies.py
Normal file
File diff suppressed because it is too large
Load Diff
460
backend/tests/test_download_verification.py
Normal file
460
backend/tests/test_download_verification.py
Normal file
@@ -0,0 +1,460 @@
|
||||
"""
|
||||
Integration tests for download verification API endpoints.
|
||||
|
||||
These tests verify:
|
||||
- Checksum headers in download responses
|
||||
- Pre-verification mode
|
||||
- Streaming verification mode
|
||||
- HEAD request headers
|
||||
- Verification failure handling
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import hashlib
|
||||
import base64
|
||||
import io
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test Fixtures
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def upload_test_file(integration_client):
|
||||
"""
|
||||
Factory fixture to upload a test file and return its artifact ID.
|
||||
|
||||
Usage:
|
||||
artifact_id = upload_test_file(project, package, content, tag="v1.0")
|
||||
"""
|
||||
|
||||
def _upload(project_name: str, package_name: str, content: bytes, tag: str = None):
|
||||
files = {
|
||||
"file": ("test-file.bin", io.BytesIO(content), "application/octet-stream")
|
||||
}
|
||||
data = {}
|
||||
if tag:
|
||||
data["tag"] = tag
|
||||
|
||||
response = integration_client.post(
|
||||
f"/api/v1/project/{project_name}/{package_name}/upload",
|
||||
files=files,
|
||||
data=data,
|
||||
)
|
||||
assert response.status_code == 200, f"Upload failed: {response.text}"
|
||||
return response.json()["artifact_id"]
|
||||
|
||||
return _upload
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Integration Tests - Download Headers
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestDownloadChecksumHeaders:
|
||||
"""Tests for checksum headers in download responses."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_includes_sha256_header(
|
||||
self, integration_client, test_package, upload_test_file
|
||||
):
|
||||
"""Test download response includes X-Checksum-SHA256 header."""
|
||||
project_name, package_name = test_package
|
||||
content = b"Content for SHA256 header test"
|
||||
|
||||
# Upload file
|
||||
artifact_id = upload_test_file(
|
||||
project_name, package_name, content, tag="sha256-header-test"
|
||||
)
|
||||
|
||||
# Download with proxy mode
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/sha256-header-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "X-Checksum-SHA256" in response.headers
|
||||
assert response.headers["X-Checksum-SHA256"] == artifact_id
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_includes_etag_header(
|
||||
self, integration_client, test_package, upload_test_file
|
||||
):
|
||||
"""Test download response includes ETag header."""
|
||||
project_name, package_name = test_package
|
||||
content = b"Content for ETag header test"
|
||||
|
||||
artifact_id = upload_test_file(
|
||||
project_name, package_name, content, tag="etag-test"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/etag-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "ETag" in response.headers
|
||||
# ETag should be quoted artifact ID
|
||||
assert response.headers["ETag"] == f'"{artifact_id}"'
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_includes_digest_header(
|
||||
self, integration_client, test_package, upload_test_file
|
||||
):
|
||||
"""Test download response includes RFC 3230 Digest header."""
|
||||
project_name, package_name = test_package
|
||||
content = b"Content for Digest header test"
|
||||
sha256 = hashlib.sha256(content).hexdigest()
|
||||
|
||||
upload_test_file(project_name, package_name, content, tag="digest-test")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/digest-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "Digest" in response.headers
|
||||
|
||||
# Verify Digest format: sha-256=<base64>
|
||||
digest = response.headers["Digest"]
|
||||
assert digest.startswith("sha-256=")
|
||||
|
||||
# Verify base64 content matches
|
||||
b64_hash = digest.split("=", 1)[1]
|
||||
decoded = base64.b64decode(b64_hash)
|
||||
assert decoded == bytes.fromhex(sha256)
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_includes_content_length_header(
|
||||
self, integration_client, test_package, upload_test_file
|
||||
):
|
||||
"""Test download response includes X-Content-Length header."""
|
||||
project_name, package_name = test_package
|
||||
content = b"Content for X-Content-Length test"
|
||||
|
||||
upload_test_file(project_name, package_name, content, tag="content-length-test")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/content-length-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "X-Content-Length" in response.headers
|
||||
assert response.headers["X-Content-Length"] == str(len(content))
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_download_includes_verified_header_false(
|
||||
self, integration_client, test_package, upload_test_file
|
||||
):
|
||||
"""Test download without verification has X-Verified: false."""
|
||||
project_name, package_name = test_package
|
||||
content = b"Content for X-Verified false test"
|
||||
|
||||
upload_test_file(project_name, package_name, content, tag="verified-false-test")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/verified-false-test",
|
||||
params={"mode": "proxy", "verify": "false"},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "X-Verified" in response.headers
|
||||
assert response.headers["X-Verified"] == "false"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Integration Tests - Pre-Verification Mode
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestPreVerificationMode:
|
||||
"""Tests for pre-verification download mode."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_pre_verify_success(
|
||||
self, integration_client, test_package, upload_test_file
|
||||
):
|
||||
"""Test pre-verification mode succeeds for valid content."""
|
||||
project_name, package_name = test_package
|
||||
content = b"Content for pre-verification success test"
|
||||
|
||||
upload_test_file(project_name, package_name, content, tag="pre-verify-success")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/pre-verify-success",
|
||||
params={"mode": "proxy", "verify": "true", "verify_mode": "pre"},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
assert "X-Verified" in response.headers
|
||||
assert response.headers["X-Verified"] == "true"
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_pre_verify_returns_complete_content(
|
||||
self, integration_client, test_package, upload_test_file
|
||||
):
|
||||
"""Test pre-verification returns complete content."""
|
||||
project_name, package_name = test_package
|
||||
# Use binary content to verify no corruption
|
||||
content = bytes(range(256)) * 10 # 2560 bytes of all byte values
|
||||
|
||||
upload_test_file(project_name, package_name, content, tag="pre-verify-content")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/pre-verify-content",
|
||||
params={"mode": "proxy", "verify": "true", "verify_mode": "pre"},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Integration Tests - Streaming Verification Mode
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestStreamingVerificationMode:
|
||||
"""Tests for streaming verification download mode."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_stream_verify_success(
|
||||
self, integration_client, test_package, upload_test_file
|
||||
):
|
||||
"""Test streaming verification mode succeeds for valid content."""
|
||||
project_name, package_name = test_package
|
||||
content = b"Content for streaming verification success test"
|
||||
|
||||
upload_test_file(
|
||||
project_name, package_name, content, tag="stream-verify-success"
|
||||
)
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/stream-verify-success",
|
||||
params={"mode": "proxy", "verify": "true", "verify_mode": "stream"},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
# X-Verified is "pending" for streaming mode (verified after transfer)
|
||||
assert "X-Verified" in response.headers
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_stream_verify_large_content(
|
||||
self, integration_client, test_package, upload_test_file
|
||||
):
|
||||
"""Test streaming verification with larger content."""
|
||||
project_name, package_name = test_package
|
||||
# 100KB of content
|
||||
content = b"x" * (100 * 1024)
|
||||
|
||||
upload_test_file(project_name, package_name, content, tag="stream-verify-large")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/stream-verify-large",
|
||||
params={"mode": "proxy", "verify": "true", "verify_mode": "stream"},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.content == content
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Integration Tests - HEAD Request Headers
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestHeadRequestHeaders:
|
||||
"""Tests for HEAD request checksum headers."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_head_includes_sha256_header(
|
||||
self, integration_client, test_package, upload_test_file
|
||||
):
|
||||
"""Test HEAD request includes X-Checksum-SHA256 header."""
|
||||
project_name, package_name = test_package
|
||||
content = b"Content for HEAD SHA256 test"
|
||||
|
||||
artifact_id = upload_test_file(
|
||||
project_name, package_name, content, tag="head-sha256-test"
|
||||
)
|
||||
|
||||
response = integration_client.head(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/head-sha256-test"
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "X-Checksum-SHA256" in response.headers
|
||||
assert response.headers["X-Checksum-SHA256"] == artifact_id
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_head_includes_etag(
|
||||
self, integration_client, test_package, upload_test_file
|
||||
):
|
||||
"""Test HEAD request includes ETag header."""
|
||||
project_name, package_name = test_package
|
||||
content = b"Content for HEAD ETag test"
|
||||
|
||||
artifact_id = upload_test_file(
|
||||
project_name, package_name, content, tag="head-etag-test"
|
||||
)
|
||||
|
||||
response = integration_client.head(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/head-etag-test"
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "ETag" in response.headers
|
||||
assert response.headers["ETag"] == f'"{artifact_id}"'
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_head_includes_digest(
|
||||
self, integration_client, test_package, upload_test_file
|
||||
):
|
||||
"""Test HEAD request includes Digest header."""
|
||||
project_name, package_name = test_package
|
||||
content = b"Content for HEAD Digest test"
|
||||
|
||||
upload_test_file(project_name, package_name, content, tag="head-digest-test")
|
||||
|
||||
response = integration_client.head(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/head-digest-test"
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "Digest" in response.headers
|
||||
assert response.headers["Digest"].startswith("sha-256=")
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_head_includes_content_length(
|
||||
self, integration_client, test_package, upload_test_file
|
||||
):
|
||||
"""Test HEAD request includes X-Content-Length header."""
|
||||
project_name, package_name = test_package
|
||||
content = b"Content for HEAD Content-Length test"
|
||||
|
||||
upload_test_file(project_name, package_name, content, tag="head-length-test")
|
||||
|
||||
response = integration_client.head(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/head-length-test"
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "X-Content-Length" in response.headers
|
||||
assert response.headers["X-Content-Length"] == str(len(content))
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_head_no_body(self, integration_client, test_package, upload_test_file):
|
||||
"""Test HEAD request returns no body."""
|
||||
project_name, package_name = test_package
|
||||
content = b"Content for HEAD no-body test"
|
||||
|
||||
upload_test_file(project_name, package_name, content, tag="head-no-body-test")
|
||||
|
||||
response = integration_client.head(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/head-no-body-test"
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.content == b""
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Integration Tests - Range Requests
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestRangeRequestHeaders:
|
||||
"""Tests for range request handling with checksum headers."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_range_request_includes_checksum_headers(
|
||||
self, integration_client, test_package, upload_test_file
|
||||
):
|
||||
"""Test range request includes checksum headers."""
|
||||
project_name, package_name = test_package
|
||||
content = b"Content for range request checksum header test"
|
||||
|
||||
upload_test_file(project_name, package_name, content, tag="range-checksum-test")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/range-checksum-test",
|
||||
headers={"Range": "bytes=0-9"},
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
|
||||
assert response.status_code == 206
|
||||
assert "X-Checksum-SHA256" in response.headers
|
||||
# Checksum is for the FULL file, not the range
|
||||
assert len(response.headers["X-Checksum-SHA256"]) == 64
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Integration Tests - Client-Side Verification
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestClientSideVerification:
|
||||
"""Tests demonstrating client-side verification using headers."""
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_client_can_verify_downloaded_content(
|
||||
self, integration_client, test_package, upload_test_file
|
||||
):
|
||||
"""Test client can verify downloaded content using header."""
|
||||
project_name, package_name = test_package
|
||||
content = b"Content for client-side verification test"
|
||||
|
||||
upload_test_file(project_name, package_name, content, tag="client-verify-test")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/client-verify-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Get expected hash from header
|
||||
expected_hash = response.headers["X-Checksum-SHA256"]
|
||||
|
||||
# Compute actual hash of downloaded content
|
||||
actual_hash = hashlib.sha256(response.content).hexdigest()
|
||||
|
||||
# Verify match
|
||||
assert actual_hash == expected_hash
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_client_can_verify_using_digest_header(
|
||||
self, integration_client, test_package, upload_test_file
|
||||
):
|
||||
"""Test client can verify using RFC 3230 Digest header."""
|
||||
project_name, package_name = test_package
|
||||
content = b"Content for Digest header verification"
|
||||
|
||||
upload_test_file(project_name, package_name, content, tag="digest-verify-test")
|
||||
|
||||
response = integration_client.get(
|
||||
f"/api/v1/project/{project_name}/{package_name}/+/digest-verify-test",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Parse Digest header
|
||||
digest_header = response.headers["Digest"]
|
||||
assert digest_header.startswith("sha-256=")
|
||||
b64_hash = digest_header.split("=", 1)[1]
|
||||
expected_hash_bytes = base64.b64decode(b64_hash)
|
||||
|
||||
# Compute actual hash of downloaded content
|
||||
actual_hash_bytes = hashlib.sha256(response.content).digest()
|
||||
|
||||
# Verify match
|
||||
assert actual_hash_bytes == expected_hash_bytes
|
||||
1946
backend/tests/test_upstream_caching.py
Normal file
1946
backend/tests/test_upstream_caching.py
Normal file
File diff suppressed because it is too large
Load Diff
0
backend/tests/unit/__init__.py
Normal file
0
backend/tests/unit/__init__.py
Normal file
95
backend/tests/unit/test_auth.py
Normal file
95
backend/tests/unit/test_auth.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""Unit tests for authentication module."""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
|
||||
class TestCreateDefaultAdmin:
|
||||
"""Tests for the create_default_admin function."""
|
||||
|
||||
def test_create_default_admin_with_env_password(self):
|
||||
"""Test that ORCHARD_ADMIN_PASSWORD env var sets admin password."""
|
||||
from app.auth import create_default_admin, verify_password
|
||||
|
||||
# Create mock settings with custom password
|
||||
mock_settings = MagicMock()
|
||||
mock_settings.admin_password = "my-custom-password-123"
|
||||
|
||||
# Mock database session
|
||||
mock_db = MagicMock()
|
||||
mock_db.query.return_value.count.return_value = 0 # No existing users
|
||||
|
||||
# Track the user that gets created
|
||||
created_user = None
|
||||
|
||||
def capture_user(user):
|
||||
nonlocal created_user
|
||||
created_user = user
|
||||
|
||||
mock_db.add.side_effect = capture_user
|
||||
|
||||
with patch("app.auth.get_settings", return_value=mock_settings):
|
||||
admin = create_default_admin(mock_db)
|
||||
|
||||
# Verify the user was created
|
||||
assert mock_db.add.called
|
||||
assert created_user is not None
|
||||
assert created_user.username == "admin"
|
||||
assert created_user.is_admin is True
|
||||
# Password should NOT require change when set via env var
|
||||
assert created_user.must_change_password is False
|
||||
# Verify password was hashed correctly
|
||||
assert verify_password("my-custom-password-123", created_user.password_hash)
|
||||
|
||||
def test_create_default_admin_with_default_password(self):
|
||||
"""Test that default password 'changeme123' is used when env var not set."""
|
||||
from app.auth import create_default_admin, verify_password
|
||||
|
||||
# Create mock settings with empty password (default)
|
||||
mock_settings = MagicMock()
|
||||
mock_settings.admin_password = ""
|
||||
|
||||
# Mock database session
|
||||
mock_db = MagicMock()
|
||||
mock_db.query.return_value.count.return_value = 0 # No existing users
|
||||
|
||||
# Track the user that gets created
|
||||
created_user = None
|
||||
|
||||
def capture_user(user):
|
||||
nonlocal created_user
|
||||
created_user = user
|
||||
|
||||
mock_db.add.side_effect = capture_user
|
||||
|
||||
with patch("app.auth.get_settings", return_value=mock_settings):
|
||||
admin = create_default_admin(mock_db)
|
||||
|
||||
# Verify the user was created
|
||||
assert mock_db.add.called
|
||||
assert created_user is not None
|
||||
assert created_user.username == "admin"
|
||||
assert created_user.is_admin is True
|
||||
# Password SHOULD require change when using default
|
||||
assert created_user.must_change_password is True
|
||||
# Verify default password was used
|
||||
assert verify_password("changeme123", created_user.password_hash)
|
||||
|
||||
def test_create_default_admin_skips_when_users_exist(self):
|
||||
"""Test that no admin is created when users already exist."""
|
||||
from app.auth import create_default_admin
|
||||
|
||||
# Create mock settings
|
||||
mock_settings = MagicMock()
|
||||
mock_settings.admin_password = "some-password"
|
||||
|
||||
# Mock database session with existing users
|
||||
mock_db = MagicMock()
|
||||
mock_db.query.return_value.count.return_value = 1 # Users exist
|
||||
|
||||
with patch("app.auth.get_settings", return_value=mock_settings):
|
||||
result = create_default_admin(mock_db)
|
||||
|
||||
# Should return None and not create any user
|
||||
assert result is None
|
||||
assert not mock_db.add.called
|
||||
271
backend/tests/unit/test_models.py
Normal file
271
backend/tests/unit/test_models.py
Normal file
@@ -0,0 +1,271 @@
|
||||
"""
|
||||
Unit tests for SQLAlchemy models.
|
||||
|
||||
Tests cover:
|
||||
- Model instantiation and defaults
|
||||
- Property aliases (sha256, format_metadata)
|
||||
- Relationship definitions
|
||||
- Constraint definitions
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class TestArtifactModel:
|
||||
"""Tests for the Artifact model."""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_artifact_sha256_property(self):
|
||||
"""Test sha256 property is an alias for id."""
|
||||
from app.models import Artifact
|
||||
|
||||
artifact = Artifact(
|
||||
id="a" * 64,
|
||||
size=1024,
|
||||
created_by="test-user",
|
||||
s3_key="fruits/aa/aa/test",
|
||||
)
|
||||
|
||||
assert artifact.sha256 == artifact.id
|
||||
assert artifact.sha256 == "a" * 64
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_artifact_format_metadata_alias(self):
|
||||
"""Test format_metadata is an alias for artifact_metadata."""
|
||||
from app.models import Artifact
|
||||
|
||||
test_metadata = {"format": "tarball", "version": "1.0.0"}
|
||||
artifact = Artifact(
|
||||
id="b" * 64,
|
||||
size=2048,
|
||||
created_by="test-user",
|
||||
s3_key="fruits/bb/bb/test",
|
||||
artifact_metadata=test_metadata,
|
||||
)
|
||||
|
||||
assert artifact.format_metadata == test_metadata
|
||||
assert artifact.format_metadata == artifact.artifact_metadata
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_artifact_format_metadata_setter(self):
|
||||
"""Test format_metadata setter updates artifact_metadata."""
|
||||
from app.models import Artifact
|
||||
|
||||
artifact = Artifact(
|
||||
id="c" * 64,
|
||||
size=512,
|
||||
created_by="test-user",
|
||||
s3_key="fruits/cc/cc/test",
|
||||
)
|
||||
|
||||
new_metadata = {"type": "rpm", "arch": "x86_64"}
|
||||
artifact.format_metadata = new_metadata
|
||||
|
||||
assert artifact.artifact_metadata == new_metadata
|
||||
assert artifact.format_metadata == new_metadata
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_artifact_default_ref_count(self):
|
||||
"""Test artifact ref_count column has default value of 1."""
|
||||
from app.models import Artifact
|
||||
|
||||
# Check the column definition has the right default
|
||||
ref_count_col = Artifact.__table__.columns["ref_count"]
|
||||
assert ref_count_col.default is not None
|
||||
assert ref_count_col.default.arg == 1
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_artifact_default_metadata_is_dict(self):
|
||||
"""Test artifact default metadata is an empty dict."""
|
||||
from app.models import Artifact
|
||||
|
||||
artifact = Artifact(
|
||||
id="e" * 64,
|
||||
size=100,
|
||||
created_by="test-user",
|
||||
s3_key="fruits/ee/ee/test",
|
||||
)
|
||||
|
||||
# Default might be None until saved, but the column default is dict
|
||||
assert artifact.artifact_metadata is None or isinstance(
|
||||
artifact.artifact_metadata, dict
|
||||
)
|
||||
|
||||
|
||||
class TestProjectModel:
|
||||
"""Tests for the Project model."""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_project_default_is_public(self):
|
||||
"""Test project is_public column has default value of True."""
|
||||
from app.models import Project
|
||||
|
||||
# Check the column definition has the right default
|
||||
is_public_col = Project.__table__.columns["is_public"]
|
||||
assert is_public_col.default is not None
|
||||
assert is_public_col.default.arg is True
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_project_uuid_generation(self):
|
||||
"""Test project generates UUID by default."""
|
||||
from app.models import Project
|
||||
|
||||
project = Project(
|
||||
name="uuid-test-project",
|
||||
created_by="test-user",
|
||||
)
|
||||
|
||||
# UUID should be set by default function
|
||||
assert project.id is not None or hasattr(Project.id, "default")
|
||||
|
||||
|
||||
class TestPackageModel:
|
||||
"""Tests for the Package model."""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_package_default_format(self):
|
||||
"""Test package format column has default value of 'generic'."""
|
||||
from app.models import Package
|
||||
|
||||
# Check the column definition has the right default
|
||||
format_col = Package.__table__.columns["format"]
|
||||
assert format_col.default is not None
|
||||
assert format_col.default.arg == "generic"
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_package_default_platform(self):
|
||||
"""Test package platform column has default value of 'any'."""
|
||||
from app.models import Package
|
||||
|
||||
# Check the column definition has the right default
|
||||
platform_col = Package.__table__.columns["platform"]
|
||||
assert platform_col.default is not None
|
||||
assert platform_col.default.arg == "any"
|
||||
|
||||
|
||||
class TestTagModel:
|
||||
"""Tests for the Tag model."""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_tag_requires_package_id(self):
|
||||
"""Test tag requires package_id."""
|
||||
from app.models import Tag
|
||||
|
||||
tag = Tag(
|
||||
name="v1.0.0",
|
||||
package_id=uuid.uuid4(),
|
||||
artifact_id="f" * 64,
|
||||
created_by="test-user",
|
||||
)
|
||||
|
||||
assert tag.package_id is not None
|
||||
assert tag.artifact_id == "f" * 64
|
||||
|
||||
|
||||
class TestTagHistoryModel:
|
||||
"""Tests for the TagHistory model."""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_tag_history_default_change_type(self):
|
||||
"""Test tag history change_type column has default value of 'update'."""
|
||||
from app.models import TagHistory
|
||||
|
||||
# Check the column definition has the right default
|
||||
change_type_col = TagHistory.__table__.columns["change_type"]
|
||||
assert change_type_col.default is not None
|
||||
assert change_type_col.default.arg == "update"
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_tag_history_allows_null_old_artifact(self):
|
||||
"""Test tag history allows null old_artifact_id (for create events)."""
|
||||
from app.models import TagHistory
|
||||
|
||||
history = TagHistory(
|
||||
tag_id=uuid.uuid4(),
|
||||
old_artifact_id=None,
|
||||
new_artifact_id="h" * 64,
|
||||
change_type="create",
|
||||
changed_by="test-user",
|
||||
)
|
||||
|
||||
assert history.old_artifact_id is None
|
||||
|
||||
|
||||
class TestUploadModel:
|
||||
"""Tests for the Upload model."""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_upload_default_deduplicated_is_false(self):
|
||||
"""Test upload deduplicated column has default value of False."""
|
||||
from app.models import Upload
|
||||
|
||||
# Check the column definition has the right default
|
||||
deduplicated_col = Upload.__table__.columns["deduplicated"]
|
||||
assert deduplicated_col.default is not None
|
||||
assert deduplicated_col.default.arg is False
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_upload_default_checksum_verified_is_true(self):
|
||||
"""Test upload checksum_verified column has default value of True."""
|
||||
from app.models import Upload
|
||||
|
||||
# Check the column definition has the right default
|
||||
checksum_verified_col = Upload.__table__.columns["checksum_verified"]
|
||||
assert checksum_verified_col.default is not None
|
||||
assert checksum_verified_col.default.arg is True
|
||||
|
||||
|
||||
class TestAccessPermissionModel:
|
||||
"""Tests for the AccessPermission model."""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_access_permission_levels(self):
|
||||
"""Test valid access permission levels."""
|
||||
from app.models import AccessPermission
|
||||
|
||||
# This tests the check constraint values
|
||||
valid_levels = ["read", "write", "admin"]
|
||||
|
||||
for level in valid_levels:
|
||||
permission = AccessPermission(
|
||||
project_id=uuid.uuid4(),
|
||||
user_id="test-user",
|
||||
level=level,
|
||||
)
|
||||
assert permission.level == level
|
||||
|
||||
|
||||
class TestAuditLogModel:
|
||||
"""Tests for the AuditLog model."""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_audit_log_required_fields(self):
|
||||
"""Test audit log has all required fields."""
|
||||
from app.models import AuditLog
|
||||
|
||||
log = AuditLog(
|
||||
action="project.create",
|
||||
resource="/projects/test-project",
|
||||
user_id="test-user",
|
||||
)
|
||||
|
||||
assert log.action == "project.create"
|
||||
assert log.resource == "/projects/test-project"
|
||||
assert log.user_id == "test-user"
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_audit_log_optional_details(self):
|
||||
"""Test audit log can have optional details JSON."""
|
||||
from app.models import AuditLog
|
||||
|
||||
details = {"old_value": "v1", "new_value": "v2"}
|
||||
log = AuditLog(
|
||||
action="tag.update",
|
||||
resource="/projects/test/packages/pkg/tags/latest",
|
||||
user_id="test-user",
|
||||
details=details,
|
||||
)
|
||||
|
||||
assert log.details == details
|
||||
439
backend/tests/unit/test_storage.py
Normal file
439
backend/tests/unit/test_storage.py
Normal file
@@ -0,0 +1,439 @@
|
||||
"""
|
||||
Unit tests for S3 storage layer.
|
||||
|
||||
Tests cover:
|
||||
- SHA256 hash calculation and consistency
|
||||
- Hash format validation (64-char hex)
|
||||
- S3 key generation pattern
|
||||
- Deduplication behavior (_exists method)
|
||||
- Storage result computation (MD5, SHA1, size)
|
||||
- Edge cases (empty files, large files, binary content)
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import hashlib
|
||||
import io
|
||||
from tests.factories import (
|
||||
compute_sha256,
|
||||
TEST_CONTENT_HELLO,
|
||||
TEST_HASH_HELLO,
|
||||
TEST_CONTENT_BINARY,
|
||||
TEST_HASH_BINARY,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Hash Computation Tests
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestHashComputation:
|
||||
"""Unit tests for hash calculation functionality."""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_sha256_consistent_results(self):
|
||||
"""Test SHA256 hash produces consistent results for identical content."""
|
||||
content = b"test content for hashing"
|
||||
|
||||
# Compute hash multiple times
|
||||
hash1 = compute_sha256(content)
|
||||
hash2 = compute_sha256(content)
|
||||
hash3 = compute_sha256(content)
|
||||
|
||||
assert hash1 == hash2 == hash3
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_sha256_different_content_different_hash(self):
|
||||
"""Test SHA256 produces different hashes for different content."""
|
||||
content1 = b"content version 1"
|
||||
content2 = b"content version 2"
|
||||
|
||||
hash1 = compute_sha256(content1)
|
||||
hash2 = compute_sha256(content2)
|
||||
|
||||
assert hash1 != hash2
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_sha256_format_64_char_hex(self):
|
||||
"""Test SHA256 hash is always 64 character lowercase hexadecimal."""
|
||||
test_cases = [
|
||||
b"", # Empty
|
||||
b"a", # Single char
|
||||
b"Hello, World!", # Normal string
|
||||
bytes(range(256)), # All byte values
|
||||
b"x" * 10000, # Larger content
|
||||
]
|
||||
|
||||
for content in test_cases:
|
||||
hash_value = compute_sha256(content)
|
||||
|
||||
# Check length
|
||||
assert len(hash_value) == 64, (
|
||||
f"Hash length should be 64, got {len(hash_value)}"
|
||||
)
|
||||
|
||||
# Check lowercase
|
||||
assert hash_value == hash_value.lower(), "Hash should be lowercase"
|
||||
|
||||
# Check hexadecimal
|
||||
assert all(c in "0123456789abcdef" for c in hash_value), (
|
||||
"Hash should be hex"
|
||||
)
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_sha256_known_value(self):
|
||||
"""Test SHA256 produces expected hash for known input."""
|
||||
assert compute_sha256(TEST_CONTENT_HELLO) == TEST_HASH_HELLO
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_sha256_binary_content(self):
|
||||
"""Test SHA256 handles binary content correctly."""
|
||||
assert compute_sha256(TEST_CONTENT_BINARY) == TEST_HASH_BINARY
|
||||
|
||||
# Test with null bytes
|
||||
content_with_nulls = b"\x00\x00test\x00\x00"
|
||||
hash_value = compute_sha256(content_with_nulls)
|
||||
assert len(hash_value) == 64
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_sha256_streaming_computation(self):
|
||||
"""Test SHA256 can be computed in chunks (streaming)."""
|
||||
# Large content
|
||||
chunk_size = 8192
|
||||
total_size = chunk_size * 10 # 80KB
|
||||
content = b"x" * total_size
|
||||
|
||||
# Direct computation
|
||||
direct_hash = compute_sha256(content)
|
||||
|
||||
# Streaming computation
|
||||
hasher = hashlib.sha256()
|
||||
for i in range(0, total_size, chunk_size):
|
||||
hasher.update(content[i : i + chunk_size])
|
||||
streaming_hash = hasher.hexdigest()
|
||||
|
||||
assert direct_hash == streaming_hash
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_sha256_order_matters(self):
|
||||
"""Test that content order affects hash (not just content set)."""
|
||||
content1 = b"AB"
|
||||
content2 = b"BA"
|
||||
|
||||
assert compute_sha256(content1) != compute_sha256(content2)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Storage Hash Computation Tests
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestStorageHashComputation:
|
||||
"""Tests for hash computation in the storage layer."""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_storage_computes_sha256(self, mock_storage):
|
||||
"""Test storage layer correctly computes SHA256 hash."""
|
||||
content = TEST_CONTENT_HELLO
|
||||
file_obj = io.BytesIO(content)
|
||||
|
||||
result = mock_storage._store_simple(file_obj)
|
||||
|
||||
assert result.sha256 == TEST_HASH_HELLO
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_storage_computes_md5(self, mock_storage):
|
||||
"""Test storage layer also computes MD5 hash."""
|
||||
content = TEST_CONTENT_HELLO
|
||||
file_obj = io.BytesIO(content)
|
||||
|
||||
result = mock_storage._store_simple(file_obj)
|
||||
|
||||
expected_md5 = hashlib.md5(content).hexdigest()
|
||||
assert result.md5 == expected_md5
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_storage_computes_sha1(self, mock_storage):
|
||||
"""Test storage layer also computes SHA1 hash."""
|
||||
content = TEST_CONTENT_HELLO
|
||||
file_obj = io.BytesIO(content)
|
||||
|
||||
result = mock_storage._store_simple(file_obj)
|
||||
|
||||
expected_sha1 = hashlib.sha1(content).hexdigest()
|
||||
assert result.sha1 == expected_sha1
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_storage_returns_correct_size(self, mock_storage):
|
||||
"""Test storage layer returns correct file size."""
|
||||
content = b"test content with known size"
|
||||
file_obj = io.BytesIO(content)
|
||||
|
||||
result = mock_storage._store_simple(file_obj)
|
||||
|
||||
assert result.size == len(content)
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_storage_generates_correct_s3_key(self, mock_storage):
|
||||
"""Test storage layer generates correct S3 key pattern."""
|
||||
content = TEST_CONTENT_HELLO
|
||||
file_obj = io.BytesIO(content)
|
||||
|
||||
result = mock_storage._store_simple(file_obj)
|
||||
|
||||
# Key should be: fruits/{hash[:2]}/{hash[2:4]}/{hash}
|
||||
expected_key = (
|
||||
f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
|
||||
)
|
||||
assert result.s3_key == expected_key
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Hash Edge Cases
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestHashEdgeCases:
|
||||
"""Edge case tests for hash computation."""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_hash_empty_content_rejected(self, mock_storage):
|
||||
"""Test that empty content is rejected."""
|
||||
from app.storage import HashComputationError
|
||||
|
||||
file_obj = io.BytesIO(b"")
|
||||
|
||||
with pytest.raises(HashComputationError):
|
||||
mock_storage._store_simple(file_obj)
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_hash_large_file_streaming(self, mock_storage):
|
||||
"""Test hash computation for large files uses streaming."""
|
||||
# Create a 10MB file
|
||||
size = 10 * 1024 * 1024
|
||||
content = b"x" * size
|
||||
file_obj = io.BytesIO(content)
|
||||
|
||||
result = mock_storage._store_simple(file_obj)
|
||||
|
||||
expected_hash = compute_sha256(content)
|
||||
assert result.sha256 == expected_hash
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_hash_special_bytes(self):
|
||||
"""Test hash handles all byte values correctly."""
|
||||
# All possible byte values
|
||||
content = bytes(range(256))
|
||||
hash_value = compute_sha256(content)
|
||||
|
||||
assert len(hash_value) == 64
|
||||
assert hash_value == TEST_HASH_BINARY
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# S3 Existence Check Tests
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestExistsMethod:
|
||||
"""Tests for the _exists() method that checks S3 object existence."""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_exists_returns_true_for_existing_key(self, mock_storage, mock_s3_client):
|
||||
"""Test _exists() returns True when object exists."""
|
||||
# Pre-populate the mock storage
|
||||
test_key = "fruits/df/fd/test-hash"
|
||||
mock_s3_client.objects[test_key] = b"content"
|
||||
|
||||
result = mock_storage._exists(test_key)
|
||||
|
||||
assert result is True
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_exists_returns_false_for_nonexistent_key(self, mock_storage):
|
||||
"""Test _exists() returns False when object doesn't exist."""
|
||||
result = mock_storage._exists("fruits/no/ne/nonexistent-key")
|
||||
|
||||
assert result is False
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_exists_handles_404_error(self, mock_storage):
|
||||
"""Test _exists() handles 404 errors gracefully."""
|
||||
# The mock client raises ClientError for nonexistent keys
|
||||
result = mock_storage._exists("fruits/xx/yy/does-not-exist")
|
||||
|
||||
assert result is False
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# S3 Key Generation Tests
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestS3KeyGeneration:
|
||||
"""Tests for S3 key pattern generation."""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_s3_key_pattern(self):
|
||||
"""Test S3 key follows pattern: fruits/{hash[:2]}/{hash[2:4]}/{hash}"""
|
||||
test_hash = "abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890"
|
||||
|
||||
expected_key = f"fruits/{test_hash[:2]}/{test_hash[2:4]}/{test_hash}"
|
||||
# Expected: fruits/ab/cd/abcdef1234567890...
|
||||
|
||||
assert expected_key == f"fruits/ab/cd/{test_hash}"
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_s3_key_generation_in_storage(self, mock_storage):
|
||||
"""Test storage layer generates correct S3 key."""
|
||||
content = TEST_CONTENT_HELLO
|
||||
file_obj = io.BytesIO(content)
|
||||
|
||||
result = mock_storage._store_simple(file_obj)
|
||||
|
||||
expected_key = (
|
||||
f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
|
||||
)
|
||||
assert result.s3_key == expected_key
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_s3_key_uses_sha256_hash(self, mock_storage):
|
||||
"""Test S3 key is derived from SHA256 hash."""
|
||||
content = b"unique test content for key test"
|
||||
file_obj = io.BytesIO(content)
|
||||
expected_hash = compute_sha256(content)
|
||||
|
||||
result = mock_storage._store_simple(file_obj)
|
||||
|
||||
# Key should contain the hash
|
||||
assert expected_hash in result.s3_key
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Deduplication Behavior Tests
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestDeduplicationBehavior:
|
||||
"""Tests for deduplication (skip upload when exists)."""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_skips_upload_when_exists(self, mock_storage, mock_s3_client):
|
||||
"""Test storage skips S3 upload when artifact already exists."""
|
||||
content = TEST_CONTENT_HELLO
|
||||
s3_key = (
|
||||
f"fruits/{TEST_HASH_HELLO[:2]}/{TEST_HASH_HELLO[2:4]}/{TEST_HASH_HELLO}"
|
||||
)
|
||||
|
||||
# Pre-populate storage (simulate existing artifact)
|
||||
mock_s3_client.objects[s3_key] = content
|
||||
|
||||
# Track put_object calls
|
||||
original_put = mock_s3_client.put_object
|
||||
put_called = []
|
||||
|
||||
def tracked_put(*args, **kwargs):
|
||||
put_called.append(True)
|
||||
return original_put(*args, **kwargs)
|
||||
|
||||
mock_s3_client.put_object = tracked_put
|
||||
|
||||
# Store the same content
|
||||
file_obj = io.BytesIO(content)
|
||||
result = mock_storage._store_simple(file_obj)
|
||||
|
||||
# put_object should NOT have been called (deduplication)
|
||||
assert len(put_called) == 0
|
||||
assert result.sha256 == TEST_HASH_HELLO
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_uploads_when_not_exists(self, mock_storage, mock_s3_client):
|
||||
"""Test storage uploads to S3 when artifact doesn't exist."""
|
||||
content = b"brand new unique content"
|
||||
content_hash = compute_sha256(content)
|
||||
s3_key = f"fruits/{content_hash[:2]}/{content_hash[2:4]}/{content_hash}"
|
||||
|
||||
# Ensure object doesn't exist
|
||||
assert s3_key not in mock_s3_client.objects
|
||||
|
||||
# Store the content
|
||||
file_obj = io.BytesIO(content)
|
||||
result = mock_storage._store_simple(file_obj)
|
||||
|
||||
# Object should now exist in mock storage
|
||||
assert s3_key in mock_s3_client.objects
|
||||
assert mock_s3_client.objects[s3_key] == content
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_returns_same_hash_for_duplicate(self, mock_storage, mock_s3_client):
|
||||
"""Test storing same content twice returns same hash."""
|
||||
content = b"content to be stored twice"
|
||||
|
||||
# First store
|
||||
file1 = io.BytesIO(content)
|
||||
result1 = mock_storage._store_simple(file1)
|
||||
|
||||
# Second store (duplicate)
|
||||
file2 = io.BytesIO(content)
|
||||
result2 = mock_storage._store_simple(file2)
|
||||
|
||||
assert result1.sha256 == result2.sha256
|
||||
assert result1.s3_key == result2.s3_key # gitleaks:allow
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_different_content_different_keys(self, mock_storage):
|
||||
"""Test different content produces different S3 keys."""
|
||||
content1 = b"first content"
|
||||
content2 = b"second content"
|
||||
|
||||
file1 = io.BytesIO(content1)
|
||||
result1 = mock_storage._store_simple(file1)
|
||||
|
||||
file2 = io.BytesIO(content2)
|
||||
result2 = mock_storage._store_simple(file2)
|
||||
|
||||
assert result1.sha256 != result2.sha256
|
||||
assert result1.s3_key != result2.s3_key # gitleaks:allow
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Deduplication Edge Cases
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestDeduplicationEdgeCases:
|
||||
"""Edge case tests for deduplication."""
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_same_content_different_filenames(self, mock_storage):
|
||||
"""Test same content with different metadata is deduplicated."""
|
||||
content = b"identical content"
|
||||
|
||||
# Store with "filename1"
|
||||
file1 = io.BytesIO(content)
|
||||
result1 = mock_storage._store_simple(file1)
|
||||
|
||||
# Store with "filename2" (same content)
|
||||
file2 = io.BytesIO(content)
|
||||
result2 = mock_storage._store_simple(file2)
|
||||
|
||||
# Both should have same hash (content-addressable)
|
||||
assert result1.sha256 == result2.sha256
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_whitespace_only_difference(self, mock_storage):
|
||||
"""Test content differing only by whitespace produces different hashes."""
|
||||
content1 = b"test content"
|
||||
content2 = b"test content" # Extra space
|
||||
content3 = b"test content " # Trailing space
|
||||
|
||||
file1 = io.BytesIO(content1)
|
||||
file2 = io.BytesIO(content2)
|
||||
file3 = io.BytesIO(content3)
|
||||
|
||||
result1 = mock_storage._store_simple(file1)
|
||||
result2 = mock_storage._store_simple(file2)
|
||||
result3 = mock_storage._store_simple(file3)
|
||||
|
||||
# All should be different (content-addressable)
|
||||
assert len({result1.sha256, result2.sha256, result3.sha256}) == 3
|
||||
213
backend/tests/unit/test_team_auth.py
Normal file
213
backend/tests/unit/test_team_auth.py
Normal file
@@ -0,0 +1,213 @@
|
||||
"""
|
||||
Unit tests for TeamAuthorizationService.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
import uuid
|
||||
|
||||
|
||||
class TestTeamRoleHierarchy:
|
||||
"""Tests for team role hierarchy functions."""
|
||||
|
||||
def test_get_team_role_rank(self):
|
||||
"""Test role ranking."""
|
||||
from app.auth import get_team_role_rank
|
||||
|
||||
assert get_team_role_rank("member") == 0
|
||||
assert get_team_role_rank("admin") == 1
|
||||
assert get_team_role_rank("owner") == 2
|
||||
assert get_team_role_rank("invalid") == -1
|
||||
|
||||
def test_has_sufficient_team_role(self):
|
||||
"""Test role sufficiency checks."""
|
||||
from app.auth import has_sufficient_team_role
|
||||
|
||||
# Same role should be sufficient
|
||||
assert has_sufficient_team_role("member", "member") is True
|
||||
assert has_sufficient_team_role("admin", "admin") is True
|
||||
assert has_sufficient_team_role("owner", "owner") is True
|
||||
|
||||
# Higher role should be sufficient for lower requirements
|
||||
assert has_sufficient_team_role("admin", "member") is True
|
||||
assert has_sufficient_team_role("owner", "member") is True
|
||||
assert has_sufficient_team_role("owner", "admin") is True
|
||||
|
||||
# Lower role should NOT be sufficient for higher requirements
|
||||
assert has_sufficient_team_role("member", "admin") is False
|
||||
assert has_sufficient_team_role("member", "owner") is False
|
||||
assert has_sufficient_team_role("admin", "owner") is False
|
||||
|
||||
|
||||
class TestTeamAuthorizationService:
|
||||
"""Tests for TeamAuthorizationService class."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_db(self):
|
||||
"""Create a mock database session."""
|
||||
return MagicMock()
|
||||
|
||||
@pytest.fixture
|
||||
def mock_user(self):
|
||||
"""Create a mock user."""
|
||||
user = MagicMock()
|
||||
user.id = uuid.uuid4()
|
||||
user.username = "testuser"
|
||||
user.is_admin = False
|
||||
return user
|
||||
|
||||
@pytest.fixture
|
||||
def mock_admin_user(self):
|
||||
"""Create a mock admin user."""
|
||||
user = MagicMock()
|
||||
user.id = uuid.uuid4()
|
||||
user.username = "adminuser"
|
||||
user.is_admin = True
|
||||
return user
|
||||
|
||||
def test_get_user_team_role_no_user(self, mock_db):
|
||||
"""Test that None is returned for anonymous users."""
|
||||
from app.auth import TeamAuthorizationService
|
||||
|
||||
service = TeamAuthorizationService(mock_db)
|
||||
result = service.get_user_team_role("team-id", None)
|
||||
assert result is None
|
||||
|
||||
def test_get_user_team_role_admin_user(self, mock_db, mock_admin_user):
|
||||
"""Test that system admins who are not members get admin role."""
|
||||
from app.auth import TeamAuthorizationService
|
||||
|
||||
# Mock no membership found
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = None
|
||||
|
||||
service = TeamAuthorizationService(mock_db)
|
||||
result = service.get_user_team_role("team-id", mock_admin_user)
|
||||
assert result == "admin"
|
||||
|
||||
def test_get_user_team_role_member(self, mock_db, mock_user):
|
||||
"""Test getting role for a team member."""
|
||||
from app.auth import TeamAuthorizationService
|
||||
|
||||
# Mock the membership query
|
||||
mock_membership = MagicMock()
|
||||
mock_membership.role = "member"
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = mock_membership
|
||||
|
||||
service = TeamAuthorizationService(mock_db)
|
||||
result = service.get_user_team_role("team-id", mock_user)
|
||||
assert result == "member"
|
||||
|
||||
def test_get_user_team_role_not_member(self, mock_db, mock_user):
|
||||
"""Test getting role for a non-member."""
|
||||
from app.auth import TeamAuthorizationService
|
||||
|
||||
# Mock no membership found
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = None
|
||||
|
||||
service = TeamAuthorizationService(mock_db)
|
||||
result = service.get_user_team_role("team-id", mock_user)
|
||||
assert result is None
|
||||
|
||||
def test_check_team_access_member(self, mock_db, mock_user):
|
||||
"""Test access check for member requiring member role."""
|
||||
from app.auth import TeamAuthorizationService
|
||||
|
||||
# Mock the membership query
|
||||
mock_membership = MagicMock()
|
||||
mock_membership.role = "member"
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = mock_membership
|
||||
|
||||
service = TeamAuthorizationService(mock_db)
|
||||
|
||||
# Member should have member access
|
||||
assert service.check_team_access("team-id", mock_user, "member") is True
|
||||
# Member should not have admin access
|
||||
assert service.check_team_access("team-id", mock_user, "admin") is False
|
||||
# Member should not have owner access
|
||||
assert service.check_team_access("team-id", mock_user, "owner") is False
|
||||
|
||||
def test_check_team_access_admin(self, mock_db, mock_user):
|
||||
"""Test access check for admin role."""
|
||||
from app.auth import TeamAuthorizationService
|
||||
|
||||
# Mock admin membership
|
||||
mock_membership = MagicMock()
|
||||
mock_membership.role = "admin"
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = mock_membership
|
||||
|
||||
service = TeamAuthorizationService(mock_db)
|
||||
|
||||
assert service.check_team_access("team-id", mock_user, "member") is True
|
||||
assert service.check_team_access("team-id", mock_user, "admin") is True
|
||||
assert service.check_team_access("team-id", mock_user, "owner") is False
|
||||
|
||||
def test_check_team_access_owner(self, mock_db, mock_user):
|
||||
"""Test access check for owner role."""
|
||||
from app.auth import TeamAuthorizationService
|
||||
|
||||
# Mock owner membership
|
||||
mock_membership = MagicMock()
|
||||
mock_membership.role = "owner"
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = mock_membership
|
||||
|
||||
service = TeamAuthorizationService(mock_db)
|
||||
|
||||
assert service.check_team_access("team-id", mock_user, "member") is True
|
||||
assert service.check_team_access("team-id", mock_user, "admin") is True
|
||||
assert service.check_team_access("team-id", mock_user, "owner") is True
|
||||
|
||||
def test_can_create_project(self, mock_db, mock_user):
|
||||
"""Test can_create_project requires admin role."""
|
||||
from app.auth import TeamAuthorizationService
|
||||
|
||||
service = TeamAuthorizationService(mock_db)
|
||||
|
||||
# Member cannot create projects
|
||||
mock_membership = MagicMock()
|
||||
mock_membership.role = "member"
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = mock_membership
|
||||
assert service.can_create_project("team-id", mock_user) is False
|
||||
|
||||
# Admin can create projects
|
||||
mock_membership.role = "admin"
|
||||
assert service.can_create_project("team-id", mock_user) is True
|
||||
|
||||
# Owner can create projects
|
||||
mock_membership.role = "owner"
|
||||
assert service.can_create_project("team-id", mock_user) is True
|
||||
|
||||
def test_can_manage_members(self, mock_db, mock_user):
|
||||
"""Test can_manage_members requires admin role."""
|
||||
from app.auth import TeamAuthorizationService
|
||||
|
||||
service = TeamAuthorizationService(mock_db)
|
||||
|
||||
# Member cannot manage members
|
||||
mock_membership = MagicMock()
|
||||
mock_membership.role = "member"
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = mock_membership
|
||||
assert service.can_manage_members("team-id", mock_user) is False
|
||||
|
||||
# Admin can manage members
|
||||
mock_membership.role = "admin"
|
||||
assert service.can_manage_members("team-id", mock_user) is True
|
||||
|
||||
def test_can_delete_team(self, mock_db, mock_user):
|
||||
"""Test can_delete_team requires owner role."""
|
||||
from app.auth import TeamAuthorizationService
|
||||
|
||||
service = TeamAuthorizationService(mock_db)
|
||||
|
||||
# Member cannot delete team
|
||||
mock_membership = MagicMock()
|
||||
mock_membership.role = "member"
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = mock_membership
|
||||
assert service.can_delete_team("team-id", mock_user) is False
|
||||
|
||||
# Admin cannot delete team
|
||||
mock_membership.role = "admin"
|
||||
assert service.can_delete_team("team-id", mock_user) is False
|
||||
|
||||
# Only owner can delete team
|
||||
mock_membership.role = "owner"
|
||||
assert service.can_delete_team("team-id", mock_user) is True
|
||||
7
container-test.sh
Executable file
7
container-test.sh
Executable file
@@ -0,0 +1,7 @@
|
||||
#!/bin/sh
|
||||
|
||||
echo "testing container"
|
||||
|
||||
# Without a sleep, local testing shows no output because attaching to the logs happens after the container is done executing
|
||||
# this script.
|
||||
sleep 1
|
||||
155
docker-compose.local.yml
Normal file
155
docker-compose.local.yml
Normal file
@@ -0,0 +1,155 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
orchard-server:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.local
|
||||
ports:
|
||||
- "0.0.0.0:8080:8080"
|
||||
environment:
|
||||
- ORCHARD_SERVER_HOST=0.0.0.0
|
||||
- ORCHARD_SERVER_PORT=8080
|
||||
- ORCHARD_DATABASE_HOST=postgres
|
||||
- ORCHARD_DATABASE_PORT=5432
|
||||
- ORCHARD_DATABASE_USER=orchard
|
||||
- ORCHARD_DATABASE_PASSWORD=orchard_secret
|
||||
- ORCHARD_DATABASE_DBNAME=orchard
|
||||
- ORCHARD_DATABASE_SSLMODE=disable
|
||||
- ORCHARD_S3_ENDPOINT=http://minio:9000
|
||||
- ORCHARD_S3_REGION=us-east-1
|
||||
- ORCHARD_S3_BUCKET=orchard-artifacts
|
||||
- ORCHARD_S3_ACCESS_KEY_ID=minioadmin
|
||||
- ORCHARD_S3_SECRET_ACCESS_KEY=minioadmin
|
||||
- ORCHARD_S3_USE_PATH_STYLE=true
|
||||
- ORCHARD_REDIS_HOST=redis
|
||||
- ORCHARD_REDIS_PORT=6379
|
||||
# Higher rate limit for local development/testing
|
||||
- ORCHARD_LOGIN_RATE_LIMIT=1000/minute
|
||||
# Admin password - set in .env file or environment (see .env.example)
|
||||
- ORCHARD_ADMIN_PASSWORD=${ORCHARD_ADMIN_PASSWORD:-}
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
minio:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- orchard-network
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
|
||||
interval: 30s
|
||||
timeout: 3s
|
||||
start_period: 10s
|
||||
retries: 3
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
cap_drop:
|
||||
- ALL
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1.0'
|
||||
memory: 1G
|
||||
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
environment:
|
||||
- POSTGRES_USER=orchard
|
||||
- POSTGRES_PASSWORD=orchard_secret
|
||||
- POSTGRES_DB=orchard
|
||||
volumes:
|
||||
- postgres-data-local:/var/lib/postgresql/data
|
||||
- ./migrations:/docker-entrypoint-initdb.d:ro
|
||||
ports:
|
||||
- "127.0.0.1:5432:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U orchard -d orchard"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- orchard-network
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
|
||||
minio:
|
||||
image: minio/minio:latest
|
||||
command: server /data --console-address ":9001"
|
||||
environment:
|
||||
- MINIO_ROOT_USER=minioadmin
|
||||
- MINIO_ROOT_PASSWORD=minioadmin
|
||||
volumes:
|
||||
- minio-data-local:/data
|
||||
ports:
|
||||
- "127.0.0.1:9000:9000"
|
||||
- "127.0.0.1:9001:9001"
|
||||
healthcheck:
|
||||
test: ["CMD", "mc", "ready", "local"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- orchard-network
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
|
||||
minio-init:
|
||||
image: minio/mc:latest
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
entrypoint: >
|
||||
/bin/sh -c "
|
||||
mc alias set myminio http://minio:9000 minioadmin minioadmin;
|
||||
mc mb myminio/orchard-artifacts --ignore-existing;
|
||||
mc anonymous set download myminio/orchard-artifacts;
|
||||
exit 0;
|
||||
"
|
||||
networks:
|
||||
- orchard-network
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.25'
|
||||
memory: 128M
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
command: redis-server --appendonly yes
|
||||
volumes:
|
||||
- redis-data-local:/data
|
||||
ports:
|
||||
- "127.0.0.1:6379:6379"
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- orchard-network
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.25'
|
||||
memory: 256M
|
||||
|
||||
volumes:
|
||||
postgres-data-local:
|
||||
minio-data-local:
|
||||
redis-data-local:
|
||||
|
||||
networks:
|
||||
orchard-network:
|
||||
driver: bridge
|
||||
@@ -6,7 +6,7 @@ services:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "8080:8080"
|
||||
- "127.0.0.1:8080:8080"
|
||||
environment:
|
||||
- ORCHARD_SERVER_HOST=0.0.0.0
|
||||
- ORCHARD_SERVER_PORT=8080
|
||||
@@ -34,9 +34,24 @@ services:
|
||||
networks:
|
||||
- orchard-network
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
|
||||
interval: 30s
|
||||
timeout: 3s
|
||||
start_period: 10s
|
||||
retries: 3
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
cap_drop:
|
||||
- ALL
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '1.0'
|
||||
memory: 1G
|
||||
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
image: containers.global.bsf.tools/postgres:16-alpine
|
||||
environment:
|
||||
- POSTGRES_USER=orchard
|
||||
- POSTGRES_PASSWORD=orchard_secret
|
||||
@@ -45,7 +60,7 @@ services:
|
||||
- postgres-data:/var/lib/postgresql/data
|
||||
- ./migrations:/docker-entrypoint-initdb.d:ro
|
||||
ports:
|
||||
- "5432:5432"
|
||||
- "127.0.0.1:5432:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U orchard -d orchard"]
|
||||
interval: 10s
|
||||
@@ -54,9 +69,18 @@ services:
|
||||
networks:
|
||||
- orchard-network
|
||||
restart: unless-stopped
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
cap_drop:
|
||||
- ALL
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
|
||||
minio:
|
||||
image: minio/minio:latest
|
||||
image: containers.global.bsf.tools/minio/minio:latest
|
||||
command: server /data --console-address ":9001"
|
||||
environment:
|
||||
- MINIO_ROOT_USER=minioadmin
|
||||
@@ -64,8 +88,8 @@ services:
|
||||
volumes:
|
||||
- minio-data:/data
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- "9001:9001"
|
||||
- "127.0.0.1:9000:9000"
|
||||
- "127.0.0.1:9001:9001"
|
||||
healthcheck:
|
||||
test: ["CMD", "mc", "ready", "local"]
|
||||
interval: 10s
|
||||
@@ -74,9 +98,18 @@ services:
|
||||
networks:
|
||||
- orchard-network
|
||||
restart: unless-stopped
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
cap_drop:
|
||||
- ALL
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
|
||||
minio-init:
|
||||
image: minio/mc:latest
|
||||
image: containers.global.bsf.tools/minio/mc:latest
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
@@ -89,14 +122,23 @@ services:
|
||||
"
|
||||
networks:
|
||||
- orchard-network
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
cap_drop:
|
||||
- ALL
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.25'
|
||||
memory: 128M
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
image: containers.global.bsf.tools/redis:7-alpine
|
||||
command: redis-server --appendonly yes
|
||||
volumes:
|
||||
- redis-data:/data
|
||||
ports:
|
||||
- "6379:6379"
|
||||
- "127.0.0.1:6379:6379"
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
@@ -105,6 +147,15 @@ services:
|
||||
networks:
|
||||
- orchard-network
|
||||
restart: unless-stopped
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
cap_drop:
|
||||
- ALL
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.25'
|
||||
memory: 256M
|
||||
|
||||
volumes:
|
||||
postgres-data:
|
||||
|
||||
575
docs/design/deduplication-design.md
Normal file
575
docs/design/deduplication-design.md
Normal file
@@ -0,0 +1,575 @@
|
||||
# Deduplication Design Document
|
||||
|
||||
This document defines Orchard's content-addressable storage and deduplication approach using SHA256 hashes.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Overview](#overview)
|
||||
2. [Hash Algorithm Selection](#hash-algorithm-selection)
|
||||
3. [Content-Addressable Storage Model](#content-addressable-storage-model)
|
||||
4. [S3 Key Derivation](#s3-key-derivation)
|
||||
5. [Duplicate Detection Strategy](#duplicate-detection-strategy)
|
||||
6. [Reference Counting Lifecycle](#reference-counting-lifecycle)
|
||||
7. [Edge Cases and Error Handling](#edge-cases-and-error-handling)
|
||||
8. [Collision Handling](#collision-handling)
|
||||
9. [Performance Considerations](#performance-considerations)
|
||||
10. [Operations Runbook](#operations-runbook)
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Orchard uses **whole-file deduplication** based on content hashing. When a file is uploaded:
|
||||
|
||||
1. The SHA256 hash of the entire file content is computed
|
||||
2. The hash becomes the artifact's primary identifier
|
||||
3. If a file with the same hash already exists, no duplicate is stored
|
||||
4. Multiple tags/references can point to the same artifact
|
||||
|
||||
**Scope:** Orchard implements whole-file deduplication only. Chunk-level or block-level deduplication is out of scope for MVP.
|
||||
|
||||
---
|
||||
|
||||
## Hash Algorithm Selection
|
||||
|
||||
### Decision: SHA256
|
||||
|
||||
| Criteria | SHA256 | SHA1 | MD5 | Blake3 |
|
||||
|----------|--------|------|-----|--------|
|
||||
| Security | Strong (256-bit) | Weak (broken) | Weak (broken) | Strong |
|
||||
| Speed | ~400 MB/s | ~600 MB/s | ~800 MB/s | ~1500 MB/s |
|
||||
| Collision Resistance | 2^128 | Broken | Broken | 2^128 |
|
||||
| Industry Adoption | Universal | Legacy | Legacy | Emerging |
|
||||
| Tool Ecosystem | Excellent | Good | Good | Growing |
|
||||
|
||||
### Rationale
|
||||
|
||||
1. **Security**: SHA256 has no known practical collision attacks. SHA1 and MD5 are cryptographically broken.
|
||||
|
||||
2. **Collision Resistance**: With 256-bit output, the probability of accidental collision is approximately 2^-128 (~10^-38). To have a 50% chance of collision, you would need approximately 2^128 unique files.
|
||||
|
||||
3. **Industry Standard**: SHA256 is the de facto standard for content-addressable storage (Git, Docker, npm, etc.).
|
||||
|
||||
4. **Performance**: While Blake3 is faster, SHA256 throughput (~400 MB/s) exceeds typical network bandwidth for uploads. The bottleneck is I/O, not hashing.
|
||||
|
||||
5. **Tooling**: Universal support in all languages, operating systems, and verification tools.
|
||||
|
||||
### Migration Path
|
||||
|
||||
If a future algorithm change is needed (e.g., SHA3 or Blake3):
|
||||
|
||||
1. **Database**: Add `hash_algorithm` column to artifacts table (default: 'sha256')
|
||||
2. **S3 Keys**: New algorithm uses different prefix (e.g., `fruits-sha3/` vs `fruits/`)
|
||||
3. **API**: Accept algorithm hint in upload, return algorithm in responses
|
||||
4. **Migration**: Background job to re-hash existing artifacts if needed
|
||||
|
||||
**Current Implementation**: Single algorithm (SHA256), no algorithm versioning required for MVP.
|
||||
|
||||
---
|
||||
|
||||
## Content-Addressable Storage Model
|
||||
|
||||
### Core Principles
|
||||
|
||||
1. **Identity = Content**: The artifact ID IS the SHA256 hash of its content
|
||||
2. **Immutability**: Content cannot change after storage (same hash = same content)
|
||||
3. **Deduplication**: Same content uploaded twice results in single storage
|
||||
4. **Metadata Independence**: Files with identical content but different names/types are deduplicated
|
||||
|
||||
### Data Model
|
||||
|
||||
```
|
||||
Artifact {
|
||||
id: VARCHAR(64) PRIMARY KEY -- SHA256 hash (lowercase hex)
|
||||
size: BIGINT -- File size in bytes
|
||||
ref_count: INTEGER -- Number of references
|
||||
s3_key: VARCHAR(1024) -- S3 storage path
|
||||
checksum_md5: VARCHAR(32) -- Secondary checksum
|
||||
checksum_sha1: VARCHAR(40) -- Secondary checksum
|
||||
...
|
||||
}
|
||||
|
||||
Tag {
|
||||
id: UUID PRIMARY KEY
|
||||
name: VARCHAR(255)
|
||||
package_id: UUID FK
|
||||
artifact_id: VARCHAR(64) FK -- Points to Artifact.id (SHA256)
|
||||
}
|
||||
```
|
||||
|
||||
### Hash Format
|
||||
|
||||
- Algorithm: SHA256
|
||||
- Output: 64 lowercase hexadecimal characters
|
||||
- Example: `dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f`
|
||||
|
||||
---
|
||||
|
||||
## S3 Key Derivation
|
||||
|
||||
### Key Structure
|
||||
|
||||
```
|
||||
fruits/{hash[0:2]}/{hash[2:4]}/{full_hash}
|
||||
```
|
||||
|
||||
Example for hash `dffd6021bb2bd5b0...`:
|
||||
```
|
||||
fruits/df/fd/dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f
|
||||
```
|
||||
|
||||
### Rationale for Prefix Sharding
|
||||
|
||||
1. **S3 Performance**: S3 partitions by key prefix. Distributing across prefixes improves throughput.
|
||||
|
||||
2. **Filesystem Compatibility**: When using filesystem-backed storage, avoids single directory with millions of files.
|
||||
|
||||
3. **Distribution**: With 2-character prefixes (256 combinations each level), provides 65,536 (256 x 256) top-level buckets.
|
||||
|
||||
### Bucket Distribution Analysis
|
||||
|
||||
Assuming uniformly distributed SHA256 hashes:
|
||||
|
||||
| Artifacts | Files per Prefix (avg) | Max per Prefix (99.9%) |
|
||||
|-----------|------------------------|------------------------|
|
||||
| 100,000 | 1.5 | 10 |
|
||||
| 1,000,000 | 15 | 50 |
|
||||
| 10,000,000 | 152 | 250 |
|
||||
| 100,000,000 | 1,525 | 2,000 |
|
||||
|
||||
The two-level prefix provides excellent distribution up to hundreds of millions of artifacts.
|
||||
|
||||
---
|
||||
|
||||
## Duplicate Detection Strategy
|
||||
|
||||
### Upload Flow
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ UPLOAD REQUEST │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ 1. VALIDATE: Check file size limits (min/max) │
|
||||
│ - Empty files (0 bytes) → Reject with 422 │
|
||||
│ - Exceeds max_file_size → Reject with 413 │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ 2. COMPUTE HASH: Stream file through SHA256/MD5/SHA1 │
|
||||
│ - Use 8MB chunks for memory efficiency │
|
||||
│ - Single pass for all three hashes │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ 3. DERIVE S3 KEY: fruits/{hash[0:2]}/{hash[2:4]}/{hash} │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ 4. CHECK EXISTENCE: HEAD request to S3 for derived key │
|
||||
│ - Retry up to 3 times on transient failures │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
┌───────────────┴───────────────┐
|
||||
▼ ▼
|
||||
┌─────────────────────────┐ ┌─────────────────────────────────┐
|
||||
│ EXISTS: Deduplicated │ │ NOT EXISTS: Upload to S3 │
|
||||
│ - Verify size matches │ │ - PUT object (or multipart) │
|
||||
│ - Skip S3 upload │ │ - Abort on failure │
|
||||
│ - Log saved bytes │ └─────────────────────────────────┘
|
||||
└─────────────────────────┘ │
|
||||
│ │
|
||||
└───────────────┬───────────────┘
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ 5. DATABASE: Create/update artifact record │
|
||||
│ - Use row locking to prevent race conditions │
|
||||
│ - ref_count managed by SQL triggers │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ 6. CREATE TAG: If tag provided, create/update tag │
|
||||
│ - SQL trigger increments ref_count │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Hash Computation
|
||||
|
||||
**Memory Requirements:**
|
||||
- Chunk size: 8MB (`HASH_CHUNK_SIZE`)
|
||||
- Working memory: ~25MB (8MB chunk + hash states)
|
||||
- Independent of file size (streaming)
|
||||
|
||||
**Throughput:**
|
||||
- SHA256 alone: ~400 MB/s on modern CPU
|
||||
- With MD5 + SHA1: ~300 MB/s (parallel computation)
|
||||
- Typical bottleneck: Network I/O, not CPU
|
||||
|
||||
### Multipart Upload Threshold
|
||||
|
||||
Files larger than 100MB use S3 multipart upload:
|
||||
- First pass: Stream to compute hashes
|
||||
- If not duplicate: Seek to start, upload in 10MB parts
|
||||
- On failure: Abort multipart upload (no orphaned parts)
|
||||
|
||||
---
|
||||
|
||||
## Reference Counting Lifecycle
|
||||
|
||||
### What Constitutes a "Reference"
|
||||
|
||||
A reference is a **Tag** pointing to an artifact. Each tag increments the ref_count by 1.
|
||||
|
||||
**Uploads do NOT directly increment ref_count** - only tag creation does.
|
||||
|
||||
### Lifecycle
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ CREATE: New artifact uploaded │
|
||||
│ - ref_count = 0 (no tags yet) │
|
||||
│ - Artifact exists but is "orphaned" │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ TAG CREATED: Tag points to artifact │
|
||||
│ - SQL trigger: ref_count += 1 │
|
||||
│ - Artifact is now referenced │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ TAG UPDATED: Tag moved to different artifact │
|
||||
│ - SQL trigger on old artifact: ref_count -= 1 │
|
||||
│ - SQL trigger on new artifact: ref_count += 1 │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ TAG DELETED: Tag removed │
|
||||
│ - SQL trigger: ref_count -= 1 │
|
||||
│ - If ref_count = 0, artifact is orphaned │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ GARBAGE COLLECTION: Clean up orphaned artifacts │
|
||||
│ - Triggered manually via admin endpoint │
|
||||
│ - Finds artifacts where ref_count = 0 │
|
||||
│ - Deletes from S3 and database │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### SQL Triggers
|
||||
|
||||
Three triggers manage ref_count automatically:
|
||||
|
||||
1. **`tags_ref_count_insert_trigger`**: On tag INSERT, increment target artifact's ref_count
|
||||
2. **`tags_ref_count_delete_trigger`**: On tag DELETE, decrement target artifact's ref_count
|
||||
3. **`tags_ref_count_update_trigger`**: On tag UPDATE (artifact_id changed), decrement old, increment new
|
||||
|
||||
### Garbage Collection
|
||||
|
||||
**Trigger**: Manual admin endpoint (`POST /api/v1/admin/garbage-collect`)
|
||||
|
||||
**Process**:
|
||||
1. Query artifacts where `ref_count = 0`
|
||||
2. For each orphan:
|
||||
- Delete from S3 (`DELETE fruits/xx/yy/hash`)
|
||||
- Delete from database
|
||||
- Log deletion
|
||||
|
||||
**Safety**:
|
||||
- Dry-run mode by default (`?dry_run=true`)
|
||||
- Limit per run (`?limit=100`)
|
||||
- Check constraint prevents ref_count < 0
|
||||
|
||||
---
|
||||
|
||||
## Edge Cases and Error Handling
|
||||
|
||||
### Empty Files
|
||||
|
||||
- **Behavior**: Rejected with HTTP 422
|
||||
- **Reason**: Empty content has deterministic hash but provides no value
|
||||
- **Error**: "Empty files are not allowed"
|
||||
|
||||
### Maximum File Size
|
||||
|
||||
- **Default Limit**: 10GB (`ORCHARD_MAX_FILE_SIZE`)
|
||||
- **Configurable**: Via environment variable
|
||||
- **Behavior**: Rejected with HTTP 413 before upload begins
|
||||
- **Error**: "File too large. Maximum size is 10GB"
|
||||
|
||||
### Concurrent Upload of Same Content
|
||||
|
||||
**Race Condition Scenario**: Two clients upload identical content simultaneously.
|
||||
|
||||
**Handling**:
|
||||
1. **S3 Level**: Both compute same hash, both check existence, both may upload
|
||||
2. **Database Level**: Row-level locking with `SELECT ... FOR UPDATE`
|
||||
3. **Outcome**: One creates artifact, other sees it exists, both succeed
|
||||
4. **Trigger Safety**: SQL triggers are atomic per row
|
||||
|
||||
**No Data Corruption**: S3 is eventually consistent; identical content = identical result.
|
||||
|
||||
### Upload Interrupted
|
||||
|
||||
**Scenario**: Upload fails after hash computed but before S3 write completes.
|
||||
|
||||
**Simple Upload**:
|
||||
- S3 put_object is atomic - either completes or fails entirely
|
||||
- No cleanup needed
|
||||
|
||||
**Multipart Upload**:
|
||||
- On any failure, `abort_multipart_upload` is called
|
||||
- S3 cleans up partial parts
|
||||
- No orphaned data
|
||||
|
||||
### DB Exists but S3 Missing
|
||||
|
||||
**Detection**: Download request finds artifact in DB but S3 returns 404.
|
||||
|
||||
**Current Behavior**: Return 500 error to client.
|
||||
|
||||
**Recovery Options** (not yet implemented):
|
||||
1. Mark artifact for re-upload (set flag, notify admins)
|
||||
2. Decrement ref_count to trigger garbage collection
|
||||
3. Return specific error code for client retry
|
||||
|
||||
**Recommended**: Log critical alert, return 503 with retry hint.
|
||||
|
||||
### S3 Exists but DB Missing
|
||||
|
||||
**Detection**: Orphan - file in S3 with no corresponding DB record.
|
||||
|
||||
**Cause**:
|
||||
- Failed transaction after S3 upload
|
||||
- Manual S3 manipulation
|
||||
- Database restore from backup
|
||||
|
||||
**Recovery**:
|
||||
- Garbage collection won't delete (no DB record to query)
|
||||
- Requires S3 bucket scan + DB reconciliation
|
||||
- Manual admin task (out of scope for MVP)
|
||||
|
||||
### Network Timeout During Existence Check
|
||||
|
||||
**Behavior**: Retry up to 3 times with adaptive backoff.
|
||||
|
||||
**After Retries Exhausted**: Raise `S3ExistenceCheckError`, return 503 to client.
|
||||
|
||||
**Rationale**: Don't upload without knowing if duplicate exists (prevents orphans).
|
||||
|
||||
---
|
||||
|
||||
## Collision Handling
|
||||
|
||||
### SHA256 Collision Probability
|
||||
|
||||
For random inputs, the probability of collision is:
|
||||
|
||||
```
|
||||
P(collision) ≈ n² / 2^257
|
||||
|
||||
Where n = number of unique files
|
||||
```
|
||||
|
||||
| Files | Collision Probability |
|
||||
|-------|----------------------|
|
||||
| 10^9 (1 billion) | 10^-59 |
|
||||
| 10^12 (1 trillion) | 10^-53 |
|
||||
| 10^18 | 10^-41 |
|
||||
|
||||
**Practical Assessment**: You would need to store more files than atoms in the observable universe to have meaningful collision risk.
|
||||
|
||||
### Detection Mechanism
|
||||
|
||||
Despite near-zero probability, we detect potential collisions by:
|
||||
|
||||
1. **Size Comparison**: If hash matches but sizes differ, CRITICAL alert
|
||||
2. **ETag Verification**: S3 ETag provides secondary check
|
||||
|
||||
### Handling Procedure
|
||||
|
||||
If collision detected (size mismatch):
|
||||
|
||||
1. **Log CRITICAL alert** with full details
|
||||
2. **Reject upload** with 500 error
|
||||
3. **Do NOT overwrite** existing content
|
||||
4. **Notify operations** for manual investigation
|
||||
|
||||
```python
|
||||
raise HashCollisionError(
|
||||
f"Hash collision detected for {sha256_hash}: size mismatch"
|
||||
)
|
||||
```
|
||||
|
||||
### MVP Position
|
||||
|
||||
For MVP, we:
|
||||
- Detect collisions via size mismatch
|
||||
- Log and alert on detection
|
||||
- Reject conflicting upload
|
||||
- Accept that true collisions are practically impossible
|
||||
|
||||
No active mitigation (e.g., storing hash + size as composite key) is needed.
|
||||
|
||||
---
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
### Hash Computation Overhead
|
||||
|
||||
| File Size | Hash Time | Upload Time (100 Mbps) | Overhead |
|
||||
|-----------|-----------|------------------------|----------|
|
||||
| 10 MB | 25ms | 800ms | 3% |
|
||||
| 100 MB | 250ms | 8s | 3% |
|
||||
| 1 GB | 2.5s | 80s | 3% |
|
||||
| 10 GB | 25s | 800s | 3% |
|
||||
|
||||
**Conclusion**: Hash computation adds ~3% overhead regardless of file size. Network I/O dominates.
|
||||
|
||||
### Existence Check Overhead
|
||||
|
||||
- S3 HEAD request: ~50-100ms per call
|
||||
- Cached in future: Could use Redis/memory cache for hot paths
|
||||
- Current MVP: No caching (acceptable for expected load)
|
||||
|
||||
### Deduplication Savings
|
||||
|
||||
Example with 50% duplication rate:
|
||||
|
||||
| Metric | Without Dedup | With Dedup | Savings |
|
||||
|--------|---------------|------------|---------|
|
||||
| Storage (100K files, 10MB avg) | 1 TB | 500 GB | 50% |
|
||||
| Upload bandwidth | 1 TB | 500 GB | 50% |
|
||||
| S3 costs | $23/mo | $11.50/mo | 50% |
|
||||
|
||||
---
|
||||
|
||||
## Operations Runbook
|
||||
|
||||
### Monitoring Deduplication
|
||||
|
||||
```bash
|
||||
# View deduplication stats
|
||||
curl http://orchard:8080/api/v1/stats/deduplication
|
||||
|
||||
# Response includes:
|
||||
# - deduplication_ratio
|
||||
# - total_uploads, deduplicated_uploads
|
||||
# - bytes_saved
|
||||
```
|
||||
|
||||
### Checking for Orphaned Artifacts
|
||||
|
||||
```bash
|
||||
# List orphaned artifacts (ref_count = 0)
|
||||
curl http://orchard:8080/api/v1/admin/orphaned-artifacts
|
||||
|
||||
# Dry-run garbage collection
|
||||
curl -X POST "http://orchard:8080/api/v1/admin/garbage-collect?dry_run=true"
|
||||
|
||||
# Execute garbage collection
|
||||
curl -X POST "http://orchard:8080/api/v1/admin/garbage-collect?dry_run=false"
|
||||
```
|
||||
|
||||
### Verifying Artifact Integrity
|
||||
|
||||
```bash
|
||||
# Download and verify hash matches artifact ID
|
||||
ARTIFACT_ID="dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f"
|
||||
curl -O http://orchard:8080/api/v1/artifact/$ARTIFACT_ID/download
|
||||
COMPUTED=$(sha256sum downloaded_file | cut -d' ' -f1)
|
||||
[ "$ARTIFACT_ID" = "$COMPUTED" ] && echo "OK" || echo "INTEGRITY FAILURE"
|
||||
```
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
| Symptom | Likely Cause | Resolution |
|
||||
|---------|--------------|------------|
|
||||
| "Hash computation error" | Empty file or read error | Check file content, retry |
|
||||
| "Storage unavailable" | S3/MinIO down | Check S3 health, retry |
|
||||
| "File too large" | Exceeds max_file_size | Adjust config or use chunked upload |
|
||||
| "Hash collision detected" | Extremely rare | Investigate, do not ignore |
|
||||
| Orphaned artifacts accumulating | Tags deleted, no GC run | Run garbage collection |
|
||||
| Download returns 404 | S3 object missing | Check S3 bucket, restore from backup |
|
||||
|
||||
### Configuration Reference
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `ORCHARD_MAX_FILE_SIZE` | 10GB | Maximum upload size |
|
||||
| `ORCHARD_MIN_FILE_SIZE` | 1 | Minimum upload size (rejects empty) |
|
||||
| `ORCHARD_S3_MAX_RETRIES` | 3 | Retry attempts for S3 operations |
|
||||
| `ORCHARD_S3_CONNECT_TIMEOUT` | 10s | S3 connection timeout |
|
||||
| `ORCHARD_S3_READ_TIMEOUT` | 60s | S3 read timeout |
|
||||
|
||||
---
|
||||
|
||||
## Appendix: Decision Records
|
||||
|
||||
### ADR-001: SHA256 for Content Hashing
|
||||
|
||||
**Status**: Accepted
|
||||
|
||||
**Context**: Need deterministic content identifier for deduplication.
|
||||
|
||||
**Decision**: Use SHA256.
|
||||
|
||||
**Rationale**:
|
||||
- Cryptographically strong (no known attacks)
|
||||
- Universal adoption (Git, Docker, npm)
|
||||
- Sufficient speed for I/O-bound workloads
|
||||
- Excellent tooling
|
||||
|
||||
**Consequences**:
|
||||
- 64-character artifact IDs (longer than UUIDs)
|
||||
- CPU overhead ~3% of upload time
|
||||
- Future algorithm migration requires versioning
|
||||
|
||||
### ADR-002: Whole-File Deduplication Only
|
||||
|
||||
**Status**: Accepted
|
||||
|
||||
**Context**: Could implement chunk-level deduplication for better savings.
|
||||
|
||||
**Decision**: Whole-file only for MVP.
|
||||
|
||||
**Rationale**:
|
||||
- Simpler implementation
|
||||
- No chunking algorithm complexity
|
||||
- Sufficient for build artifact use case
|
||||
- Can add chunk-level later if needed
|
||||
|
||||
**Consequences**:
|
||||
- Files with partial overlap stored entirely
|
||||
- Large files with small changes not deduplicated
|
||||
- Acceptable for binary artifact workloads
|
||||
|
||||
### ADR-003: SQL Triggers for ref_count
|
||||
|
||||
**Status**: Accepted
|
||||
|
||||
**Context**: ref_count must be accurate for garbage collection.
|
||||
|
||||
**Decision**: Use PostgreSQL triggers, not application code.
|
||||
|
||||
**Rationale**:
|
||||
- Atomic with tag operations
|
||||
- Cannot be bypassed
|
||||
- Works regardless of client (API, direct SQL, migrations)
|
||||
- Simpler application code
|
||||
|
||||
**Consequences**:
|
||||
- Trigger logic in SQL (less visible)
|
||||
- Must maintain triggers across schema changes
|
||||
- Debugging requires database access
|
||||
504
docs/design/integrity-verification.md
Normal file
504
docs/design/integrity-verification.md
Normal file
@@ -0,0 +1,504 @@
|
||||
# Integrity Verification Workflow Design
|
||||
|
||||
This document defines the process for SHA256 checksum verification on artifact downloads, including failure handling and retry mechanisms.
|
||||
|
||||
## Overview
|
||||
|
||||
Orchard uses content-addressable storage where the artifact ID is the SHA256 hash of the content. This design leverages that property to provide configurable integrity verification during downloads.
|
||||
|
||||
## Current State
|
||||
|
||||
| Aspect | Status |
|
||||
|--------|--------|
|
||||
| Download streams content directly from S3 | ✅ Implemented |
|
||||
| Artifact ID is the SHA256 hash | ✅ Implemented |
|
||||
| S3 key derived from SHA256 hash | ✅ Implemented |
|
||||
| Verification during download | ❌ Not implemented |
|
||||
| Checksum headers in response | ❌ Not implemented |
|
||||
| Retry mechanism on failure | ❌ Not implemented |
|
||||
| Failure handling beyond S3 errors | ❌ Not implemented |
|
||||
|
||||
## Verification Modes
|
||||
|
||||
The verification mode is selected via query parameter `?verify=<mode>` or server-wide default via `ORCHARD_VERIFY_MODE`.
|
||||
|
||||
| Mode | Performance | Integrity | Use Case |
|
||||
|------|-------------|-----------|----------|
|
||||
| `none` | ⚡ Fastest | Client-side | Trusted networks, high throughput |
|
||||
| `header` | ⚡ Fast | Client-side | Standard downloads, client verification |
|
||||
| `stream` | 🔄 Moderate | Post-hoc server | Logging/auditing, non-blocking |
|
||||
| `pre` | 🐢 Slower | Guaranteed | Critical downloads, untrusted storage |
|
||||
| `strict` | 🐢 Slower | Guaranteed + Alert | Security-sensitive, compliance |
|
||||
|
||||
### Mode: None (Default)
|
||||
|
||||
**Behavior:**
|
||||
- Stream content directly from S3 with no server-side processing
|
||||
- Maximum download performance
|
||||
- Client is responsible for verification
|
||||
|
||||
**Headers Returned:**
|
||||
```
|
||||
X-Checksum-SHA256: <expected_hash>
|
||||
Content-Length: <expected_size>
|
||||
```
|
||||
|
||||
**Flow:**
|
||||
```
|
||||
Client Request → Lookup Artifact → Stream from S3 → Client
|
||||
```
|
||||
|
||||
### Mode: Header
|
||||
|
||||
**Behavior:**
|
||||
- Stream content directly from S3
|
||||
- Include comprehensive checksum headers
|
||||
- Client performs verification using headers
|
||||
|
||||
**Headers Returned:**
|
||||
```
|
||||
X-Checksum-SHA256: <expected_hash>
|
||||
Content-Length: <expected_size>
|
||||
Digest: sha-256=<base64_encoded_hash>
|
||||
ETag: "<sha256_hash>"
|
||||
X-Content-SHA256: <expected_hash>
|
||||
```
|
||||
|
||||
**Flow:**
|
||||
```
|
||||
Client Request → Lookup Artifact → Add Headers → Stream from S3 → Client Verifies
|
||||
```
|
||||
|
||||
**Client Verification Example:**
|
||||
```bash
|
||||
# Download and verify
|
||||
curl -OJ https://orchard/project/foo/bar/+/v1.0.0
|
||||
EXPECTED=$(curl -sI https://orchard/project/foo/bar/+/v1.0.0 | grep X-Checksum-SHA256 | cut -d' ' -f2)
|
||||
ACTUAL=$(sha256sum downloaded_file | cut -d' ' -f1)
|
||||
[ "$EXPECTED" = "$ACTUAL" ] && echo "OK" || echo "MISMATCH"
|
||||
```
|
||||
|
||||
### Mode: Stream (Post-Hoc Verification)
|
||||
|
||||
**Behavior:**
|
||||
- Wrap S3 stream with `HashingStreamWrapper`
|
||||
- Compute SHA256 incrementally while streaming to client
|
||||
- Verify hash after stream completes
|
||||
- Log verification result
|
||||
- Cannot reject content (already sent to client)
|
||||
|
||||
**Headers Returned:**
|
||||
```
|
||||
X-Checksum-SHA256: <expected_hash>
|
||||
Content-Length: <expected_size>
|
||||
X-Verify-Mode: stream
|
||||
Trailer: X-Verified
|
||||
```
|
||||
|
||||
**Trailers (if client supports):**
|
||||
```
|
||||
X-Verified: true|false
|
||||
X-Computed-SHA256: <computed_hash>
|
||||
```
|
||||
|
||||
**Flow:**
|
||||
```
|
||||
Client Request → Lookup Artifact → Wrap Stream → Stream to Client
|
||||
↓
|
||||
Compute Hash Incrementally
|
||||
↓
|
||||
Verify After Complete → Log Result
|
||||
```
|
||||
|
||||
**Implementation:**
|
||||
```python
|
||||
class HashingStreamWrapper:
|
||||
def __init__(self, stream, expected_hash: str, on_complete: Callable):
|
||||
self.stream = stream
|
||||
self.hasher = hashlib.sha256()
|
||||
self.expected_hash = expected_hash
|
||||
self.on_complete = on_complete
|
||||
|
||||
def __iter__(self):
|
||||
for chunk in self.stream:
|
||||
self.hasher.update(chunk)
|
||||
yield chunk
|
||||
# Stream complete, verify
|
||||
computed = self.hasher.hexdigest()
|
||||
self.on_complete(computed == self.expected_hash, computed)
|
||||
```
|
||||
|
||||
### Mode: Pre-Verify (Blocking)
|
||||
|
||||
**Behavior:**
|
||||
- Download entire content from S3 to memory/temp file
|
||||
- Compute SHA256 hash before sending to client
|
||||
- On match: stream verified content to client
|
||||
- On mismatch: retry from S3 (up to N times)
|
||||
- If retries exhausted: return 500 error
|
||||
|
||||
**Headers Returned:**
|
||||
```
|
||||
X-Checksum-SHA256: <expected_hash>
|
||||
Content-Length: <expected_size>
|
||||
X-Verify-Mode: pre
|
||||
X-Verified: true
|
||||
```
|
||||
|
||||
**Flow:**
|
||||
```
|
||||
Client Request → Lookup Artifact → Download from S3 → Compute Hash
|
||||
↓
|
||||
Hash Matches?
|
||||
↓ ↓
|
||||
Yes No
|
||||
↓ ↓
|
||||
Stream to Client Retry?
|
||||
↓
|
||||
Yes → Loop
|
||||
No → 500 Error
|
||||
```
|
||||
|
||||
**Memory Considerations:**
|
||||
- For files < `ORCHARD_VERIFY_MEMORY_LIMIT` (default 100MB): buffer in memory
|
||||
- For larger files: use temporary file with streaming hash computation
|
||||
- Cleanup temp files after response sent
|
||||
|
||||
### Mode: Strict
|
||||
|
||||
**Behavior:**
|
||||
- Same as pre-verify but with no retries
|
||||
- Fail immediately on any mismatch
|
||||
- Quarantine artifact on failure (mark as potentially corrupted)
|
||||
- Trigger alert/notification on failure
|
||||
- For security-critical downloads
|
||||
|
||||
**Headers Returned (on success):**
|
||||
```
|
||||
X-Checksum-SHA256: <expected_hash>
|
||||
Content-Length: <expected_size>
|
||||
X-Verify-Mode: strict
|
||||
X-Verified: true
|
||||
```
|
||||
|
||||
**Error Response (on failure):**
|
||||
```json
|
||||
{
|
||||
"error": "integrity_verification_failed",
|
||||
"message": "Artifact content does not match expected checksum",
|
||||
"expected_hash": "<expected>",
|
||||
"computed_hash": "<computed>",
|
||||
"artifact_id": "<id>",
|
||||
"action_taken": "quarantined"
|
||||
}
|
||||
```
|
||||
|
||||
**Quarantine Process:**
|
||||
1. Mark artifact `status = 'quarantined'` in database
|
||||
2. Log security event to audit_logs
|
||||
3. Optionally notify via webhook/email
|
||||
4. Artifact becomes unavailable for download until resolved
|
||||
|
||||
## Failure Detection
|
||||
|
||||
### Failure Types
|
||||
|
||||
| Failure Type | Detection Method | Severity |
|
||||
|--------------|------------------|----------|
|
||||
| Hash mismatch | Computed SHA256 ≠ Expected | Critical |
|
||||
| Size mismatch | Actual bytes ≠ `Content-Length` | High |
|
||||
| S3 read error | boto3 exception | Medium |
|
||||
| Truncated content | Stream ends early | High |
|
||||
| S3 object missing | `NoSuchKey` error | Critical |
|
||||
| ETag mismatch | S3 ETag ≠ expected | Medium |
|
||||
|
||||
### Detection Implementation
|
||||
|
||||
```python
|
||||
class VerificationResult:
|
||||
success: bool
|
||||
failure_type: Optional[str] # hash_mismatch, size_mismatch, etc.
|
||||
expected_hash: str
|
||||
computed_hash: Optional[str]
|
||||
expected_size: int
|
||||
actual_size: Optional[int]
|
||||
error_message: Optional[str]
|
||||
retry_count: int
|
||||
```
|
||||
|
||||
## Retry Mechanism
|
||||
|
||||
### Configuration
|
||||
|
||||
| Environment Variable | Default | Description |
|
||||
|---------------------|---------|-------------|
|
||||
| `ORCHARD_VERIFY_MAX_RETRIES` | 3 | Maximum retry attempts |
|
||||
| `ORCHARD_VERIFY_RETRY_DELAY_MS` | 100 | Base delay between retries |
|
||||
| `ORCHARD_VERIFY_RETRY_BACKOFF` | 2.0 | Exponential backoff multiplier |
|
||||
| `ORCHARD_VERIFY_RETRY_MAX_DELAY_MS` | 5000 | Maximum delay cap |
|
||||
|
||||
### Backoff Formula
|
||||
|
||||
```
|
||||
delay = min(base_delay * (backoff ^ attempt), max_delay)
|
||||
```
|
||||
|
||||
Example with defaults:
|
||||
- Attempt 1: 100ms
|
||||
- Attempt 2: 200ms
|
||||
- Attempt 3: 400ms
|
||||
|
||||
### Retry Flow
|
||||
|
||||
```python
|
||||
async def download_with_retry(artifact, max_retries=3):
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
content = await fetch_from_s3(artifact.s3_key)
|
||||
computed_hash = compute_sha256(content)
|
||||
|
||||
if computed_hash == artifact.id:
|
||||
return content # Success
|
||||
|
||||
# Hash mismatch
|
||||
log.warning(f"Verification failed, attempt {attempt + 1}/{max_retries + 1}")
|
||||
|
||||
if attempt < max_retries:
|
||||
delay = calculate_backoff(attempt)
|
||||
await asyncio.sleep(delay / 1000)
|
||||
else:
|
||||
raise IntegrityError("Max retries exceeded")
|
||||
|
||||
except S3Error as e:
|
||||
if attempt < max_retries:
|
||||
delay = calculate_backoff(attempt)
|
||||
await asyncio.sleep(delay / 1000)
|
||||
else:
|
||||
raise
|
||||
```
|
||||
|
||||
### Retryable vs Non-Retryable Failures
|
||||
|
||||
**Retryable:**
|
||||
- S3 read timeout
|
||||
- S3 connection error
|
||||
- Hash mismatch (may be transient S3 issue)
|
||||
- Truncated content
|
||||
|
||||
**Non-Retryable:**
|
||||
- S3 object not found (404)
|
||||
- S3 access denied (403)
|
||||
- Artifact not in database
|
||||
- Strict mode failures
|
||||
|
||||
## Configuration Reference
|
||||
|
||||
### Environment Variables
|
||||
|
||||
```bash
|
||||
# Verification mode (none, header, stream, pre, strict)
|
||||
ORCHARD_VERIFY_MODE=none
|
||||
|
||||
# Retry settings
|
||||
ORCHARD_VERIFY_MAX_RETRIES=3
|
||||
ORCHARD_VERIFY_RETRY_DELAY_MS=100
|
||||
ORCHARD_VERIFY_RETRY_BACKOFF=2.0
|
||||
ORCHARD_VERIFY_RETRY_MAX_DELAY_MS=5000
|
||||
|
||||
# Memory limit for pre-verify buffering (bytes)
|
||||
ORCHARD_VERIFY_MEMORY_LIMIT=104857600 # 100MB
|
||||
|
||||
# Strict mode settings
|
||||
ORCHARD_VERIFY_QUARANTINE_ON_FAILURE=true
|
||||
ORCHARD_VERIFY_ALERT_WEBHOOK=https://alerts.example.com/webhook
|
||||
|
||||
# Allow per-request mode override
|
||||
ORCHARD_VERIFY_ALLOW_OVERRIDE=true
|
||||
```
|
||||
|
||||
### Per-Request Override
|
||||
|
||||
When `ORCHARD_VERIFY_ALLOW_OVERRIDE=true`, clients can specify verification mode:
|
||||
|
||||
```
|
||||
GET /api/v1/project/foo/bar/+/v1.0.0?verify=pre
|
||||
GET /api/v1/project/foo/bar/+/v1.0.0?verify=none
|
||||
```
|
||||
|
||||
## API Changes
|
||||
|
||||
### Download Endpoint
|
||||
|
||||
**Request:**
|
||||
```
|
||||
GET /api/v1/project/{project}/{package}/+/{ref}?verify={mode}
|
||||
```
|
||||
|
||||
**New Query Parameters:**
|
||||
| Parameter | Type | Default | Description |
|
||||
|-----------|------|---------|-------------|
|
||||
| `verify` | string | from config | Verification mode |
|
||||
|
||||
**New Response Headers:**
|
||||
| Header | Description |
|
||||
|--------|-------------|
|
||||
| `X-Checksum-SHA256` | Expected SHA256 hash |
|
||||
| `X-Verify-Mode` | Active verification mode |
|
||||
| `X-Verified` | `true` if server verified content |
|
||||
| `Digest` | RFC 3230 digest header |
|
||||
|
||||
### New Endpoint: Verify Artifact
|
||||
|
||||
**Request:**
|
||||
```
|
||||
POST /api/v1/project/{project}/{package}/+/{ref}/verify
|
||||
```
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{
|
||||
"artifact_id": "abc123...",
|
||||
"verified": true,
|
||||
"expected_hash": "abc123...",
|
||||
"computed_hash": "abc123...",
|
||||
"size_match": true,
|
||||
"expected_size": 1048576,
|
||||
"actual_size": 1048576,
|
||||
"verification_time_ms": 45
|
||||
}
|
||||
```
|
||||
|
||||
## Logging and Monitoring
|
||||
|
||||
### Log Events
|
||||
|
||||
| Event | Level | When |
|
||||
|-------|-------|------|
|
||||
| `verification.success` | INFO | Hash verified successfully |
|
||||
| `verification.failure` | ERROR | Hash mismatch detected |
|
||||
| `verification.retry` | WARN | Retry attempt initiated |
|
||||
| `verification.quarantine` | ERROR | Artifact quarantined |
|
||||
| `verification.skip` | DEBUG | Verification skipped (mode=none) |
|
||||
|
||||
### Metrics
|
||||
|
||||
| Metric | Type | Description |
|
||||
|--------|------|-------------|
|
||||
| `orchard_verification_total` | Counter | Total verification attempts |
|
||||
| `orchard_verification_failures` | Counter | Failed verifications |
|
||||
| `orchard_verification_retries` | Counter | Retry attempts |
|
||||
| `orchard_verification_duration_ms` | Histogram | Verification time |
|
||||
|
||||
### Audit Log Entry
|
||||
|
||||
```json
|
||||
{
|
||||
"action": "artifact.download.verified",
|
||||
"resource": "project/foo/package/bar/artifact/abc123",
|
||||
"user_id": "user@example.com",
|
||||
"details": {
|
||||
"verification_mode": "pre",
|
||||
"verified": true,
|
||||
"retry_count": 0,
|
||||
"duration_ms": 45
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Security Considerations
|
||||
|
||||
1. **Strict Mode for Sensitive Data**: Use strict mode for artifacts containing credentials, certificates, or security-critical code.
|
||||
|
||||
2. **Quarantine Isolation**: Quarantined artifacts should be moved to a separate S3 prefix or bucket for forensic analysis.
|
||||
|
||||
3. **Alert on Repeated Failures**: Multiple verification failures for the same artifact may indicate storage corruption or tampering.
|
||||
|
||||
4. **Audit Trail**: All verification events should be logged for compliance and forensic purposes.
|
||||
|
||||
5. **Client Trust**: In `none` and `header` modes, clients must implement their own verification for security guarantees.
|
||||
|
||||
## Implementation Phases
|
||||
|
||||
### Phase 1: Headers Only
|
||||
- Add `X-Checksum-SHA256` header to all downloads
|
||||
- Add `verify=header` mode support
|
||||
- Add configuration options
|
||||
|
||||
### Phase 2: Stream Verification
|
||||
- Implement `HashingStreamWrapper`
|
||||
- Add `verify=stream` mode
|
||||
- Add verification logging
|
||||
|
||||
### Phase 3: Pre-Verification
|
||||
- Implement buffered verification
|
||||
- Add retry mechanism
|
||||
- Add `verify=pre` mode
|
||||
|
||||
### Phase 4: Strict Mode
|
||||
- Implement quarantine mechanism
|
||||
- Add alerting integration
|
||||
- Add `verify=strict` mode
|
||||
|
||||
## Client Integration Examples
|
||||
|
||||
### curl with Verification
|
||||
```bash
|
||||
#!/bin/bash
|
||||
URL="https://orchard.example.com/api/v1/project/myproject/mypackage/+/v1.0.0"
|
||||
|
||||
# Get expected hash from headers
|
||||
EXPECTED=$(curl -sI "$URL" | grep -i "X-Checksum-SHA256" | tr -d '\r' | cut -d' ' -f2)
|
||||
|
||||
# Download file
|
||||
curl -sO "$URL"
|
||||
FILENAME=$(basename "$URL")
|
||||
|
||||
# Verify
|
||||
ACTUAL=$(sha256sum "$FILENAME" | cut -d' ' -f1)
|
||||
|
||||
if [ "$EXPECTED" = "$ACTUAL" ]; then
|
||||
echo "✓ Verification passed"
|
||||
else
|
||||
echo "✗ Verification FAILED"
|
||||
echo " Expected: $EXPECTED"
|
||||
echo " Actual: $ACTUAL"
|
||||
exit 1
|
||||
fi
|
||||
```
|
||||
|
||||
### Python Client
|
||||
```python
|
||||
import hashlib
|
||||
import requests
|
||||
|
||||
def download_verified(url: str) -> bytes:
|
||||
# Get headers first
|
||||
head = requests.head(url)
|
||||
expected_hash = head.headers.get('X-Checksum-SHA256')
|
||||
expected_size = int(head.headers.get('Content-Length', 0))
|
||||
|
||||
# Download content
|
||||
response = requests.get(url)
|
||||
content = response.content
|
||||
|
||||
# Verify size
|
||||
if len(content) != expected_size:
|
||||
raise ValueError(f"Size mismatch: {len(content)} != {expected_size}")
|
||||
|
||||
# Verify hash
|
||||
actual_hash = hashlib.sha256(content).hexdigest()
|
||||
if actual_hash != expected_hash:
|
||||
raise ValueError(f"Hash mismatch: {actual_hash} != {expected_hash}")
|
||||
|
||||
return content
|
||||
```
|
||||
|
||||
### Server-Side Verification
|
||||
```bash
|
||||
# Force server to verify before sending
|
||||
curl -O "https://orchard.example.com/api/v1/project/myproject/mypackage/+/v1.0.0?verify=pre"
|
||||
|
||||
# Check if verification was performed
|
||||
curl -I "https://orchard.example.com/api/v1/project/myproject/mypackage/+/v1.0.0?verify=pre" | grep X-Verified
|
||||
# X-Verified: true
|
||||
```
|
||||
672
docs/epic-upstream-caching.md
Normal file
672
docs/epic-upstream-caching.md
Normal file
@@ -0,0 +1,672 @@
|
||||
# Epic: Upstream Artifact Caching for Hermetic Builds
|
||||
|
||||
## Overview
|
||||
|
||||
Orchard will act as a permanent, content-addressable cache for upstream artifacts (npm, PyPI, Maven, Docker, etc.). Once an artifact is cached, it is stored forever by SHA256 hash - enabling reproducible builds years later regardless of whether the upstream source still exists.
|
||||
|
||||
## Problem Statement
|
||||
|
||||
Build reproducibility is critical for enterprise environments:
|
||||
- Packages get deleted, yanked, or modified upstream
|
||||
- Registries go down or change URLs
|
||||
- Version constraints resolve differently over time
|
||||
- Air-gapped environments cannot access public internet
|
||||
|
||||
Teams need to guarantee that a build from 5 years ago produces the exact same output today.
|
||||
|
||||
## Solution
|
||||
|
||||
Orchard becomes "the cache that never forgets":
|
||||
|
||||
1. **Fetch once, store forever** - When a build needs `lodash@4.17.21`, Orchard fetches it from npm, stores it by SHA256 hash, and never deletes it
|
||||
2. **Content-addressable** - Same hash = same bytes, guaranteed
|
||||
3. **Format-agnostic** - Orchard doesn't need to understand npm/PyPI/Maven protocols; the client provides the URL, Orchard fetches and stores
|
||||
4. **Air-gap support** - Disable public internet entirely, only allow configured private upstreams
|
||||
|
||||
## User Workflow
|
||||
|
||||
```
|
||||
1. Build tool resolves dependencies npm install / pip install / mvn resolve
|
||||
↓
|
||||
2. Generate lockfile with URLs package-lock.json / requirements.txt
|
||||
↓
|
||||
3. Cache all URLs in Orchard orchard cache --file urls.txt
|
||||
↓
|
||||
4. Pin by SHA256 hash lodash = "sha256:abc123..."
|
||||
↓
|
||||
5. Future builds fetch by hash Always get exact same bytes
|
||||
```
|
||||
|
||||
## Key Features
|
||||
|
||||
- **Multiple upstream sources** - Configure npm, PyPI, Maven Central, private Artifactory, etc.
|
||||
- **Per-source authentication** - Basic auth, bearer tokens, API keys
|
||||
- **System cache projects** - `_npm`, `_pypi`, `_maven` organize cached packages by format
|
||||
- **Cross-referencing** - Link cached artifacts to user projects for visibility
|
||||
- **URL tracking** - Know which URLs map to which hashes, audit provenance
|
||||
- **Air-gap mode** - Global kill switch for all public internet access
|
||||
- **Environment variable config** - 12-factor friendly for containerized deployments
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Orchard Server │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ POST /api/v1/cache │
|
||||
│ ├── Check if URL already cached (url_hash lookup) │
|
||||
│ ├── Match URL to upstream source (get auth) │
|
||||
│ ├── Fetch via UpstreamClient (stream + compute SHA256) │
|
||||
│ ├── Store artifact in S3 (content-addressable) │
|
||||
│ ├── Create tag in system project (_npm/lodash:4.17.21) │
|
||||
│ ├── Optionally create tag in user project │
|
||||
│ └── Record in cached_urls table (provenance) │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ Tables │
|
||||
│ ├── upstream_sources (npm-public, pypi-public, artifactory) │
|
||||
│ ├── cache_settings (allow_public_internet, etc.) │
|
||||
│ ├── cached_urls (url → artifact_id mapping) │
|
||||
│ └── projects.is_system (for _npm, _pypi, etc.) │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Issues Summary
|
||||
|
||||
| Issue | Title | Status | Dependencies |
|
||||
|-------|-------|--------|--------------|
|
||||
| #68 | Schema: Upstream Sources & Cache Tracking | ✅ Complete | None |
|
||||
| #69 | HTTP Client: Generic URL Fetcher | Pending | None |
|
||||
| #70 | Cache API Endpoint | Pending | #68, #69 |
|
||||
| #71 | System Projects (Cache Namespaces) | Pending | #68, #70 |
|
||||
| #72 | Upstream Sources Admin API | Pending | #68 |
|
||||
| #73 | Global Cache Settings API | Pending | #68 |
|
||||
| #74 | Environment Variable Overrides | Pending | #68, #72, #73 |
|
||||
| #75 | Frontend: Upstream Sources Management | Pending | #72, #73 |
|
||||
| #105 | Frontend: System Projects Integration | Pending | #71 |
|
||||
| #77 | CLI: Cache Command | Pending | #70 |
|
||||
|
||||
## Implementation Phases
|
||||
|
||||
**Phase 1 - Core (MVP):**
|
||||
- #68 Schema ✅
|
||||
- #69 HTTP Client
|
||||
- #70 Cache API
|
||||
- #71 System Projects
|
||||
|
||||
**Phase 2 - Admin:**
|
||||
- #72 Upstream Sources API
|
||||
- #73 Cache Settings API
|
||||
- #74 Environment Variables
|
||||
|
||||
**Phase 3 - Frontend:**
|
||||
- #75 Upstream Sources UI
|
||||
- #105 System Projects UI
|
||||
|
||||
**Phase 4 - CLI:**
|
||||
- #77 Cache Command
|
||||
|
||||
---
|
||||
|
||||
# Issue #68: Schema - Upstream Sources & Cache Tracking
|
||||
|
||||
**Status: ✅ Complete**
|
||||
|
||||
## Description
|
||||
|
||||
Create database schema for flexible multi-source upstream configuration and URL-to-artifact tracking. This replaces the previous singleton proxy_config design with a more flexible model supporting multiple upstream sources, air-gap mode, and provenance tracking.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [x] `upstream_sources` table:
|
||||
- id (UUID, primary key)
|
||||
- name (VARCHAR(255), unique, e.g., "npm-public", "artifactory-private")
|
||||
- source_type (VARCHAR(50), enum: npm, pypi, maven, docker, helm, nuget, deb, rpm, generic)
|
||||
- url (VARCHAR(2048), base URL of upstream)
|
||||
- enabled (BOOLEAN, default false)
|
||||
- is_public (BOOLEAN, true if this is a public internet source)
|
||||
- auth_type (VARCHAR(20), enum: none, basic, bearer, api_key)
|
||||
- username (VARCHAR(255), nullable)
|
||||
- password_encrypted (BYTEA, nullable, Fernet encrypted)
|
||||
- headers_encrypted (BYTEA, nullable, for custom headers like API keys)
|
||||
- priority (INTEGER, default 100, lower = checked first)
|
||||
- created_at, updated_at timestamps
|
||||
- [x] `cache_settings` table (singleton, id always 1):
|
||||
- id (INTEGER, primary key, check id = 1)
|
||||
- allow_public_internet (BOOLEAN, default true, air-gap kill switch)
|
||||
- auto_create_system_projects (BOOLEAN, default true)
|
||||
- created_at, updated_at timestamps
|
||||
- [x] `cached_urls` table:
|
||||
- id (UUID, primary key)
|
||||
- url (VARCHAR(4096), original URL fetched)
|
||||
- url_hash (VARCHAR(64), SHA256 of URL for fast lookup, indexed)
|
||||
- artifact_id (VARCHAR(64), FK to artifacts)
|
||||
- source_id (UUID, FK to upstream_sources, nullable for manual imports)
|
||||
- fetched_at (TIMESTAMP WITH TIME ZONE)
|
||||
- response_headers (JSONB, original upstream headers for provenance)
|
||||
- created_at timestamp
|
||||
- [x] Add `is_system` BOOLEAN column to projects table (default false)
|
||||
- [x] Migration SQL file in migrations/
|
||||
- [x] Runtime migration in database.py
|
||||
- [x] SQLAlchemy models for all new tables
|
||||
- [x] Pydantic schemas for API input/output (passwords write-only)
|
||||
- [x] Encryption helpers for password/headers fields
|
||||
- [x] Seed default upstream sources (disabled by default):
|
||||
- npm-public: https://registry.npmjs.org
|
||||
- pypi-public: https://pypi.org/simple
|
||||
- maven-central: https://repo1.maven.org/maven2
|
||||
- docker-hub: https://registry-1.docker.io
|
||||
- [x] Unit tests for models and schemas
|
||||
|
||||
## Files Modified
|
||||
|
||||
- `migrations/010_upstream_caching.sql`
|
||||
- `backend/app/database.py` (migrations 016-020)
|
||||
- `backend/app/models.py` (UpstreamSource, CacheSettings, CachedUrl, Project.is_system)
|
||||
- `backend/app/schemas.py` (all caching schemas)
|
||||
- `backend/app/encryption.py` (renamed env var)
|
||||
- `backend/app/config.py` (renamed setting)
|
||||
- `backend/tests/test_upstream_caching.py` (37 tests)
|
||||
- `frontend/src/components/Layout.tsx` (footer tagline)
|
||||
- `CHANGELOG.md`
|
||||
|
||||
---
|
||||
|
||||
# Issue #69: HTTP Client - Generic URL Fetcher
|
||||
|
||||
**Status: Pending**
|
||||
|
||||
## Description
|
||||
|
||||
Create a reusable HTTP client for fetching artifacts from upstream sources. Supports multiple auth methods, streaming for large files, and computes SHA256 while downloading.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] `UpstreamClient` class in `backend/app/upstream.py`
|
||||
- [ ] `fetch(url)` method that:
|
||||
- Streams response body (doesn't load large files into memory)
|
||||
- Computes SHA256 hash while streaming
|
||||
- Returns file content, hash, size, and response headers
|
||||
- [ ] Auth support based on upstream source configuration:
|
||||
- None (anonymous)
|
||||
- Basic auth (username/password)
|
||||
- Bearer token (Authorization: Bearer {token})
|
||||
- API key (custom header name/value)
|
||||
- [ ] URL-to-source matching:
|
||||
- Match URL to configured upstream source by URL prefix
|
||||
- Apply auth from matched source
|
||||
- Respect source priority for multiple matches
|
||||
- [ ] Configuration options:
|
||||
- Timeout (connect and read, default 30s/300s)
|
||||
- Max retries (default 3)
|
||||
- Follow redirects (default true, max 5)
|
||||
- Max file size (reject if Content-Length exceeds limit)
|
||||
- [ ] Respect `allow_public_internet` setting:
|
||||
- If false, reject URLs matching `is_public=true` sources
|
||||
- If false, reject URLs not matching any configured source
|
||||
- [ ] Capture response headers for provenance tracking
|
||||
- [ ] Proper error handling:
|
||||
- Connection errors (retry with backoff)
|
||||
- HTTP errors (4xx, 5xx)
|
||||
- Timeout errors
|
||||
- SSL/TLS errors
|
||||
- [ ] Logging for debugging (URL, source matched, status, timing)
|
||||
- [ ] Unit tests with mocked HTTP responses
|
||||
- [ ] Integration tests against httpbin.org or similar (optional, marked)
|
||||
|
||||
## Technical Notes
|
||||
|
||||
- Use `httpx` for async HTTP support (already in requirements)
|
||||
- Stream to temp file to avoid memory issues with large artifacts
|
||||
- Consider checksum verification if upstream provides it (e.g., npm provides shasum)
|
||||
|
||||
---
|
||||
|
||||
# Issue #70: Cache API Endpoint
|
||||
|
||||
**Status: Pending**
|
||||
|
||||
## Description
|
||||
|
||||
API endpoint to cache an artifact from an upstream URL. This is the core endpoint that fetches from upstream, stores in Orchard, and creates appropriate tags.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] `POST /api/v1/cache` endpoint
|
||||
- [ ] Request body:
|
||||
```json
|
||||
{
|
||||
"url": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
||||
"source_type": "npm",
|
||||
"package_name": "lodash",
|
||||
"tag": "4.17.21",
|
||||
"user_project": "my-app",
|
||||
"user_package": "npm-deps",
|
||||
"user_tag": "lodash-4.17.21",
|
||||
"expected_hash": "sha256:abc123..."
|
||||
}
|
||||
```
|
||||
- `url` (required): URL to fetch
|
||||
- `source_type` (required): Determines system project (_npm, _pypi, etc.)
|
||||
- `package_name` (optional): Package name in system project, derived from URL if not provided
|
||||
- `tag` (optional): Tag name in system project, derived from URL if not provided
|
||||
- `user_project`, `user_package`, `user_tag` (optional): Cross-reference in user's project
|
||||
- `expected_hash` (optional): Verify downloaded content matches
|
||||
- [ ] Response:
|
||||
```json
|
||||
{
|
||||
"artifact_id": "abc123...",
|
||||
"sha256": "abc123...",
|
||||
"size": 12345,
|
||||
"content_type": "application/gzip",
|
||||
"already_cached": false,
|
||||
"source_url": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
||||
"source_name": "npm-public",
|
||||
"system_project": "_npm",
|
||||
"system_package": "lodash",
|
||||
"system_tag": "4.17.21",
|
||||
"user_reference": "my-app/npm-deps:lodash-4.17.21"
|
||||
}
|
||||
```
|
||||
- [ ] Behavior:
|
||||
- Check if URL already cached (by url_hash in cached_urls)
|
||||
- If cached: return existing artifact, optionally create user tag
|
||||
- If not cached: fetch via UpstreamClient, store artifact, create tags
|
||||
- Create/get system project if needed (e.g., `_npm`)
|
||||
- Create package in system project (e.g., `_npm/lodash`)
|
||||
- Create tag in system project (e.g., `_npm/lodash:4.17.21`)
|
||||
- If user reference provided, create tag in user's project
|
||||
- Record in cached_urls table with provenance
|
||||
- [ ] Error handling:
|
||||
- 400: Invalid request (bad URL format, missing required fields)
|
||||
- 403: Air-gap mode enabled and URL is from public source
|
||||
- 404: Upstream returned 404
|
||||
- 409: Hash mismatch (if expected_hash provided)
|
||||
- 502: Upstream fetch failed (connection error, timeout)
|
||||
- 503: Upstream source disabled
|
||||
- [ ] Authentication required (any authenticated user can cache)
|
||||
- [ ] Audit logging for cache operations
|
||||
- [ ] Integration tests covering success and error cases
|
||||
|
||||
## Technical Notes
|
||||
|
||||
- URL parsing for package_name/tag derivation is format-specific:
|
||||
- npm: `/{package}/-/{package}-{version}.tgz` → package=lodash, tag=4.17.21
|
||||
- pypi: `/packages/.../requests-2.28.0.tar.gz` → package=requests, tag=2.28.0
|
||||
- maven: `/{group}/{artifact}/{version}/{artifact}-{version}.jar`
|
||||
- Deduplication: if same SHA256 already exists, just create new tag pointing to it
|
||||
|
||||
---
|
||||
|
||||
# Issue #71: System Projects (Cache Namespaces)
|
||||
|
||||
**Status: Pending**
|
||||
|
||||
## Description
|
||||
|
||||
Implement auto-created system projects for organizing cached artifacts by format type. These are special projects that provide a browsable namespace for all cached upstream packages.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] System project names: `_npm`, `_pypi`, `_maven`, `_docker`, `_helm`, `_nuget`, `_deb`, `_rpm`, `_generic`
|
||||
- [ ] Auto-creation:
|
||||
- Created automatically on first cache request for that format
|
||||
- Created by cache endpoint, not at startup
|
||||
- Uses system user as creator (`created_by = "system"`)
|
||||
- [ ] System project properties:
|
||||
- `is_system = true`
|
||||
- `is_public = true` (readable by all authenticated users)
|
||||
- `description` = "System cache for {format} packages"
|
||||
- [ ] Restrictions:
|
||||
- Cannot be deleted (return 403 with message)
|
||||
- Cannot be renamed
|
||||
- Cannot change `is_public` to false
|
||||
- Only admins can modify description
|
||||
- [ ] Helper function: `get_or_create_system_project(source_type)` in routes.py or new cache.py module
|
||||
- [ ] Update project deletion endpoint to check `is_system` flag
|
||||
- [ ] Update project update endpoint to enforce restrictions
|
||||
- [ ] Query helper: list all system projects for UI dropdown
|
||||
- [ ] Unit tests for restrictions
|
||||
- [ ] Integration tests for auto-creation and restrictions
|
||||
|
||||
## Technical Notes
|
||||
|
||||
- System projects are identified by `is_system=true`, not just naming convention
|
||||
- The `_` prefix is a convention for display purposes
|
||||
- Packages within system projects follow upstream naming (e.g., `_npm/lodash`, `_npm/@types/node`)
|
||||
|
||||
---
|
||||
|
||||
# Issue #72: Upstream Sources Admin API
|
||||
|
||||
**Status: Pending**
|
||||
|
||||
## Description
|
||||
|
||||
CRUD API endpoints for managing upstream sources configuration. Admin-only access.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] `GET /api/v1/admin/upstream-sources` - List all upstream sources
|
||||
- Returns array of sources with id, name, source_type, url, enabled, is_public, auth_type, priority, has_credentials, created_at, updated_at
|
||||
- Supports `?enabled=true/false` filter
|
||||
- Supports `?source_type=npm,pypi` filter
|
||||
- Passwords/tokens never returned
|
||||
- [ ] `POST /api/v1/admin/upstream-sources` - Create upstream source
|
||||
- Request: name, source_type, url, enabled, is_public, auth_type, username, password, headers, priority
|
||||
- Validates unique name
|
||||
- Validates URL format
|
||||
- Encrypts password/headers before storage
|
||||
- Returns created source (without secrets)
|
||||
- [ ] `GET /api/v1/admin/upstream-sources/{id}` - Get source details
|
||||
- Returns source with `has_credentials` boolean, not actual credentials
|
||||
- [ ] `PUT /api/v1/admin/upstream-sources/{id}` - Update source
|
||||
- Partial update supported
|
||||
- If password provided, re-encrypt; if omitted, keep existing
|
||||
- Special value `password: null` clears credentials
|
||||
- [ ] `DELETE /api/v1/admin/upstream-sources/{id}` - Delete source
|
||||
- Returns 400 if source has cached_urls referencing it (optional: cascade or reassign)
|
||||
- [ ] `POST /api/v1/admin/upstream-sources/{id}/test` - Test connectivity
|
||||
- Attempts HEAD request to source URL
|
||||
- Returns success/failure with status code and timing
|
||||
- Does not cache anything
|
||||
- [ ] All endpoints require admin role
|
||||
- [ ] Audit logging for all mutations
|
||||
- [ ] Pydantic schemas: UpstreamSourceCreate, UpstreamSourceUpdate, UpstreamSourceResponse
|
||||
- [ ] Integration tests for all endpoints
|
||||
|
||||
## Technical Notes
|
||||
|
||||
- Test endpoint should respect auth configuration to verify credentials work
|
||||
- Consider adding `last_used_at` and `last_error` fields for observability (future enhancement)
|
||||
|
||||
---
|
||||
|
||||
# Issue #73: Global Cache Settings API
|
||||
|
||||
**Status: Pending**
|
||||
|
||||
## Description
|
||||
|
||||
API endpoints for managing global cache settings including air-gap mode.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] `GET /api/v1/admin/cache-settings` - Get current settings
|
||||
- Returns: allow_public_internet, auto_create_system_projects, created_at, updated_at
|
||||
- [ ] `PUT /api/v1/admin/cache-settings` - Update settings
|
||||
- Partial update supported
|
||||
- Returns updated settings
|
||||
- [ ] Settings fields:
|
||||
- `allow_public_internet` (boolean): When false, blocks all requests to sources marked `is_public=true`
|
||||
- `auto_create_system_projects` (boolean): When false, system projects must be created manually
|
||||
- [ ] Admin-only access
|
||||
- [ ] Audit logging for changes (especially air-gap mode changes)
|
||||
- [ ] Pydantic schemas: CacheSettingsResponse, CacheSettingsUpdate
|
||||
- [ ] Initialize singleton row on first access if not exists
|
||||
- [ ] Integration tests
|
||||
|
||||
## Technical Notes
|
||||
|
||||
- Air-gap mode change should be logged prominently (security-relevant)
|
||||
- Consider requiring confirmation header for disabling air-gap mode (similar to factory reset)
|
||||
|
||||
---
|
||||
|
||||
# Issue #74: Environment Variable Overrides
|
||||
|
||||
**Status: Pending**
|
||||
|
||||
## Description
|
||||
|
||||
Allow cache and upstream configuration via environment variables for containerized deployments. Environment variables override database settings following 12-factor app principles.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Global settings overrides:
|
||||
- `ORCHARD_CACHE_ALLOW_PUBLIC_INTERNET=true/false`
|
||||
- `ORCHARD_CACHE_AUTO_CREATE_SYSTEM_PROJECTS=true/false`
|
||||
- `ORCHARD_CACHE_ENCRYPTION_KEY` (Fernet key for credential encryption)
|
||||
- [ ] Upstream source definition via env vars:
|
||||
- `ORCHARD_UPSTREAM__{NAME}__URL` (double underscore as separator)
|
||||
- `ORCHARD_UPSTREAM__{NAME}__TYPE` (npm, pypi, maven, etc.)
|
||||
- `ORCHARD_UPSTREAM__{NAME}__ENABLED` (true/false)
|
||||
- `ORCHARD_UPSTREAM__{NAME}__IS_PUBLIC` (true/false)
|
||||
- `ORCHARD_UPSTREAM__{NAME}__AUTH_TYPE` (none, basic, bearer, api_key)
|
||||
- `ORCHARD_UPSTREAM__{NAME}__USERNAME`
|
||||
- `ORCHARD_UPSTREAM__{NAME}__PASSWORD`
|
||||
- `ORCHARD_UPSTREAM__{NAME}__PRIORITY`
|
||||
- Example: `ORCHARD_UPSTREAM__NPM_PRIVATE__URL=https://npm.corp.com`
|
||||
- [ ] Env var sources:
|
||||
- Loaded at startup
|
||||
- Merged with database sources
|
||||
- Env var sources have `source = "env"` marker
|
||||
- Cannot be modified via API (return 400)
|
||||
- Cannot be deleted via API (return 400)
|
||||
- [ ] Update Settings class in config.py
|
||||
- [ ] Update get/list endpoints to include env-defined sources
|
||||
- [ ] Document all env vars in CLAUDE.md
|
||||
- [ ] Unit tests for env var parsing
|
||||
- [ ] Integration tests with env vars set
|
||||
|
||||
## Technical Notes
|
||||
|
||||
- Double underscore (`__`) separator allows source names with single underscores
|
||||
- Env-defined sources should appear in API responses but marked as read-only
|
||||
- Consider startup validation that warns about invalid env var combinations
|
||||
|
||||
---
|
||||
|
||||
# Issue #75: Frontend - Upstream Sources Management
|
||||
|
||||
**Status: Pending**
|
||||
|
||||
## Description
|
||||
|
||||
Admin UI for managing upstream sources and cache settings.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] New admin page: `/admin/cache` or `/admin/upstream-sources`
|
||||
- [ ] Upstream sources section:
|
||||
- Table listing all sources with: name, type, URL, enabled toggle, public badge, priority, actions
|
||||
- Visual distinction for env-defined sources (locked icon, no edit/delete)
|
||||
- Create button opens modal/form
|
||||
- Edit button for DB-defined sources
|
||||
- Delete with confirmation modal
|
||||
- Test connection button with status indicator
|
||||
- [ ] Create/edit form fields:
|
||||
- Name (text, required)
|
||||
- Source type (dropdown)
|
||||
- URL (text, required)
|
||||
- Priority (number)
|
||||
- Is public (checkbox)
|
||||
- Enabled (checkbox)
|
||||
- Auth type (dropdown: none, basic, bearer, api_key)
|
||||
- Conditional auth fields based on type:
|
||||
- Basic: username, password
|
||||
- Bearer: token
|
||||
- API key: header name, header value
|
||||
- Password fields masked, "unchanged" placeholder on edit
|
||||
- [ ] Cache settings section:
|
||||
- Air-gap mode toggle with warning
|
||||
- Auto-create system projects toggle
|
||||
- "Air-gap mode" shows prominent warning banner when enabled
|
||||
- [ ] Link from main admin navigation
|
||||
- [ ] Loading and error states
|
||||
- [ ] Success/error toast notifications
|
||||
|
||||
## Technical Notes
|
||||
|
||||
- Use existing admin page patterns from user management
|
||||
- Air-gap toggle should require confirmation (modal with warning text)
|
||||
|
||||
---
|
||||
|
||||
# Issue #105: Frontend - System Projects Integration
|
||||
|
||||
**Status: Pending**
|
||||
|
||||
## Description
|
||||
|
||||
Integrate system projects into the frontend UI with appropriate visual treatment and navigation.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Home page project dropdown:
|
||||
- System projects shown in separate "Cached Packages" section
|
||||
- Visual distinction (icon, different background, or badge)
|
||||
- Format icon for each type (npm, pypi, maven, etc.)
|
||||
- [ ] Project list/grid:
|
||||
- System projects can be filtered: "Show system projects" toggle
|
||||
- Or separate tab: "Projects" | "Package Cache"
|
||||
- [ ] System project page:
|
||||
- "System Cache" badge in header
|
||||
- Description explains this is auto-managed cache
|
||||
- Settings/delete buttons hidden or disabled
|
||||
- Shows format type prominently
|
||||
- [ ] Package page within system project:
|
||||
- Shows "Cached from" with source URL (linked)
|
||||
- Shows "First cached" timestamp
|
||||
- Shows which upstream source provided it
|
||||
- [ ] Artifact page:
|
||||
- If artifact came from cache, show provenance:
|
||||
- Original URL
|
||||
- Upstream source name
|
||||
- Fetch timestamp
|
||||
- [ ] Search includes system projects (with filter option)
|
||||
|
||||
## Technical Notes
|
||||
|
||||
- Use React context or query params for system project filtering
|
||||
- Consider dedicated route: `/cache/npm/lodash` as alias for `/_npm/lodash`
|
||||
|
||||
---
|
||||
|
||||
# Issue #77: CLI - Cache Command
|
||||
|
||||
**Status: Pending**
|
||||
|
||||
## Description
|
||||
|
||||
Add a new `orchard cache` command to the existing CLI for caching artifacts from upstream URLs. This integrates with the new cache API endpoint and can optionally update `orchard.ensure` with cached artifacts.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] New command: `orchard cache <url>` in `orchard/commands/cache.py`
|
||||
- [ ] Basic usage:
|
||||
```bash
|
||||
# Cache a URL, print artifact info
|
||||
orchard cache https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz
|
||||
|
||||
# Output:
|
||||
# Caching https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz...
|
||||
# Source type: npm
|
||||
# Package: lodash
|
||||
# Version: 4.17.21
|
||||
#
|
||||
# Successfully cached artifact
|
||||
# Artifact ID: abc123...
|
||||
# Size: 1.2 MB
|
||||
# System project: _npm
|
||||
# System package: lodash
|
||||
# System tag: 4.17.21
|
||||
```
|
||||
- [ ] Options:
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--type, -t TYPE` | Source type: npm, pypi, maven, docker, helm, generic (auto-detected from URL if not provided) |
|
||||
| `--package, -p NAME` | Package name in system project (auto-derived from URL if not provided) |
|
||||
| `--tag TAG` | Tag name in system project (auto-derived from URL if not provided) |
|
||||
| `--project PROJECT` | Also create tag in this user project |
|
||||
| `--user-package PKG` | Package name in user project (required if --project specified) |
|
||||
| `--user-tag TAG` | Tag name in user project (default: same as system tag) |
|
||||
| `--expected-hash HASH` | Verify downloaded content matches this SHA256 |
|
||||
| `--add` | Add to orchard.ensure after caching |
|
||||
| `--add-path PATH` | Extraction path for --add (default: `<package>/`) |
|
||||
| `--file, -f FILE` | Path to orchard.ensure file |
|
||||
| `--verbose, -v` | Show detailed output |
|
||||
- [ ] URL type auto-detection:
|
||||
- `registry.npmjs.org` → npm
|
||||
- `pypi.org` or `files.pythonhosted.org` → pypi
|
||||
- `repo1.maven.org` or contains `/maven2/` → maven
|
||||
- `registry-1.docker.io` or `docker.io` → docker
|
||||
- Otherwise → generic
|
||||
- [ ] Package/version extraction from URL patterns:
|
||||
- npm: `/{package}/-/{package}-{version}.tgz`
|
||||
- pypi: `/packages/.../requests-{version}.tar.gz`
|
||||
- maven: `/{group}/{artifact}/{version}/{artifact}-{version}.jar`
|
||||
- [ ] Add `cache_artifact()` function to `orchard/api.py`
|
||||
- [ ] Integration with `--add` flag:
|
||||
- Parse existing orchard.ensure
|
||||
- Add new dependency entry pointing to cached artifact
|
||||
- Use artifact_id (SHA256) for hermetic pinning
|
||||
- [ ] Batch mode: `orchard cache --file urls.txt`
|
||||
- One URL per line
|
||||
- Lines starting with `#` are comments
|
||||
- Report success/failure for each
|
||||
- [ ] Exit codes:
|
||||
- 0: Success (or already cached)
|
||||
- 1: Fetch failed
|
||||
- 2: Hash mismatch
|
||||
- 3: Air-gap mode blocked request
|
||||
- [ ] Error handling consistent with existing CLI patterns
|
||||
- [ ] Unit tests in `test/test_cache.py`
|
||||
- [ ] Update README.md with cache command documentation
|
||||
|
||||
## Technical Notes
|
||||
|
||||
- Follow existing Click patterns from other commands
|
||||
- Use `get_auth_headers()` from `orchard/auth.py`
|
||||
- URL parsing can use `urllib.parse`
|
||||
- Consider adding URL pattern registry for extensibility
|
||||
- The `--add` flag should integrate with existing ensure file parsing in `orchard/ensure.py`
|
||||
|
||||
## Example Workflows
|
||||
|
||||
```bash
|
||||
# Simple: cache a single URL
|
||||
orchard cache https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz
|
||||
|
||||
# Cache and add to orchard.ensure for current project
|
||||
orchard cache https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz \
|
||||
--add --add-path libs/lodash/
|
||||
|
||||
# Cache with explicit metadata
|
||||
orchard cache https://internal.corp/files/custom-lib.tar.gz \
|
||||
--type generic \
|
||||
--package custom-lib \
|
||||
--tag v1.0.0
|
||||
|
||||
# Cache and cross-reference to user project
|
||||
orchard cache https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz \
|
||||
--project my-app \
|
||||
--user-package npm-deps \
|
||||
--user-tag lodash-4.17.21
|
||||
|
||||
# Batch cache from file
|
||||
orchard cache --file deps-urls.txt
|
||||
|
||||
# Verify hash while caching
|
||||
orchard cache https://example.com/file.tar.gz \
|
||||
--expected-hash sha256:abc123...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Out of Scope (Future Enhancements)
|
||||
|
||||
- Automatic transitive dependency resolution (client's responsibility)
|
||||
- Lockfile parsing (`package-lock.json`, `requirements.txt`) - stretch goal for CLI
|
||||
- Cache eviction policies (we cache forever by design)
|
||||
- Mirroring/sync between Orchard instances
|
||||
- Format-specific metadata extraction (npm package.json parsing, etc.)
|
||||
|
||||
## Success Criteria
|
||||
|
||||
- [ ] Can cache any URL and retrieve by SHA256 hash
|
||||
- [ ] Cached artifacts persist indefinitely
|
||||
- [ ] Air-gap mode blocks all public internet access
|
||||
- [ ] Multiple upstream sources with different auth
|
||||
- [ ] System projects organize cached packages by format
|
||||
- [ ] CLI can cache URLs and update orchard.ensure
|
||||
- [ ] Admin UI for upstream source management
|
||||
294
docs/integrity-verification.md
Normal file
294
docs/integrity-verification.md
Normal file
@@ -0,0 +1,294 @@
|
||||
# Integrity Verification
|
||||
|
||||
Orchard uses content-addressable storage with SHA256 hashing to ensure artifact integrity. This document describes how integrity verification works and how to use it.
|
||||
|
||||
## How It Works
|
||||
|
||||
### Content-Addressable Storage
|
||||
|
||||
Orchard stores artifacts using their SHA256 hash as the unique identifier. This provides several benefits:
|
||||
|
||||
1. **Automatic deduplication**: Identical content is stored only once
|
||||
2. **Built-in integrity**: The artifact ID *is* the content hash
|
||||
3. **Tamper detection**: Any modification changes the hash, making corruption detectable
|
||||
|
||||
When you upload a file:
|
||||
1. Orchard computes the SHA256 hash of the content
|
||||
2. The hash becomes the artifact ID (64-character hex string)
|
||||
3. The file is stored in S3 at `fruits/{hash[0:2]}/{hash[2:4]}/{hash}`
|
||||
4. The hash and metadata are recorded in the database
|
||||
|
||||
### Hash Format
|
||||
|
||||
- Algorithm: SHA256
|
||||
- Format: 64-character lowercase hexadecimal string
|
||||
- Example: `dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f`
|
||||
|
||||
## Client-Side Verification
|
||||
|
||||
### Before Upload
|
||||
|
||||
Compute the hash locally before uploading to verify the server received your content correctly:
|
||||
|
||||
```python
|
||||
import hashlib
|
||||
|
||||
def compute_sha256(content: bytes) -> str:
|
||||
return hashlib.sha256(content).hexdigest()
|
||||
|
||||
# Compute hash before upload
|
||||
content = open("myfile.tar.gz", "rb").read()
|
||||
local_hash = compute_sha256(content)
|
||||
|
||||
# Upload the file
|
||||
response = requests.post(
|
||||
f"{base_url}/api/v1/project/{project}/{package}/upload",
|
||||
files={"file": ("myfile.tar.gz", content)},
|
||||
)
|
||||
result = response.json()
|
||||
|
||||
# Verify server computed the same hash
|
||||
assert result["artifact_id"] == local_hash, "Hash mismatch!"
|
||||
```
|
||||
|
||||
### Providing Expected Hash on Upload
|
||||
|
||||
You can provide the expected hash in the upload request. The server will reject the upload if the computed hash doesn't match:
|
||||
|
||||
```python
|
||||
response = requests.post(
|
||||
f"{base_url}/api/v1/project/{project}/{package}/upload",
|
||||
files={"file": ("myfile.tar.gz", content)},
|
||||
headers={"X-Checksum-SHA256": local_hash},
|
||||
)
|
||||
|
||||
# Returns 422 if hash doesn't match
|
||||
if response.status_code == 422:
|
||||
print("Checksum mismatch - upload rejected")
|
||||
```
|
||||
|
||||
### After Download
|
||||
|
||||
Verify downloaded content matches the expected hash using response headers:
|
||||
|
||||
```python
|
||||
response = requests.get(
|
||||
f"{base_url}/api/v1/project/{project}/{package}/+/{tag}",
|
||||
params={"mode": "proxy"},
|
||||
)
|
||||
|
||||
# Get expected hash from header
|
||||
expected_hash = response.headers.get("X-Checksum-SHA256")
|
||||
|
||||
# Compute hash of downloaded content
|
||||
actual_hash = compute_sha256(response.content)
|
||||
|
||||
# Verify
|
||||
if actual_hash != expected_hash:
|
||||
raise Exception(f"Integrity check failed! Expected {expected_hash}, got {actual_hash}")
|
||||
```
|
||||
|
||||
### Response Headers for Verification
|
||||
|
||||
Download responses include multiple headers for verification:
|
||||
|
||||
| Header | Format | Description |
|
||||
|--------|--------|-------------|
|
||||
| `X-Checksum-SHA256` | Hex string | SHA256 hash (64 chars) |
|
||||
| `ETag` | `"<hash>"` | SHA256 hash in quotes |
|
||||
| `Digest` | `sha-256=<base64>` | RFC 3230 format (base64-encoded) |
|
||||
| `Content-Length` | Integer | File size in bytes |
|
||||
|
||||
### Server-Side Verification on Download
|
||||
|
||||
Request server-side verification during download:
|
||||
|
||||
```bash
|
||||
# Pre-verification: Server verifies before streaming (returns 500 if corrupt)
|
||||
curl "${base_url}/api/v1/project/${project}/${package}/+/${tag}?mode=proxy&verify=true&verify_mode=pre"
|
||||
|
||||
# Stream verification: Server verifies while streaming (logs error if corrupt)
|
||||
curl "${base_url}/api/v1/project/${project}/${package}/+/${tag}?mode=proxy&verify=true&verify_mode=stream"
|
||||
```
|
||||
|
||||
The `X-Verified` header indicates whether server-side verification was performed:
|
||||
- `X-Verified: true` - Content was verified by the server
|
||||
|
||||
## Server-Side Consistency Check
|
||||
|
||||
### Consistency Check Endpoint
|
||||
|
||||
Administrators can run a consistency check to verify all stored artifacts:
|
||||
|
||||
```bash
|
||||
curl "${base_url}/api/v1/admin/consistency-check"
|
||||
```
|
||||
|
||||
Response:
|
||||
```json
|
||||
{
|
||||
"total_artifacts_checked": 1234,
|
||||
"healthy": true,
|
||||
"orphaned_s3_objects": 0,
|
||||
"missing_s3_objects": 0,
|
||||
"size_mismatches": 0,
|
||||
"orphaned_s3_keys": [],
|
||||
"missing_s3_keys": [],
|
||||
"size_mismatch_artifacts": []
|
||||
}
|
||||
```
|
||||
|
||||
### What the Check Verifies
|
||||
|
||||
1. **Missing S3 objects**: Database records with no corresponding S3 object
|
||||
2. **Orphaned S3 objects**: S3 objects with no database record
|
||||
3. **Size mismatches**: S3 object size doesn't match database record
|
||||
|
||||
### Running Consistency Checks
|
||||
|
||||
**Manual check:**
|
||||
```bash
|
||||
# Check all artifacts
|
||||
curl "${base_url}/api/v1/admin/consistency-check"
|
||||
|
||||
# Limit results (for large deployments)
|
||||
curl "${base_url}/api/v1/admin/consistency-check?limit=100"
|
||||
```
|
||||
|
||||
**Scheduled checks (recommended):**
|
||||
|
||||
Set up a cron job or Kubernetes CronJob to run periodic checks:
|
||||
|
||||
```yaml
|
||||
# Kubernetes CronJob example
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: orchard-consistency-check
|
||||
spec:
|
||||
schedule: "0 2 * * *" # Daily at 2 AM
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: check
|
||||
image: curlimages/curl
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
response=$(curl -s "${ORCHARD_URL}/api/v1/admin/consistency-check")
|
||||
healthy=$(echo "$response" | jq -r '.healthy')
|
||||
if [ "$healthy" != "true" ]; then
|
||||
echo "ALERT: Consistency check failed!"
|
||||
echo "$response"
|
||||
exit 1
|
||||
fi
|
||||
echo "Consistency check passed"
|
||||
restartPolicy: OnFailure
|
||||
```
|
||||
|
||||
## Recovery Procedures
|
||||
|
||||
### Corrupted Artifact (Size Mismatch)
|
||||
|
||||
If the consistency check reports size mismatches:
|
||||
|
||||
1. **Identify affected artifacts:**
|
||||
```bash
|
||||
curl "${base_url}/api/v1/admin/consistency-check" | jq '.size_mismatch_artifacts'
|
||||
```
|
||||
|
||||
2. **Check if artifact can be re-uploaded:**
|
||||
- If the original content is available, delete the corrupted artifact and re-upload
|
||||
- The same content will produce the same artifact ID
|
||||
|
||||
3. **If original content is lost:**
|
||||
- The artifact data is corrupted and cannot be recovered
|
||||
- Delete the artifact record and notify affected users
|
||||
- Consider restoring from backup if available
|
||||
|
||||
### Missing S3 Object
|
||||
|
||||
If database records exist but S3 objects are missing:
|
||||
|
||||
1. **Identify affected artifacts:**
|
||||
```bash
|
||||
curl "${base_url}/api/v1/admin/consistency-check" | jq '.missing_s3_keys'
|
||||
```
|
||||
|
||||
2. **Check S3 bucket:**
|
||||
- Verify the S3 bucket exists and is accessible
|
||||
- Check S3 access logs for deletion events
|
||||
- Check if objects were moved or lifecycle-deleted
|
||||
|
||||
3. **Recovery options:**
|
||||
- Restore from S3 versioning (if enabled)
|
||||
- Restore from backup
|
||||
- Re-upload original content (if available)
|
||||
- Delete orphaned database records
|
||||
|
||||
### Orphaned S3 Objects
|
||||
|
||||
If S3 objects exist without database records:
|
||||
|
||||
1. **Identify orphaned objects:**
|
||||
```bash
|
||||
curl "${base_url}/api/v1/admin/consistency-check" | jq '.orphaned_s3_keys'
|
||||
```
|
||||
|
||||
2. **Investigate cause:**
|
||||
- Upload interrupted before database commit?
|
||||
- Database record deleted but S3 cleanup failed?
|
||||
|
||||
3. **Resolution:**
|
||||
- If content is needed, create database record manually
|
||||
- If content is not needed, delete the S3 object to reclaim storage
|
||||
|
||||
### Preventive Measures
|
||||
|
||||
1. **Enable S3 versioning** to recover from accidental deletions
|
||||
2. **Regular backups** of both database and S3 bucket
|
||||
3. **Scheduled consistency checks** to detect issues early
|
||||
4. **Monitoring and alerting** on consistency check failures
|
||||
5. **Audit logging** to track all artifact operations
|
||||
|
||||
## Verification in CI/CD
|
||||
|
||||
### Verifying Artifacts in Pipelines
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# Download and verify artifact in CI pipeline
|
||||
|
||||
ARTIFACT_URL="${ORCHARD_URL}/api/v1/project/${PROJECT}/${PACKAGE}/+/${TAG}"
|
||||
|
||||
# Download with verification headers
|
||||
response=$(curl -s -D - "${ARTIFACT_URL}?mode=proxy" -o artifact.tar.gz)
|
||||
expected_hash=$(echo "$response" | grep -i "X-Checksum-SHA256" | cut -d: -f2 | tr -d ' \r')
|
||||
|
||||
# Compute actual hash
|
||||
actual_hash=$(sha256sum artifact.tar.gz | cut -d' ' -f1)
|
||||
|
||||
# Verify
|
||||
if [ "$actual_hash" != "$expected_hash" ]; then
|
||||
echo "ERROR: Integrity check failed!"
|
||||
echo "Expected: $expected_hash"
|
||||
echo "Actual: $actual_hash"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Integrity verified: $actual_hash"
|
||||
```
|
||||
|
||||
### Using Server-Side Verification
|
||||
|
||||
For critical deployments, use server-side pre-verification:
|
||||
|
||||
```bash
|
||||
# Server verifies before streaming - returns 500 if corrupt
|
||||
curl -f "${ARTIFACT_URL}?mode=proxy&verify=true&verify_mode=pre" -o artifact.tar.gz
|
||||
```
|
||||
|
||||
This ensures the artifact is verified before any bytes are streamed to your pipeline.
|
||||
@@ -2,9 +2,9 @@
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
||||
<link rel="icon" type="image/svg+xml" href="/orchard.svg" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Orchard - Content-Addressable Storage</title>
|
||||
<title>Orchard</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
|
||||
4717
frontend/package-lock.json
generated
Normal file
4717
frontend/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -6,18 +6,34 @@
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"build": "tsc && vite build",
|
||||
"preview": "vite preview"
|
||||
"preview": "vite preview",
|
||||
"test": "vitest",
|
||||
"test:run": "vitest run",
|
||||
"test:coverage": "vitest run --coverage"
|
||||
},
|
||||
"dependencies": {
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"react-router-dom": "^6.21.3"
|
||||
"react-router-dom": "6.28.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@testing-library/jest-dom": "^6.4.2",
|
||||
"@testing-library/react": "^14.2.1",
|
||||
"@testing-library/user-event": "^14.5.2",
|
||||
"@types/react": "^18.2.48",
|
||||
"@types/react-dom": "^18.2.18",
|
||||
"@vitejs/plugin-react": "^4.2.1",
|
||||
"@vitest/coverage-v8": "^1.3.1",
|
||||
"jsdom": "^24.0.0",
|
||||
"typescript": "^5.3.3",
|
||||
"vite": "^5.0.12"
|
||||
"vite": "^5.0.12",
|
||||
"vitest": "^1.3.1"
|
||||
},
|
||||
"overrides": {
|
||||
"ws": "8.18.0",
|
||||
"ufo": "1.5.4",
|
||||
"rollup": "4.52.4",
|
||||
"caniuse-lite": "1.0.30001692",
|
||||
"baseline-browser-mapping": "2.9.5"
|
||||
}
|
||||
}
|
||||
|
||||
18
frontend/public/orchard.svg
Normal file
18
frontend/public/orchard.svg
Normal file
@@ -0,0 +1,18 @@
|
||||
<svg width="32" height="32" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<!-- Green background -->
|
||||
<rect width="24" height="24" rx="4" fill="#4CAF50"/>
|
||||
<!-- Three fruit trees representing an orchard - shifted down to center -->
|
||||
<g transform="translate(0, 2)">
|
||||
<!-- Left tree - rounded canopy -->
|
||||
<path d="M6 14 Q6 8 3 8 Q6 4 6 4 Q6 4 9 8 Q6 8 6 14" fill="white" opacity="0.7"/>
|
||||
<rect x="5.25" y="13" width="1.5" height="4" fill="white" opacity="0.7"/>
|
||||
<!-- Center tree - larger rounded canopy -->
|
||||
<path d="M12 12 Q12 5 8 5 Q12 1 12 1 Q12 1 16 5 Q12 5 12 12" fill="white"/>
|
||||
<rect x="11.25" y="11" width="1.5" height="5" fill="white"/>
|
||||
<!-- Right tree - rounded canopy -->
|
||||
<path d="M18 14 Q18 8 15 8 Q18 4 18 4 Q18 4 21 8 Q18 8 18 14" fill="white" opacity="0.7"/>
|
||||
<rect x="17.25" y="13" width="1.5" height="4" fill="white" opacity="0.7"/>
|
||||
<!-- Ground -->
|
||||
<ellipse cx="12" cy="18" rx="8" ry="1.5" fill="white" opacity="0.4"/>
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1012 B |
@@ -1,18 +1,80 @@
|
||||
import { Routes, Route } from 'react-router-dom';
|
||||
import { Routes, Route, Navigate, useLocation } from 'react-router-dom';
|
||||
import { AuthProvider, useAuth } from './contexts/AuthContext';
|
||||
import { TeamProvider } from './contexts/TeamContext';
|
||||
import Layout from './components/Layout';
|
||||
import Home from './pages/Home';
|
||||
import ProjectPage from './pages/ProjectPage';
|
||||
import PackagePage from './pages/PackagePage';
|
||||
import Dashboard from './pages/Dashboard';
|
||||
import LoginPage from './pages/LoginPage';
|
||||
import ChangePasswordPage from './pages/ChangePasswordPage';
|
||||
import APIKeysPage from './pages/APIKeysPage';
|
||||
import AdminUsersPage from './pages/AdminUsersPage';
|
||||
import AdminOIDCPage from './pages/AdminOIDCPage';
|
||||
import AdminCachePage from './pages/AdminCachePage';
|
||||
import ProjectSettingsPage from './pages/ProjectSettingsPage';
|
||||
import TeamsPage from './pages/TeamsPage';
|
||||
import TeamDashboardPage from './pages/TeamDashboardPage';
|
||||
import TeamSettingsPage from './pages/TeamSettingsPage';
|
||||
import TeamMembersPage from './pages/TeamMembersPage';
|
||||
|
||||
// Component that checks if user must change password
|
||||
function RequirePasswordChange({ children }: { children: React.ReactNode }) {
|
||||
const { user, loading } = useAuth();
|
||||
const location = useLocation();
|
||||
|
||||
if (loading) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// If user is logged in and must change password, redirect to change password page
|
||||
if (user?.must_change_password && location.pathname !== '/change-password') {
|
||||
return <Navigate to="/change-password" replace />;
|
||||
}
|
||||
|
||||
return <>{children}</>;
|
||||
}
|
||||
|
||||
function AppRoutes() {
|
||||
return (
|
||||
<Routes>
|
||||
<Route path="/login" element={<LoginPage />} />
|
||||
<Route path="/change-password" element={<ChangePasswordPage />} />
|
||||
<Route
|
||||
path="*"
|
||||
element={
|
||||
<RequirePasswordChange>
|
||||
<Layout>
|
||||
<Routes>
|
||||
<Route path="/" element={<Home />} />
|
||||
<Route path="/dashboard" element={<Dashboard />} />
|
||||
<Route path="/settings/api-keys" element={<APIKeysPage />} />
|
||||
<Route path="/admin/users" element={<AdminUsersPage />} />
|
||||
<Route path="/admin/oidc" element={<AdminOIDCPage />} />
|
||||
<Route path="/admin/cache" element={<AdminCachePage />} />
|
||||
<Route path="/teams" element={<TeamsPage />} />
|
||||
<Route path="/teams/:slug" element={<TeamDashboardPage />} />
|
||||
<Route path="/teams/:slug/settings" element={<TeamSettingsPage />} />
|
||||
<Route path="/teams/:slug/members" element={<TeamMembersPage />} />
|
||||
<Route path="/project/:projectName" element={<ProjectPage />} />
|
||||
<Route path="/project/:projectName/settings" element={<ProjectSettingsPage />} />
|
||||
<Route path="/project/:projectName/:packageName" element={<PackagePage />} />
|
||||
</Routes>
|
||||
</Layout>
|
||||
</RequirePasswordChange>
|
||||
}
|
||||
/>
|
||||
</Routes>
|
||||
);
|
||||
}
|
||||
|
||||
function App() {
|
||||
return (
|
||||
<Layout>
|
||||
<Routes>
|
||||
<Route path="/" element={<Home />} />
|
||||
<Route path="/project/:projectName" element={<ProjectPage />} />
|
||||
<Route path="/project/:projectName/:packageName" element={<PackagePage />} />
|
||||
</Routes>
|
||||
</Layout>
|
||||
<AuthProvider>
|
||||
<TeamProvider>
|
||||
<AppRoutes />
|
||||
</TeamProvider>
|
||||
</AuthProvider>
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,22 +1,176 @@
|
||||
import { Project, Package, Tag, Artifact, UploadResponse } from './types';
|
||||
import {
|
||||
Project,
|
||||
Package,
|
||||
Tag,
|
||||
TagDetail,
|
||||
Artifact,
|
||||
ArtifactDetail,
|
||||
UploadResponse,
|
||||
PaginatedResponse,
|
||||
ListParams,
|
||||
TagListParams,
|
||||
PackageListParams,
|
||||
ArtifactListParams,
|
||||
ProjectListParams,
|
||||
GlobalSearchResponse,
|
||||
Stats,
|
||||
DeduplicationStats,
|
||||
TimelineStats,
|
||||
CrossProjectStats,
|
||||
User,
|
||||
LoginCredentials,
|
||||
APIKey,
|
||||
APIKeyCreate,
|
||||
APIKeyCreateResponse,
|
||||
AdminUser,
|
||||
UserCreate,
|
||||
UserUpdate,
|
||||
AccessPermission,
|
||||
AccessPermissionCreate,
|
||||
AccessPermissionUpdate,
|
||||
AccessLevel,
|
||||
OIDCConfig,
|
||||
OIDCConfigUpdate,
|
||||
OIDCStatus,
|
||||
PackageVersion,
|
||||
ArtifactDependenciesResponse,
|
||||
ReverseDependenciesResponse,
|
||||
DependencyResolutionResponse,
|
||||
TeamDetail,
|
||||
TeamMember,
|
||||
TeamCreate,
|
||||
TeamUpdate,
|
||||
TeamMemberCreate,
|
||||
TeamMemberUpdate,
|
||||
UpstreamSource,
|
||||
UpstreamSourceCreate,
|
||||
UpstreamSourceUpdate,
|
||||
UpstreamSourceTestResult,
|
||||
} from './types';
|
||||
|
||||
const API_BASE = '/api/v1';
|
||||
|
||||
// Custom error classes for better error handling
|
||||
export class ApiError extends Error {
|
||||
status: number;
|
||||
|
||||
constructor(message: string, status: number) {
|
||||
super(message);
|
||||
this.name = 'ApiError';
|
||||
this.status = status;
|
||||
}
|
||||
}
|
||||
|
||||
export class UnauthorizedError extends ApiError {
|
||||
constructor(message: string = 'Not authenticated') {
|
||||
super(message, 401);
|
||||
this.name = 'UnauthorizedError';
|
||||
}
|
||||
}
|
||||
|
||||
export class ForbiddenError extends ApiError {
|
||||
constructor(message: string = 'Access denied') {
|
||||
super(message, 403);
|
||||
this.name = 'ForbiddenError';
|
||||
}
|
||||
}
|
||||
|
||||
async function handleResponse<T>(response: Response): Promise<T> {
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({ detail: 'Unknown error' }));
|
||||
throw new Error(error.detail || `HTTP ${response.status}`);
|
||||
const message = error.detail || `HTTP ${response.status}`;
|
||||
|
||||
if (response.status === 401) {
|
||||
throw new UnauthorizedError(message);
|
||||
}
|
||||
if (response.status === 403) {
|
||||
throw new ForbiddenError(message);
|
||||
}
|
||||
throw new ApiError(message, response.status);
|
||||
}
|
||||
return response.json();
|
||||
}
|
||||
|
||||
// Project API
|
||||
export async function listProjects(): Promise<Project[]> {
|
||||
const response = await fetch(`${API_BASE}/projects`);
|
||||
return handleResponse<Project[]>(response);
|
||||
function buildQueryString(params: Record<string, unknown>): string {
|
||||
const searchParams = new URLSearchParams();
|
||||
Object.entries(params).forEach(([key, value]) => {
|
||||
if (value !== undefined && value !== null && value !== '') {
|
||||
searchParams.append(key, String(value));
|
||||
}
|
||||
});
|
||||
const query = searchParams.toString();
|
||||
return query ? `?${query}` : '';
|
||||
}
|
||||
|
||||
export async function createProject(data: { name: string; description?: string; is_public?: boolean }): Promise<Project> {
|
||||
// Auth API
|
||||
export async function login(credentials: LoginCredentials): Promise<User> {
|
||||
const response = await fetch(`${API_BASE}/auth/login`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(credentials),
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<User>(response);
|
||||
}
|
||||
|
||||
export async function logout(): Promise<void> {
|
||||
const response = await fetch(`${API_BASE}/auth/logout`, {
|
||||
method: 'POST',
|
||||
credentials: 'include',
|
||||
});
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({ detail: 'Unknown error' }));
|
||||
throw new Error(error.detail || `HTTP ${response.status}`);
|
||||
}
|
||||
}
|
||||
|
||||
export async function changePassword(currentPassword: string, newPassword: string): Promise<void> {
|
||||
const response = await fetch(`${API_BASE}/auth/change-password`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ current_password: currentPassword, new_password: newPassword }),
|
||||
credentials: 'include',
|
||||
});
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({ detail: 'Unknown error' }));
|
||||
throw new Error(error.detail || `HTTP ${response.status}`);
|
||||
}
|
||||
}
|
||||
|
||||
export async function getCurrentUser(): Promise<User | null> {
|
||||
try {
|
||||
const response = await fetch(`${API_BASE}/auth/me`, {
|
||||
credentials: 'include',
|
||||
});
|
||||
if (response.status === 401) {
|
||||
return null;
|
||||
}
|
||||
return handleResponse<User>(response);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Global Search API
|
||||
export async function globalSearch(query: string, limit: number = 5): Promise<GlobalSearchResponse> {
|
||||
const params = buildQueryString({ q: query, limit });
|
||||
const response = await fetch(`${API_BASE}/search${params}`);
|
||||
return handleResponse<GlobalSearchResponse>(response);
|
||||
}
|
||||
|
||||
// Project API
|
||||
export async function listProjects(params: ProjectListParams = {}): Promise<PaginatedResponse<Project>> {
|
||||
const query = buildQueryString(params as Record<string, unknown>);
|
||||
const response = await fetch(`${API_BASE}/projects${query}`);
|
||||
return handleResponse<PaginatedResponse<Project>>(response);
|
||||
}
|
||||
|
||||
export async function listProjectsSimple(params: ListParams = {}): Promise<Project[]> {
|
||||
const data = await listProjects(params);
|
||||
return data.items;
|
||||
}
|
||||
|
||||
export async function createProject(data: { name: string; description?: string; is_public?: boolean; team_id?: string }): Promise<Project> {
|
||||
const response = await fetch(`${API_BASE}/projects`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
@@ -30,10 +184,45 @@ export async function getProject(name: string): Promise<Project> {
|
||||
return handleResponse<Project>(response);
|
||||
}
|
||||
|
||||
export async function updateProject(
|
||||
projectName: string,
|
||||
data: { description?: string; is_public?: boolean }
|
||||
): Promise<Project> {
|
||||
const response = await fetch(`${API_BASE}/projects/${projectName}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(data),
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<Project>(response);
|
||||
}
|
||||
|
||||
export async function deleteProject(projectName: string): Promise<void> {
|
||||
const response = await fetch(`${API_BASE}/projects/${projectName}`, {
|
||||
method: 'DELETE',
|
||||
credentials: 'include',
|
||||
});
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({ detail: 'Unknown error' }));
|
||||
throw new Error(error.detail || `HTTP ${response.status}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Package API
|
||||
export async function listPackages(projectName: string): Promise<Package[]> {
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/packages`);
|
||||
return handleResponse<Package[]>(response);
|
||||
export async function listPackages(projectName: string, params: PackageListParams = {}): Promise<PaginatedResponse<Package>> {
|
||||
const query = buildQueryString(params as Record<string, unknown>);
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/packages${query}`);
|
||||
return handleResponse<PaginatedResponse<Package>>(response);
|
||||
}
|
||||
|
||||
export async function listPackagesSimple(projectName: string, params: PackageListParams = {}): Promise<Package[]> {
|
||||
const data = await listPackages(projectName, params);
|
||||
return data.items;
|
||||
}
|
||||
|
||||
export async function getPackage(projectName: string, packageName: string): Promise<Package> {
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/packages/${packageName}`);
|
||||
return handleResponse<Package>(response);
|
||||
}
|
||||
|
||||
export async function createPackage(projectName: string, data: { name: string; description?: string }): Promise<Package> {
|
||||
@@ -46,9 +235,20 @@ export async function createPackage(projectName: string, data: { name: string; d
|
||||
}
|
||||
|
||||
// Tag API
|
||||
export async function listTags(projectName: string, packageName: string): Promise<Tag[]> {
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/${packageName}/tags`);
|
||||
return handleResponse<Tag[]>(response);
|
||||
export async function listTags(projectName: string, packageName: string, params: TagListParams = {}): Promise<PaginatedResponse<TagDetail>> {
|
||||
const query = buildQueryString(params as Record<string, unknown>);
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/${packageName}/tags${query}`);
|
||||
return handleResponse<PaginatedResponse<TagDetail>>(response);
|
||||
}
|
||||
|
||||
export async function listTagsSimple(projectName: string, packageName: string, params: TagListParams = {}): Promise<TagDetail[]> {
|
||||
const data = await listTags(projectName, packageName, params);
|
||||
return data.items;
|
||||
}
|
||||
|
||||
export async function getTag(projectName: string, packageName: string, tagName: string): Promise<TagDetail> {
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/${packageName}/tags/${tagName}`);
|
||||
return handleResponse<TagDetail>(response);
|
||||
}
|
||||
|
||||
export async function createTag(projectName: string, packageName: string, data: { name: string; artifact_id: string }): Promise<Tag> {
|
||||
@@ -61,18 +261,37 @@ export async function createTag(projectName: string, packageName: string, data:
|
||||
}
|
||||
|
||||
// Artifact API
|
||||
export async function getArtifact(artifactId: string): Promise<Artifact> {
|
||||
export async function getArtifact(artifactId: string): Promise<ArtifactDetail> {
|
||||
const response = await fetch(`${API_BASE}/artifact/${artifactId}`);
|
||||
return handleResponse<Artifact>(response);
|
||||
return handleResponse<ArtifactDetail>(response);
|
||||
}
|
||||
|
||||
export async function listPackageArtifacts(
|
||||
projectName: string,
|
||||
packageName: string,
|
||||
params: ArtifactListParams = {}
|
||||
): Promise<PaginatedResponse<Artifact & { tags: string[] }>> {
|
||||
const query = buildQueryString(params as Record<string, unknown>);
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/${packageName}/artifacts${query}`);
|
||||
return handleResponse<PaginatedResponse<Artifact & { tags: string[] }>>(response);
|
||||
}
|
||||
|
||||
// Upload
|
||||
export async function uploadArtifact(projectName: string, packageName: string, file: File, tag?: string): Promise<UploadResponse> {
|
||||
export async function uploadArtifact(
|
||||
projectName: string,
|
||||
packageName: string,
|
||||
file: File,
|
||||
tag?: string,
|
||||
version?: string
|
||||
): Promise<UploadResponse> {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file);
|
||||
if (tag) {
|
||||
formData.append('tag', tag);
|
||||
}
|
||||
if (version) {
|
||||
formData.append('version', version);
|
||||
}
|
||||
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/${packageName}/upload`, {
|
||||
method: 'POST',
|
||||
@@ -85,3 +304,445 @@ export async function uploadArtifact(projectName: string, packageName: string, f
|
||||
export function getDownloadUrl(projectName: string, packageName: string, ref: string): string {
|
||||
return `${API_BASE}/project/${projectName}/${packageName}/+/${ref}`;
|
||||
}
|
||||
|
||||
// Stats API
|
||||
export async function getStats(): Promise<Stats> {
|
||||
const response = await fetch(`${API_BASE}/stats`);
|
||||
return handleResponse<Stats>(response);
|
||||
}
|
||||
|
||||
export async function getDeduplicationStats(): Promise<DeduplicationStats> {
|
||||
const response = await fetch(`${API_BASE}/stats/deduplication`);
|
||||
return handleResponse<DeduplicationStats>(response);
|
||||
}
|
||||
|
||||
export async function getTimelineStats(
|
||||
period: 'day' | 'week' | 'month' = 'day',
|
||||
fromDate?: string,
|
||||
toDate?: string
|
||||
): Promise<TimelineStats> {
|
||||
const params = buildQueryString({ period, from_date: fromDate, to_date: toDate });
|
||||
const response = await fetch(`${API_BASE}/stats/timeline${params}`);
|
||||
return handleResponse<TimelineStats>(response);
|
||||
}
|
||||
|
||||
export async function getCrossProjectStats(): Promise<CrossProjectStats> {
|
||||
const response = await fetch(`${API_BASE}/stats/cross-project`);
|
||||
return handleResponse<CrossProjectStats>(response);
|
||||
}
|
||||
|
||||
export async function listAPIKeys(): Promise<APIKey[]> {
|
||||
const response = await fetch(`${API_BASE}/auth/keys`, {
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<APIKey[]>(response);
|
||||
}
|
||||
|
||||
export async function createAPIKey(data: APIKeyCreate): Promise<APIKeyCreateResponse> {
|
||||
const response = await fetch(`${API_BASE}/auth/keys`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(data),
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<APIKeyCreateResponse>(response);
|
||||
}
|
||||
|
||||
export async function deleteAPIKey(id: string): Promise<void> {
|
||||
const response = await fetch(`${API_BASE}/auth/keys/${id}`, {
|
||||
method: 'DELETE',
|
||||
credentials: 'include',
|
||||
});
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({ detail: 'Unknown error' }));
|
||||
throw new Error(error.detail || `HTTP ${response.status}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Admin User Management API
|
||||
export async function listUsers(): Promise<AdminUser[]> {
|
||||
const response = await fetch(`${API_BASE}/admin/users`, {
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<AdminUser[]>(response);
|
||||
}
|
||||
|
||||
export async function createUser(data: UserCreate): Promise<AdminUser> {
|
||||
const response = await fetch(`${API_BASE}/admin/users`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(data),
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<AdminUser>(response);
|
||||
}
|
||||
|
||||
export async function updateUser(username: string, data: UserUpdate): Promise<AdminUser> {
|
||||
const response = await fetch(`${API_BASE}/admin/users/${username}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(data),
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<AdminUser>(response);
|
||||
}
|
||||
|
||||
export async function resetUserPassword(username: string, newPassword: string): Promise<void> {
|
||||
const response = await fetch(`${API_BASE}/admin/users/${username}/reset-password`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ new_password: newPassword }),
|
||||
credentials: 'include',
|
||||
});
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({ detail: 'Unknown error' }));
|
||||
throw new Error(error.detail || `HTTP ${response.status}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Access Permission API
|
||||
export interface MyAccessResponse {
|
||||
project: string;
|
||||
access_level: AccessLevel | null;
|
||||
is_owner: boolean;
|
||||
}
|
||||
|
||||
export async function getMyProjectAccess(projectName: string): Promise<MyAccessResponse> {
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/my-access`, {
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<MyAccessResponse>(response);
|
||||
}
|
||||
|
||||
export async function listProjectPermissions(projectName: string): Promise<AccessPermission[]> {
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/permissions`, {
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<AccessPermission[]>(response);
|
||||
}
|
||||
|
||||
export async function grantProjectAccess(
|
||||
projectName: string,
|
||||
data: AccessPermissionCreate
|
||||
): Promise<AccessPermission> {
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/permissions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(data),
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<AccessPermission>(response);
|
||||
}
|
||||
|
||||
export async function updateProjectAccess(
|
||||
projectName: string,
|
||||
username: string,
|
||||
data: AccessPermissionUpdate
|
||||
): Promise<AccessPermission> {
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/permissions/${username}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(data),
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<AccessPermission>(response);
|
||||
}
|
||||
|
||||
export async function revokeProjectAccess(projectName: string, username: string): Promise<void> {
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/permissions/${username}`, {
|
||||
method: 'DELETE',
|
||||
credentials: 'include',
|
||||
});
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({ detail: 'Unknown error' }));
|
||||
throw new Error(error.detail || `HTTP ${response.status}`);
|
||||
}
|
||||
}
|
||||
|
||||
// OIDC API
|
||||
export async function getOIDCStatus(): Promise<OIDCStatus> {
|
||||
const response = await fetch(`${API_BASE}/auth/oidc/status`);
|
||||
return handleResponse<OIDCStatus>(response);
|
||||
}
|
||||
|
||||
export async function getOIDCConfig(): Promise<OIDCConfig> {
|
||||
const response = await fetch(`${API_BASE}/auth/oidc/config`, {
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<OIDCConfig>(response);
|
||||
}
|
||||
|
||||
export async function updateOIDCConfig(data: OIDCConfigUpdate): Promise<OIDCConfig> {
|
||||
const response = await fetch(`${API_BASE}/auth/oidc/config`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(data),
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<OIDCConfig>(response);
|
||||
}
|
||||
|
||||
export function getOIDCLoginUrl(returnTo?: string): string {
|
||||
const params = new URLSearchParams();
|
||||
if (returnTo) {
|
||||
params.set('return_to', returnTo);
|
||||
}
|
||||
const query = params.toString();
|
||||
return `${API_BASE}/auth/oidc/login${query ? `?${query}` : ''}`;
|
||||
}
|
||||
|
||||
// Version API
|
||||
export async function listVersions(
|
||||
projectName: string,
|
||||
packageName: string,
|
||||
params: ListParams = {}
|
||||
): Promise<PaginatedResponse<PackageVersion>> {
|
||||
const query = buildQueryString(params as Record<string, unknown>);
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/${packageName}/versions${query}`);
|
||||
return handleResponse<PaginatedResponse<PackageVersion>>(response);
|
||||
}
|
||||
|
||||
export async function getVersion(
|
||||
projectName: string,
|
||||
packageName: string,
|
||||
version: string
|
||||
): Promise<PackageVersion> {
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/${packageName}/versions/${version}`);
|
||||
return handleResponse<PackageVersion>(response);
|
||||
}
|
||||
|
||||
export async function deleteVersion(
|
||||
projectName: string,
|
||||
packageName: string,
|
||||
version: string
|
||||
): Promise<void> {
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/${packageName}/versions/${version}`, {
|
||||
method: 'DELETE',
|
||||
credentials: 'include',
|
||||
});
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({ detail: 'Unknown error' }));
|
||||
throw new Error(error.detail || `HTTP ${response.status}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Dependency API
|
||||
export async function getArtifactDependencies(artifactId: string): Promise<ArtifactDependenciesResponse> {
|
||||
const response = await fetch(`${API_BASE}/artifact/${artifactId}/dependencies`);
|
||||
return handleResponse<ArtifactDependenciesResponse>(response);
|
||||
}
|
||||
|
||||
export async function getDependenciesByRef(
|
||||
projectName: string,
|
||||
packageName: string,
|
||||
ref: string
|
||||
): Promise<ArtifactDependenciesResponse> {
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/${packageName}/+/${ref}/dependencies`);
|
||||
return handleResponse<ArtifactDependenciesResponse>(response);
|
||||
}
|
||||
|
||||
export async function getReverseDependencies(
|
||||
projectName: string,
|
||||
packageName: string,
|
||||
params: { page?: number; limit?: number } = {}
|
||||
): Promise<ReverseDependenciesResponse> {
|
||||
const query = buildQueryString(params as Record<string, unknown>);
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/${packageName}/reverse-dependencies${query}`);
|
||||
return handleResponse<ReverseDependenciesResponse>(response);
|
||||
}
|
||||
|
||||
export async function resolveDependencies(
|
||||
projectName: string,
|
||||
packageName: string,
|
||||
ref: string
|
||||
): Promise<DependencyResolutionResponse> {
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/${packageName}/+/${ref}/resolve`);
|
||||
return handleResponse<DependencyResolutionResponse>(response);
|
||||
}
|
||||
|
||||
export async function getEnsureFile(
|
||||
projectName: string,
|
||||
packageName: string,
|
||||
ref: string
|
||||
): Promise<string> {
|
||||
const response = await fetch(`${API_BASE}/project/${projectName}/${packageName}/+/${ref}/ensure`);
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({ detail: 'Unknown error' }));
|
||||
throw new ApiError(error.detail || `HTTP ${response.status}`, response.status);
|
||||
}
|
||||
return response.text();
|
||||
}
|
||||
|
||||
// Team API
|
||||
export async function listTeams(params: ListParams = {}): Promise<PaginatedResponse<TeamDetail>> {
|
||||
const query = buildQueryString(params as Record<string, unknown>);
|
||||
const response = await fetch(`${API_BASE}/teams${query}`, {
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<PaginatedResponse<TeamDetail>>(response);
|
||||
}
|
||||
|
||||
export async function createTeam(data: TeamCreate): Promise<TeamDetail> {
|
||||
const response = await fetch(`${API_BASE}/teams`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(data),
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<TeamDetail>(response);
|
||||
}
|
||||
|
||||
export async function getTeam(slug: string): Promise<TeamDetail> {
|
||||
const response = await fetch(`${API_BASE}/teams/${slug}`, {
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<TeamDetail>(response);
|
||||
}
|
||||
|
||||
export async function updateTeam(slug: string, data: TeamUpdate): Promise<TeamDetail> {
|
||||
const response = await fetch(`${API_BASE}/teams/${slug}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(data),
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<TeamDetail>(response);
|
||||
}
|
||||
|
||||
export async function deleteTeam(slug: string): Promise<void> {
|
||||
const response = await fetch(`${API_BASE}/teams/${slug}`, {
|
||||
method: 'DELETE',
|
||||
credentials: 'include',
|
||||
});
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({ detail: 'Unknown error' }));
|
||||
throw new ApiError(error.detail || `HTTP ${response.status}`, response.status);
|
||||
}
|
||||
}
|
||||
|
||||
export async function listTeamMembers(slug: string): Promise<TeamMember[]> {
|
||||
const response = await fetch(`${API_BASE}/teams/${slug}/members`, {
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<TeamMember[]>(response);
|
||||
}
|
||||
|
||||
export async function addTeamMember(slug: string, data: TeamMemberCreate): Promise<TeamMember> {
|
||||
const response = await fetch(`${API_BASE}/teams/${slug}/members`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(data),
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<TeamMember>(response);
|
||||
}
|
||||
|
||||
export async function updateTeamMember(
|
||||
slug: string,
|
||||
username: string,
|
||||
data: TeamMemberUpdate
|
||||
): Promise<TeamMember> {
|
||||
const response = await fetch(`${API_BASE}/teams/${slug}/members/${username}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(data),
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<TeamMember>(response);
|
||||
}
|
||||
|
||||
export async function removeTeamMember(slug: string, username: string): Promise<void> {
|
||||
const response = await fetch(`${API_BASE}/teams/${slug}/members/${username}`, {
|
||||
method: 'DELETE',
|
||||
credentials: 'include',
|
||||
});
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({ detail: 'Unknown error' }));
|
||||
throw new ApiError(error.detail || `HTTP ${response.status}`, response.status);
|
||||
}
|
||||
}
|
||||
|
||||
export async function listTeamProjects(
|
||||
slug: string,
|
||||
params: ProjectListParams = {}
|
||||
): Promise<PaginatedResponse<Project>> {
|
||||
const query = buildQueryString(params as Record<string, unknown>);
|
||||
const response = await fetch(`${API_BASE}/teams/${slug}/projects${query}`, {
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<PaginatedResponse<Project>>(response);
|
||||
}
|
||||
|
||||
// User search (for autocomplete)
|
||||
export interface UserSearchResult {
|
||||
id: string;
|
||||
username: string;
|
||||
is_admin: boolean;
|
||||
}
|
||||
|
||||
export async function searchUsers(query: string, limit: number = 10): Promise<UserSearchResult[]> {
|
||||
const response = await fetch(`${API_BASE}/users/search?q=${encodeURIComponent(query)}&limit=${limit}`, {
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<UserSearchResult[]>(response);
|
||||
}
|
||||
|
||||
// Upstream Sources Admin API
|
||||
export interface UpstreamSourceListParams {
|
||||
enabled?: boolean;
|
||||
source_type?: string;
|
||||
}
|
||||
|
||||
export async function listUpstreamSources(params: UpstreamSourceListParams = {}): Promise<UpstreamSource[]> {
|
||||
const query = buildQueryString(params as Record<string, unknown>);
|
||||
const response = await fetch(`${API_BASE}/admin/upstream-sources${query}`, {
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<UpstreamSource[]>(response);
|
||||
}
|
||||
|
||||
export async function createUpstreamSource(data: UpstreamSourceCreate): Promise<UpstreamSource> {
|
||||
const response = await fetch(`${API_BASE}/admin/upstream-sources`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(data),
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<UpstreamSource>(response);
|
||||
}
|
||||
|
||||
export async function getUpstreamSource(id: string): Promise<UpstreamSource> {
|
||||
const response = await fetch(`${API_BASE}/admin/upstream-sources/${id}`, {
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<UpstreamSource>(response);
|
||||
}
|
||||
|
||||
export async function updateUpstreamSource(id: string, data: UpstreamSourceUpdate): Promise<UpstreamSource> {
|
||||
const response = await fetch(`${API_BASE}/admin/upstream-sources/${id}`, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(data),
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<UpstreamSource>(response);
|
||||
}
|
||||
|
||||
export async function deleteUpstreamSource(id: string): Promise<void> {
|
||||
const response = await fetch(`${API_BASE}/admin/upstream-sources/${id}`, {
|
||||
method: 'DELETE',
|
||||
credentials: 'include',
|
||||
});
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({ detail: 'Unknown error' }));
|
||||
throw new ApiError(error.detail || `HTTP ${response.status}`, response.status);
|
||||
}
|
||||
}
|
||||
|
||||
export async function testUpstreamSource(id: string): Promise<UpstreamSourceTestResult> {
|
||||
const response = await fetch(`${API_BASE}/admin/upstream-sources/${id}/test`, {
|
||||
method: 'POST',
|
||||
credentials: 'include',
|
||||
});
|
||||
return handleResponse<UpstreamSourceTestResult>(response);
|
||||
}
|
||||
|
||||
145
frontend/src/components/AccessManagement.css
Normal file
145
frontend/src/components/AccessManagement.css
Normal file
@@ -0,0 +1,145 @@
|
||||
.access-management {
|
||||
margin-top: 1.5rem;
|
||||
}
|
||||
|
||||
.access-management__header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.access-management__header h3 {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.access-management__form {
|
||||
background: var(--bg-tertiary);
|
||||
padding: 1rem;
|
||||
border-radius: 6px;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.access-management__form .form-row {
|
||||
display: flex;
|
||||
gap: 1rem;
|
||||
align-items: flex-end;
|
||||
}
|
||||
|
||||
.access-management__form .form-group {
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
.access-management__form .form-group:last-of-type {
|
||||
flex: 0 0 auto;
|
||||
}
|
||||
|
||||
.access-management__list {
|
||||
margin-top: 1rem;
|
||||
}
|
||||
|
||||
.access-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
.access-table th,
|
||||
.access-table td {
|
||||
padding: 0.75rem;
|
||||
text-align: left;
|
||||
border-bottom: 1px solid var(--border-color);
|
||||
}
|
||||
|
||||
.access-table th {
|
||||
font-weight: 600;
|
||||
color: var(--text-secondary);
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
.access-table td.actions {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.access-badge {
|
||||
display: inline-block;
|
||||
padding: 0.25rem 0.5rem;
|
||||
border-radius: 4px;
|
||||
font-size: 0.75rem;
|
||||
font-weight: 600;
|
||||
text-transform: capitalize;
|
||||
}
|
||||
|
||||
.access-badge--read {
|
||||
background: var(--bg-tertiary);
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.access-badge--write {
|
||||
background: var(--color-info-bg);
|
||||
color: var(--color-info);
|
||||
}
|
||||
|
||||
.access-badge--admin {
|
||||
background: var(--color-success-bg);
|
||||
color: var(--color-success);
|
||||
}
|
||||
|
||||
.btn-sm {
|
||||
padding: 0.25rem 0.5rem;
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
.btn-danger {
|
||||
background: var(--color-error);
|
||||
color: white;
|
||||
}
|
||||
|
||||
.btn-danger:hover {
|
||||
background: #c0392b;
|
||||
}
|
||||
|
||||
/* Expired permission styling */
|
||||
.expired {
|
||||
color: var(--color-error);
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
/* Date input styling in table */
|
||||
.access-table input[type="date"] {
|
||||
padding: 0.25rem 0.5rem;
|
||||
background: var(--bg-tertiary);
|
||||
border: 1px solid var(--border-primary);
|
||||
border-radius: 4px;
|
||||
font-size: 0.875rem;
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
/* Access source styling */
|
||||
.access-source {
|
||||
display: inline-block;
|
||||
padding: 0.2rem 0.4rem;
|
||||
border-radius: 4px;
|
||||
font-size: 0.75rem;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.access-source--explicit {
|
||||
background: var(--bg-tertiary);
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.access-source--team {
|
||||
background: var(--color-info-bg, #e3f2fd);
|
||||
color: var(--color-info, #1976d2);
|
||||
}
|
||||
|
||||
/* Team access row styling */
|
||||
.team-access-row {
|
||||
background: var(--bg-secondary, #fafafa);
|
||||
}
|
||||
|
||||
.team-access-row td.actions .text-muted {
|
||||
font-size: 0.8125rem;
|
||||
font-style: italic;
|
||||
}
|
||||
315
frontend/src/components/AccessManagement.tsx
Normal file
315
frontend/src/components/AccessManagement.tsx
Normal file
@@ -0,0 +1,315 @@
|
||||
import { useState, useEffect, useCallback } from 'react';
|
||||
import { AccessPermission, AccessLevel } from '../types';
|
||||
import {
|
||||
listProjectPermissions,
|
||||
grantProjectAccess,
|
||||
updateProjectAccess,
|
||||
revokeProjectAccess,
|
||||
} from '../api';
|
||||
import './AccessManagement.css';
|
||||
|
||||
interface AccessManagementProps {
|
||||
projectName: string;
|
||||
}
|
||||
|
||||
export function AccessManagement({ projectName }: AccessManagementProps) {
|
||||
const [permissions, setPermissions] = useState<AccessPermission[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [success, setSuccess] = useState<string | null>(null);
|
||||
|
||||
// Form state
|
||||
const [showAddForm, setShowAddForm] = useState(false);
|
||||
const [newUsername, setNewUsername] = useState('');
|
||||
const [newLevel, setNewLevel] = useState<AccessLevel>('read');
|
||||
const [newExpiresAt, setNewExpiresAt] = useState('');
|
||||
const [submitting, setSubmitting] = useState(false);
|
||||
|
||||
// Edit state
|
||||
const [editingUser, setEditingUser] = useState<string | null>(null);
|
||||
const [editLevel, setEditLevel] = useState<AccessLevel>('read');
|
||||
const [editExpiresAt, setEditExpiresAt] = useState('');
|
||||
|
||||
const loadPermissions = useCallback(async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
const data = await listProjectPermissions(projectName);
|
||||
setPermissions(data);
|
||||
setError(null);
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Failed to load permissions');
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, [projectName]);
|
||||
|
||||
useEffect(() => {
|
||||
loadPermissions();
|
||||
}, [loadPermissions]);
|
||||
|
||||
const handleGrant = async (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
if (!newUsername.trim()) return;
|
||||
|
||||
try {
|
||||
setSubmitting(true);
|
||||
setError(null);
|
||||
await grantProjectAccess(projectName, {
|
||||
username: newUsername.trim(),
|
||||
level: newLevel,
|
||||
expires_at: newExpiresAt || undefined,
|
||||
});
|
||||
setSuccess(`Access granted to ${newUsername}`);
|
||||
setNewUsername('');
|
||||
setNewLevel('read');
|
||||
setNewExpiresAt('');
|
||||
setShowAddForm(false);
|
||||
await loadPermissions();
|
||||
setTimeout(() => setSuccess(null), 3000);
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Failed to grant access');
|
||||
} finally {
|
||||
setSubmitting(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleUpdate = async (username: string) => {
|
||||
try {
|
||||
setSubmitting(true);
|
||||
setError(null);
|
||||
await updateProjectAccess(projectName, username, {
|
||||
level: editLevel,
|
||||
expires_at: editExpiresAt || null,
|
||||
});
|
||||
setSuccess(`Updated access for ${username}`);
|
||||
setEditingUser(null);
|
||||
await loadPermissions();
|
||||
setTimeout(() => setSuccess(null), 3000);
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Failed to update access');
|
||||
} finally {
|
||||
setSubmitting(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleRevoke = async (username: string) => {
|
||||
if (!confirm(`Revoke access for ${username}?`)) return;
|
||||
|
||||
try {
|
||||
setSubmitting(true);
|
||||
setError(null);
|
||||
await revokeProjectAccess(projectName, username);
|
||||
setSuccess(`Access revoked for ${username}`);
|
||||
await loadPermissions();
|
||||
setTimeout(() => setSuccess(null), 3000);
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Failed to revoke access');
|
||||
} finally {
|
||||
setSubmitting(false);
|
||||
}
|
||||
};
|
||||
|
||||
const startEdit = (permission: AccessPermission) => {
|
||||
setEditingUser(permission.user_id);
|
||||
setEditLevel(permission.level as AccessLevel);
|
||||
// Convert ISO date to local date format for date input
|
||||
setEditExpiresAt(permission.expires_at ? permission.expires_at.split('T')[0] : '');
|
||||
};
|
||||
|
||||
const cancelEdit = () => {
|
||||
setEditingUser(null);
|
||||
setEditExpiresAt('');
|
||||
};
|
||||
|
||||
const formatExpiration = (expiresAt: string | null) => {
|
||||
if (!expiresAt) return 'Never';
|
||||
const date = new Date(expiresAt);
|
||||
const now = new Date();
|
||||
const isExpired = date < now;
|
||||
return (
|
||||
<span className={isExpired ? 'expired' : ''}>
|
||||
{date.toLocaleDateString()}
|
||||
{isExpired && ' (Expired)'}
|
||||
</span>
|
||||
);
|
||||
};
|
||||
|
||||
if (loading) {
|
||||
return <div className="access-management loading">Loading permissions...</div>;
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="access-management card">
|
||||
<div className="access-management__header">
|
||||
<h3>Access Management</h3>
|
||||
<button
|
||||
className="btn btn-primary btn-sm"
|
||||
onClick={() => setShowAddForm(!showAddForm)}
|
||||
>
|
||||
{showAddForm ? 'Cancel' : '+ Add User'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{error && <div className="error-message">{error}</div>}
|
||||
{success && <div className="success-message">{success}</div>}
|
||||
|
||||
{showAddForm && (
|
||||
<form className="access-management__form" onSubmit={handleGrant}>
|
||||
<div className="form-row">
|
||||
<div className="form-group">
|
||||
<label htmlFor="username">Username</label>
|
||||
<input
|
||||
id="username"
|
||||
type="text"
|
||||
value={newUsername}
|
||||
onChange={(e) => setNewUsername(e.target.value)}
|
||||
placeholder="Enter username"
|
||||
required
|
||||
disabled={submitting}
|
||||
/>
|
||||
</div>
|
||||
<div className="form-group">
|
||||
<label htmlFor="level">Access Level</label>
|
||||
<select
|
||||
id="level"
|
||||
value={newLevel}
|
||||
onChange={(e) => setNewLevel(e.target.value as AccessLevel)}
|
||||
disabled={submitting}
|
||||
>
|
||||
<option value="read">Read</option>
|
||||
<option value="write">Write</option>
|
||||
<option value="admin">Admin</option>
|
||||
</select>
|
||||
</div>
|
||||
<div className="form-group">
|
||||
<label htmlFor="expires_at">Expires (optional)</label>
|
||||
<input
|
||||
id="expires_at"
|
||||
type="date"
|
||||
value={newExpiresAt}
|
||||
onChange={(e) => setNewExpiresAt(e.target.value)}
|
||||
disabled={submitting}
|
||||
min={new Date().toISOString().split('T')[0]}
|
||||
/>
|
||||
</div>
|
||||
<button type="submit" className="btn btn-primary" disabled={submitting}>
|
||||
{submitting ? 'Granting...' : 'Grant Access'}
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
)}
|
||||
|
||||
<div className="access-management__list">
|
||||
{permissions.length === 0 ? (
|
||||
<p className="text-muted">No explicit permissions set. Only the project owner has access.</p>
|
||||
) : (
|
||||
<table className="access-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>User</th>
|
||||
<th>Access Level</th>
|
||||
<th>Source</th>
|
||||
<th>Granted</th>
|
||||
<th>Expires</th>
|
||||
<th>Actions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{permissions.map((p) => {
|
||||
const isTeamBased = p.source === 'team';
|
||||
return (
|
||||
<tr key={p.id} className={isTeamBased ? 'team-access-row' : ''}>
|
||||
<td>{p.user_id}</td>
|
||||
<td>
|
||||
{editingUser === p.user_id && !isTeamBased ? (
|
||||
<select
|
||||
value={editLevel}
|
||||
onChange={(e) => setEditLevel(e.target.value as AccessLevel)}
|
||||
disabled={submitting}
|
||||
>
|
||||
<option value="read">Read</option>
|
||||
<option value="write">Write</option>
|
||||
<option value="admin">Admin</option>
|
||||
</select>
|
||||
) : (
|
||||
<span className={`access-badge access-badge--${p.level}`}>
|
||||
{p.level}
|
||||
</span>
|
||||
)}
|
||||
</td>
|
||||
<td>
|
||||
{isTeamBased ? (
|
||||
<span className="access-source access-source--team" title={`Team role: ${p.team_role}`}>
|
||||
Team: {p.team_slug}
|
||||
</span>
|
||||
) : (
|
||||
<span className="access-source access-source--explicit">
|
||||
Explicit
|
||||
</span>
|
||||
)}
|
||||
</td>
|
||||
<td>{new Date(p.created_at).toLocaleDateString()}</td>
|
||||
<td>
|
||||
{editingUser === p.user_id && !isTeamBased ? (
|
||||
<input
|
||||
type="date"
|
||||
value={editExpiresAt}
|
||||
onChange={(e) => setEditExpiresAt(e.target.value)}
|
||||
disabled={submitting}
|
||||
min={new Date().toISOString().split('T')[0]}
|
||||
/>
|
||||
) : (
|
||||
formatExpiration(p.expires_at)
|
||||
)}
|
||||
</td>
|
||||
<td className="actions">
|
||||
{isTeamBased ? (
|
||||
<span className="text-muted" title="Manage access via team settings">
|
||||
Via team
|
||||
</span>
|
||||
) : editingUser === p.user_id ? (
|
||||
<>
|
||||
<button
|
||||
className="btn btn-sm btn-primary"
|
||||
onClick={() => handleUpdate(p.user_id)}
|
||||
disabled={submitting}
|
||||
>
|
||||
Save
|
||||
</button>
|
||||
<button
|
||||
className="btn btn-sm"
|
||||
onClick={cancelEdit}
|
||||
disabled={submitting}
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<button
|
||||
className="btn btn-sm"
|
||||
onClick={() => startEdit(p)}
|
||||
disabled={submitting}
|
||||
>
|
||||
Edit
|
||||
</button>
|
||||
<button
|
||||
className="btn btn-sm btn-danger"
|
||||
onClick={() => handleRevoke(p.user_id)}
|
||||
disabled={submitting}
|
||||
>
|
||||
Revoke
|
||||
</button>
|
||||
</>
|
||||
)}
|
||||
</td>
|
||||
</tr>
|
||||
);
|
||||
})}
|
||||
</tbody>
|
||||
</table>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
43
frontend/src/components/Badge.css
Normal file
43
frontend/src/components/Badge.css
Normal file
@@ -0,0 +1,43 @@
|
||||
/* Badge Component */
|
||||
.badge {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
padding: 4px 10px;
|
||||
border-radius: 100px;
|
||||
font-weight: 500;
|
||||
font-size: 0.75rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.02em;
|
||||
}
|
||||
|
||||
.badge--default {
|
||||
background: var(--bg-tertiary);
|
||||
color: var(--text-secondary);
|
||||
border: 1px solid var(--border-primary);
|
||||
}
|
||||
|
||||
.badge--success,
|
||||
.badge--public {
|
||||
background: var(--success-bg);
|
||||
color: var(--success);
|
||||
border: 1px solid rgba(34, 197, 94, 0.2);
|
||||
}
|
||||
|
||||
.badge--warning,
|
||||
.badge--private {
|
||||
background: var(--warning-bg);
|
||||
color: var(--warning);
|
||||
border: 1px solid rgba(245, 158, 11, 0.2);
|
||||
}
|
||||
|
||||
.badge--error {
|
||||
background: var(--error-bg);
|
||||
color: var(--error);
|
||||
border: 1px solid rgba(239, 68, 68, 0.2);
|
||||
}
|
||||
|
||||
.badge--info {
|
||||
background: rgba(59, 130, 246, 0.1);
|
||||
color: #3b82f6;
|
||||
border: 1px solid rgba(59, 130, 246, 0.2);
|
||||
}
|
||||
17
frontend/src/components/Badge.tsx
Normal file
17
frontend/src/components/Badge.tsx
Normal file
@@ -0,0 +1,17 @@
|
||||
import './Badge.css';
|
||||
|
||||
type BadgeVariant = 'default' | 'success' | 'warning' | 'error' | 'info' | 'public' | 'private';
|
||||
|
||||
interface BadgeProps {
|
||||
children: React.ReactNode;
|
||||
variant?: BadgeVariant;
|
||||
className?: string;
|
||||
}
|
||||
|
||||
export function Badge({ children, variant = 'default', className = '' }: BadgeProps) {
|
||||
return (
|
||||
<span className={`badge badge--${variant} ${className}`.trim()}>
|
||||
{children}
|
||||
</span>
|
||||
);
|
||||
}
|
||||
38
frontend/src/components/Breadcrumb.css
Normal file
38
frontend/src/components/Breadcrumb.css
Normal file
@@ -0,0 +1,38 @@
|
||||
/* Breadcrumb Component */
|
||||
.breadcrumb {
|
||||
margin-bottom: 24px;
|
||||
}
|
||||
|
||||
.breadcrumb__list {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
list-style: none;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
.breadcrumb__item {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.breadcrumb__link {
|
||||
color: var(--text-secondary);
|
||||
transition: color var(--transition-fast);
|
||||
}
|
||||
|
||||
.breadcrumb__link:hover {
|
||||
color: var(--accent-primary);
|
||||
}
|
||||
|
||||
.breadcrumb__separator {
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.breadcrumb__current {
|
||||
color: var(--text-primary);
|
||||
font-weight: 500;
|
||||
}
|
||||
38
frontend/src/components/Breadcrumb.tsx
Normal file
38
frontend/src/components/Breadcrumb.tsx
Normal file
@@ -0,0 +1,38 @@
|
||||
import { Link } from 'react-router-dom';
|
||||
import './Breadcrumb.css';
|
||||
|
||||
interface BreadcrumbItem {
|
||||
label: string;
|
||||
href?: string;
|
||||
}
|
||||
|
||||
interface BreadcrumbProps {
|
||||
items: BreadcrumbItem[];
|
||||
className?: string;
|
||||
}
|
||||
|
||||
export function Breadcrumb({ items, className = '' }: BreadcrumbProps) {
|
||||
return (
|
||||
<nav className={`breadcrumb ${className}`.trim()} aria-label="Breadcrumb">
|
||||
<ol className="breadcrumb__list">
|
||||
{items.map((item, index) => {
|
||||
const isLast = index === items.length - 1;
|
||||
return (
|
||||
<li key={index} className="breadcrumb__item">
|
||||
{!isLast && item.href ? (
|
||||
<>
|
||||
<Link to={item.href} className="breadcrumb__link">
|
||||
{item.label}
|
||||
</Link>
|
||||
<span className="breadcrumb__separator">/</span>
|
||||
</>
|
||||
) : (
|
||||
<span className="breadcrumb__current">{item.label}</span>
|
||||
)}
|
||||
</li>
|
||||
);
|
||||
})}
|
||||
</ol>
|
||||
</nav>
|
||||
);
|
||||
}
|
||||
78
frontend/src/components/Card.css
Normal file
78
frontend/src/components/Card.css
Normal file
@@ -0,0 +1,78 @@
|
||||
/* Card Component */
|
||||
.card {
|
||||
background: var(--bg-secondary);
|
||||
border: 1px solid var(--border-primary);
|
||||
border-radius: var(--radius-lg);
|
||||
padding: 24px;
|
||||
transition: all var(--transition-normal);
|
||||
}
|
||||
|
||||
.card--elevated {
|
||||
box-shadow: var(--shadow-md);
|
||||
}
|
||||
|
||||
.card--accent {
|
||||
background: linear-gradient(135deg, rgba(16, 185, 129, 0.05) 0%, rgba(5, 150, 105, 0.05) 100%);
|
||||
border: 1px solid rgba(16, 185, 129, 0.2);
|
||||
}
|
||||
|
||||
.card--clickable {
|
||||
display: block;
|
||||
color: inherit;
|
||||
position: relative;
|
||||
overflow: hidden;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.card--clickable::before {
|
||||
content: '';
|
||||
position: absolute;
|
||||
inset: 0;
|
||||
background: var(--accent-gradient);
|
||||
opacity: 0;
|
||||
transition: opacity var(--transition-normal);
|
||||
border-radius: var(--radius-lg);
|
||||
}
|
||||
|
||||
.card--clickable:hover {
|
||||
border-color: var(--border-secondary);
|
||||
transform: translateY(-2px);
|
||||
box-shadow: var(--shadow-lg);
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
.card--clickable:hover::before {
|
||||
opacity: 0.03;
|
||||
}
|
||||
|
||||
.card__header {
|
||||
margin-bottom: 16px;
|
||||
}
|
||||
|
||||
.card__header h3 {
|
||||
color: var(--text-primary);
|
||||
font-size: 1.125rem;
|
||||
font-weight: 600;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
|
||||
.card__header p {
|
||||
color: var(--text-secondary);
|
||||
font-size: 0.875rem;
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.card__body {
|
||||
color: var(--text-secondary);
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
.card__footer {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
font-size: 0.75rem;
|
||||
padding-top: 16px;
|
||||
border-top: 1px solid var(--border-primary);
|
||||
margin-top: 16px;
|
||||
}
|
||||
59
frontend/src/components/Card.tsx
Normal file
59
frontend/src/components/Card.tsx
Normal file
@@ -0,0 +1,59 @@
|
||||
import { ReactNode } from 'react';
|
||||
import './Card.css';
|
||||
|
||||
interface CardProps {
|
||||
children: ReactNode;
|
||||
className?: string;
|
||||
onClick?: () => void;
|
||||
href?: string;
|
||||
variant?: 'default' | 'elevated' | 'accent';
|
||||
}
|
||||
|
||||
export function Card({ children, className = '', onClick, href, variant = 'default' }: CardProps) {
|
||||
const baseClass = `card card--${variant} ${className}`.trim();
|
||||
|
||||
if (href) {
|
||||
return (
|
||||
<a href={href} className={`${baseClass} card--clickable`}>
|
||||
{children}
|
||||
</a>
|
||||
);
|
||||
}
|
||||
|
||||
if (onClick) {
|
||||
return (
|
||||
<div className={`${baseClass} card--clickable`} onClick={onClick} role="button" tabIndex={0}>
|
||||
{children}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return <div className={baseClass}>{children}</div>;
|
||||
}
|
||||
|
||||
interface CardHeaderProps {
|
||||
children: ReactNode;
|
||||
className?: string;
|
||||
}
|
||||
|
||||
export function CardHeader({ children, className = '' }: CardHeaderProps) {
|
||||
return <div className={`card__header ${className}`.trim()}>{children}</div>;
|
||||
}
|
||||
|
||||
interface CardBodyProps {
|
||||
children: ReactNode;
|
||||
className?: string;
|
||||
}
|
||||
|
||||
export function CardBody({ children, className = '' }: CardBodyProps) {
|
||||
return <div className={`card__body ${className}`.trim()}>{children}</div>;
|
||||
}
|
||||
|
||||
interface CardFooterProps {
|
||||
children: ReactNode;
|
||||
className?: string;
|
||||
}
|
||||
|
||||
export function CardFooter({ children, className = '' }: CardFooterProps) {
|
||||
return <div className={`card__footer ${className}`.trim()}>{children}</div>;
|
||||
}
|
||||
155
frontend/src/components/DataTable.css
Normal file
155
frontend/src/components/DataTable.css
Normal file
@@ -0,0 +1,155 @@
|
||||
/* DataTable Component */
|
||||
.data-table {
|
||||
background: var(--bg-secondary);
|
||||
border: 1px solid var(--border-primary);
|
||||
border-radius: var(--radius-lg);
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.data-table table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
.data-table th,
|
||||
.data-table td {
|
||||
padding: 14px 20px;
|
||||
text-align: left;
|
||||
border-bottom: 1px solid var(--border-primary);
|
||||
}
|
||||
|
||||
.data-table th {
|
||||
background: var(--bg-tertiary);
|
||||
font-weight: 600;
|
||||
font-size: 0.75rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
color: var(--text-tertiary);
|
||||
}
|
||||
|
||||
.data-table__th--sortable {
|
||||
cursor: pointer;
|
||||
user-select: none;
|
||||
transition: color var(--transition-fast);
|
||||
}
|
||||
|
||||
.data-table__th--sortable:hover {
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
.data-table__th-content {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
}
|
||||
|
||||
.data-table__sort-icon {
|
||||
transition: transform var(--transition-fast);
|
||||
}
|
||||
|
||||
.data-table__sort-icon--desc {
|
||||
transform: rotate(180deg);
|
||||
}
|
||||
|
||||
.data-table tbody tr:last-child td {
|
||||
border-bottom: none;
|
||||
}
|
||||
|
||||
.data-table tbody tr {
|
||||
transition: background var(--transition-fast);
|
||||
}
|
||||
|
||||
.data-table tbody tr:hover {
|
||||
background: var(--bg-tertiary);
|
||||
}
|
||||
|
||||
.data-table td strong {
|
||||
color: var(--accent-primary);
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
/* Empty state */
|
||||
.data-table__empty {
|
||||
text-align: center;
|
||||
padding: 48px 32px;
|
||||
color: var(--text-tertiary);
|
||||
background: var(--bg-secondary);
|
||||
border: 1px dashed var(--border-secondary);
|
||||
border-radius: var(--radius-lg);
|
||||
}
|
||||
|
||||
.data-table__empty p {
|
||||
font-size: 0.9375rem;
|
||||
}
|
||||
|
||||
/* Utility classes for cells */
|
||||
.data-table .cell-mono {
|
||||
font-family: 'JetBrains Mono', 'Fira Code', 'Consolas', monospace;
|
||||
font-size: 0.8125rem;
|
||||
color: var(--text-tertiary);
|
||||
background: var(--bg-tertiary);
|
||||
padding: 4px 8px;
|
||||
border-radius: var(--radius-sm);
|
||||
}
|
||||
|
||||
.data-table .cell-truncate {
|
||||
max-width: 200px;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
/* Clickable rows */
|
||||
.data-table__row--clickable {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.data-table__row--clickable:hover {
|
||||
background: var(--bg-hover);
|
||||
}
|
||||
|
||||
/* Responsive table wrapper */
|
||||
.data-table--responsive {
|
||||
overflow-x: auto;
|
||||
-webkit-overflow-scrolling: touch;
|
||||
}
|
||||
|
||||
.data-table--responsive table {
|
||||
min-width: 800px;
|
||||
}
|
||||
|
||||
/* Cell with name and icon */
|
||||
.data-table .cell-name {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
font-weight: 500;
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
.data-table .cell-name:hover {
|
||||
color: var(--accent-primary);
|
||||
}
|
||||
|
||||
/* Date cells */
|
||||
.data-table .cell-date {
|
||||
color: var(--text-tertiary);
|
||||
font-size: 0.8125rem;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
/* Description cell */
|
||||
.data-table .cell-description {
|
||||
max-width: 300px;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
color: var(--text-secondary);
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
/* Owner cell */
|
||||
.data-table .cell-owner {
|
||||
color: var(--text-secondary);
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
92
frontend/src/components/DataTable.tsx
Normal file
92
frontend/src/components/DataTable.tsx
Normal file
@@ -0,0 +1,92 @@
|
||||
import { ReactNode } from 'react';
|
||||
import './DataTable.css';
|
||||
|
||||
interface Column<T> {
|
||||
key: string;
|
||||
header: string;
|
||||
render: (item: T) => ReactNode;
|
||||
className?: string;
|
||||
sortable?: boolean;
|
||||
}
|
||||
|
||||
interface DataTableProps<T> {
|
||||
data: T[];
|
||||
columns: Column<T>[];
|
||||
keyExtractor: (item: T) => string;
|
||||
emptyMessage?: string;
|
||||
className?: string;
|
||||
onSort?: (key: string) => void;
|
||||
sortKey?: string;
|
||||
sortOrder?: 'asc' | 'desc';
|
||||
onRowClick?: (item: T) => void;
|
||||
}
|
||||
|
||||
export function DataTable<T>({
|
||||
data,
|
||||
columns,
|
||||
keyExtractor,
|
||||
emptyMessage = 'No data available',
|
||||
className = '',
|
||||
onSort,
|
||||
sortKey,
|
||||
sortOrder,
|
||||
onRowClick,
|
||||
}: DataTableProps<T>) {
|
||||
if (data.length === 0) {
|
||||
return (
|
||||
<div className="data-table__empty">
|
||||
<p>{emptyMessage}</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className={`data-table ${className}`.trim()}>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
{columns.map((column) => (
|
||||
<th
|
||||
key={column.key}
|
||||
className={`${column.className || ''} ${column.sortable ? 'data-table__th--sortable' : ''}`}
|
||||
onClick={() => column.sortable && onSort?.(column.key)}
|
||||
>
|
||||
<span className="data-table__th-content">
|
||||
{column.header}
|
||||
{column.sortable && sortKey === column.key && (
|
||||
<svg
|
||||
className={`data-table__sort-icon ${sortOrder === 'desc' ? 'data-table__sort-icon--desc' : ''}`}
|
||||
width="12"
|
||||
height="12"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
strokeWidth="2"
|
||||
>
|
||||
<polyline points="18 15 12 9 6 15" />
|
||||
</svg>
|
||||
)}
|
||||
</span>
|
||||
</th>
|
||||
))}
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{data.map((item) => (
|
||||
<tr
|
||||
key={keyExtractor(item)}
|
||||
onClick={() => onRowClick?.(item)}
|
||||
className={onRowClick ? 'data-table__row--clickable' : ''}
|
||||
>
|
||||
{columns.map((column) => (
|
||||
<td key={column.key} className={column.className}>
|
||||
{column.render(item)}
|
||||
</td>
|
||||
))}
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user