Add needs: [integration_test_stage, changelog] to release job override. This ensures the tag (which triggers prod deploy) is only created after stage integration tests pass, preventing race conditions between pipelines.
538 lines
17 KiB
YAML
538 lines
17 KiB
YAML
include:
|
|
- project: 'esv/bsf/pypi/prosper'
|
|
ref: v0.64.1
|
|
file: '/prosper/templates/projects/docker.yml'
|
|
|
|
variables:
|
|
# renovate: datasource=gitlab-tags depName=esv/bsf/pypi/prosper versioning=semver registryUrl=https://gitlab.global.bsf.tools
|
|
PROSPER_VERSION: v0.64.1
|
|
# Use internal PyPI proxy instead of public internet
|
|
PIP_INDEX_URL: https://deps.global.bsf.tools/artifactory/api/pypi/pypi.org/simple
|
|
# Environment URLs (used by deploy and test jobs)
|
|
STAGE_URL: https://orchard-stage.common.global.bsf.tools
|
|
PROD_URL: https://orchard.common.global.bsf.tools
|
|
# Stage environment AWS resources (used by reset job)
|
|
STAGE_RDS_HOST: orchard-stage.cluster-cvw3jzjkozoc.us-gov-west-1.rds.amazonaws.com
|
|
STAGE_RDS_DBNAME: postgres
|
|
STAGE_SECRET_ARN: "arn:aws-us-gov:secretsmanager:us-gov-west-1:052673043337:secret:rds!cluster-a573672b-1a38-4665-a654-1b7df37b5297-IaeFQL"
|
|
STAGE_S3_BUCKET: orchard-artifacts-stage
|
|
AWS_REGION: us-gov-west-1
|
|
# Shared pip cache directory
|
|
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"
|
|
|
|
# Prevent duplicate pipelines for MRs
|
|
workflow:
|
|
rules:
|
|
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
|
|
when: never
|
|
- when: always
|
|
|
|
# Define stages - extends Prosper's stages with our custom ones
|
|
stages:
|
|
- .pre
|
|
- lint
|
|
- build
|
|
- test
|
|
- analyze
|
|
- deploy
|
|
|
|
# Override Prosper template jobs to exclude tag pipelines
|
|
# Tags only run deploy_prod and smoke_test_prod (image already built on main)
|
|
build_image:
|
|
rules:
|
|
- if: '$CI_COMMIT_TAG'
|
|
when: never
|
|
- when: on_success
|
|
|
|
test_image:
|
|
rules:
|
|
- if: '$CI_COMMIT_TAG'
|
|
when: never
|
|
- when: on_success
|
|
|
|
hadolint:
|
|
rules:
|
|
- if: '$CI_COMMIT_TAG'
|
|
when: never
|
|
- when: on_success
|
|
|
|
kics:
|
|
variables:
|
|
KICS_CONFIG: kics.config
|
|
rules:
|
|
- if: '$CI_COMMIT_TAG'
|
|
when: never
|
|
- when: on_success
|
|
|
|
secrets:
|
|
rules:
|
|
- if: '$CI_COMMIT_TAG'
|
|
when: never
|
|
- when: on_success
|
|
|
|
app_deps_scan:
|
|
rules:
|
|
- if: '$CI_COMMIT_TAG'
|
|
when: never
|
|
- when: on_success
|
|
|
|
cve_scan:
|
|
rules:
|
|
- if: '$CI_COMMIT_TAG'
|
|
when: never
|
|
- when: on_success
|
|
|
|
app_sbom_analysis:
|
|
rules:
|
|
- if: '$CI_COMMIT_TAG'
|
|
when: never
|
|
- when: on_success
|
|
|
|
cve_sbom_analysis:
|
|
rules:
|
|
- if: '$CI_COMMIT_TAG'
|
|
when: never
|
|
- when: on_success
|
|
|
|
# Override release job to wait for stage integration tests before creating tag
|
|
# This ensures the tag (which triggers prod deploy) is only created after stage passes
|
|
release:
|
|
needs: [integration_test_stage, changelog]
|
|
|
|
# Full integration test suite template (for feature/stage deployments)
|
|
# Runs the complete pytest integration test suite against the deployed environment
|
|
.integration_test_template: &integration_test_template
|
|
stage: deploy # Runs in deploy stage, but after deployment due to 'needs'
|
|
image: deps.global.bsf.tools/docker/python:3.12-slim
|
|
timeout: 20m # Full suite takes longer than smoke tests
|
|
interruptible: true # Cancel if new pipeline starts
|
|
retry: 1 # Retry once on failure (network flakiness)
|
|
cache:
|
|
key: pip-$CI_COMMIT_REF_SLUG
|
|
paths:
|
|
- .pip-cache/
|
|
policy: pull-push
|
|
before_script:
|
|
- pip install --index-url "$PIP_INDEX_URL" -r backend/requirements.txt
|
|
- pip install --index-url "$PIP_INDEX_URL" pytest pytest-asyncio httpx
|
|
script:
|
|
- cd backend
|
|
# Run full integration test suite, excluding:
|
|
# - large/slow tests
|
|
# - requires_direct_s3 tests (can't access MinIO from outside K8s cluster)
|
|
# ORCHARD_TEST_URL tells the tests which server to connect to
|
|
# Note: Auth tests work because dev/stage deployments have relaxed rate limits
|
|
- |
|
|
python -m pytest tests/integration/ -v \
|
|
--junitxml=integration-report.xml \
|
|
-m "not large and not slow and not requires_direct_s3" \
|
|
--tb=short
|
|
artifacts:
|
|
when: always
|
|
expire_in: 1 week
|
|
paths:
|
|
- backend/integration-report.xml
|
|
reports:
|
|
junit: backend/integration-report.xml
|
|
|
|
# Lightweight smoke test template (for production - no test data creation)
|
|
.smoke_test_template: &smoke_test_template
|
|
stage: deploy
|
|
image: deps.global.bsf.tools/docker/python:3.12-slim
|
|
timeout: 5m
|
|
before_script:
|
|
- pip install --index-url "$PIP_INDEX_URL" httpx
|
|
script:
|
|
- |
|
|
python - <<'PYTEST_SCRIPT'
|
|
import httpx
|
|
import os
|
|
import sys
|
|
|
|
BASE_URL = os.environ.get("ORCHARD_TEST_URL")
|
|
if not BASE_URL:
|
|
print("ERROR: ORCHARD_TEST_URL not set")
|
|
sys.exit(1)
|
|
|
|
print(f"Running smoke tests against {BASE_URL}")
|
|
client = httpx.Client(base_url=BASE_URL, timeout=30.0)
|
|
|
|
errors = []
|
|
|
|
# Test 1: Health endpoint
|
|
print("\n=== Test 1: Health endpoint ===")
|
|
r = client.get("/health")
|
|
if r.status_code == 200:
|
|
print("PASS: Health check passed")
|
|
else:
|
|
errors.append(f"Health check failed: {r.status_code}")
|
|
|
|
# Test 2: API responds (list projects)
|
|
print("\n=== Test 2: API responds ===")
|
|
r = client.get("/api/v1/projects")
|
|
if r.status_code == 200:
|
|
projects = r.json()
|
|
print(f"PASS: API responding, found {len(projects)} project(s)")
|
|
else:
|
|
errors.append(f"API check failed: {r.status_code}")
|
|
|
|
# Test 3: Frontend served
|
|
print("\n=== Test 3: Frontend served ===")
|
|
r = client.get("/")
|
|
if r.status_code == 200 and "</html>" in r.text:
|
|
print("PASS: Frontend is being served")
|
|
else:
|
|
errors.append(f"Frontend check failed: {r.status_code}")
|
|
|
|
# Report results
|
|
print("\n" + "=" * 50)
|
|
if errors:
|
|
print(f"FAILED: {len(errors)} error(s)")
|
|
for e in errors:
|
|
print(f" FAIL: {e}")
|
|
sys.exit(1)
|
|
else:
|
|
print("SUCCESS: All smoke tests passed!")
|
|
sys.exit(0)
|
|
PYTEST_SCRIPT
|
|
|
|
# Integration tests for stage deployment (full suite)
|
|
integration_test_stage:
|
|
<<: *integration_test_template
|
|
needs: [deploy_stage]
|
|
variables:
|
|
ORCHARD_TEST_URL: $STAGE_URL
|
|
rules:
|
|
- if: '$CI_COMMIT_BRANCH == "main"'
|
|
when: on_success
|
|
|
|
# Reset stage environment after integration tests (clean slate for next run)
|
|
# Calls the /api/v1/admin/factory-reset endpoint which handles DB and S3 cleanup
|
|
reset_stage:
|
|
stage: deploy
|
|
needs: [integration_test_stage]
|
|
image: deps.global.bsf.tools/docker/python:3.12-slim
|
|
timeout: 5m
|
|
retry: 1 # Retry once on transient failures
|
|
before_script:
|
|
- pip install --index-url "$PIP_INDEX_URL" httpx
|
|
script:
|
|
- |
|
|
python - <<'RESET_SCRIPT'
|
|
import httpx
|
|
import sys
|
|
import os
|
|
import time
|
|
|
|
BASE_URL = os.environ.get("STAGE_URL", "")
|
|
ADMIN_USER = "admin"
|
|
ADMIN_PASS = "changeme123" # Default admin password
|
|
MAX_RETRIES = 3
|
|
RETRY_DELAY = 5 # seconds
|
|
|
|
if not BASE_URL:
|
|
print("ERROR: STAGE_URL environment variable not set")
|
|
sys.exit(1)
|
|
|
|
print(f"=== Resetting stage environment at {BASE_URL} ===")
|
|
|
|
def do_reset():
|
|
with httpx.Client(base_url=BASE_URL, timeout=120.0) as client:
|
|
# Login as admin
|
|
print("Logging in as admin...")
|
|
login_response = client.post(
|
|
"/api/v1/auth/login",
|
|
json={"username": ADMIN_USER, "password": ADMIN_PASS},
|
|
)
|
|
if login_response.status_code != 200:
|
|
raise Exception(f"Login failed: {login_response.status_code} - {login_response.text}")
|
|
print("Login successful")
|
|
|
|
# Call factory reset endpoint
|
|
print("Calling factory reset endpoint...")
|
|
reset_response = client.post(
|
|
"/api/v1/admin/factory-reset",
|
|
headers={"X-Confirm-Reset": "yes-delete-all-data"},
|
|
)
|
|
|
|
if reset_response.status_code == 200:
|
|
result = reset_response.json()
|
|
print("Factory reset successful!")
|
|
print(f" Database tables dropped: {result['results']['database_tables_dropped']}")
|
|
print(f" S3 objects deleted: {result['results']['s3_objects_deleted']}")
|
|
print(f" Database reinitialized: {result['results']['database_reinitialized']}")
|
|
print(f" Seeded: {result['results']['seeded']}")
|
|
return True
|
|
else:
|
|
raise Exception(f"Factory reset failed: {reset_response.status_code} - {reset_response.text}")
|
|
|
|
# Retry loop
|
|
for attempt in range(1, MAX_RETRIES + 1):
|
|
try:
|
|
print(f"Attempt {attempt}/{MAX_RETRIES}")
|
|
if do_reset():
|
|
sys.exit(0)
|
|
except Exception as e:
|
|
print(f"Attempt {attempt} failed: {e}")
|
|
if attempt < MAX_RETRIES:
|
|
print(f"Retrying in {RETRY_DELAY} seconds...")
|
|
time.sleep(RETRY_DELAY)
|
|
else:
|
|
print("All retry attempts failed")
|
|
sys.exit(1)
|
|
RESET_SCRIPT
|
|
rules:
|
|
- if: '$CI_COMMIT_BRANCH == "main"'
|
|
when: on_success
|
|
allow_failure: true # Don't fail pipeline if reset has issues
|
|
|
|
# Integration tests for feature deployment (full suite)
|
|
integration_test_feature:
|
|
<<: *integration_test_template
|
|
needs: [deploy_feature]
|
|
variables:
|
|
ORCHARD_TEST_URL: https://orchard-$CI_COMMIT_REF_SLUG.common.global.bsf.tools
|
|
rules:
|
|
- if: '$CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != "main"'
|
|
when: on_success
|
|
|
|
# Run Python backend unit tests
|
|
python_unit_tests:
|
|
stage: test
|
|
needs: [] # Run in parallel with build
|
|
image: deps.global.bsf.tools/docker/python:3.12-slim
|
|
timeout: 15m
|
|
interruptible: true # Cancel if new pipeline starts
|
|
cache:
|
|
key: pip-$CI_COMMIT_REF_SLUG
|
|
paths:
|
|
- .pip-cache/
|
|
policy: pull-push
|
|
before_script:
|
|
- pip install --index-url "$PIP_INDEX_URL" -r backend/requirements.txt
|
|
- pip install --index-url "$PIP_INDEX_URL" pytest pytest-asyncio pytest-cov httpx
|
|
script:
|
|
- cd backend
|
|
# Run unit tests (integration tests run post-deployment against live environment)
|
|
- python -m pytest tests/unit/ -v --cov=app --cov-report=term --cov-report=xml:coverage.xml --cov-report=html:coverage_html --junitxml=pytest-report.xml
|
|
artifacts:
|
|
when: always
|
|
expire_in: 1 week
|
|
paths:
|
|
- backend/coverage.xml
|
|
- backend/coverage_html/
|
|
- backend/pytest-report.xml
|
|
reports:
|
|
junit: backend/pytest-report.xml
|
|
coverage_report:
|
|
coverage_format: cobertura
|
|
path: backend/coverage.xml
|
|
coverage: '/TOTAL.*\s+(\d+%)/'
|
|
rules:
|
|
- if: '$CI_COMMIT_TAG'
|
|
when: never
|
|
- when: on_success
|
|
|
|
# Run frontend tests
|
|
frontend_tests:
|
|
stage: test
|
|
needs: [] # Run in parallel with build
|
|
image: deps.global.bsf.tools/docker/node:20-alpine
|
|
timeout: 15m
|
|
interruptible: true # Cancel if new pipeline starts
|
|
cache:
|
|
key: npm-$CI_COMMIT_REF_SLUG
|
|
paths:
|
|
- frontend/node_modules/
|
|
policy: pull-push
|
|
before_script:
|
|
- cd frontend
|
|
- npm config set registry https://deps.global.bsf.tools/artifactory/api/npm/registry.npmjs.org
|
|
- npm ci --verbose
|
|
script:
|
|
- npm run test -- --run --reporter=verbose --coverage
|
|
artifacts:
|
|
when: always
|
|
expire_in: 1 week
|
|
paths:
|
|
- frontend/coverage/
|
|
reports:
|
|
coverage_report:
|
|
coverage_format: cobertura
|
|
path: frontend/coverage/cobertura-coverage.xml
|
|
coverage: '/All files[^|]*\|[^|]*\s+([\d\.]+)/'
|
|
rules:
|
|
- if: '$CI_COMMIT_TAG'
|
|
when: never
|
|
- when: on_success
|
|
|
|
# Shared deploy configuration
|
|
.deploy_template: &deploy_template
|
|
stage: deploy
|
|
needs: [build_image, test_image, kics, hadolint, python_unit_tests, frontend_tests, secrets, app_deps_scan, cve_scan, cve_sbom_analysis, app_sbom_analysis]
|
|
image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
|
|
|
|
.helm_setup: &helm_setup
|
|
- helm version
|
|
- cd helm/orchard
|
|
# OCI-based charts from internal registry - no repo add needed
|
|
- helm dependency update
|
|
|
|
# Simplified deployment verification - just health check
|
|
# Full API/frontend checks are done by integration tests post-deployment
|
|
.verify_deployment: &verify_deployment |
|
|
echo "=== Waiting for health endpoint (certs may take a few minutes) ==="
|
|
for i in $(seq 1 30); do
|
|
if curl -sf --max-time 10 "$BASE_URL/health" > /dev/null 2>&1; then
|
|
echo "Health check passed!"
|
|
echo "Deployment URL: $BASE_URL"
|
|
exit 0
|
|
fi
|
|
echo "Attempt $i/30 - waiting 10s..."
|
|
sleep 10
|
|
done
|
|
echo "Health check failed after 30 attempts"
|
|
exit 1
|
|
|
|
# Deploy to stage (main branch)
|
|
deploy_stage:
|
|
<<: *deploy_template
|
|
variables:
|
|
NAMESPACE: orch-stage-namespace
|
|
VALUES_FILE: helm/orchard/values-stage.yaml
|
|
BASE_URL: $STAGE_URL
|
|
before_script:
|
|
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
|
|
- *helm_setup
|
|
script:
|
|
- echo "Deploying to stage environment"
|
|
- cd $CI_PROJECT_DIR
|
|
- |
|
|
helm upgrade --install orchard-stage ./helm/orchard \
|
|
--namespace $NAMESPACE \
|
|
-f $VALUES_FILE \
|
|
--set image.tag=git.linux-amd64-$CI_COMMIT_SHA \
|
|
--wait \
|
|
--atomic \
|
|
--timeout 10m
|
|
- kubectl rollout status deployment/orchard-stage-server -n $NAMESPACE --timeout=10m
|
|
- *verify_deployment
|
|
environment:
|
|
name: stage
|
|
url: $STAGE_URL
|
|
kubernetes:
|
|
agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
|
|
rules:
|
|
- if: '$CI_COMMIT_BRANCH == "main"'
|
|
when: on_success
|
|
|
|
# Deploy feature branch to dev namespace
|
|
deploy_feature:
|
|
<<: *deploy_template
|
|
variables:
|
|
NAMESPACE: orch-dev-namespace
|
|
VALUES_FILE: helm/orchard/values-dev.yaml
|
|
before_script:
|
|
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard
|
|
- *helm_setup
|
|
script:
|
|
- echo "Deploying feature branch $CI_COMMIT_REF_SLUG"
|
|
- cd $CI_PROJECT_DIR
|
|
- |
|
|
helm upgrade --install orchard-$CI_COMMIT_REF_SLUG ./helm/orchard \
|
|
--namespace $NAMESPACE \
|
|
-f $VALUES_FILE \
|
|
--set image.tag=git.linux-amd64-$CI_COMMIT_SHA \
|
|
--set ingress.hosts[0].host=orchard-$CI_COMMIT_REF_SLUG.common.global.bsf.tools \
|
|
--set ingress.tls[0].hosts[0]=orchard-$CI_COMMIT_REF_SLUG.common.global.bsf.tools \
|
|
--set ingress.tls[0].secretName=orchard-$CI_COMMIT_REF_SLUG-tls \
|
|
--set minioIngress.host=minio-$CI_COMMIT_REF_SLUG.common.global.bsf.tools \
|
|
--set minioIngress.tls.secretName=minio-$CI_COMMIT_REF_SLUG-tls \
|
|
--wait \
|
|
--atomic \
|
|
--timeout 10m
|
|
- kubectl rollout status deployment/orchard-$CI_COMMIT_REF_SLUG-server -n $NAMESPACE --timeout=10m
|
|
- export BASE_URL="https://orchard-$CI_COMMIT_REF_SLUG.common.global.bsf.tools"
|
|
- *verify_deployment
|
|
environment:
|
|
name: review/$CI_COMMIT_REF_SLUG
|
|
url: https://orchard-$CI_COMMIT_REF_SLUG.common.global.bsf.tools
|
|
on_stop: cleanup_feature
|
|
auto_stop_in: 1 week
|
|
kubernetes:
|
|
agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard
|
|
rules:
|
|
- if: '$CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != "main"'
|
|
when: on_success
|
|
|
|
# Cleanup feature branch deployment (standalone - doesn't need deploy dependencies)
|
|
cleanup_feature:
|
|
stage: deploy
|
|
needs: []
|
|
image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
|
|
timeout: 5m
|
|
variables:
|
|
NAMESPACE: orch-dev-namespace
|
|
GIT_STRATEGY: none # No source needed, branch may be deleted
|
|
before_script:
|
|
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard
|
|
script:
|
|
- echo "Cleaning up feature deployment orchard-$CI_COMMIT_REF_SLUG"
|
|
- helm uninstall orchard-$CI_COMMIT_REF_SLUG --namespace $NAMESPACE || true
|
|
environment:
|
|
name: review/$CI_COMMIT_REF_SLUG
|
|
action: stop
|
|
kubernetes:
|
|
agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard
|
|
rules:
|
|
- if: '$CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != "main"'
|
|
when: manual
|
|
allow_failure: true
|
|
|
|
# Deploy to production (version tags only)
|
|
deploy_prod:
|
|
stage: deploy
|
|
# For tag pipelines, no other jobs run - image was already built when commit was on main
|
|
needs: []
|
|
image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
|
|
variables:
|
|
NAMESPACE: orch-namespace
|
|
VALUES_FILE: helm/orchard/values-prod.yaml
|
|
BASE_URL: $PROD_URL
|
|
before_script:
|
|
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-prod
|
|
- *helm_setup
|
|
script:
|
|
- echo "Deploying to PRODUCTION - version $CI_COMMIT_TAG"
|
|
- cd $CI_PROJECT_DIR
|
|
- |
|
|
helm upgrade --install orchard-prod ./helm/orchard \
|
|
--namespace $NAMESPACE \
|
|
-f $VALUES_FILE \
|
|
--set image.tag=git.linux-amd64-$CI_COMMIT_SHA \
|
|
--wait \
|
|
--atomic \
|
|
--timeout 10m
|
|
- kubectl rollout status deployment/orchard-prod-server -n $NAMESPACE --timeout=10m
|
|
- *verify_deployment
|
|
environment:
|
|
name: production
|
|
url: $PROD_URL
|
|
kubernetes:
|
|
agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-prod
|
|
rules:
|
|
# Only run on semantic version tags (v1.0.0, v1.2.3, etc.)
|
|
- if: '$CI_COMMIT_TAG =~ /^v\d+\.\d+\.\d+$/'
|
|
when: on_success
|
|
allow_failure: false
|
|
|
|
# Smoke tests for production deployment (read-only, no test data creation)
|
|
smoke_test_prod:
|
|
<<: *smoke_test_template
|
|
needs: [deploy_prod]
|
|
variables:
|
|
ORCHARD_TEST_URL: $PROD_URL
|
|
rules:
|
|
- if: '$CI_COMMIT_TAG =~ /^v\d+\.\d+\.\d+$/'
|
|
when: on_success
|