Improve CI pipeline consistency and efficiency

- Rename integration_test_prod to smoke_test_prod for clarity
- Share pip cache between unit and integration tests
- Add interruptible: true to test jobs (cancel on new pipeline)
- Add retry: 1 to integration tests (handle network flakiness)
- Simplify verify_deployment to health check only (integration tests do full checks)
- Make cleanup_feature standalone (doesn't need deploy dependencies)
- Extract environment URLs (STAGE_URL, PROD_URL) to global variables
- Add timeout: 5m to cleanup_feature
- Remove -x flag from integration tests (show all failures)
This commit is contained in:
Mondo Diaz
2026-01-16 20:56:31 +00:00
parent 257756dfcc
commit 194d624ba9
2 changed files with 33 additions and 48 deletions

View File

@@ -8,6 +8,11 @@ variables:
PROSPER_VERSION: v0.64.1
# Use internal PyPI proxy instead of public internet
PIP_INDEX_URL: https://deps.global.bsf.tools/artifactory/api/pypi/pypi.org/simple
# Environment URLs (used by deploy and test jobs)
STAGE_URL: https://orchard-stage.common.global.bsf.tools
PROD_URL: https://orchard.common.global.bsf.tools
# Shared pip cache directory
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"
# Prevent duplicate pipelines for MRs
workflow:
@@ -35,10 +40,10 @@ kics:
stage: deploy # Runs in deploy stage, but after deployment due to 'needs'
image: deps.global.bsf.tools/docker/python:3.12-slim
timeout: 20m # Full suite takes longer than smoke tests
variables:
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"
interruptible: true # Cancel if new pipeline starts
retry: 1 # Retry once on failure (network flakiness)
cache:
key: pip-integration-$CI_COMMIT_REF_SLUG
key: pip-$CI_COMMIT_REF_SLUG
paths:
- .pip-cache/
policy: pull-push
@@ -53,8 +58,7 @@ kics:
python -m pytest tests/integration/ -v \
--junitxml=integration-report.xml \
-m "not large and not slow" \
--tb=short \
-x # Stop on first failure for faster feedback
--tb=short
artifacts:
when: always
expire_in: 1 week
@@ -129,7 +133,7 @@ integration_test_stage:
<<: *integration_test_template
needs: [deploy_stage]
variables:
ORCHARD_TEST_URL: https://orchard-stage.common.global.bsf.tools
ORCHARD_TEST_URL: $STAGE_URL
rules:
- if: '$CI_COMMIT_BRANCH == "main"'
when: on_success
@@ -150,8 +154,7 @@ python_unit_tests:
needs: [] # Run in parallel with build
image: deps.global.bsf.tools/docker/python:3.12-slim
timeout: 15m
variables:
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"
interruptible: true # Cancel if new pipeline starts
cache:
key: pip-$CI_COMMIT_REF_SLUG
paths:
@@ -184,6 +187,7 @@ frontend_tests:
needs: [] # Run in parallel with build
image: deps.global.bsf.tools/docker/node:20-alpine
timeout: 15m
interruptible: true # Cancel if new pipeline starts
cache:
key: npm-$CI_COMMIT_REF_SLUG
paths:
@@ -218,47 +222,21 @@ frontend_tests:
# OCI-based charts from internal registry - no repo add needed
- helm dependency update
# Simplified deployment verification - just health check
# Full API/frontend checks are done by integration tests post-deployment
.verify_deployment: &verify_deployment |
echo "=== Waiting for health endpoint (certs may take a few minutes) ==="
for i in $(seq 1 30); do
if curl -sf --max-time 10 "$BASE_URL/health" > /dev/null 2>&1; then
echo "Health check passed!"
break
echo "Deployment URL: $BASE_URL"
exit 0
fi
echo "Attempt $i/30 - waiting 10s..."
sleep 10
done
# Verify health endpoint
echo ""
echo "=== Health Check ==="
curl -sf "$BASE_URL/health" || { echo "Health check failed"; exit 1; }
echo ""
# Verify API is responding
echo ""
echo "=== API Check (GET /api/v1/projects) ==="
HTTP_CODE=$(curl -sf -o /dev/null -w "%{http_code}" "$BASE_URL/api/v1/projects")
if [ "$HTTP_CODE" = "200" ]; then
echo "API responding: HTTP $HTTP_CODE"
else
echo "API check failed: HTTP $HTTP_CODE"
exit 1
fi
# Verify frontend is served
echo ""
echo "=== Frontend Check ==="
if curl -sf "$BASE_URL/" | grep -q "</html>"; then
echo "Frontend is being served"
else
echo "Frontend check failed"
exit 1
fi
echo ""
echo "=== All checks passed! ==="
echo "Deployment URL: $BASE_URL"
echo "Health check failed after 30 attempts"
exit 1
# Deploy to stage (main branch)
deploy_stage:
@@ -266,7 +244,7 @@ deploy_stage:
variables:
NAMESPACE: orch-stage-namespace
VALUES_FILE: helm/orchard/values-stage.yaml
BASE_URL: https://orchard-stage.common.global.bsf.tools
BASE_URL: $STAGE_URL
before_script:
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
- *helm_setup
@@ -285,7 +263,7 @@ deploy_stage:
- *verify_deployment
environment:
name: stage
url: https://orchard-stage.common.global.bsf.tools
url: $STAGE_URL
kubernetes:
agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
rules:
@@ -331,10 +309,12 @@ deploy_feature:
- if: '$CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != "main"'
when: on_success
# Cleanup feature branch deployment
# Cleanup feature branch deployment (standalone - doesn't need deploy dependencies)
cleanup_feature:
<<: *deploy_template
stage: deploy
needs: []
image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
timeout: 5m
variables:
NAMESPACE: orch-dev-namespace
GIT_STRATEGY: none # No source needed, branch may be deleted
@@ -363,7 +343,7 @@ deploy_prod:
variables:
NAMESPACE: orch-prod-namespace
VALUES_FILE: helm/orchard/values-prod.yaml
BASE_URL: https://orchard.common.global.bsf.tools
BASE_URL: $PROD_URL
before_script:
- kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-prod
- *helm_setup
@@ -382,7 +362,7 @@ deploy_prod:
- *verify_deployment
environment:
name: production
url: https://orchard.common.global.bsf.tools
url: $PROD_URL
kubernetes:
agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-prod
rules:
@@ -392,11 +372,11 @@ deploy_prod:
allow_failure: false
# Smoke tests for production deployment (read-only, no test data creation)
integration_test_prod:
smoke_test_prod:
<<: *smoke_test_template
needs: [deploy_prod]
variables:
ORCHARD_TEST_URL: https://orchard.common.global.bsf.tools
ORCHARD_TEST_URL: $PROD_URL
rules:
- if: '$CI_COMMIT_TAG =~ /^v\d+\.\d+\.\d+$/'
when: on_success

View File

@@ -63,6 +63,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
- CI integration tests now run full pytest suite (~350 tests) against deployed environment instead of 3 smoke tests
- CI production deployment uses lightweight smoke tests only (no test data creation in prod)
- CI pipeline improvements: shared pip cache, `interruptible` flag on test jobs, retry on integration tests
- Simplified deploy verification to health check only (full checks done by integration tests)
- Extracted environment URLs to global variables for maintainability
- Made `cleanup_feature` job standalone (no longer inherits deploy template dependencies)
- Renamed `integration_test_prod` to `smoke_test_prod` for clarity
- Updated download ref resolution to check versions before tags (version → tag → artifact ID) (#56)
- Deploy jobs now require all security scans to pass before deployment (added test_image, app_deps_scan, cve_scan, cve_sbom_analysis, app_sbom_analysis to dependencies) (#63)
- Increased deploy job timeout from 5m to 10m (#63)