From 194d624ba9d7368539397b5c60bce99c1990b97f Mon Sep 17 00:00:00 2001 From: Mondo Diaz Date: Fri, 16 Jan 2026 20:56:31 +0000 Subject: [PATCH] Improve CI pipeline consistency and efficiency - Rename integration_test_prod to smoke_test_prod for clarity - Share pip cache between unit and integration tests - Add interruptible: true to test jobs (cancel on new pipeline) - Add retry: 1 to integration tests (handle network flakiness) - Simplify verify_deployment to health check only (integration tests do full checks) - Make cleanup_feature standalone (doesn't need deploy dependencies) - Extract environment URLs (STAGE_URL, PROD_URL) to global variables - Add timeout: 5m to cleanup_feature - Remove -x flag from integration tests (show all failures) --- .gitlab-ci.yml | 76 +++++++++++++++++++------------------------------- CHANGELOG.md | 5 ++++ 2 files changed, 33 insertions(+), 48 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 07d6ff7..635f889 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -8,6 +8,11 @@ variables: PROSPER_VERSION: v0.64.1 # Use internal PyPI proxy instead of public internet PIP_INDEX_URL: https://deps.global.bsf.tools/artifactory/api/pypi/pypi.org/simple + # Environment URLs (used by deploy and test jobs) + STAGE_URL: https://orchard-stage.common.global.bsf.tools + PROD_URL: https://orchard.common.global.bsf.tools + # Shared pip cache directory + PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache" # Prevent duplicate pipelines for MRs workflow: @@ -35,10 +40,10 @@ kics: stage: deploy # Runs in deploy stage, but after deployment due to 'needs' image: deps.global.bsf.tools/docker/python:3.12-slim timeout: 20m # Full suite takes longer than smoke tests - variables: - PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache" + interruptible: true # Cancel if new pipeline starts + retry: 1 # Retry once on failure (network flakiness) cache: - key: pip-integration-$CI_COMMIT_REF_SLUG + key: pip-$CI_COMMIT_REF_SLUG paths: - .pip-cache/ policy: pull-push @@ -53,8 +58,7 @@ kics: python -m pytest tests/integration/ -v \ --junitxml=integration-report.xml \ -m "not large and not slow" \ - --tb=short \ - -x # Stop on first failure for faster feedback + --tb=short artifacts: when: always expire_in: 1 week @@ -129,7 +133,7 @@ integration_test_stage: <<: *integration_test_template needs: [deploy_stage] variables: - ORCHARD_TEST_URL: https://orchard-stage.common.global.bsf.tools + ORCHARD_TEST_URL: $STAGE_URL rules: - if: '$CI_COMMIT_BRANCH == "main"' when: on_success @@ -150,8 +154,7 @@ python_unit_tests: needs: [] # Run in parallel with build image: deps.global.bsf.tools/docker/python:3.12-slim timeout: 15m - variables: - PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache" + interruptible: true # Cancel if new pipeline starts cache: key: pip-$CI_COMMIT_REF_SLUG paths: @@ -184,6 +187,7 @@ frontend_tests: needs: [] # Run in parallel with build image: deps.global.bsf.tools/docker/node:20-alpine timeout: 15m + interruptible: true # Cancel if new pipeline starts cache: key: npm-$CI_COMMIT_REF_SLUG paths: @@ -218,47 +222,21 @@ frontend_tests: # OCI-based charts from internal registry - no repo add needed - helm dependency update +# Simplified deployment verification - just health check +# Full API/frontend checks are done by integration tests post-deployment .verify_deployment: &verify_deployment | echo "=== Waiting for health endpoint (certs may take a few minutes) ===" for i in $(seq 1 30); do if curl -sf --max-time 10 "$BASE_URL/health" > /dev/null 2>&1; then echo "Health check passed!" - break + echo "Deployment URL: $BASE_URL" + exit 0 fi echo "Attempt $i/30 - waiting 10s..." sleep 10 done - - # Verify health endpoint - echo "" - echo "=== Health Check ===" - curl -sf "$BASE_URL/health" || { echo "Health check failed"; exit 1; } - echo "" - - # Verify API is responding - echo "" - echo "=== API Check (GET /api/v1/projects) ===" - HTTP_CODE=$(curl -sf -o /dev/null -w "%{http_code}" "$BASE_URL/api/v1/projects") - if [ "$HTTP_CODE" = "200" ]; then - echo "API responding: HTTP $HTTP_CODE" - else - echo "API check failed: HTTP $HTTP_CODE" - exit 1 - fi - - # Verify frontend is served - echo "" - echo "=== Frontend Check ===" - if curl -sf "$BASE_URL/" | grep -q ""; then - echo "Frontend is being served" - else - echo "Frontend check failed" - exit 1 - fi - - echo "" - echo "=== All checks passed! ===" - echo "Deployment URL: $BASE_URL" + echo "Health check failed after 30 attempts" + exit 1 # Deploy to stage (main branch) deploy_stage: @@ -266,7 +244,7 @@ deploy_stage: variables: NAMESPACE: orch-stage-namespace VALUES_FILE: helm/orchard/values-stage.yaml - BASE_URL: https://orchard-stage.common.global.bsf.tools + BASE_URL: $STAGE_URL before_script: - kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage - *helm_setup @@ -285,7 +263,7 @@ deploy_stage: - *verify_deployment environment: name: stage - url: https://orchard-stage.common.global.bsf.tools + url: $STAGE_URL kubernetes: agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage rules: @@ -331,10 +309,12 @@ deploy_feature: - if: '$CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != "main"' when: on_success -# Cleanup feature branch deployment +# Cleanup feature branch deployment (standalone - doesn't need deploy dependencies) cleanup_feature: - <<: *deploy_template + stage: deploy needs: [] + image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12 + timeout: 5m variables: NAMESPACE: orch-dev-namespace GIT_STRATEGY: none # No source needed, branch may be deleted @@ -363,7 +343,7 @@ deploy_prod: variables: NAMESPACE: orch-prod-namespace VALUES_FILE: helm/orchard/values-prod.yaml - BASE_URL: https://orchard.common.global.bsf.tools + BASE_URL: $PROD_URL before_script: - kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-prod - *helm_setup @@ -382,7 +362,7 @@ deploy_prod: - *verify_deployment environment: name: production - url: https://orchard.common.global.bsf.tools + url: $PROD_URL kubernetes: agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-prod rules: @@ -392,11 +372,11 @@ deploy_prod: allow_failure: false # Smoke tests for production deployment (read-only, no test data creation) -integration_test_prod: +smoke_test_prod: <<: *smoke_test_template needs: [deploy_prod] variables: - ORCHARD_TEST_URL: https://orchard.common.global.bsf.tools + ORCHARD_TEST_URL: $PROD_URL rules: - if: '$CI_COMMIT_TAG =~ /^v\d+\.\d+\.\d+$/' when: on_success diff --git a/CHANGELOG.md b/CHANGELOG.md index 11b8c13..cdf8da3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -63,6 +63,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - CI integration tests now run full pytest suite (~350 tests) against deployed environment instead of 3 smoke tests - CI production deployment uses lightweight smoke tests only (no test data creation in prod) +- CI pipeline improvements: shared pip cache, `interruptible` flag on test jobs, retry on integration tests +- Simplified deploy verification to health check only (full checks done by integration tests) +- Extracted environment URLs to global variables for maintainability +- Made `cleanup_feature` job standalone (no longer inherits deploy template dependencies) +- Renamed `integration_test_prod` to `smoke_test_prod` for clarity - Updated download ref resolution to check versions before tags (version → tag → artifact ID) (#56) - Deploy jobs now require all security scans to pass before deployment (added test_image, app_deps_scan, cve_scan, cve_sbom_analysis, app_sbom_analysis to dependencies) (#63) - Increased deploy job timeout from 5m to 10m (#63)