Add upstream caching infrastructure and refactor CI pipeline

Upstream Caching (Epic #68-#75, #105): - Add upstream_sources and cache_settings tables with migrations - Add cache management API endpoints (CRUD for sources, settings) - Add environment variable overrides for upstream sources and cache settings - Add encryption module for storing credentials securely - Add frontend Admin Cache Management page - Add is_system field to projects for system cache distinction - Add purge_seed_data for transitioning to production-like environments CI Pipeline Refactoring: - Remove reset jobs (reset_stage_pre, reset_stage) - Add ephemeral orchard-test deployment for main branch testing - Run integration tests on ephemeral deployment before promoting to stage - Stage is now long-running pre-prod (smoke tests only) - Disable prosper_setup for tag pipelines
2026-01-29 11:28:59 -06:00
parent c92895ffe9
commit a3a49ac9c3
24 changed files with 7271 additions and 103 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -11,13 +11,6 @@ variables:
  # Environment URLs (used by deploy and test jobs)
  STAGE_URL: https://orchard-stage.common.global.bsf.tools
  PROD_URL: https://orchard.common.global.bsf.tools
-  # Stage environment AWS resources (used by reset job)
-  STAGE_RDS_HOST: orchard-stage.cluster-cvw3jzjkozoc.us-gov-west-1.rds.amazonaws.com
-  STAGE_RDS_DBNAME: postgres
-  STAGE_SECRET_ARN: "arn:aws-us-gov:secretsmanager:us-gov-west-1:052673043337:secret:rds!cluster-a573672b-1a38-4665-a654-1b7df37b5297-IaeFQL"
-  STAGE_AUTH_SECRET_ARN: "arn:aws-us-gov:secretsmanager:us-gov-west-1:052673043337:secret:orchard-stage-creds-SMqvQx"
-  STAGE_S3_BUCKET: orchard-artifacts-stage
-  AWS_REGION: us-gov-west-1
  # Shared pip cache directory
  PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"

@@ -95,10 +88,18 @@ cve_sbom_analysis:
      when: never
    - when: on_success

-# Override release job to wait for stage integration tests before creating tag
+# Disable prosper_setup for tag pipelines since no build/analysis jobs run
+# (image is already built when commit was on main, and deploy uses helm directly)
+prosper_setup:
+  rules:
+    - if: '$CI_COMMIT_TAG'
+      when: never
+    - when: on_success
+
+# Override release job to wait for stage deployment and smoke tests before creating tag
 # This ensures the tag (which triggers prod deploy) is only created after stage passes
 release:
-  needs: [integration_test_stage, changelog]
+  needs: [smoke_test_stage, changelog]

 # Full integration test suite template (for feature/stage deployments)
 # Runs the complete pytest integration test suite against the deployed environment
@@ -200,107 +201,91 @@ release:
          sys.exit(0)
      PYTEST_SCRIPT

-# Reset stage template - runs from CI runner, uses CI variable for auth
-# Calls the /api/v1/admin/factory-reset endpoint which handles DB and S3 cleanup
-.reset_stage_template: &reset_stage_template
-  stage: deploy
-  image: deps.global.bsf.tools/docker/python:3.12-slim
-  timeout: 5m
-  retry: 1
+# Ephemeral test deployment in stage namespace (main branch only)
+# Runs integration tests before promoting to long-running stage
+deploy_test:
+  <<: *deploy_template
+  variables:
+    NAMESPACE: orch-stage-namespace
+    VALUES_FILE: helm/orchard/values-dev.yaml
+    BASE_URL: https://orchard-test.common.global.bsf.tools
  before_script:
-    - pip install --index-url "$PIP_INDEX_URL" httpx
+    - kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
+    - *helm_setup
  script:
+    - echo "Deploying ephemeral test environment"
+    - cd $CI_PROJECT_DIR
    - |
-      python - <<'RESET_SCRIPT'
-      import httpx
-      import sys
-      import os
-      import time
-
-      BASE_URL = os.environ.get("STAGE_URL", "")
-      ADMIN_USER = "admin"
-      ADMIN_PASS = os.environ.get("STAGE_ADMIN_PASSWORD", "")
-      MAX_RETRIES = 3
-      RETRY_DELAY = 5
-
-      if not BASE_URL:
-          print("ERROR: STAGE_URL not set")
-          sys.exit(1)
-
-      if not ADMIN_PASS:
-          print("ERROR: STAGE_ADMIN_PASSWORD not set")
-          sys.exit(1)
-
-      print(f"=== Resetting stage environment at {BASE_URL} ===")
-
-      def do_reset():
-          with httpx.Client(base_url=BASE_URL, timeout=120.0) as client:
-              print("Logging in as admin...")
-              login_response = client.post(
-                  "/api/v1/auth/login",
-                  json={"username": ADMIN_USER, "password": ADMIN_PASS},
-              )
-              if login_response.status_code != 200:
-                  raise Exception(f"Login failed: {login_response.status_code} - {login_response.text}")
-              print("Login successful")
-
-              print("Calling factory reset endpoint...")
-              reset_response = client.post(
-                  "/api/v1/admin/factory-reset",
-                  headers={"X-Confirm-Reset": "yes-delete-all-data"},
-              )
-
-              if reset_response.status_code == 200:
-                  result = reset_response.json()
-                  print("Factory reset successful!")
-                  print(f"  Database tables dropped: {result['results']['database_tables_dropped']}")
-                  print(f"  S3 objects deleted: {result['results']['s3_objects_deleted']}")
-                  print(f"  Database reinitialized: {result['results']['database_reinitialized']}")
-                  print(f"  Seeded: {result['results']['seeded']}")
-                  return True
-              else:
-                  raise Exception(f"Factory reset failed: {reset_response.status_code} - {reset_response.text}")
-
-      for attempt in range(1, MAX_RETRIES + 1):
-          try:
-              print(f"Attempt {attempt}/{MAX_RETRIES}")
-              if do_reset():
-                  sys.exit(0)
-          except Exception as e:
-              print(f"Attempt {attempt} failed: {e}")
-              if attempt < MAX_RETRIES:
-                  print(f"Retrying in {RETRY_DELAY} seconds...")
-                  time.sleep(RETRY_DELAY)
-              else:
-                  print("All retry attempts failed")
-                  sys.exit(1)
-      RESET_SCRIPT
+      helm upgrade --install orchard-test ./helm/orchard \
+        --namespace $NAMESPACE \
+        -f $VALUES_FILE \
+        --set image.tag=git.linux-amd64-$CI_COMMIT_SHA \
+        --set orchard.auth.adminPassword=$STAGE_ADMIN_PASSWORD \
+        --set ingress.hosts[0].host=orchard-test.common.global.bsf.tools \
+        --set ingress.tls[0].hosts[0]=orchard-test.common.global.bsf.tools \
+        --set ingress.tls[0].secretName=orchard-test-tls \
+        --set minioIngress.host=minio-test.common.global.bsf.tools \
+        --set minioIngress.tls.secretName=minio-test-tls \
+        --wait \
+        --atomic \
+        --timeout 10m
+    - kubectl rollout status deployment/orchard-test-server -n $NAMESPACE --timeout=10m
+    - *verify_deployment
+  environment:
+    name: test
+    url: https://orchard-test.common.global.bsf.tools
+    on_stop: cleanup_test
+    kubernetes:
+      agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
  rules:
    - if: '$CI_COMMIT_BRANCH == "main"'
      when: on_success

-# Reset stage BEFORE integration tests (ensure known state)
-reset_stage_pre:
-  <<: *reset_stage_template
-  needs: [deploy_stage]
-
-# Integration tests for stage deployment
-# Uses CI variable STAGE_ADMIN_PASSWORD (set in GitLab CI/CD settings)
-integration_test_stage:
-  <<: *integration_test_template
-  needs: [reset_stage_pre]
+# Cleanup ephemeral test deployment after integration tests
+cleanup_test:
+  stage: deploy
+  needs: [integration_test_main]
+  image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
+  timeout: 5m
  variables:
-    ORCHARD_TEST_URL: $STAGE_URL
+    NAMESPACE: orch-stage-namespace
+    GIT_STRATEGY: none
+  before_script:
+    - kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
+  script:
+    - echo "Cleaning up ephemeral test deployment orchard-test"
+    - helm uninstall orchard-test --namespace $NAMESPACE || true
+  environment:
+    name: test
+    action: stop
+    kubernetes:
+      agent: esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "main"'
+      when: on_success
+  allow_failure: true
+
+# Integration tests for ephemeral test deployment (main branch)
+# Runs against orchard-test before promoting to long-running stage
+integration_test_main:
+  <<: *integration_test_template
+  needs: [deploy_test]
+  variables:
+    ORCHARD_TEST_URL: https://orchard-test.common.global.bsf.tools
    ORCHARD_TEST_PASSWORD: $STAGE_ADMIN_PASSWORD
  rules:
    - if: '$CI_COMMIT_BRANCH == "main"'
      when: on_success

-# Reset stage AFTER integration tests (clean slate for next run)
-reset_stage:
-  <<: *reset_stage_template
-  needs: [integration_test_stage]
-  allow_failure: true  # Don't fail pipeline if reset has issues
+# Smoke test for long-running stage (after promotion)
+smoke_test_stage:
+  <<: *smoke_test_template
+  needs: [deploy_stage]
+  variables:
+    ORCHARD_TEST_URL: $STAGE_URL
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "main"'
+      when: on_success

 # Integration tests for feature deployment (full suite)
 # Uses DEV_ADMIN_PASSWORD CI variable (same as deploy_feature)
@@ -412,9 +397,12 @@ frontend_tests:
  echo "Health check failed after 30 attempts"
  exit 1

-# Deploy to stage (main branch)
+# Deploy to long-running stage (main branch, after ephemeral tests pass)
 deploy_stage:
-  <<: *deploy_template
+  stage: deploy
+  # Wait for ephemeral test to pass before promoting to long-running stage
+  needs: [cleanup_test]
+  image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
  variables:
    NAMESPACE: orch-stage-namespace
    VALUES_FILE: helm/orchard/values-stage.yaml
@@ -423,7 +411,7 @@ deploy_stage:
    - kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
    - *helm_setup
  script:
-    - echo "Deploying to stage environment"
+    - echo "Deploying to long-running stage environment"
    - cd $CI_PROJECT_DIR
    - |
      helm upgrade --install orchard-stage ./helm/orchard \