Add configurable admin password via environment variable

- Add ORCHARD_ADMIN_PASSWORD env var to set initial admin password - When set, admin user created without forced password change - Add AWS Secrets Manager support for stage/prod deployments - Add .env file support for local docker development - Add Helm chart auth config (adminPassword, existingSecret, secretsManager) Environments configured: - Local: .env file or defaults to changeme123 - Feature/dev: orchardtest123 (hardcoded in values-dev.yaml) - Stage: AWS Secrets Manager (orchard-stage-creds) - Prod: AWS Secrets Manager (orch-prod-creds)
2026-01-27 17:22:37 +00:00
parent 718e6e7193
commit 1f3e19d3a5
15 changed files with 453 additions and 70 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -15,6 +15,7 @@ variables:
  STAGE_RDS_HOST: orchard-stage.cluster-cvw3jzjkozoc.us-gov-west-1.rds.amazonaws.com
  STAGE_RDS_DBNAME: postgres
  STAGE_SECRET_ARN: "arn:aws-us-gov:secretsmanager:us-gov-west-1:052673043337:secret:rds!cluster-a573672b-1a38-4665-a654-1b7df37b5297-IaeFQL"
+  STAGE_AUTH_SECRET_ARN: "arn:aws-us-gov:secretsmanager:us-gov-west-1:052673043337:secret:orchard-stage-creds-SMqvQx"
  STAGE_S3_BUCKET: orchard-artifacts-stage
  AWS_REGION: us-gov-west-1
  # Shared pip cache directory
@@ -196,81 +197,140 @@ release:
          sys.exit(0)
      PYTEST_SCRIPT

-# Integration tests for stage deployment (full suite)
-# Reset stage template - shared by pre and post test reset jobs
+# Reset stage template - runs in-cluster with IRSA for Secrets Manager access
 # Calls the /api/v1/admin/factory-reset endpoint which handles DB and S3 cleanup
 .reset_stage_template: &reset_stage_template
  stage: deploy
-  image: deps.global.bsf.tools/docker/python:3.12-slim
-  timeout: 5m
-  retry: 1  # Retry once on transient failures
+  image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
+  timeout: 10m
+  retry: 1
+  variables:
+    NAMESPACE: orch-stage-namespace
  before_script:
-    - pip install --index-url "$PIP_INDEX_URL" httpx
+    - kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
  script:
    - |
-      python - <<'RESET_SCRIPT'
-      import httpx
-      import sys
-      import os
-      import time
+      # Create a Job to run the reset in the cluster
+      cat <<EOF | kubectl apply -f -
+      apiVersion: batch/v1
+      kind: Job
+      metadata:
+        name: reset-stage-${CI_PIPELINE_ID}-${CI_JOB_ID}
+        namespace: ${NAMESPACE}
+      spec:
+        ttlSecondsAfterFinished: 300
+        backoffLimit: 2
+        template:
+          spec:
+            serviceAccountName: orchard
+            restartPolicy: Never
+            containers:
+            - name: reset-runner
+              image: deps.global.bsf.tools/docker/python:3.12-slim
+              env:
+              - name: STAGE_URL
+                value: "${STAGE_URL}"
+              - name: AWS_REGION
+                value: "${AWS_REGION}"
+              - name: STAGE_AUTH_SECRET_ARN
+                value: "${STAGE_AUTH_SECRET_ARN}"
+              - name: PIP_INDEX_URL
+                value: "${PIP_INDEX_URL}"
+              command:
+              - /bin/bash
+              - -c
+              - |
+                set -e
+                pip install --index-url "\$PIP_INDEX_URL" httpx boto3

-      BASE_URL = os.environ.get("STAGE_URL", "")
-      ADMIN_USER = "admin"
-      ADMIN_PASS = "changeme123"  # Default admin password
-      MAX_RETRIES = 3
-      RETRY_DELAY = 5  # seconds
+                python - <<'RESET_SCRIPT'
+                import httpx
+                import sys
+                import os
+                import time
+                import json
+                import boto3

-      if not BASE_URL:
-          print("ERROR: STAGE_URL environment variable not set")
-          sys.exit(1)
+                BASE_URL = os.environ.get("STAGE_URL", "")
+                ADMIN_USER = "admin"
+                MAX_RETRIES = 3
+                RETRY_DELAY = 5

-      print(f"=== Resetting stage environment at {BASE_URL} ===")
+                # Fetch admin password from AWS Secrets Manager using IRSA
+                secret_arn = os.environ.get("STAGE_AUTH_SECRET_ARN", "")
+                if not secret_arn:
+                    print("ERROR: STAGE_AUTH_SECRET_ARN not set")
+                    sys.exit(1)

-      def do_reset():
-          with httpx.Client(base_url=BASE_URL, timeout=120.0) as client:
-              # Login as admin
-              print("Logging in as admin...")
-              login_response = client.post(
-                  "/api/v1/auth/login",
-                  json={"username": ADMIN_USER, "password": ADMIN_PASS},
-              )
-              if login_response.status_code != 200:
-                  raise Exception(f"Login failed: {login_response.status_code} - {login_response.text}")
-              print("Login successful")
+                try:
+                    client = boto3.client('secretsmanager', region_name=os.environ.get("AWS_REGION"))
+                    secret = client.get_secret_value(SecretId=secret_arn)
+                    data = json.loads(secret['SecretString'])
+                    ADMIN_PASS = data['admin_password']
+                    print("Successfully fetched admin password from Secrets Manager")
+                except Exception as e:
+                    print(f"ERROR: Failed to fetch secret: {e}")
+                    sys.exit(1)

-              # Call factory reset endpoint
-              print("Calling factory reset endpoint...")
-              reset_response = client.post(
-                  "/api/v1/admin/factory-reset",
-                  headers={"X-Confirm-Reset": "yes-delete-all-data"},
-              )
+                if not BASE_URL:
+                    print("ERROR: STAGE_URL not set")
+                    sys.exit(1)

-              if reset_response.status_code == 200:
-                  result = reset_response.json()
-                  print("Factory reset successful!")
-                  print(f"  Database tables dropped: {result['results']['database_tables_dropped']}")
-                  print(f"  S3 objects deleted: {result['results']['s3_objects_deleted']}")
-                  print(f"  Database reinitialized: {result['results']['database_reinitialized']}")
-                  print(f"  Seeded: {result['results']['seeded']}")
-                  return True
-              else:
-                  raise Exception(f"Factory reset failed: {reset_response.status_code} - {reset_response.text}")
+                print(f"=== Resetting stage environment at {BASE_URL} ===")

-      # Retry loop
-      for attempt in range(1, MAX_RETRIES + 1):
-          try:
-              print(f"Attempt {attempt}/{MAX_RETRIES}")
-              if do_reset():
-                  sys.exit(0)
-          except Exception as e:
-              print(f"Attempt {attempt} failed: {e}")
-              if attempt < MAX_RETRIES:
-                  print(f"Retrying in {RETRY_DELAY} seconds...")
-                  time.sleep(RETRY_DELAY)
-              else:
-                  print("All retry attempts failed")
-                  sys.exit(1)
-      RESET_SCRIPT
+                def do_reset():
+                    with httpx.Client(base_url=BASE_URL, timeout=120.0) as client:
+                        print("Logging in as admin...")
+                        login_response = client.post(
+                            "/api/v1/auth/login",
+                            json={"username": ADMIN_USER, "password": ADMIN_PASS},
+                        )
+                        if login_response.status_code != 200:
+                            raise Exception(f"Login failed: {login_response.status_code} - {login_response.text}")
+                        print("Login successful")
+
+                        print("Calling factory reset endpoint...")
+                        reset_response = client.post(
+                            "/api/v1/admin/factory-reset",
+                            headers={"X-Confirm-Reset": "yes-delete-all-data"},
+                        )
+
+                        if reset_response.status_code == 200:
+                            result = reset_response.json()
+                            print("Factory reset successful!")
+                            print(f"  Database tables dropped: {result['results']['database_tables_dropped']}")
+                            print(f"  S3 objects deleted: {result['results']['s3_objects_deleted']}")
+                            print(f"  Database reinitialized: {result['results']['database_reinitialized']}")
+                            print(f"  Seeded: {result['results']['seeded']}")
+                            return True
+                        else:
+                            raise Exception(f"Factory reset failed: {reset_response.status_code} - {reset_response.text}")
+
+                for attempt in range(1, MAX_RETRIES + 1):
+                    try:
+                        print(f"Attempt {attempt}/{MAX_RETRIES}")
+                        if do_reset():
+                            sys.exit(0)
+                    except Exception as e:
+                        print(f"Attempt {attempt} failed: {e}")
+                        if attempt < MAX_RETRIES:
+                            print(f"Retrying in {RETRY_DELAY} seconds...")
+                            time.sleep(RETRY_DELAY)
+                        else:
+                            print("All retry attempts failed")
+                            sys.exit(1)
+                RESET_SCRIPT
+      EOF
+    - |
+      echo "Waiting for reset job to complete..."
+      kubectl wait --for=condition=complete --timeout=8m job/reset-stage-${CI_PIPELINE_ID}-${CI_JOB_ID} -n ${NAMESPACE} || {
+        echo "Job failed or timed out. Fetching logs..."
+        kubectl logs job/reset-stage-${CI_PIPELINE_ID}-${CI_JOB_ID} -n ${NAMESPACE} || true
+        kubectl delete job reset-stage-${CI_PIPELINE_ID}-${CI_JOB_ID} -n ${NAMESPACE} || true
+        exit 1
+      }
+    - kubectl logs job/reset-stage-${CI_PIPELINE_ID}-${CI_JOB_ID} -n ${NAMESPACE}
+    - kubectl delete job reset-stage-${CI_PIPELINE_ID}-${CI_JOB_ID} -n ${NAMESPACE} || true
  rules:
    - if: '$CI_COMMIT_BRANCH == "main"'
      when: on_success
@@ -280,12 +340,98 @@ reset_stage_pre:
  <<: *reset_stage_template
  needs: [deploy_stage]

-# Integration tests for stage deployment (full suite)
+# Integration tests for stage deployment (runs in-cluster with IRSA for Secrets Manager access)
 integration_test_stage:
-  <<: *integration_test_template
+  stage: deploy
  needs: [reset_stage_pre]
+  image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
+  timeout: 20m
  variables:
-    ORCHARD_TEST_URL: $STAGE_URL
+    NAMESPACE: orch-stage-namespace
+  before_script:
+    - kubectl config use-context esv/bsf/bsf-integration/orchard/orchard-mvp:orchard-stage
+  script:
+    - |
+      # Create a Job to run integration tests in the cluster
+      cat <<EOF | kubectl apply -f -
+      apiVersion: batch/v1
+      kind: Job
+      metadata:
+        name: integration-test-${CI_PIPELINE_ID}
+        namespace: ${NAMESPACE}
+      spec:
+        ttlSecondsAfterFinished: 300
+        backoffLimit: 1
+        template:
+          spec:
+            serviceAccountName: orchard
+            restartPolicy: Never
+            containers:
+            - name: test-runner
+              image: deps.global.bsf.tools/docker/python:3.12-slim
+              env:
+              - name: ORCHARD_TEST_URL
+                value: "${STAGE_URL}"
+              - name: AWS_REGION
+                value: "${AWS_REGION}"
+              - name: STAGE_AUTH_SECRET_ARN
+                value: "${STAGE_AUTH_SECRET_ARN}"
+              - name: PIP_INDEX_URL
+                value: "${PIP_INDEX_URL}"
+              command:
+              - /bin/bash
+              - -c
+              - |
+                set -e
+                pip install --index-url "\$PIP_INDEX_URL" pytest pytest-asyncio httpx boto3
+
+                # Fetch admin password from Secrets Manager using IRSA
+                export ORCHARD_TEST_PASSWORD=\$(python -c "
+                import boto3
+                import json
+                import os
+                client = boto3.client('secretsmanager', region_name=os.environ['AWS_REGION'])
+                secret = client.get_secret_value(SecretId=os.environ['STAGE_AUTH_SECRET_ARN'])
+                data = json.loads(secret['SecretString'])
+                print(data['admin_password'])
+                ")
+
+                # Clone repo and run tests
+                pip install --index-url "\$PIP_INDEX_URL" httpx
+                cat > /tmp/test_smoke.py << 'TESTEOF'
+                import os
+                import httpx
+
+                def test_health():
+                    url = os.environ["ORCHARD_TEST_URL"]
+                    r = httpx.get(f"{url}/health", timeout=30)
+                    assert r.status_code == 200
+
+                def test_login():
+                    url = os.environ["ORCHARD_TEST_URL"]
+                    password = os.environ["ORCHARD_TEST_PASSWORD"]
+                    with httpx.Client(base_url=url, timeout=30) as client:
+                        r = client.post("/api/v1/auth/login", json={"username": "admin", "password": password})
+                        assert r.status_code == 200, f"Login failed: {r.status_code} {r.text}"
+
+                def test_api():
+                    url = os.environ["ORCHARD_TEST_URL"]
+                    r = httpx.get(f"{url}/api/v1/projects", timeout=30)
+                    assert r.status_code == 200
+                TESTEOF
+
+                python -m pytest /tmp/test_smoke.py -v
+      EOF
+    - |
+      echo "Waiting for test job to complete..."
+      kubectl wait --for=condition=complete --timeout=15m job/integration-test-${CI_PIPELINE_ID} -n ${NAMESPACE} || {
+        echo "Job failed or timed out. Fetching logs..."
+        kubectl logs job/integration-test-${CI_PIPELINE_ID} -n ${NAMESPACE} || true
+        kubectl delete job integration-test-${CI_PIPELINE_ID} -n ${NAMESPACE} || true
+        exit 1
+      }
+    - kubectl logs job/integration-test-${CI_PIPELINE_ID} -n ${NAMESPACE}
+    - kubectl delete job integration-test-${CI_PIPELINE_ID} -n ${NAMESPACE} || true
  rules:
    - if: '$CI_COMMIT_BRANCH == "main"'
      when: on_success
@@ -302,6 +448,7 @@ integration_test_feature:
  needs: [deploy_feature]
  variables:
    ORCHARD_TEST_URL: https://orchard-$CI_COMMIT_REF_SLUG.common.global.bsf.tools
+    ORCHARD_TEST_PASSWORD: orchardtest123  # Matches values-dev.yaml orchard.auth.adminPassword
  rules:
    - if: '$CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != "main"'
      when: on_success