Fix reset_stage job to read STAGE_URL from environment

Merge branch 'feature/stage-reset-job' into 'main'
Add factory reset endpoint for stage environment cleanup (#54) See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!37
2026-01-21 22:25:04 +00:00 · 2026-01-21 16:00:02 -06:00 · 2026-01-21 16:00:02 -06:00 · 2026-01-21 13:42:53 -07:00 · 2026-01-21 13:42:53 -07:00
4 changed files with 241 additions and 9 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -11,6 +11,12 @@ variables:
  # Environment URLs (used by deploy and test jobs)
  STAGE_URL: https://orchard-stage.common.global.bsf.tools
  PROD_URL: https://orchard.common.global.bsf.tools
+  # Stage environment AWS resources (used by reset job)
+  STAGE_RDS_HOST: orchard-stage.cluster-cvw3jzjkozoc.us-gov-west-1.rds.amazonaws.com
+  STAGE_RDS_DBNAME: postgres
+  STAGE_SECRET_ARN: "arn:aws-us-gov:secretsmanager:us-gov-west-1:052673043337:secret:rds!cluster-a573672b-1a38-4665-a654-1b7df37b5297-IaeFQL"
+  STAGE_S3_BUCKET: orchard-artifacts-stage
+  AWS_REGION: us-gov-west-1
  # Shared pip cache directory
  PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"

@@ -141,6 +147,69 @@ integration_test_stage:
    - if: '$CI_COMMIT_BRANCH == "main"'
      when: on_success

+# Reset stage environment after integration tests (clean slate for next run)
+# Calls the /api/v1/admin/factory-reset endpoint which handles DB and S3 cleanup
+reset_stage:
+  stage: deploy
+  needs: [integration_test_stage]
+  image: deps.global.bsf.tools/docker/python:3.12-slim
+  timeout: 5m
+  before_script:
+    - pip install --index-url "$PIP_INDEX_URL" httpx
+  script:
+    - |
+      python - <<'RESET_SCRIPT'
+      import httpx
+      import sys
+      import os
+
+      BASE_URL = os.environ.get("STAGE_URL", "")
+      ADMIN_USER = "admin"
+      ADMIN_PASS = "changeme123"  # Default admin password
+
+      if not BASE_URL:
+          print("ERROR: STAGE_URL environment variable not set")
+          sys.exit(1)
+
+      print(f"=== Resetting stage environment at {BASE_URL} ===")
+
+      client = httpx.Client(base_url=BASE_URL, timeout=60.0)
+
+      # Login as admin
+      print("Logging in as admin...")
+      login_response = client.post(
+          "/api/v1/auth/login",
+          json={"username": ADMIN_USER, "password": ADMIN_PASS},
+      )
+      if login_response.status_code != 200:
+          print(f"Login failed: {login_response.status_code} - {login_response.text}")
+          sys.exit(1)
+      print("Login successful")
+
+      # Call factory reset endpoint
+      print("Calling factory reset endpoint...")
+      reset_response = client.post(
+          "/api/v1/admin/factory-reset",
+          headers={"X-Confirm-Reset": "yes-delete-all-data"},
+      )
+
+      if reset_response.status_code == 200:
+          result = reset_response.json()
+          print(f"Factory reset successful!")
+          print(f"  Database tables dropped: {result['results']['database_tables_dropped']}")
+          print(f"  S3 objects deleted: {result['results']['s3_objects_deleted']}")
+          print(f"  Database reinitialized: {result['results']['database_reinitialized']}")
+          print(f"  Seeded: {result['results']['seeded']}")
+          sys.exit(0)
+      else:
+          print(f"Factory reset failed: {reset_response.status_code} - {reset_response.text}")
+          sys.exit(1)
+      RESET_SCRIPT
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "main"'
+      when: on_success
+  allow_failure: true  # Don't fail pipeline if reset has issues
+
 # Integration tests for feature deployment (full suite)
 integration_test_feature:
  <<: *integration_test_template
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,28 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

 ## [Unreleased]
+### Added
+- Added factory reset endpoint `POST /api/v1/admin/factory-reset` for test environment cleanup (#54)
+  - Requires admin authentication and `X-Confirm-Reset: yes-delete-all-data` header
+  - Drops all database tables, clears S3 bucket, reinitializes schema, re-seeds default data
+  - CI pipeline automatically calls this after integration tests on stage
+- Added `delete_all()` method to storage backend for bulk S3 object deletion (#54)
+- Added AWS Secrets Manager CSI driver support for database credentials (#54)
+- Added SecretProviderClass template for Secrets Manager integration (#54)
+- Added IRSA service account annotations for prod and stage environments (#54)
+
+### Changed
+- Configured stage and prod to use AWS RDS instead of PostgreSQL subchart (#54)
+- Configured stage and prod to use AWS S3 instead of MinIO subchart (#54)
+- Changed prod deployment from manual to automatic on version tags (#54)
+- Updated S3 client to support IRSA credentials when no explicit keys provided (#54)
+- Changed prod image pullPolicy to Always (#54)
+- Added proxy-body-size annotation to prod ingress for large uploads (#54)
+
+### Removed
+- Disabled PostgreSQL subchart for stage and prod environments (#54)
+- Disabled MinIO subchart for stage and prod environments (#54)
+
 ### Added
 - Added comprehensive upload/download tests for size boundaries (1B to 1GB) (#38)
 - Added concurrent upload/download tests (2, 5, 10 parallel operations) (#38)
--- a/backend/app/routes.py
+++ b/backend/app/routes.py
@@ -6390,3 +6390,110 @@ def get_artifact_provenance(
        tags=tag_list,
        uploads=upload_history,
    )
+
+
+# =============================================================================
+# Factory Reset Endpoint (Admin Only)
+# =============================================================================
+
+
+@router.post("/api/v1/admin/factory-reset", tags=["admin"])
+def factory_reset(
+    request: Request,
+    db: Session = Depends(get_db),
+    storage: S3Storage = Depends(get_storage),
+    current_user: User = Depends(require_admin),
+):
+    """
+    Factory reset - delete all data and restore to initial state.
+
+    This endpoint:
+    1. Drops all database tables
+    2. Deletes all objects from S3 storage
+    3. Recreates the database schema
+    4. Re-seeds with default admin user
+
+    Requires:
+    - Admin authentication
+    - X-Confirm-Reset header set to "yes-delete-all-data"
+
+    WARNING: This is a destructive operation that cannot be undone.
+    """
+    # Require explicit confirmation header
+    confirm_header = request.headers.get("X-Confirm-Reset")
+    if confirm_header != "yes-delete-all-data":
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Factory reset requires X-Confirm-Reset header set to 'yes-delete-all-data'",
+        )
+
+    logger.warning(f"Factory reset initiated by admin user: {current_user.username}")
+
+    results = {
+        "database_tables_dropped": 0,
+        "s3_objects_deleted": 0,
+        "database_reinitialized": False,
+        "seeded": False,
+    }
+
+    try:
+        # Step 1: Drop all tables in public schema
+        logger.info("Dropping all database tables...")
+        drop_result = db.execute(
+            text("""
+                DO $$
+                DECLARE
+                    r RECORD;
+                    table_count INT := 0;
+                BEGIN
+                    SET session_replication_role = 'replica';
+                    FOR r IN (SELECT tablename FROM pg_tables WHERE schemaname = 'public') LOOP
+                        EXECUTE 'DROP TABLE IF EXISTS public.' || quote_ident(r.tablename) || ' CASCADE';
+                        table_count := table_count + 1;
+                    END LOOP;
+                    SET session_replication_role = 'origin';
+                    RAISE NOTICE 'Dropped % tables', table_count;
+                END $$;
+            """)
+        )
+        db.commit()
+
+        # Count tables that were dropped
+        count_result = db.execute(
+            text("SELECT COUNT(*) FROM pg_tables WHERE schemaname = 'public'")
+        )
+        remaining_tables = count_result.scalar()
+        results["database_tables_dropped"] = "all"
+        logger.info(f"Database tables dropped, remaining: {remaining_tables}")
+
+        # Step 2: Delete all S3 objects
+        logger.info("Deleting all S3 objects...")
+        results["s3_objects_deleted"] = storage.delete_all()
+
+        # Step 3: Reinitialize database schema
+        logger.info("Reinitializing database schema...")
+        from .database import init_db
+        init_db()
+        results["database_reinitialized"] = True
+
+        # Step 4: Re-seed with default data
+        logger.info("Seeding database with defaults...")
+        from .seed import seed_database
+        seed_database()
+        results["seeded"] = True
+
+        logger.warning(f"Factory reset completed by {current_user.username}")
+
+        return {
+            "status": "success",
+            "message": "Factory reset completed successfully",
+            "results": results,
+        }
+
+    except Exception as e:
+        logger.error(f"Factory reset failed: {e}")
+        db.rollback()
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Factory reset failed: {str(e)}",
+        )
--- a/backend/app/storage.py
+++ b/backend/app/storage.py
@@ -242,15 +242,19 @@ class S3Storage:
            },
        )

-        self.client = boto3.client(
-            "s3",
-            endpoint_url=settings.s3_endpoint if settings.s3_endpoint else None,
-            region_name=settings.s3_region,
-            aws_access_key_id=settings.s3_access_key_id,
-            aws_secret_access_key=settings.s3_secret_access_key,
-            config=config,
-            verify=settings.s3_verify_ssl,  # SSL/TLS verification
-        )
+        # Build client kwargs - only include credentials if explicitly provided
+        # This allows IRSA/IAM role credentials to be used when no explicit creds are set
+        client_kwargs = {
+            "endpoint_url": settings.s3_endpoint if settings.s3_endpoint else None,
+            "region_name": settings.s3_region,
+            "config": config,
+            "verify": settings.s3_verify_ssl,
+        }
+        if settings.s3_access_key_id and settings.s3_secret_access_key:
+            client_kwargs["aws_access_key_id"] = settings.s3_access_key_id
+            client_kwargs["aws_secret_access_key"] = settings.s3_secret_access_key
+
+        self.client = boto3.client("s3", **client_kwargs)
        self.bucket = settings.s3_bucket
        # Store active multipart uploads for resumable support
        self._active_uploads: Dict[str, Dict[str, Any]] = {}
@@ -831,6 +835,36 @@ class S3Storage:
        except ClientError:
            return False

+    def delete_all(self) -> int:
+        """
+        Delete all objects in the bucket.
+
+        Returns:
+            Number of objects deleted
+        """
+        deleted_count = 0
+        try:
+            paginator = self.client.get_paginator("list_objects_v2")
+            for page in paginator.paginate(Bucket=self.bucket):
+                objects = page.get("Contents", [])
+                if not objects:
+                    continue
+
+                # Delete objects in batches of 1000 (S3 limit)
+                delete_keys = [{"Key": obj["Key"]} for obj in objects]
+                if delete_keys:
+                    self.client.delete_objects(
+                        Bucket=self.bucket, Delete={"Objects": delete_keys}
+                    )
+                    deleted_count += len(delete_keys)
+                    logger.info(f"Deleted {len(delete_keys)} objects from S3")
+
+            logger.info(f"Total objects deleted from S3: {deleted_count}")
+            return deleted_count
+        except ClientError as e:
+            logger.error(f"Failed to delete all S3 objects: {e}")
+            raise
+
    def generate_presigned_url(
        self,
        s3_key: str,
Author	SHA1	Message	Date
Mondo Diaz	e96dc5cde8	Fix reset_stage job to read STAGE_URL from environment	2026-01-21 22:25:04 +00:00
Mondo Diaz	cba5bac383	Merge branch 'feature/stage-reset-job' into 'main' Add factory reset endpoint for stage environment cleanup (#54) See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!37	2026-01-21 16:00:02 -06:00
Mondo Diaz	535280a783	Add factory reset endpoint for stage environment cleanup (#54 )	2026-01-21 16:00:02 -06:00
Dane Moss	c9026e1950	Merge branch 'fix/s3-irsa-credentials' into 'main' Fix S3 client to support IRSA credentials (#54) See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!36	2026-01-21 13:42:53 -07:00
Mondo Diaz	fedbd95cf4	Fix S3 client to support IRSA credentials (#54 )	2026-01-21 13:42:53 -07:00