5 Commits

Author SHA1 Message Date
Mondo Diaz
e96dc5cde8 Fix reset_stage job to read STAGE_URL from environment 2026-01-21 22:25:04 +00:00
Mondo Diaz
cba5bac383 Merge branch 'feature/stage-reset-job' into 'main'
Add factory reset endpoint for stage environment cleanup (#54)

See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!37
2026-01-21 16:00:02 -06:00
Mondo Diaz
535280a783 Add factory reset endpoint for stage environment cleanup (#54) 2026-01-21 16:00:02 -06:00
Dane Moss
c9026e1950 Merge branch 'fix/s3-irsa-credentials' into 'main'
Fix S3 client to support IRSA credentials (#54)

See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!36
2026-01-21 13:42:53 -07:00
Mondo Diaz
fedbd95cf4 Fix S3 client to support IRSA credentials (#54) 2026-01-21 13:42:53 -07:00
4 changed files with 241 additions and 9 deletions

View File

@@ -11,6 +11,12 @@ variables:
# Environment URLs (used by deploy and test jobs)
STAGE_URL: https://orchard-stage.common.global.bsf.tools
PROD_URL: https://orchard.common.global.bsf.tools
# Stage environment AWS resources (used by reset job)
STAGE_RDS_HOST: orchard-stage.cluster-cvw3jzjkozoc.us-gov-west-1.rds.amazonaws.com
STAGE_RDS_DBNAME: postgres
STAGE_SECRET_ARN: "arn:aws-us-gov:secretsmanager:us-gov-west-1:052673043337:secret:rds!cluster-a573672b-1a38-4665-a654-1b7df37b5297-IaeFQL"
STAGE_S3_BUCKET: orchard-artifacts-stage
AWS_REGION: us-gov-west-1
# Shared pip cache directory
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"
@@ -141,6 +147,69 @@ integration_test_stage:
- if: '$CI_COMMIT_BRANCH == "main"'
when: on_success
# Reset stage environment after integration tests (clean slate for next run)
# Calls the /api/v1/admin/factory-reset endpoint which handles DB and S3 cleanup
reset_stage:
stage: deploy
needs: [integration_test_stage]
image: deps.global.bsf.tools/docker/python:3.12-slim
timeout: 5m
before_script:
- pip install --index-url "$PIP_INDEX_URL" httpx
script:
- |
python - <<'RESET_SCRIPT'
import httpx
import sys
import os
BASE_URL = os.environ.get("STAGE_URL", "")
ADMIN_USER = "admin"
ADMIN_PASS = "changeme123" # Default admin password
if not BASE_URL:
print("ERROR: STAGE_URL environment variable not set")
sys.exit(1)
print(f"=== Resetting stage environment at {BASE_URL} ===")
client = httpx.Client(base_url=BASE_URL, timeout=60.0)
# Login as admin
print("Logging in as admin...")
login_response = client.post(
"/api/v1/auth/login",
json={"username": ADMIN_USER, "password": ADMIN_PASS},
)
if login_response.status_code != 200:
print(f"Login failed: {login_response.status_code} - {login_response.text}")
sys.exit(1)
print("Login successful")
# Call factory reset endpoint
print("Calling factory reset endpoint...")
reset_response = client.post(
"/api/v1/admin/factory-reset",
headers={"X-Confirm-Reset": "yes-delete-all-data"},
)
if reset_response.status_code == 200:
result = reset_response.json()
print(f"Factory reset successful!")
print(f" Database tables dropped: {result['results']['database_tables_dropped']}")
print(f" S3 objects deleted: {result['results']['s3_objects_deleted']}")
print(f" Database reinitialized: {result['results']['database_reinitialized']}")
print(f" Seeded: {result['results']['seeded']}")
sys.exit(0)
else:
print(f"Factory reset failed: {reset_response.status_code} - {reset_response.text}")
sys.exit(1)
RESET_SCRIPT
rules:
- if: '$CI_COMMIT_BRANCH == "main"'
when: on_success
allow_failure: true # Don't fail pipeline if reset has issues
# Integration tests for feature deployment (full suite)
integration_test_feature:
<<: *integration_test_template

View File

@@ -6,6 +6,28 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Added
- Added factory reset endpoint `POST /api/v1/admin/factory-reset` for test environment cleanup (#54)
- Requires admin authentication and `X-Confirm-Reset: yes-delete-all-data` header
- Drops all database tables, clears S3 bucket, reinitializes schema, re-seeds default data
- CI pipeline automatically calls this after integration tests on stage
- Added `delete_all()` method to storage backend for bulk S3 object deletion (#54)
- Added AWS Secrets Manager CSI driver support for database credentials (#54)
- Added SecretProviderClass template for Secrets Manager integration (#54)
- Added IRSA service account annotations for prod and stage environments (#54)
### Changed
- Configured stage and prod to use AWS RDS instead of PostgreSQL subchart (#54)
- Configured stage and prod to use AWS S3 instead of MinIO subchart (#54)
- Changed prod deployment from manual to automatic on version tags (#54)
- Updated S3 client to support IRSA credentials when no explicit keys provided (#54)
- Changed prod image pullPolicy to Always (#54)
- Added proxy-body-size annotation to prod ingress for large uploads (#54)
### Removed
- Disabled PostgreSQL subchart for stage and prod environments (#54)
- Disabled MinIO subchart for stage and prod environments (#54)
### Added
- Added comprehensive upload/download tests for size boundaries (1B to 1GB) (#38)
- Added concurrent upload/download tests (2, 5, 10 parallel operations) (#38)

View File

@@ -6390,3 +6390,110 @@ def get_artifact_provenance(
tags=tag_list,
uploads=upload_history,
)
# =============================================================================
# Factory Reset Endpoint (Admin Only)
# =============================================================================
@router.post("/api/v1/admin/factory-reset", tags=["admin"])
def factory_reset(
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
current_user: User = Depends(require_admin),
):
"""
Factory reset - delete all data and restore to initial state.
This endpoint:
1. Drops all database tables
2. Deletes all objects from S3 storage
3. Recreates the database schema
4. Re-seeds with default admin user
Requires:
- Admin authentication
- X-Confirm-Reset header set to "yes-delete-all-data"
WARNING: This is a destructive operation that cannot be undone.
"""
# Require explicit confirmation header
confirm_header = request.headers.get("X-Confirm-Reset")
if confirm_header != "yes-delete-all-data":
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Factory reset requires X-Confirm-Reset header set to 'yes-delete-all-data'",
)
logger.warning(f"Factory reset initiated by admin user: {current_user.username}")
results = {
"database_tables_dropped": 0,
"s3_objects_deleted": 0,
"database_reinitialized": False,
"seeded": False,
}
try:
# Step 1: Drop all tables in public schema
logger.info("Dropping all database tables...")
drop_result = db.execute(
text("""
DO $$
DECLARE
r RECORD;
table_count INT := 0;
BEGIN
SET session_replication_role = 'replica';
FOR r IN (SELECT tablename FROM pg_tables WHERE schemaname = 'public') LOOP
EXECUTE 'DROP TABLE IF EXISTS public.' || quote_ident(r.tablename) || ' CASCADE';
table_count := table_count + 1;
END LOOP;
SET session_replication_role = 'origin';
RAISE NOTICE 'Dropped % tables', table_count;
END $$;
""")
)
db.commit()
# Count tables that were dropped
count_result = db.execute(
text("SELECT COUNT(*) FROM pg_tables WHERE schemaname = 'public'")
)
remaining_tables = count_result.scalar()
results["database_tables_dropped"] = "all"
logger.info(f"Database tables dropped, remaining: {remaining_tables}")
# Step 2: Delete all S3 objects
logger.info("Deleting all S3 objects...")
results["s3_objects_deleted"] = storage.delete_all()
# Step 3: Reinitialize database schema
logger.info("Reinitializing database schema...")
from .database import init_db
init_db()
results["database_reinitialized"] = True
# Step 4: Re-seed with default data
logger.info("Seeding database with defaults...")
from .seed import seed_database
seed_database()
results["seeded"] = True
logger.warning(f"Factory reset completed by {current_user.username}")
return {
"status": "success",
"message": "Factory reset completed successfully",
"results": results,
}
except Exception as e:
logger.error(f"Factory reset failed: {e}")
db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Factory reset failed: {str(e)}",
)

View File

@@ -242,15 +242,19 @@ class S3Storage:
},
)
self.client = boto3.client(
"s3",
endpoint_url=settings.s3_endpoint if settings.s3_endpoint else None,
region_name=settings.s3_region,
aws_access_key_id=settings.s3_access_key_id,
aws_secret_access_key=settings.s3_secret_access_key,
config=config,
verify=settings.s3_verify_ssl, # SSL/TLS verification
)
# Build client kwargs - only include credentials if explicitly provided
# This allows IRSA/IAM role credentials to be used when no explicit creds are set
client_kwargs = {
"endpoint_url": settings.s3_endpoint if settings.s3_endpoint else None,
"region_name": settings.s3_region,
"config": config,
"verify": settings.s3_verify_ssl,
}
if settings.s3_access_key_id and settings.s3_secret_access_key:
client_kwargs["aws_access_key_id"] = settings.s3_access_key_id
client_kwargs["aws_secret_access_key"] = settings.s3_secret_access_key
self.client = boto3.client("s3", **client_kwargs)
self.bucket = settings.s3_bucket
# Store active multipart uploads for resumable support
self._active_uploads: Dict[str, Dict[str, Any]] = {}
@@ -831,6 +835,36 @@ class S3Storage:
except ClientError:
return False
def delete_all(self) -> int:
"""
Delete all objects in the bucket.
Returns:
Number of objects deleted
"""
deleted_count = 0
try:
paginator = self.client.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket=self.bucket):
objects = page.get("Contents", [])
if not objects:
continue
# Delete objects in batches of 1000 (S3 limit)
delete_keys = [{"Key": obj["Key"]} for obj in objects]
if delete_keys:
self.client.delete_objects(
Bucket=self.bucket, Delete={"Objects": delete_keys}
)
deleted_count += len(delete_keys)
logger.info(f"Deleted {len(delete_keys)} objects from S3")
logger.info(f"Total objects deleted from S3: {deleted_count}")
return deleted_count
except ClientError as e:
logger.error(f"Failed to delete all S3 objects: {e}")
raise
def generate_presigned_url(
self,
s3_key: str,