Compare commits
9 Commits
c9026e1950
...
fix/factor
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b0bb3ed569 | ||
|
|
1ac75e1017 | ||
|
|
693613f111 | ||
|
|
9da4ae8c0d | ||
|
|
7ffdc64364 | ||
|
|
6abc0c88b0 | ||
|
|
e96dc5cde8 | ||
|
|
cba5bac383 | ||
|
|
535280a783 |
@@ -11,6 +11,12 @@ variables:
|
||||
# Environment URLs (used by deploy and test jobs)
|
||||
STAGE_URL: https://orchard-stage.common.global.bsf.tools
|
||||
PROD_URL: https://orchard.common.global.bsf.tools
|
||||
# Stage environment AWS resources (used by reset job)
|
||||
STAGE_RDS_HOST: orchard-stage.cluster-cvw3jzjkozoc.us-gov-west-1.rds.amazonaws.com
|
||||
STAGE_RDS_DBNAME: postgres
|
||||
STAGE_SECRET_ARN: "arn:aws-us-gov:secretsmanager:us-gov-west-1:052673043337:secret:rds!cluster-a573672b-1a38-4665-a654-1b7df37b5297-IaeFQL"
|
||||
STAGE_S3_BUCKET: orchard-artifacts-stage
|
||||
AWS_REGION: us-gov-west-1
|
||||
# Shared pip cache directory
|
||||
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"
|
||||
|
||||
@@ -141,6 +147,86 @@ integration_test_stage:
|
||||
- if: '$CI_COMMIT_BRANCH == "main"'
|
||||
when: on_success
|
||||
|
||||
# Reset stage environment after integration tests (clean slate for next run)
|
||||
# Calls the /api/v1/admin/factory-reset endpoint which handles DB and S3 cleanup
|
||||
reset_stage:
|
||||
stage: deploy
|
||||
needs: [integration_test_stage]
|
||||
image: deps.global.bsf.tools/docker/python:3.12-slim
|
||||
timeout: 5m
|
||||
retry: 1 # Retry once on transient failures
|
||||
before_script:
|
||||
- pip install --index-url "$PIP_INDEX_URL" httpx
|
||||
script:
|
||||
- |
|
||||
python - <<'RESET_SCRIPT'
|
||||
import httpx
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
|
||||
BASE_URL = os.environ.get("STAGE_URL", "")
|
||||
ADMIN_USER = "admin"
|
||||
ADMIN_PASS = "changeme123" # Default admin password
|
||||
MAX_RETRIES = 3
|
||||
RETRY_DELAY = 5 # seconds
|
||||
|
||||
if not BASE_URL:
|
||||
print("ERROR: STAGE_URL environment variable not set")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"=== Resetting stage environment at {BASE_URL} ===")
|
||||
|
||||
def do_reset():
|
||||
with httpx.Client(base_url=BASE_URL, timeout=120.0) as client:
|
||||
# Login as admin
|
||||
print("Logging in as admin...")
|
||||
login_response = client.post(
|
||||
"/api/v1/auth/login",
|
||||
json={"username": ADMIN_USER, "password": ADMIN_PASS},
|
||||
)
|
||||
if login_response.status_code != 200:
|
||||
raise Exception(f"Login failed: {login_response.status_code} - {login_response.text}")
|
||||
print("Login successful")
|
||||
|
||||
# Call factory reset endpoint
|
||||
print("Calling factory reset endpoint...")
|
||||
reset_response = client.post(
|
||||
"/api/v1/admin/factory-reset",
|
||||
headers={"X-Confirm-Reset": "yes-delete-all-data"},
|
||||
)
|
||||
|
||||
if reset_response.status_code == 200:
|
||||
result = reset_response.json()
|
||||
print("Factory reset successful!")
|
||||
print(f" Database tables dropped: {result['results']['database_tables_dropped']}")
|
||||
print(f" S3 objects deleted: {result['results']['s3_objects_deleted']}")
|
||||
print(f" Database reinitialized: {result['results']['database_reinitialized']}")
|
||||
print(f" Seeded: {result['results']['seeded']}")
|
||||
return True
|
||||
else:
|
||||
raise Exception(f"Factory reset failed: {reset_response.status_code} - {reset_response.text}")
|
||||
|
||||
# Retry loop
|
||||
for attempt in range(1, MAX_RETRIES + 1):
|
||||
try:
|
||||
print(f"Attempt {attempt}/{MAX_RETRIES}")
|
||||
if do_reset():
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
print(f"Attempt {attempt} failed: {e}")
|
||||
if attempt < MAX_RETRIES:
|
||||
print(f"Retrying in {RETRY_DELAY} seconds...")
|
||||
time.sleep(RETRY_DELAY)
|
||||
else:
|
||||
print("All retry attempts failed")
|
||||
sys.exit(1)
|
||||
RESET_SCRIPT
|
||||
rules:
|
||||
- if: '$CI_COMMIT_BRANCH == "main"'
|
||||
when: on_success
|
||||
allow_failure: true # Don't fail pipeline if reset has issues
|
||||
|
||||
# Integration tests for feature deployment (full suite)
|
||||
integration_test_feature:
|
||||
<<: *integration_test_template
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
# False positive: s3_key is an attribute name in test assertions, not a secret
|
||||
# These are historical commits - files have since been deleted or updated with inline comments
|
||||
7e68baed0886a3c928644cd01aa3b39f92d4f976:backend/tests/test_duplicate_detection.py:generic-api-key:154
|
||||
81458b3bcb5ace97109ba4c16f4afa6e55b1b8bd:backend/tests/test_duplicate_detection.py:generic-api-key:154
|
||||
2f1891cf0126ec0e7d4c789d872a2cb2dd3a1745:backend/tests/unit/test_storage.py:generic-api-key:381
|
||||
10d36947948de796f0bacea3827f4531529c405d:backend/tests/unit/test_storage.py:generic-api-key:381
|
||||
bccbc71c13570d14b8b26a11335c45f102fe3072:backend/tests/unit/test_storage.py:generic-api-key:381
|
||||
|
||||
15
CHANGELOG.md
15
CHANGELOG.md
@@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
## [Unreleased]
|
||||
### Added
|
||||
- Added factory reset endpoint `POST /api/v1/admin/factory-reset` for test environment cleanup (#54)
|
||||
- Requires admin authentication and `X-Confirm-Reset: yes-delete-all-data` header
|
||||
- Drops all database tables, clears S3 bucket, reinitializes schema, re-seeds default data
|
||||
- CI pipeline automatically calls this after integration tests on stage
|
||||
- Added `delete_all()` method to storage backend for bulk S3 object deletion (#54)
|
||||
- Added AWS Secrets Manager CSI driver support for database credentials (#54)
|
||||
- Added SecretProviderClass template for Secrets Manager integration (#54)
|
||||
- Added IRSA service account annotations for prod and stage environments (#54)
|
||||
@@ -23,6 +28,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Disabled PostgreSQL subchart for stage and prod environments (#54)
|
||||
- Disabled MinIO subchart for stage and prod environments (#54)
|
||||
|
||||
### Fixed
|
||||
- Fixed factory reset not creating default admin user after reset (#60)
|
||||
- Admin user was only created at server startup, not after factory reset
|
||||
- CI reset job would fail to login because admin user didn't exist
|
||||
- Improved reset_stage CI job reliability (#60)
|
||||
- Added application-level retry logic (3 attempts with 5s delay)
|
||||
- Added job-level retry for transient failures
|
||||
- Fixed httpx client to use proper context manager
|
||||
- Increased timeout to 120s for reset operations
|
||||
|
||||
### Added
|
||||
- Added comprehensive upload/download tests for size boundaries (1B to 1GB) (#38)
|
||||
- Added concurrent upload/download tests (2, 5, 10 parallel operations) (#38)
|
||||
|
||||
@@ -6390,3 +6390,121 @@ def get_artifact_provenance(
|
||||
tags=tag_list,
|
||||
uploads=upload_history,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Factory Reset Endpoint (Admin Only)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@router.post("/api/v1/admin/factory-reset", tags=["admin"])
|
||||
def factory_reset(
|
||||
request: Request,
|
||||
db: Session = Depends(get_db),
|
||||
storage: S3Storage = Depends(get_storage),
|
||||
current_user: User = Depends(require_admin),
|
||||
):
|
||||
"""
|
||||
Factory reset - delete all data and restore to initial state.
|
||||
|
||||
This endpoint:
|
||||
1. Drops all database tables
|
||||
2. Deletes all objects from S3 storage
|
||||
3. Recreates the database schema
|
||||
4. Re-seeds with default admin user
|
||||
|
||||
Requires:
|
||||
- Admin authentication
|
||||
- X-Confirm-Reset header set to "yes-delete-all-data"
|
||||
|
||||
WARNING: This is a destructive operation that cannot be undone.
|
||||
"""
|
||||
# Require explicit confirmation header
|
||||
confirm_header = request.headers.get("X-Confirm-Reset")
|
||||
if confirm_header != "yes-delete-all-data":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Factory reset requires X-Confirm-Reset header set to 'yes-delete-all-data'",
|
||||
)
|
||||
|
||||
# Capture username before we drop tables (user object will become invalid)
|
||||
admin_username = current_user.username
|
||||
logger.warning(f"Factory reset initiated by admin user: {admin_username}")
|
||||
|
||||
results = {
|
||||
"database_tables_dropped": 0,
|
||||
"s3_objects_deleted": 0,
|
||||
"database_reinitialized": False,
|
||||
"seeded": False,
|
||||
}
|
||||
|
||||
try:
|
||||
# Step 1: Drop all tables in public schema
|
||||
logger.info("Dropping all database tables...")
|
||||
drop_result = db.execute(
|
||||
text("""
|
||||
DO $$
|
||||
DECLARE
|
||||
r RECORD;
|
||||
table_count INT := 0;
|
||||
BEGIN
|
||||
SET session_replication_role = 'replica';
|
||||
FOR r IN (SELECT tablename FROM pg_tables WHERE schemaname = 'public') LOOP
|
||||
EXECUTE 'DROP TABLE IF EXISTS public.' || quote_ident(r.tablename) || ' CASCADE';
|
||||
table_count := table_count + 1;
|
||||
END LOOP;
|
||||
SET session_replication_role = 'origin';
|
||||
RAISE NOTICE 'Dropped % tables', table_count;
|
||||
END $$;
|
||||
""")
|
||||
)
|
||||
db.commit()
|
||||
|
||||
# Count tables that were dropped
|
||||
count_result = db.execute(
|
||||
text("SELECT COUNT(*) FROM pg_tables WHERE schemaname = 'public'")
|
||||
)
|
||||
remaining_tables = count_result.scalar()
|
||||
results["database_tables_dropped"] = "all"
|
||||
logger.info(f"Database tables dropped, remaining: {remaining_tables}")
|
||||
|
||||
# Step 2: Delete all S3 objects
|
||||
logger.info("Deleting all S3 objects...")
|
||||
results["s3_objects_deleted"] = storage.delete_all()
|
||||
|
||||
# Step 3: Reinitialize database schema
|
||||
logger.info("Reinitializing database schema...")
|
||||
from .database import init_db, SessionLocal
|
||||
init_db()
|
||||
results["database_reinitialized"] = True
|
||||
|
||||
# Step 4: Re-seed with default data (need fresh session after schema recreate)
|
||||
logger.info("Seeding database with defaults...")
|
||||
from .seed import seed_database
|
||||
from .auth import create_default_admin
|
||||
fresh_db = SessionLocal()
|
||||
try:
|
||||
# Create default admin user first (normally done at startup)
|
||||
create_default_admin(fresh_db)
|
||||
# Then seed other test data
|
||||
seed_database(fresh_db)
|
||||
fresh_db.commit()
|
||||
finally:
|
||||
fresh_db.close()
|
||||
results["seeded"] = True
|
||||
|
||||
logger.warning(f"Factory reset completed by {admin_username}")
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "Factory reset completed successfully",
|
||||
"results": results,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Factory reset failed: {e}")
|
||||
db.rollback()
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Factory reset failed: {str(e)}",
|
||||
)
|
||||
|
||||
@@ -835,6 +835,36 @@ class S3Storage:
|
||||
except ClientError:
|
||||
return False
|
||||
|
||||
def delete_all(self) -> int:
|
||||
"""
|
||||
Delete all objects in the bucket.
|
||||
|
||||
Returns:
|
||||
Number of objects deleted
|
||||
"""
|
||||
deleted_count = 0
|
||||
try:
|
||||
paginator = self.client.get_paginator("list_objects_v2")
|
||||
for page in paginator.paginate(Bucket=self.bucket):
|
||||
objects = page.get("Contents", [])
|
||||
if not objects:
|
||||
continue
|
||||
|
||||
# Delete objects in batches of 1000 (S3 limit)
|
||||
delete_keys = [{"Key": obj["Key"]} for obj in objects]
|
||||
if delete_keys:
|
||||
self.client.delete_objects(
|
||||
Bucket=self.bucket, Delete={"Objects": delete_keys}
|
||||
)
|
||||
deleted_count += len(delete_keys)
|
||||
logger.info(f"Deleted {len(delete_keys)} objects from S3")
|
||||
|
||||
logger.info(f"Total objects deleted from S3: {deleted_count}")
|
||||
return deleted_count
|
||||
except ClientError as e:
|
||||
logger.error(f"Failed to delete all S3 objects: {e}")
|
||||
raise
|
||||
|
||||
def generate_presigned_url(
|
||||
self,
|
||||
s3_key: str,
|
||||
|
||||
Reference in New Issue
Block a user