19 Commits

Author SHA1 Message Date
Mondo Diaz
b2a860bafd Add pre-test stage reset to ensure known environment state
- Add reset_stage_pre job that runs after deploy_stage but before integration tests
- Extract reset script into reusable .reset_stage_template
- Ensures stage environment is in known state even if manually modified
- Pipeline flow: deploy_stage → reset_stage_pre → integration_test_stage → reset_stage
2026-01-23 23:06:47 +00:00
Dane Moss
b5579f1643 Merge branch 'release_0.5.1' into 'main'
add CL entry to bump version

See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!43
2026-01-23 15:37:09 -07:00
Dane Moss
fafa03e4ce add CL entry to bump version 2026-01-23 15:37:09 -07:00
Mondo Diaz
d4b2da3232 Merge branch 'fix/release-wait-for-stage-tests' into 'main'
Add gitleaks fingerprint for test file false positive

See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!42
2026-01-23 16:16:03 -06:00
Mondo Diaz
7b04bbdf05 Add gitleaks fingerprint for test file false positive 2026-01-23 16:16:02 -06:00
Mondo Diaz
3a807870a3 Merge branch 'fix/ci-prod-namespace' into 'main'
Fix production CI deployment and simplify tag pipeline

See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!41
2026-01-23 15:50:24 -06:00
Mondo Diaz
f966fde7df Fix production CI deployment and simplify tag pipeline 2026-01-23 15:50:24 -06:00
Mondo Diaz
133d9cbfd6 Merge branch 'bump_version' into 'main'
add changelog entry to cut a new release

See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!40
2026-01-23 13:00:51 -06:00
Dane Moss
276b4f2743 add changelog entry to cut a new release 2026-01-23 10:46:20 -07:00
Mondo Diaz
67ac6bb3f8 Merge branch 'fix/factory-reset-admin-user' into 'main'
Update CHANGELOG with factory reset fixes (#60)

See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!39
2026-01-23 09:33:01 -06:00
Mondo Diaz
b0bb3ed569 Update CHANGELOG with factory reset fixes (#60) 2026-01-21 23:44:45 +00:00
Mondo Diaz
1ac75e1017 Fix factory reset and improve reset_stage CI job
- Add create_default_admin() call to factory reset (admin user wasn't being
  created after reset, only on server restart)
- Add retry logic to reset_stage CI job (3 attempts with 5s delay)
- Use proper context manager for httpx client
- Increase timeout to 120s for reset operation
- Add retry: 1 at job level for transient failures
2026-01-21 23:20:48 +00:00
Mondo Diaz
693613f111 Fix factory reset - capture username before dropping tables 2026-01-21 23:18:29 +00:00
Mondo Diaz
9da4ae8c0d Add gitleaks fingerprint for test file false positive 2026-01-21 22:59:08 +00:00
Mondo Diaz
7ffdc64364 Fix seed_database call in factory reset - pass fresh db session 2026-01-21 22:51:03 +00:00
Mondo Diaz
6abc0c88b0 Merge branch 'feature/stage-reset-job' into 'main'
Fix reset_stage job to read STAGE_URL from environment

See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!38
2026-01-21 16:39:39 -06:00
Mondo Diaz
e96dc5cde8 Fix reset_stage job to read STAGE_URL from environment 2026-01-21 22:25:04 +00:00
Mondo Diaz
cba5bac383 Merge branch 'feature/stage-reset-job' into 'main'
Add factory reset endpoint for stage environment cleanup (#54)

See merge request esv/bsf/bsf-integration/orchard/orchard-mvp!37
2026-01-21 16:00:02 -06:00
Mondo Diaz
535280a783 Add factory reset endpoint for stage environment cleanup (#54) 2026-01-21 16:00:02 -06:00
6 changed files with 358 additions and 19 deletions

View File

@@ -11,6 +11,12 @@ variables:
# Environment URLs (used by deploy and test jobs)
STAGE_URL: https://orchard-stage.common.global.bsf.tools
PROD_URL: https://orchard.common.global.bsf.tools
# Stage environment AWS resources (used by reset job)
STAGE_RDS_HOST: orchard-stage.cluster-cvw3jzjkozoc.us-gov-west-1.rds.amazonaws.com
STAGE_RDS_DBNAME: postgres
STAGE_SECRET_ARN: "arn:aws-us-gov:secretsmanager:us-gov-west-1:052673043337:secret:rds!cluster-a573672b-1a38-4665-a654-1b7df37b5297-IaeFQL"
STAGE_S3_BUCKET: orchard-artifacts-stage
AWS_REGION: us-gov-west-1
# Shared pip cache directory
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.pip-cache"
@@ -30,9 +36,68 @@ stages:
- analyze
- deploy
# Override Prosper template jobs to exclude tag pipelines
# Tags only run deploy_prod and smoke_test_prod (image already built on main)
build_image:
rules:
- if: '$CI_COMMIT_TAG'
when: never
- when: on_success
test_image:
rules:
- if: '$CI_COMMIT_TAG'
when: never
- when: on_success
hadolint:
rules:
- if: '$CI_COMMIT_TAG'
when: never
- when: on_success
kics:
variables:
KICS_CONFIG: kics.config
rules:
- if: '$CI_COMMIT_TAG'
when: never
- when: on_success
secrets:
rules:
- if: '$CI_COMMIT_TAG'
when: never
- when: on_success
app_deps_scan:
rules:
- if: '$CI_COMMIT_TAG'
when: never
- when: on_success
cve_scan:
rules:
- if: '$CI_COMMIT_TAG'
when: never
- when: on_success
app_sbom_analysis:
rules:
- if: '$CI_COMMIT_TAG'
when: never
- when: on_success
cve_sbom_analysis:
rules:
- if: '$CI_COMMIT_TAG'
when: never
- when: on_success
# Override release job to wait for stage integration tests before creating tag
# This ensures the tag (which triggers prod deploy) is only created after stage passes
release:
needs: [integration_test_stage, changelog]
# Full integration test suite template (for feature/stage deployments)
# Runs the complete pytest integration test suite against the deployed environment
@@ -131,16 +196,106 @@ kics:
sys.exit(0)
PYTEST_SCRIPT
# Integration tests for stage deployment (full suite)
# Reset stage template - shared by pre and post test reset jobs
# Calls the /api/v1/admin/factory-reset endpoint which handles DB and S3 cleanup
.reset_stage_template: &reset_stage_template
stage: deploy
image: deps.global.bsf.tools/docker/python:3.12-slim
timeout: 5m
retry: 1 # Retry once on transient failures
before_script:
- pip install --index-url "$PIP_INDEX_URL" httpx
script:
- |
python - <<'RESET_SCRIPT'
import httpx
import sys
import os
import time
BASE_URL = os.environ.get("STAGE_URL", "")
ADMIN_USER = "admin"
ADMIN_PASS = "changeme123" # Default admin password
MAX_RETRIES = 3
RETRY_DELAY = 5 # seconds
if not BASE_URL:
print("ERROR: STAGE_URL environment variable not set")
sys.exit(1)
print(f"=== Resetting stage environment at {BASE_URL} ===")
def do_reset():
with httpx.Client(base_url=BASE_URL, timeout=120.0) as client:
# Login as admin
print("Logging in as admin...")
login_response = client.post(
"/api/v1/auth/login",
json={"username": ADMIN_USER, "password": ADMIN_PASS},
)
if login_response.status_code != 200:
raise Exception(f"Login failed: {login_response.status_code} - {login_response.text}")
print("Login successful")
# Call factory reset endpoint
print("Calling factory reset endpoint...")
reset_response = client.post(
"/api/v1/admin/factory-reset",
headers={"X-Confirm-Reset": "yes-delete-all-data"},
)
if reset_response.status_code == 200:
result = reset_response.json()
print("Factory reset successful!")
print(f" Database tables dropped: {result['results']['database_tables_dropped']}")
print(f" S3 objects deleted: {result['results']['s3_objects_deleted']}")
print(f" Database reinitialized: {result['results']['database_reinitialized']}")
print(f" Seeded: {result['results']['seeded']}")
return True
else:
raise Exception(f"Factory reset failed: {reset_response.status_code} - {reset_response.text}")
# Retry loop
for attempt in range(1, MAX_RETRIES + 1):
try:
print(f"Attempt {attempt}/{MAX_RETRIES}")
if do_reset():
sys.exit(0)
except Exception as e:
print(f"Attempt {attempt} failed: {e}")
if attempt < MAX_RETRIES:
print(f"Retrying in {RETRY_DELAY} seconds...")
time.sleep(RETRY_DELAY)
else:
print("All retry attempts failed")
sys.exit(1)
RESET_SCRIPT
rules:
- if: '$CI_COMMIT_BRANCH == "main"'
when: on_success
# Reset stage BEFORE integration tests (ensure known state)
reset_stage_pre:
<<: *reset_stage_template
needs: [deploy_stage]
# Integration tests for stage deployment (full suite)
integration_test_stage:
<<: *integration_test_template
needs: [deploy_stage]
needs: [reset_stage_pre]
variables:
ORCHARD_TEST_URL: $STAGE_URL
rules:
- if: '$CI_COMMIT_BRANCH == "main"'
when: on_success
# Reset stage AFTER integration tests (clean slate for next run)
reset_stage:
<<: *reset_stage_template
needs: [integration_test_stage]
allow_failure: true # Don't fail pipeline if reset has issues
# Integration tests for feature deployment (full suite)
integration_test_feature:
<<: *integration_test_template
@@ -183,6 +338,10 @@ python_unit_tests:
coverage_format: cobertura
path: backend/coverage.xml
coverage: '/TOTAL.*\s+(\d+%)/'
rules:
- if: '$CI_COMMIT_TAG'
when: never
- when: on_success
# Run frontend tests
frontend_tests:
@@ -212,6 +371,10 @@ frontend_tests:
coverage_format: cobertura
path: frontend/coverage/cobertura-coverage.xml
coverage: '/All files[^|]*\|[^|]*\s+([\d\.]+)/'
rules:
- if: '$CI_COMMIT_TAG'
when: never
- when: on_success
# Shared deploy configuration
.deploy_template: &deploy_template
@@ -339,12 +502,11 @@ cleanup_feature:
# Deploy to production (version tags only)
deploy_prod:
stage: deploy
# For tag pipelines, most jobs don't run (trusting main was tested)
# We only need build_image to have the image available
needs: [build_image]
# For tag pipelines, no other jobs run - image was already built when commit was on main
needs: []
image: deps.global.bsf.tools/registry-1.docker.io/alpine/k8s:1.29.12
variables:
NAMESPACE: orch-prod-namespace
NAMESPACE: orch-namespace
VALUES_FILE: helm/orchard/values-prod.yaml
BASE_URL: $PROD_URL
before_script:

8
.gitleaks.toml Normal file
View File

@@ -0,0 +1,8 @@
# Gitleaks configuration
# https://github.com/gitleaks/gitleaks#configuration
[allowlist]
# Test files that contain variable names matching secret patterns (e.g., s3_key)
paths = [
'''backend/tests/.*\.py''',
]

View File

@@ -4,6 +4,7 @@
# False positive: s3_key is an attribute name in test assertions, not a secret
# These are historical commits - files have since been deleted or updated with inline comments
7e68baed0886a3c928644cd01aa3b39f92d4f976:backend/tests/test_duplicate_detection.py:generic-api-key:154
81458b3bcb5ace97109ba4c16f4afa6e55b1b8bd:backend/tests/test_duplicate_detection.py:generic-api-key:154
2f1891cf0126ec0e7d4c789d872a2cb2dd3a1745:backend/tests/unit/test_storage.py:generic-api-key:381
10d36947948de796f0bacea3827f4531529c405d:backend/tests/unit/test_storage.py:generic-api-key:381
bccbc71c13570d14b8b26a11335c45f102fe3072:backend/tests/unit/test_storage.py:generic-api-key:381
@@ -15,3 +16,4 @@ bccbc71c13570d14b8b26a11335c45f102fe3072:backend/tests/unit/test_storage.py:gene
08dce6cbb836b687002751fed4159bfc2da61f8b:backend/tests/unit/test_storage.py:generic-api-key:381
617bcbe89cff9a009d77e4f1f1864efed1820e63:backend/tests/unit/test_storage.py:generic-api-key:381
1cbd33544388e0fe6db752fa8886fab33cf9ce7c:backend/tests/unit/test_storage.py:generic-api-key:381
7cfad28f678f5a5b8b927d694a17b9ba446b7138:backend/tests/unit/test_storage.py:generic-api-key:381

View File

@@ -6,24 +6,27 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Changed
- Added pre-test stage reset to ensure known environment state before integration tests (#54)
## [0.5.1] - 2026-01-23
### Changed
- Simplified tag pipeline to only run deploy and smoke tests (image already built on main) (#54)
### Fixed
- Fixed production CI deployment namespace to use correct `orch-namespace` (#54)
- Added gitleaks config to allowlist test files from secret scanning (#54)
## [0.5.0] - 2026-01-23
### Added
- Added factory reset endpoint `POST /api/v1/admin/factory-reset` for test environment cleanup (#54)
- Requires admin authentication and `X-Confirm-Reset: yes-delete-all-data` header
- Drops all database tables, clears S3 bucket, reinitializes schema, re-seeds default data
- CI pipeline automatically calls this after integration tests on stage
- Added `delete_all()` method to storage backend for bulk S3 object deletion (#54)
- Added AWS Secrets Manager CSI driver support for database credentials (#54)
- Added SecretProviderClass template for Secrets Manager integration (#54)
- Added IRSA service account annotations for prod and stage environments (#54)
### Changed
- Configured stage and prod to use AWS RDS instead of PostgreSQL subchart (#54)
- Configured stage and prod to use AWS S3 instead of MinIO subchart (#54)
- Changed prod deployment from manual to automatic on version tags (#54)
- Updated S3 client to support IRSA credentials when no explicit keys provided (#54)
- Changed prod image pullPolicy to Always (#54)
- Added proxy-body-size annotation to prod ingress for large uploads (#54)
### Removed
- Disabled PostgreSQL subchart for stage and prod environments (#54)
- Disabled MinIO subchart for stage and prod environments (#54)
### Added
- Added comprehensive upload/download tests for size boundaries (1B to 1GB) (#38)
- Added concurrent upload/download tests (2, 5, 10 parallel operations) (#38)
- Added data integrity tests (binary, text, unicode, compressed content) (#38)
@@ -78,6 +81,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added internal proxy configuration for npm, pip, helm, and apt (#51)
### Changed
- Configured stage and prod to use AWS RDS instead of PostgreSQL subchart (#54)
- Configured stage and prod to use AWS S3 instead of MinIO subchart (#54)
- Changed prod deployment from manual to automatic on version tags (#54)
- Updated S3 client to support IRSA credentials when no explicit keys provided (#54)
- Changed prod image pullPolicy to Always (#54)
- Added proxy-body-size annotation to prod ingress for large uploads (#54)
- CI integration tests now run full pytest suite (~350 tests) against deployed environment instead of 3 smoke tests
- CI production deployment uses lightweight smoke tests only (no test data creation in prod)
- CI pipeline improvements: shared pip cache, `interruptible` flag on test jobs, retry on integration tests
@@ -98,6 +107,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Improved pod naming: Orchard pods now named `orchard-{env}-server-*` for clarity (#51)
### Fixed
- Fixed factory reset not creating default admin user after reset (#60)
- Admin user was only created at server startup, not after factory reset
- CI reset job would fail to login because admin user didn't exist
- Improved reset_stage CI job reliability (#60)
- Added application-level retry logic (3 attempts with 5s delay)
- Added job-level retry for transient failures
- Fixed httpx client to use proper context manager
- Increased timeout to 120s for reset operations
- Fixed CI integration test rate limiting: added configurable `ORCHARD_LOGIN_RATE_LIMIT` env var, relaxed to 1000/minute for dev/stage
- Fixed duplicate `TestSecurityEdgeCases` class definition in test_auth_api.py
- Fixed integration tests auth: session-scoped client, configurable credentials via env vars, fail-fast on auth errors
@@ -118,6 +135,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Removed
- Removed unused `store_streaming()` method from storage.py (#51)
- Disabled PostgreSQL subchart for stage and prod environments (#54)
- Disabled MinIO subchart for stage and prod environments (#54)
## [0.4.0] - 2026-01-12
### Added

View File

@@ -6390,3 +6390,121 @@ def get_artifact_provenance(
tags=tag_list,
uploads=upload_history,
)
# =============================================================================
# Factory Reset Endpoint (Admin Only)
# =============================================================================
@router.post("/api/v1/admin/factory-reset", tags=["admin"])
def factory_reset(
request: Request,
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
current_user: User = Depends(require_admin),
):
"""
Factory reset - delete all data and restore to initial state.
This endpoint:
1. Drops all database tables
2. Deletes all objects from S3 storage
3. Recreates the database schema
4. Re-seeds with default admin user
Requires:
- Admin authentication
- X-Confirm-Reset header set to "yes-delete-all-data"
WARNING: This is a destructive operation that cannot be undone.
"""
# Require explicit confirmation header
confirm_header = request.headers.get("X-Confirm-Reset")
if confirm_header != "yes-delete-all-data":
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Factory reset requires X-Confirm-Reset header set to 'yes-delete-all-data'",
)
# Capture username before we drop tables (user object will become invalid)
admin_username = current_user.username
logger.warning(f"Factory reset initiated by admin user: {admin_username}")
results = {
"database_tables_dropped": 0,
"s3_objects_deleted": 0,
"database_reinitialized": False,
"seeded": False,
}
try:
# Step 1: Drop all tables in public schema
logger.info("Dropping all database tables...")
drop_result = db.execute(
text("""
DO $$
DECLARE
r RECORD;
table_count INT := 0;
BEGIN
SET session_replication_role = 'replica';
FOR r IN (SELECT tablename FROM pg_tables WHERE schemaname = 'public') LOOP
EXECUTE 'DROP TABLE IF EXISTS public.' || quote_ident(r.tablename) || ' CASCADE';
table_count := table_count + 1;
END LOOP;
SET session_replication_role = 'origin';
RAISE NOTICE 'Dropped % tables', table_count;
END $$;
""")
)
db.commit()
# Count tables that were dropped
count_result = db.execute(
text("SELECT COUNT(*) FROM pg_tables WHERE schemaname = 'public'")
)
remaining_tables = count_result.scalar()
results["database_tables_dropped"] = "all"
logger.info(f"Database tables dropped, remaining: {remaining_tables}")
# Step 2: Delete all S3 objects
logger.info("Deleting all S3 objects...")
results["s3_objects_deleted"] = storage.delete_all()
# Step 3: Reinitialize database schema
logger.info("Reinitializing database schema...")
from .database import init_db, SessionLocal
init_db()
results["database_reinitialized"] = True
# Step 4: Re-seed with default data (need fresh session after schema recreate)
logger.info("Seeding database with defaults...")
from .seed import seed_database
from .auth import create_default_admin
fresh_db = SessionLocal()
try:
# Create default admin user first (normally done at startup)
create_default_admin(fresh_db)
# Then seed other test data
seed_database(fresh_db)
fresh_db.commit()
finally:
fresh_db.close()
results["seeded"] = True
logger.warning(f"Factory reset completed by {admin_username}")
return {
"status": "success",
"message": "Factory reset completed successfully",
"results": results,
}
except Exception as e:
logger.error(f"Factory reset failed: {e}")
db.rollback()
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Factory reset failed: {str(e)}",
)

View File

@@ -835,6 +835,36 @@ class S3Storage:
except ClientError:
return False
def delete_all(self) -> int:
"""
Delete all objects in the bucket.
Returns:
Number of objects deleted
"""
deleted_count = 0
try:
paginator = self.client.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket=self.bucket):
objects = page.get("Contents", [])
if not objects:
continue
# Delete objects in batches of 1000 (S3 limit)
delete_keys = [{"Key": obj["Key"]} for obj in objects]
if delete_keys:
self.client.delete_objects(
Bucket=self.bucket, Delete={"Objects": delete_keys}
)
deleted_count += len(delete_keys)
logger.info(f"Deleted {len(delete_keys)} objects from S3")
logger.info(f"Total objects deleted from S3: {deleted_count}")
return deleted_count
except ClientError as e:
logger.error(f"Failed to delete all S3 objects: {e}")
raise
def generate_presigned_url(
self,
s3_key: str,