""" Purge seed/demo data from the database. This is used when transitioning an environment from dev/test to production-like. Triggered by setting ORCHARD_PURGE_SEED_DATA=true environment variable. """ import logging import os from sqlalchemy.orm import Session from .models import ( Project, Package, Artifact, Upload, PackageVersion, ArtifactDependency, Team, TeamMembership, User, AccessPermission, ) from .storage import get_storage logger = logging.getLogger(__name__) # Seed data identifiers (from seed.py) SEED_PROJECT_NAMES = [ "frontend-libs", "backend-services", "mobile-apps", "internal-tools", ] SEED_TEAM_SLUG = "demo-team" SEED_USERNAMES = [ "alice", "bob", "charlie", "diana", "eve", "frank", ] def should_purge_seed_data() -> bool: """Check if seed data should be purged based on environment variable.""" return os.environ.get("ORCHARD_PURGE_SEED_DATA", "").lower() == "true" def purge_seed_data(db: Session) -> dict: """ Purge all seed/demo data from the database. Returns a dict with counts of deleted items. """ logger.warning("PURGING SEED DATA - This will delete demo projects, users, and teams") results = { "dependencies_deleted": 0, "versions_deleted": 0, "uploads_deleted": 0, "artifacts_deleted": 0, "packages_deleted": 0, "projects_deleted": 0, "permissions_deleted": 0, "team_memberships_deleted": 0, "users_deleted": 0, "teams_deleted": 0, "s3_objects_deleted": 0, } storage = get_storage() # Find seed projects seed_projects = db.query(Project).filter(Project.name.in_(SEED_PROJECT_NAMES)).all() seed_project_ids = [p.id for p in seed_projects] if not seed_projects: logger.info("No seed projects found, nothing to purge") return results logger.info(f"Found {len(seed_projects)} seed projects to purge") # Find packages in seed projects seed_packages = db.query(Package).filter(Package.project_id.in_(seed_project_ids)).all() seed_package_ids = [p.id for p in seed_packages] # Find artifacts in seed packages (via uploads) seed_uploads = db.query(Upload).filter(Upload.package_id.in_(seed_package_ids)).all() seed_artifact_ids = list(set(u.artifact_id for u in seed_uploads)) # Delete in order (respecting foreign keys) # 1. Delete artifact dependencies if seed_artifact_ids: count = db.query(ArtifactDependency).filter( ArtifactDependency.artifact_id.in_(seed_artifact_ids) ).delete(synchronize_session=False) results["dependencies_deleted"] = count logger.info(f"Deleted {count} artifact dependencies") # 2. Delete package versions if seed_package_ids: count = db.query(PackageVersion).filter( PackageVersion.package_id.in_(seed_package_ids) ).delete(synchronize_session=False) results["versions_deleted"] = count logger.info(f"Deleted {count} package versions") # 3. Delete uploads if seed_package_ids: count = db.query(Upload).filter(Upload.package_id.in_(seed_package_ids)).delete( synchronize_session=False ) results["uploads_deleted"] = count logger.info(f"Deleted {count} uploads") # 4. Delete S3 objects for seed artifacts if seed_artifact_ids: seed_artifacts = db.query(Artifact).filter(Artifact.id.in_(seed_artifact_ids)).all() for artifact in seed_artifacts: if artifact.s3_key: try: storage.client.delete_object(Bucket=storage.bucket, Key=artifact.s3_key) results["s3_objects_deleted"] += 1 except Exception as e: logger.warning(f"Failed to delete S3 object {artifact.s3_key}: {e}") logger.info(f"Deleted {results['s3_objects_deleted']} S3 objects") # 5. Delete artifacts (only those with ref_count that would be 0 after our deletions) # Since we deleted all versions pointing to these artifacts, we can delete them if seed_artifact_ids: count = db.query(Artifact).filter(Artifact.id.in_(seed_artifact_ids)).delete( synchronize_session=False ) results["artifacts_deleted"] = count logger.info(f"Deleted {count} artifacts") # 6. Delete packages if seed_package_ids: count = db.query(Package).filter(Package.id.in_(seed_package_ids)).delete( synchronize_session=False ) results["packages_deleted"] = count logger.info(f"Deleted {count} packages") # 7. Delete access permissions for seed projects if seed_project_ids: count = db.query(AccessPermission).filter( AccessPermission.project_id.in_(seed_project_ids) ).delete(synchronize_session=False) results["permissions_deleted"] = count logger.info(f"Deleted {count} access permissions") # 8. Delete seed projects count = db.query(Project).filter(Project.name.in_(SEED_PROJECT_NAMES)).delete( synchronize_session=False ) results["projects_deleted"] = count logger.info(f"Deleted {count} projects") # 9. Find and delete seed team seed_team = db.query(Team).filter(Team.slug == SEED_TEAM_SLUG).first() if seed_team: # Delete team memberships first count = db.query(TeamMembership).filter( TeamMembership.team_id == seed_team.id ).delete(synchronize_session=False) results["team_memberships_deleted"] = count logger.info(f"Deleted {count} team memberships") # Delete the team db.delete(seed_team) results["teams_deleted"] = 1 logger.info(f"Deleted team: {SEED_TEAM_SLUG}") # 10. Delete seed users (but NOT admin) seed_users = db.query(User).filter(User.username.in_(SEED_USERNAMES)).all() for user in seed_users: # Delete any remaining team memberships for this user db.query(TeamMembership).filter(TeamMembership.user_id == user.id).delete( synchronize_session=False ) # Delete any access permissions for this user # Note: AccessPermission.user_id is VARCHAR (username), not UUID db.query(AccessPermission).filter(AccessPermission.user_id == user.username).delete( synchronize_session=False ) db.delete(user) results["users_deleted"] += 1 if results["users_deleted"] > 0: logger.info(f"Deleted {results['users_deleted']} seed users") db.commit() logger.warning("SEED DATA PURGE COMPLETE") logger.info(f"Purge results: {results}") return results