Add feature flags, seed data utilities, and Angular frontend scaffold

Major enhancements: - Feature flag system for cloud vs air-gapped deployment modes - Automatic storage backend selection based on deployment mode - Comprehensive seed data generation utilities - Support for generating CSV, JSON, binary, and PCAP test files - Quick seed script for easy data generation - Angular 19 frontend complete setup documentation - Material Design UI component examples and configuration Fixes: - Resolve SQLAlchemy metadata column name conflict - Rename metadata to custom_metadata throughout codebase - Fix API health check issues Documentation: - FEATURES.md - Complete feature overview - FRONTEND_SETUP.md - Angular 19 setup guide with examples - SUMMARY.md - Implementation summary 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-14 15:57:49 -05:00
parent 6821e717cd
commit 2dea63f99f
12 changed files with 1513 additions and 9 deletions
--- a/utils/seed_data.py
+++ b/utils/seed_data.py
@@ -0,0 +1,335 @@
+#!/usr/bin/env python
+"""
+Utility functions for generating seed data for testing the Data Lake.
+
+This module provides functions to:
+- Generate random test artifacts (CSV, JSON, binary, PCAP files)
+- Upload them to the database and storage backend
+- Clear all data for testing purposes
+"""
+
+import os
+import sys
+import io
+import random
+import json
+import csv
+from datetime import datetime, timedelta
+from typing import List, Dict, Any
+import uuid
+
+# Add parent directory to path to import app modules
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from app.database import SessionLocal
+from app.models.artifact import Artifact
+from app.storage import get_storage_backend
+from app.config import settings
+
+
+# Sample data for generating realistic test artifacts
+TEST_NAMES = [
+    "user_authentication", "payment_processing", "data_validation",
+    "api_endpoint_test", "database_connection", "file_upload",
+    "performance_test", "stress_test", "security_scan",
+    "regression_test", "smoke_test", "integration_test"
+]
+
+TEST_SUITES = [
+    "authentication", "payments", "api", "database",
+    "ui", "performance", "security", "integration"
+]
+
+TEST_RESULTS = ["pass", "fail", "skip", "error"]
+
+TAGS = [
+    "regression", "smoke", "critical", "high-priority",
+    "automated", "manual", "performance", "security",
+    "integration", "unit", "e2e", "api"
+]
+
+
+def generate_csv_content() -> bytes:
+    """Generate random CSV test data"""
+    output = io.StringIO()
+    writer = csv.writer(output)
+
+    # Header
+    writer.writerow(["timestamp", "test_case", "result", "duration_ms", "error_message"])
+
+    # Random rows
+    num_rows = random.randint(10, 100)
+    for i in range(num_rows):
+        timestamp = datetime.now() - timedelta(minutes=random.randint(0, 1000))
+        test_case = f"test_case_{random.randint(1, 50)}"
+        result = random.choice(TEST_RESULTS)
+        duration = random.randint(100, 5000)
+        error = "" if result == "pass" else f"Error_{random.randint(1, 10)}"
+        writer.writerow([timestamp.isoformat(), test_case, result, duration, error])
+
+    return output.getvalue().encode('utf-8')
+
+
+def generate_json_content() -> bytes:
+    """Generate random JSON test configuration"""
+    config = {
+        "test_run_id": str(uuid.uuid4()),
+        "timestamp": datetime.now().isoformat(),
+        "environment": random.choice(["dev", "staging", "prod"]),
+        "browser": random.choice(["chrome", "firefox", "safari", "edge"]),
+        "timeout": random.randint(30, 300),
+        "retries": random.randint(0, 3),
+        "parallel_threads": random.randint(1, 10),
+        "test_data": {
+            "users": random.randint(10, 1000),
+            "iterations": random.randint(1, 100),
+            "success_rate": round(random.uniform(0.7, 1.0), 2)
+        }
+    }
+    return json.dumps(config, indent=2).encode('utf-8')
+
+
+def generate_binary_content() -> bytes:
+    """Generate random binary data"""
+    size = random.randint(1024, 10240)  # 1-10KB
+    return os.urandom(size)
+
+
+def generate_pcap_content() -> bytes:
+    """Generate fake PCAP file header (simplified)"""
+    # This is a simplified PCAP file header for demonstration
+    # Real PCAP files would have proper packet data
+    pcap_header = bytearray([
+        0xd4, 0xc3, 0xb2, 0xa1,  # Magic number
+        0x02, 0x00, 0x04, 0x00,  # Version
+        0x00, 0x00, 0x00, 0x00,  # Timezone
+        0x00, 0x00, 0x00, 0x00,  # Timestamp accuracy
+        0xff, 0xff, 0x00, 0x00,  # Snapshot length
+        0x01, 0x00, 0x00, 0x00   # Link-layer type
+    ])
+    # Add some random data to simulate packets
+    pcap_header.extend(os.urandom(random.randint(500, 2000)))
+    return bytes(pcap_header)
+
+
+def create_artifact_data(index: int) -> Dict[str, Any]:
+    """Generate metadata for an artifact"""
+    test_name = random.choice(TEST_NAMES)
+    test_suite = random.choice(TEST_SUITES)
+    test_result = random.choice(TEST_RESULTS)
+
+    # Generate random tags (1-4 tags)
+    num_tags = random.randint(1, 4)
+    artifact_tags = random.sample(TAGS, num_tags)
+
+    # Generate test config
+    test_config = {
+        "environment": random.choice(["dev", "staging", "prod"]),
+        "timeout": random.randint(30, 300),
+        "retries": random.randint(0, 3)
+    }
+
+    # Generate custom metadata
+    custom_metadata = {
+        "build_number": random.randint(1000, 9999),
+        "commit_hash": uuid.uuid4().hex[:8],
+        "triggered_by": random.choice(["manual", "scheduled", "webhook"])
+    }
+
+    # Random version
+    version = f"v{random.randint(1, 5)}.{random.randint(0, 10)}.{random.randint(0, 20)}"
+
+    # Random creation date (within last 30 days)
+    created_days_ago = random.randint(0, 30)
+    created_at = datetime.now() - timedelta(days=created_days_ago, hours=random.randint(0, 23))
+
+    return {
+        "test_name": test_name,
+        "test_suite": test_suite,
+        "test_result": test_result,
+        "tags": artifact_tags,
+        "test_config": test_config,
+        "custom_metadata": custom_metadata,
+        "version": version,
+        "description": f"Test artifact {index} for {test_name}",
+        "created_at": created_at
+    }
+
+
+async def upload_artifact_to_storage(file_content: bytes, filename: str) -> str:
+    """Upload file to storage backend"""
+    storage = get_storage_backend()
+    file_extension = filename.split('.')[-1] if '.' in filename else ''
+    object_name = f"{uuid.uuid4()}.{file_extension}" if file_extension else str(uuid.uuid4())
+
+    storage_path = await storage.upload_file(
+        io.BytesIO(file_content),
+        object_name
+    )
+    return storage_path
+
+
+def get_file_type(filename: str) -> str:
+    """Determine file type from filename"""
+    extension = filename.lower().split('.')[-1]
+    type_mapping = {
+        'csv': 'csv',
+        'json': 'json',
+        'pcap': 'pcap',
+        'pcapng': 'pcap',
+        'bin': 'binary',
+        'dat': 'binary',
+    }
+    return type_mapping.get(extension, 'binary')
+
+
+async def generate_seed_data(num_artifacts: int = 50) -> List[int]:
+    """
+    Generate and upload seed data to the database and storage.
+
+    Args:
+        num_artifacts: Number of artifacts to generate (default: 50)
+
+    Returns:
+        List of created artifact IDs
+    """
+    db = SessionLocal()
+    artifact_ids = []
+
+    try:
+        print(f"Generating {num_artifacts} seed artifacts...")
+        print(f"Deployment mode: {settings.deployment_mode}")
+        print(f"Storage backend: {settings.storage_backend}")
+
+        for i in range(num_artifacts):
+            # Randomly choose file type
+            file_type_choice = random.choice(['csv', 'json', 'binary', 'pcap'])
+
+            if file_type_choice == 'csv':
+                filename = f"test_results_{i}.csv"
+                content = generate_csv_content()
+                content_type = "text/csv"
+            elif file_type_choice == 'json':
+                filename = f"test_config_{i}.json"
+                content = generate_json_content()
+                content_type = "application/json"
+            elif file_type_choice == 'pcap':
+                filename = f"network_capture_{i}.pcap"
+                content = generate_pcap_content()
+                content_type = "application/vnd.tcpdump.pcap"
+            else:
+                filename = f"test_data_{i}.bin"
+                content = generate_binary_content()
+                content_type = "application/octet-stream"
+
+            # Upload to storage
+            storage_path = await upload_artifact_to_storage(content, filename)
+
+            # Generate metadata
+            artifact_data = create_artifact_data(i)
+
+            # Create database record
+            artifact = Artifact(
+                filename=filename,
+                file_type=get_file_type(filename),
+                file_size=len(content),
+                storage_path=storage_path,
+                content_type=content_type,
+                test_name=artifact_data["test_name"],
+                test_suite=artifact_data["test_suite"],
+                test_config=artifact_data["test_config"],
+                test_result=artifact_data["test_result"],
+                custom_metadata=artifact_data["custom_metadata"],
+                description=artifact_data["description"],
+                tags=artifact_data["tags"],
+                version=artifact_data["version"],
+                created_at=artifact_data["created_at"],
+                updated_at=artifact_data["created_at"]
+            )
+
+            db.add(artifact)
+            db.commit()
+            db.refresh(artifact)
+
+            artifact_ids.append(artifact.id)
+
+            if (i + 1) % 10 == 0:
+                print(f"  Created {i + 1}/{num_artifacts} artifacts...")
+
+        print(f"✓ Successfully created {len(artifact_ids)} artifacts")
+        return artifact_ids
+
+    except Exception as e:
+        db.rollback()
+        print(f"✗ Error generating seed data: {e}")
+        raise
+    finally:
+        db.close()
+
+
+async def clear_all_data():
+    """
+    Clear all artifacts from database and storage.
+    WARNING: This will delete ALL data!
+    """
+    db = SessionLocal()
+    storage = get_storage_backend()
+
+    try:
+        print("Clearing all artifacts...")
+
+        # Get all artifacts
+        artifacts = db.query(Artifact).all()
+        count = len(artifacts)
+
+        if count == 0:
+            print("No artifacts to delete.")
+            return
+
+        print(f"Found {count} artifacts to delete...")
+
+        # Delete from storage and database
+        for i, artifact in enumerate(artifacts):
+            try:
+                # Delete from storage
+                object_name = artifact.storage_path.split('/')[-1]
+                await storage.delete_file(object_name)
+            except Exception as e:
+                print(f"  Warning: Could not delete {artifact.filename} from storage: {e}")
+
+            # Delete from database
+            db.delete(artifact)
+
+            if (i + 1) % 10 == 0:
+                print(f"  Deleted {i + 1}/{count} artifacts...")
+
+        db.commit()
+        print(f"✓ Successfully deleted {count} artifacts")
+
+    except Exception as e:
+        db.rollback()
+        print(f"✗ Error clearing data: {e}")
+        raise
+    finally:
+        db.close()
+
+
+# CLI interface
+if __name__ == "__main__":
+    import asyncio
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Generate or clear seed data for Data Lake")
+    parser.add_argument("action", choices=["generate", "clear"], help="Action to perform")
+    parser.add_argument("--count", type=int, default=50, help="Number of artifacts to generate (default: 50)")
+
+    args = parser.parse_args()
+
+    if args.action == "generate":
+        asyncio.run(generate_seed_data(args.count))
+    elif args.action == "clear":
+        confirm = input("Are you sure you want to delete ALL data? (yes/no): ")
+        if confirm.lower() == "yes":
+            asyncio.run(clear_all_data())
+        else:
+            print("Aborted.")