Merge main into f/updates

Resolved conflicts by keeping f/updates changes: - Keep Angular frontend with dark theme styling - Keep updated quickstart scripts at root level - Remove static HTML/JS files (replaced by Angular) - Keep sim_source_id field implementation - Merge backend improvements from main 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-15 11:44:01 -05:00
parent 6cbdcfbd10 2861022ac6
commit 115004b5a2
8 changed files with 149 additions and 11 deletions
--- a/15
+++ b/15
@@ -1,12 +1,15 @@
-FROM python:3.11-slim
+FROM python:3.11-alpine

 WORKDIR /app

-# Install system dependencies
-RUN apt-get update && apt-get install -y \
+# Install system dependencies for Alpine
+# Alpine uses apk instead of apt-get and is lighter/faster
+RUN apk add --no-cache \
    gcc \
+    musl-dev \
+    postgresql-dev \
    postgresql-client \
-    && rm -rf /var/lib/apt/lists/*
+    linux-headers

 # Copy requirements and install Python dependencies
 COPY requirements.txt .
@@ -18,8 +21,8 @@ COPY utils/ ./utils/
 COPY alembic/ ./alembic/
 COPY alembic.ini .

-# Create non-root user
-RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
+# Create non-root user (Alpine uses adduser instead of useradd)
+RUN adduser -D -u 1000 appuser && chown -R appuser:appuser /app
 USER appuser

 # Expose port
--- a/README.md
+++ b/README.md
@@ -1,4 +1,6 @@
-# Test Artifact Data Lake
+# Obsidian
+
+**Enterprise Test Artifact Storage**

 A lightweight, cloud-native API for storing and querying test artifacts including CSV files, JSON files, binary files, and packet captures (PCAP). Built with FastAPI and supports both AWS S3 and self-hosted MinIO storage backends.

--- a/alembic/.gitkeep
+++ b/alembic/.gitkeep
--- a/alembic/env.py
+++ b/alembic/env.py
@@ -0,0 +1,84 @@
+from logging.config import fileConfig
+import os
+
+from sqlalchemy import engine_from_config
+from sqlalchemy import pool
+
+from alembic import context
+
+# Import your models Base
+from app.models.artifact import Base
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+# Override sqlalchemy.url from environment variable
+if os.getenv("DATABASE_URL"):
+    config.set_main_option("sqlalchemy.url", os.getenv("DATABASE_URL"))
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+target_metadata = Base.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode.
+
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+
+    Calls to context.execute() here emit the given string to the
+    script output.
+
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode.
+
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection, target_metadata=target_metadata
+        )
+
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
--- a/alembic/versions/.gitkeep
+++ b/alembic/versions/.gitkeep
--- a/app/api/artifacts.py
+++ b/app/api/artifacts.py
@@ -1,7 +1,7 @@
 from fastapi import APIRouter, UploadFile, File, Form, Depends, HTTPException, Query
 from fastapi.responses import StreamingResponse
 from sqlalchemy.orm import Session
-from typing import List, Optional
+from typing import List, Optional, Dict
 import uuid
 import json
 import io
@@ -36,6 +36,7 @@ async def upload_artifact(
    test_suite: Optional[str] = Form(None),
    test_config: Optional[str] = Form(None),
    test_result: Optional[str] = Form(None),
+    sim_source_id: Optional[str] = Form(None),
    custom_metadata: Optional[str] = Form(None),
    description: Optional[str] = Form(None),
    tags: Optional[str] = Form(None),
@@ -51,6 +52,7 @@ async def upload_artifact(
    - **test_suite**: Test suite identifier
    - **test_config**: JSON string of test configuration
    - **test_result**: Test result (pass, fail, skip, error)
+    - **sim_source_id**: SIM source ID to group multiple artifacts
    - **custom_metadata**: JSON string of additional metadata
    - **description**: Text description of the artifact
    - **tags**: JSON array of tags (as string)
@@ -88,6 +90,7 @@ async def upload_artifact(
            test_suite=test_suite,
            test_config=test_config_dict,
            test_result=test_result,
+            sim_source_id=sim_source_id,
            custom_metadata=metadata_dict,
            description=description,
            tags=tags_list,
@@ -194,6 +197,7 @@ async def query_artifacts(query: ArtifactQuery, db: Session = Depends(get_db)):
    - **test_name**: Filter by test name
    - **test_suite**: Filter by test suite
    - **test_result**: Filter by test result
+    - **sim_source_id**: Filter by SIM source ID
    - **tags**: Filter by tags (must contain all specified tags)
    - **start_date**: Filter by creation date (from)
    - **end_date**: Filter by creation date (to)
@@ -212,6 +216,8 @@ async def query_artifacts(query: ArtifactQuery, db: Session = Depends(get_db)):
        q = q.filter(Artifact.test_suite == query.test_suite)
    if query.test_result:
        q = q.filter(Artifact.test_result == query.test_result)
+    if query.sim_source_id:
+        q = q.filter(Artifact.sim_source_id == query.sim_source_id)
    if query.tags:
        for tag in query.tags:
            q = q.filter(Artifact.tags.contains([tag]))
@@ -240,3 +246,20 @@ async def list_artifacts(
        Artifact.created_at.desc()
    ).offset(offset).limit(limit).all()
    return artifacts
+
+
+@router.get("/grouped-by-sim-source", response_model=Dict[str, List[ArtifactResponse]])
+async def get_artifacts_grouped_by_sim_source(
+    db: Session = Depends(get_db)
+):
+    """Get all artifacts grouped by SIM source ID"""
+    from collections import defaultdict
+
+    artifacts = db.query(Artifact).order_by(Artifact.created_at.desc()).all()
+    grouped = defaultdict(list)
+
+    for artifact in artifacts:
+        sim_source = artifact.sim_source_id or "ungrouped"
+        grouped[sim_source].append(artifact)
+
+    return dict(grouped)
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,7 +2,7 @@ version: '3.8'

 services:
  postgres:
-    image: postgres:15
+    image: postgres:15-alpine
    environment:
      POSTGRES_USER: user
      POSTGRES_PASSWORD: password
--- a/utils/seed_data.py
+++ b/utils/seed_data.py
@@ -129,7 +129,7 @@ def generate_pcap_content() -> bytes:
    return bytes(pcap_header)


-def create_artifact_data(index: int) -> Dict[str, Any]:
+def create_artifact_data(index: int, sim_source_id: str = None) -> Dict[str, Any]:
    """Generate metadata for an artifact"""
    test_name = random.choice(TEST_NAMES)
    test_suite = random.choice(TEST_SUITES)
@@ -164,6 +164,7 @@ def create_artifact_data(index: int) -> Dict[str, Any]:
        "test_name": test_name,
        "test_suite": test_suite,
        "test_result": test_result,
+        "sim_source_id": sim_source_id,
        "tags": artifact_tags,
        "test_config": test_config,
        "custom_metadata": custom_metadata,
@@ -265,6 +266,27 @@ async def generate_seed_data(num_artifacts: int = 50) -> List[int]:
        print(f"Deployment mode: {settings.deployment_mode}")
        print(f"Storage backend: {settings.storage_backend}")

+        # Generate SIM source IDs - each source will have 2-4 artifacts
+        num_sim_sources = max(num_artifacts // 3, 1)
+        sim_sources = [f"sim_run_{uuid.uuid4().hex[:8]}" for _ in range(num_sim_sources)]
+
+        # Pre-assign artifacts to SIM sources to ensure grouping
+        sim_source_assignments = []
+        for sim_source in sim_sources:
+            # Each SIM source gets 2-4 artifacts
+            num_artifacts_for_source = random.randint(2, 4)
+            sim_source_assignments.extend([sim_source] * num_artifacts_for_source)
+
+        # Pad remaining artifacts with None (ungrouped) or random sources
+        while len(sim_source_assignments) < num_artifacts:
+            if random.random() < 0.3:  # 30% ungrouped
+                sim_source_assignments.append(None)
+            else:
+                sim_source_assignments.append(random.choice(sim_sources))
+
+        # Shuffle to randomize order
+        random.shuffle(sim_source_assignments)
+
        for i in range(num_artifacts):
            # Randomly choose file type
            file_type_choice = random.choice(['csv', 'json', 'binary', 'pcap'])
@@ -289,8 +311,11 @@ async def generate_seed_data(num_artifacts: int = 50) -> List[int]:
            # Upload to storage
            storage_path = await upload_artifact_to_storage(content, filename)

+            # Get pre-assigned SIM source ID for this artifact
+            sim_source_id = sim_source_assignments[i]
+
            # Generate metadata
-            artifact_data = create_artifact_data(i)
+            artifact_data = create_artifact_data(i, sim_source_id)

            # Create database record
            artifact = Artifact(
@@ -303,6 +328,7 @@ async def generate_seed_data(num_artifacts: int = 50) -> List[int]:
                test_suite=artifact_data["test_suite"],
                test_config=artifact_data["test_config"],
                test_result=artifact_data["test_result"],
+                sim_source_id=artifact_data["sim_source_id"],
                custom_metadata=artifact_data["custom_metadata"],
                description=artifact_data["description"],
                tags=artifact_data["tags"],