Merge main into f/updates

Resolved conflicts by keeping f/updates changes:
- Keep Angular frontend with dark theme styling
- Keep updated quickstart scripts at root level
- Remove static HTML/JS files (replaced by Angular)
- Keep sim_source_id field implementation
- Merge backend improvements from main

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
pratik
2025-10-15 11:44:01 -05:00
8 changed files with 149 additions and 11 deletions

View File

@@ -1,12 +1,15 @@
FROM python:3.11-slim
FROM python:3.11-alpine
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
# Install system dependencies for Alpine
# Alpine uses apk instead of apt-get and is lighter/faster
RUN apk add --no-cache \
gcc \
musl-dev \
postgresql-dev \
postgresql-client \
&& rm -rf /var/lib/apt/lists/*
linux-headers
# Copy requirements and install Python dependencies
COPY requirements.txt .
@@ -18,8 +21,8 @@ COPY utils/ ./utils/
COPY alembic/ ./alembic/
COPY alembic.ini .
# Create non-root user
RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
# Create non-root user (Alpine uses adduser instead of useradd)
RUN adduser -D -u 1000 appuser && chown -R appuser:appuser /app
USER appuser
# Expose port

View File

@@ -1,4 +1,6 @@
# Test Artifact Data Lake
# Obsidian
**Enterprise Test Artifact Storage**
A lightweight, cloud-native API for storing and querying test artifacts including CSV files, JSON files, binary files, and packet captures (PCAP). Built with FastAPI and supports both AWS S3 and self-hosted MinIO storage backends.

0
alembic/.gitkeep Normal file
View File

84
alembic/env.py Normal file
View File

@@ -0,0 +1,84 @@
from logging.config import fileConfig
import os
from sqlalchemy import engine_from_config
from sqlalchemy import pool
from alembic import context
# Import your models Base
from app.models.artifact import Base
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
# Override sqlalchemy.url from environment variable
if os.getenv("DATABASE_URL"):
config.set_main_option("sqlalchemy.url", os.getenv("DATABASE_URL"))
# Interpret the config file for Python logging.
# This line sets up loggers basically.
if config.config_file_name is not None:
fileConfig(config.config_file_name)
# add your model's MetaData object here
# for 'autogenerate' support
target_metadata = Base.metadata
# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
connectable = engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(
connection=connection, target_metadata=target_metadata
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

View File

View File

@@ -1,7 +1,7 @@
from fastapi import APIRouter, UploadFile, File, Form, Depends, HTTPException, Query
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session
from typing import List, Optional
from typing import List, Optional, Dict
import uuid
import json
import io
@@ -36,6 +36,7 @@ async def upload_artifact(
test_suite: Optional[str] = Form(None),
test_config: Optional[str] = Form(None),
test_result: Optional[str] = Form(None),
sim_source_id: Optional[str] = Form(None),
custom_metadata: Optional[str] = Form(None),
description: Optional[str] = Form(None),
tags: Optional[str] = Form(None),
@@ -51,6 +52,7 @@ async def upload_artifact(
- **test_suite**: Test suite identifier
- **test_config**: JSON string of test configuration
- **test_result**: Test result (pass, fail, skip, error)
- **sim_source_id**: SIM source ID to group multiple artifacts
- **custom_metadata**: JSON string of additional metadata
- **description**: Text description of the artifact
- **tags**: JSON array of tags (as string)
@@ -88,6 +90,7 @@ async def upload_artifact(
test_suite=test_suite,
test_config=test_config_dict,
test_result=test_result,
sim_source_id=sim_source_id,
custom_metadata=metadata_dict,
description=description,
tags=tags_list,
@@ -194,6 +197,7 @@ async def query_artifacts(query: ArtifactQuery, db: Session = Depends(get_db)):
- **test_name**: Filter by test name
- **test_suite**: Filter by test suite
- **test_result**: Filter by test result
- **sim_source_id**: Filter by SIM source ID
- **tags**: Filter by tags (must contain all specified tags)
- **start_date**: Filter by creation date (from)
- **end_date**: Filter by creation date (to)
@@ -212,6 +216,8 @@ async def query_artifacts(query: ArtifactQuery, db: Session = Depends(get_db)):
q = q.filter(Artifact.test_suite == query.test_suite)
if query.test_result:
q = q.filter(Artifact.test_result == query.test_result)
if query.sim_source_id:
q = q.filter(Artifact.sim_source_id == query.sim_source_id)
if query.tags:
for tag in query.tags:
q = q.filter(Artifact.tags.contains([tag]))
@@ -240,3 +246,20 @@ async def list_artifacts(
Artifact.created_at.desc()
).offset(offset).limit(limit).all()
return artifacts
@router.get("/grouped-by-sim-source", response_model=Dict[str, List[ArtifactResponse]])
async def get_artifacts_grouped_by_sim_source(
db: Session = Depends(get_db)
):
"""Get all artifacts grouped by SIM source ID"""
from collections import defaultdict
artifacts = db.query(Artifact).order_by(Artifact.created_at.desc()).all()
grouped = defaultdict(list)
for artifact in artifacts:
sim_source = artifact.sim_source_id or "ungrouped"
grouped[sim_source].append(artifact)
return dict(grouped)

View File

@@ -2,7 +2,7 @@ version: '3.8'
services:
postgres:
image: postgres:15
image: postgres:15-alpine
environment:
POSTGRES_USER: user
POSTGRES_PASSWORD: password

View File

@@ -129,7 +129,7 @@ def generate_pcap_content() -> bytes:
return bytes(pcap_header)
def create_artifact_data(index: int) -> Dict[str, Any]:
def create_artifact_data(index: int, sim_source_id: str = None) -> Dict[str, Any]:
"""Generate metadata for an artifact"""
test_name = random.choice(TEST_NAMES)
test_suite = random.choice(TEST_SUITES)
@@ -164,6 +164,7 @@ def create_artifact_data(index: int) -> Dict[str, Any]:
"test_name": test_name,
"test_suite": test_suite,
"test_result": test_result,
"sim_source_id": sim_source_id,
"tags": artifact_tags,
"test_config": test_config,
"custom_metadata": custom_metadata,
@@ -265,6 +266,27 @@ async def generate_seed_data(num_artifacts: int = 50) -> List[int]:
print(f"Deployment mode: {settings.deployment_mode}")
print(f"Storage backend: {settings.storage_backend}")
# Generate SIM source IDs - each source will have 2-4 artifacts
num_sim_sources = max(num_artifacts // 3, 1)
sim_sources = [f"sim_run_{uuid.uuid4().hex[:8]}" for _ in range(num_sim_sources)]
# Pre-assign artifacts to SIM sources to ensure grouping
sim_source_assignments = []
for sim_source in sim_sources:
# Each SIM source gets 2-4 artifacts
num_artifacts_for_source = random.randint(2, 4)
sim_source_assignments.extend([sim_source] * num_artifacts_for_source)
# Pad remaining artifacts with None (ungrouped) or random sources
while len(sim_source_assignments) < num_artifacts:
if random.random() < 0.3: # 30% ungrouped
sim_source_assignments.append(None)
else:
sim_source_assignments.append(random.choice(sim_sources))
# Shuffle to randomize order
random.shuffle(sim_source_assignments)
for i in range(num_artifacts):
# Randomly choose file type
file_type_choice = random.choice(['csv', 'json', 'binary', 'pcap'])
@@ -289,8 +311,11 @@ async def generate_seed_data(num_artifacts: int = 50) -> List[int]:
# Upload to storage
storage_path = await upload_artifact_to_storage(content, filename)
# Get pre-assigned SIM source ID for this artifact
sim_source_id = sim_source_assignments[i]
# Generate metadata
artifact_data = create_artifact_data(i)
artifact_data = create_artifact_data(i, sim_source_id)
# Create database record
artifact = Artifact(
@@ -303,6 +328,7 @@ async def generate_seed_data(num_artifacts: int = 50) -> List[int]:
test_suite=artifact_data["test_suite"],
test_config=artifact_data["test_config"],
test_result=artifact_data["test_result"],
sim_source_id=artifact_data["sim_source_id"],
custom_metadata=artifact_data["custom_metadata"],
description=artifact_data["description"],
tags=artifact_data["tags"],