diff --git a/Dockerfile b/Dockerfile
index 1741c6c..4a688cc 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,12 +1,15 @@
-FROM python:3.11-slim
+FROM python:3.11-alpine
 
 WORKDIR /app
 
-# Install system dependencies
-RUN apt-get update && apt-get install -y \
+# Install system dependencies for Alpine
+# Alpine uses apk instead of apt-get and is lighter/faster
+RUN apk add --no-cache \
     gcc \
+    musl-dev \
+    postgresql-dev \
     postgresql-client \
-    && rm -rf /var/lib/apt/lists/*
+    linux-headers
 
 # Copy requirements and install Python dependencies
 COPY requirements.txt .
@@ -18,8 +21,8 @@ COPY utils/ ./utils/
 COPY alembic/ ./alembic/
 COPY alembic.ini .
 
-# Create non-root user
-RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
+# Create non-root user (Alpine uses adduser instead of useradd)
+RUN adduser -D -u 1000 appuser && chown -R appuser:appuser /app
 USER appuser
 
 # Expose port
diff --git a/README.md b/README.md
index c2faf59..66ac471 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,6 @@
-# Test Artifact Data Lake
+# Obsidian
+
+**Enterprise Test Artifact Storage**
 
 A lightweight, cloud-native API for storing and querying test artifacts including CSV files, JSON files, binary files, and packet captures (PCAP). Built with FastAPI and supports both AWS S3 and self-hosted MinIO storage backends.
 
diff --git a/alembic/.gitkeep b/alembic/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/alembic/env.py b/alembic/env.py
new file mode 100644
index 0000000..4618cd6
--- /dev/null
+++ b/alembic/env.py
@@ -0,0 +1,84 @@
+from logging.config import fileConfig
+import os
+
+from sqlalchemy import engine_from_config
+from sqlalchemy import pool
+
+from alembic import context
+
+# Import your models Base
+from app.models.artifact import Base
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+# Override sqlalchemy.url from environment variable
+if os.getenv("DATABASE_URL"):
+    config.set_main_option("sqlalchemy.url", os.getenv("DATABASE_URL"))
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+target_metadata = Base.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode.
+
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+
+    Calls to context.execute() here emit the given string to the
+    script output.
+
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode.
+
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection, target_metadata=target_metadata
+        )
+
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
diff --git a/alembic/versions/.gitkeep b/alembic/versions/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/app/api/artifacts.py b/app/api/artifacts.py
index 1d6b3d4..593413e 100644
--- a/app/api/artifacts.py
+++ b/app/api/artifacts.py
@@ -1,7 +1,7 @@
 from fastapi import APIRouter, UploadFile, File, Form, Depends, HTTPException, Query
 from fastapi.responses import StreamingResponse
 from sqlalchemy.orm import Session
-from typing import List, Optional
+from typing import List, Optional, Dict
 import uuid
 import json
 import io
@@ -36,6 +36,7 @@ async def upload_artifact(
     test_suite: Optional[str] = Form(None),
     test_config: Optional[str] = Form(None),
     test_result: Optional[str] = Form(None),
+    sim_source_id: Optional[str] = Form(None),
     custom_metadata: Optional[str] = Form(None),
     description: Optional[str] = Form(None),
     tags: Optional[str] = Form(None),
@@ -51,6 +52,7 @@ async def upload_artifact(
     - **test_suite**: Test suite identifier
     - **test_config**: JSON string of test configuration
     - **test_result**: Test result (pass, fail, skip, error)
+    - **sim_source_id**: SIM source ID to group multiple artifacts
     - **custom_metadata**: JSON string of additional metadata
     - **description**: Text description of the artifact
     - **tags**: JSON array of tags (as string)
@@ -88,6 +90,7 @@ async def upload_artifact(
             test_suite=test_suite,
             test_config=test_config_dict,
             test_result=test_result,
+            sim_source_id=sim_source_id,
             custom_metadata=metadata_dict,
             description=description,
             tags=tags_list,
@@ -194,6 +197,7 @@ async def query_artifacts(query: ArtifactQuery, db: Session = Depends(get_db)):
     - **test_name**: Filter by test name
     - **test_suite**: Filter by test suite
     - **test_result**: Filter by test result
+    - **sim_source_id**: Filter by SIM source ID
     - **tags**: Filter by tags (must contain all specified tags)
     - **start_date**: Filter by creation date (from)
     - **end_date**: Filter by creation date (to)
@@ -212,6 +216,8 @@ async def query_artifacts(query: ArtifactQuery, db: Session = Depends(get_db)):
         q = q.filter(Artifact.test_suite == query.test_suite)
     if query.test_result:
         q = q.filter(Artifact.test_result == query.test_result)
+    if query.sim_source_id:
+        q = q.filter(Artifact.sim_source_id == query.sim_source_id)
     if query.tags:
         for tag in query.tags:
             q = q.filter(Artifact.tags.contains([tag]))
@@ -240,3 +246,20 @@ async def list_artifacts(
         Artifact.created_at.desc()
     ).offset(offset).limit(limit).all()
     return artifacts
+
+
+@router.get("/grouped-by-sim-source", response_model=Dict[str, List[ArtifactResponse]])
+async def get_artifacts_grouped_by_sim_source(
+    db: Session = Depends(get_db)
+):
+    """Get all artifacts grouped by SIM source ID"""
+    from collections import defaultdict
+
+    artifacts = db.query(Artifact).order_by(Artifact.created_at.desc()).all()
+    grouped = defaultdict(list)
+
+    for artifact in artifacts:
+        sim_source = artifact.sim_source_id or "ungrouped"
+        grouped[sim_source].append(artifact)
+
+    return dict(grouped)
diff --git a/docker-compose.yml b/docker-compose.yml
index cf82292..5b7fa2b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,7 +2,7 @@ version: '3.8'
 
 services:
   postgres:
-    image: postgres:15
+    image: postgres:15-alpine
     environment:
       POSTGRES_USER: user
       POSTGRES_PASSWORD: password
diff --git a/utils/seed_data.py b/utils/seed_data.py
index 07cf9a8..105bef5 100755
--- a/utils/seed_data.py
+++ b/utils/seed_data.py
@@ -129,7 +129,7 @@ def generate_pcap_content() -> bytes:
     return bytes(pcap_header)
 
 
-def create_artifact_data(index: int) -> Dict[str, Any]:
+def create_artifact_data(index: int, sim_source_id: str = None) -> Dict[str, Any]:
     """Generate metadata for an artifact"""
     test_name = random.choice(TEST_NAMES)
     test_suite = random.choice(TEST_SUITES)
@@ -164,6 +164,7 @@ def create_artifact_data(index: int) -> Dict[str, Any]:
         "test_name": test_name,
         "test_suite": test_suite,
         "test_result": test_result,
+        "sim_source_id": sim_source_id,
         "tags": artifact_tags,
         "test_config": test_config,
         "custom_metadata": custom_metadata,
@@ -265,6 +266,27 @@ async def generate_seed_data(num_artifacts: int = 50) -> List[int]:
         print(f"Deployment mode: {settings.deployment_mode}")
         print(f"Storage backend: {settings.storage_backend}")
 
+        # Generate SIM source IDs - each source will have 2-4 artifacts
+        num_sim_sources = max(num_artifacts // 3, 1)
+        sim_sources = [f"sim_run_{uuid.uuid4().hex[:8]}" for _ in range(num_sim_sources)]
+
+        # Pre-assign artifacts to SIM sources to ensure grouping
+        sim_source_assignments = []
+        for sim_source in sim_sources:
+            # Each SIM source gets 2-4 artifacts
+            num_artifacts_for_source = random.randint(2, 4)
+            sim_source_assignments.extend([sim_source] * num_artifacts_for_source)
+
+        # Pad remaining artifacts with None (ungrouped) or random sources
+        while len(sim_source_assignments) < num_artifacts:
+            if random.random() < 0.3:  # 30% ungrouped
+                sim_source_assignments.append(None)
+            else:
+                sim_source_assignments.append(random.choice(sim_sources))
+
+        # Shuffle to randomize order
+        random.shuffle(sim_source_assignments)
+
         for i in range(num_artifacts):
             # Randomly choose file type
             file_type_choice = random.choice(['csv', 'json', 'binary', 'pcap'])
@@ -289,8 +311,11 @@ async def generate_seed_data(num_artifacts: int = 50) -> List[int]:
             # Upload to storage
             storage_path = await upload_artifact_to_storage(content, filename)
 
+            # Get pre-assigned SIM source ID for this artifact
+            sim_source_id = sim_source_assignments[i]
+
             # Generate metadata
-            artifact_data = create_artifact_data(i)
+            artifact_data = create_artifact_data(i, sim_source_id)
 
             # Create database record
             artifact = Artifact(
@@ -303,6 +328,7 @@ async def generate_seed_data(num_artifacts: int = 50) -> List[int]:
                 test_suite=artifact_data["test_suite"],
                 test_config=artifact_data["test_config"],
                 test_result=artifact_data["test_result"],
+                sim_source_id=artifact_data["sim_source_id"],
                 custom_metadata=artifact_data["custom_metadata"],
                 description=artifact_data["description"],
                 tags=artifact_data["tags"],