From 38160f2e441b78661161a5264cf05126fcb24c9a Mon Sep 17 00:00:00 2001
From: Mondo Diaz <armando.m.diaz@boeing.com>
Date: Wed, 7 Jan 2026 12:42:50 -0600
Subject: [PATCH] Add structured logging module for verification events (#28)

---
 CHANGELOG.md                                |  24 ++
 backend/app/config.py                       |   4 +
 backend/app/logging_config.py               | 254 ++++++++++++++++++++
 backend/tests/test_download_verification.py |  31 ++-
 4 files changed, 308 insertions(+), 5 deletions(-)
 create mode 100644 backend/app/logging_config.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 41a8dec..0d057f7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 ### Added
+- Added download verification with `verify` and `verify_mode` query parameters (#26)
+  - `?verify=true&verify_mode=pre` - Pre-verification: verify before streaming (guaranteed no corrupt data)
+  - `?verify=true&verify_mode=stream` - Streaming verification: verify while streaming (logs error if mismatch)
+- Added checksum response headers to all download endpoints (#27)
+  - `X-Checksum-SHA256` - SHA256 hash of the artifact
+  - `X-Content-Length` - File size in bytes
+  - `X-Checksum-MD5` - MD5 hash (if available)
+  - `ETag` - Artifact ID (SHA256)
+  - `Digest` - RFC 3230 format sha-256 hash (base64)
+  - `X-Verified` - Verification status (true/false/pending)
+- Added `checksum.py` module with SHA256 utilities (#26)
+  - `compute_sha256()` and `compute_sha256_stream()` functions
+  - `HashingStreamWrapper` for incremental hash computation
+  - `VerifyingStreamWrapper` for stream verification
+  - `verify_checksum()` and `verify_checksum_strict()` functions
+  - `ChecksumMismatchError` exception with context
+- Added `get_verified()` and `get_stream_verified()` methods to storage layer (#26)
+- Added `logging_config.py` module with structured logging (#28)
+  - JSON logging format for production
+  - Request ID tracking via context variables
+  - Verification failure logging with full context
+- Added `log_level` and `log_format` settings to configuration (#28)
+- Added 62 unit tests for checksum utilities and verification (#29)
+- Added 17 integration tests for download verification API (#29)
 - Added global artifacts endpoint `GET /api/v1/artifacts` with project/package/tag/size/date filters (#18)
 - Added global tags endpoint `GET /api/v1/tags` with project/package/search/date filters (#18)
 - Added wildcard pattern matching (`*`) for tag filters across all endpoints (#18)
diff --git a/backend/app/config.py b/backend/app/config.py
index 2aa4469..fa78674 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -48,6 +48,10 @@ class Settings(BaseSettings):
         3600  # Presigned URL expiry in seconds (default: 1 hour)
     )
 
+    # Logging settings
+    log_level: str = "INFO"  # DEBUG, INFO, WARNING, ERROR, CRITICAL
+    log_format: str = "auto"  # "json", "standard", or "auto" (json in production)
+
     @property
     def database_url(self) -> str:
         sslmode = f"?sslmode={self.database_sslmode}" if self.database_sslmode else ""
diff --git a/backend/app/logging_config.py b/backend/app/logging_config.py
new file mode 100644
index 0000000..558e17b
--- /dev/null
+++ b/backend/app/logging_config.py
@@ -0,0 +1,254 @@
+"""
+Structured logging configuration for Orchard.
+
+This module provides:
+- Structured JSON logging for production environments
+- Request tracing via X-Request-ID header
+- Verification failure logging with context
+- Configurable log levels via environment
+
+Usage:
+    from app.logging_config import setup_logging, get_request_id
+
+    setup_logging()  # Call once at app startup
+    request_id = get_request_id()  # Get current request's ID
+"""
+
+import logging
+import json
+import sys
+import uuid
+from datetime import datetime, timezone
+from typing import Optional, Any, Dict
+from contextvars import ContextVar
+
+from .config import get_settings
+
+# Context variable for request ID (thread-safe)
+_request_id_var: ContextVar[Optional[str]] = ContextVar("request_id", default=None)
+
+
+def get_request_id() -> Optional[str]:
+    """Get the current request's ID from context."""
+    return _request_id_var.get()
+
+
+def set_request_id(request_id: Optional[str] = None) -> str:
+    """
+    Set the request ID for the current context.
+
+    If no ID provided, generates a new UUID.
+    Returns the request ID that was set.
+    """
+    if request_id is None:
+        request_id = str(uuid.uuid4())
+    _request_id_var.set(request_id)
+    return request_id
+
+
+def clear_request_id():
+    """Clear the request ID from context."""
+    _request_id_var.set(None)
+
+
+class JSONFormatter(logging.Formatter):
+    """
+    JSON log formatter for structured logging.
+
+    Output format:
+    {
+        "timestamp": "2025-01-01T00:00:00.000Z",
+        "level": "INFO",
+        "logger": "app.routes",
+        "message": "Request completed",
+        "request_id": "abc-123",
+        "extra": {...}
+    }
+    """
+
+    def format(self, record: logging.LogRecord) -> str:
+        log_entry: Dict[str, Any] = {
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "level": record.levelname,
+            "logger": record.name,
+            "message": record.getMessage(),
+        }
+
+        # Add request ID if available
+        request_id = get_request_id()
+        if request_id:
+            log_entry["request_id"] = request_id
+
+        # Add exception info if present
+        if record.exc_info:
+            log_entry["exception"] = self.formatException(record.exc_info)
+
+        # Add extra fields from record
+        extra_fields: Dict[str, Any] = {}
+        for key, value in record.__dict__.items():
+            if key not in (
+                "name",
+                "msg",
+                "args",
+                "created",
+                "filename",
+                "funcName",
+                "levelname",
+                "levelno",
+                "lineno",
+                "module",
+                "msecs",
+                "pathname",
+                "process",
+                "processName",
+                "relativeCreated",
+                "stack_info",
+                "exc_info",
+                "exc_text",
+                "thread",
+                "threadName",
+                "message",
+                "asctime",
+            ):
+                try:
+                    json.dumps(value)  # Ensure serializable
+                    extra_fields[key] = value
+                except (TypeError, ValueError):
+                    extra_fields[key] = str(value)
+
+        if extra_fields:
+            log_entry["extra"] = extra_fields
+
+        return json.dumps(log_entry)
+
+
+class StandardFormatter(logging.Formatter):
+    """
+    Standard log formatter for development.
+
+    Output format:
+    [2025-01-01 00:00:00] INFO [app.routes] [req-abc123] Request completed
+    """
+
+    def format(self, record: logging.LogRecord) -> str:
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        request_id = get_request_id()
+        req_str = f" [req-{request_id[:8]}]" if request_id else ""
+
+        base_msg = f"[{timestamp}] {record.levelname:5} [{record.name}]{req_str} {record.getMessage()}"
+
+        if record.exc_info:
+            base_msg += "\n" + self.formatException(record.exc_info)
+
+        return base_msg
+
+
+def setup_logging(log_level: Optional[str] = None, json_format: Optional[bool] = None):
+    """
+    Configure logging for the application.
+
+    Args:
+        log_level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).
+                   Defaults to ORCHARD_LOG_LEVEL env var or INFO.
+        json_format: Use JSON format. Defaults to True in production.
+    """
+    settings = get_settings()
+
+    # Determine log level
+    if log_level is None:
+        log_level = getattr(settings, "log_level", "INFO")
+    effective_level = log_level if log_level else "INFO"
+    level = getattr(logging, effective_level.upper(), logging.INFO)
+
+    # Determine format
+    if json_format is None:
+        json_format = settings.is_production
+
+    # Create handler
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setLevel(level)
+
+    # Set formatter
+    if json_format:
+        handler.setFormatter(JSONFormatter())
+    else:
+        handler.setFormatter(StandardFormatter())
+
+    # Configure root logger
+    root_logger = logging.getLogger()
+    root_logger.setLevel(level)
+
+    # Remove existing handlers
+    root_logger.handlers.clear()
+    root_logger.addHandler(handler)
+
+    # Configure specific loggers
+    for logger_name in ["app", "uvicorn", "uvicorn.access", "uvicorn.error"]:
+        logger = logging.getLogger(logger_name)
+        logger.setLevel(level)
+        logger.handlers.clear()
+        logger.addHandler(handler)
+        logger.propagate = False
+
+    # Quiet down noisy loggers
+    logging.getLogger("botocore").setLevel(logging.WARNING)
+    logging.getLogger("boto3").setLevel(logging.WARNING)
+    logging.getLogger("urllib3").setLevel(logging.WARNING)
+
+
+def log_verification_failure(
+    logger: logging.Logger,
+    expected_hash: str,
+    actual_hash: str,
+    artifact_id: Optional[str] = None,
+    s3_key: Optional[str] = None,
+    project: Optional[str] = None,
+    package: Optional[str] = None,
+    size: Optional[int] = None,
+    user_id: Optional[str] = None,
+    source_ip: Optional[str] = None,
+    verification_mode: Optional[str] = None,
+):
+    """
+    Log a verification failure with full context.
+
+    This creates a structured log entry with all relevant details
+    for debugging and alerting.
+    """
+    logger.error(
+        "Checksum verification failed",
+        extra={
+            "event": "verification_failure",
+            "expected_hash": expected_hash,
+            "actual_hash": actual_hash,
+            "artifact_id": artifact_id,
+            "s3_key": s3_key,
+            "project": project,
+            "package": package,
+            "size": size,
+            "user_id": user_id,
+            "source_ip": source_ip,
+            "verification_mode": verification_mode,
+            "hash_match": expected_hash == actual_hash,
+        },
+    )
+
+
+def log_verification_success(
+    logger: logging.Logger,
+    artifact_id: str,
+    size: Optional[int] = None,
+    verification_mode: Optional[str] = None,
+    duration_ms: Optional[float] = None,
+):
+    """Log a successful verification."""
+    logger.info(
+        f"Verification passed for artifact {artifact_id[:16]}...",
+        extra={
+            "event": "verification_success",
+            "artifact_id": artifact_id,
+            "size": size,
+            "verification_mode": verification_mode,
+            "duration_ms": duration_ms,
+        },
+    )
diff --git a/backend/tests/test_download_verification.py b/backend/tests/test_download_verification.py
index e9e6952..ddec899 100644
--- a/backend/tests/test_download_verification.py
+++ b/backend/tests/test_download_verification.py
@@ -12,6 +12,7 @@ These tests verify:
 import pytest
 import hashlib
 import base64
+import io
 
 
 # =============================================================================
@@ -20,11 +21,31 @@ import base64
 
 
 @pytest.fixture
-def test_content():
-    """Generate test content with known hash."""
-    content = b"Test content for download verification"
-    sha256 = hashlib.sha256(content).hexdigest()
-    return content, sha256
+def upload_test_file(integration_client):
+    """
+    Factory fixture to upload a test file and return its artifact ID.
+
+    Usage:
+        artifact_id = upload_test_file(project, package, content, tag="v1.0")
+    """
+
+    def _upload(project_name: str, package_name: str, content: bytes, tag: str = None):
+        files = {
+            "file": ("test-file.bin", io.BytesIO(content), "application/octet-stream")
+        }
+        data = {}
+        if tag:
+            data["tag"] = tag
+
+        response = integration_client.post(
+            f"/api/v1/project/{project_name}/{package_name}/upload",
+            files=files,
+            data=data,
+        )
+        assert response.status_code == 200, f"Upload failed: {response.text}"
+        return response.json()["artifact_id"]
+
+    return _upload
 
 
 # =============================================================================