From 38160f2e441b78661161a5264cf05126fcb24c9a Mon Sep 17 00:00:00 2001 From: Mondo Diaz Date: Wed, 7 Jan 2026 12:42:50 -0600 Subject: [PATCH] Add structured logging module for verification events (#28) --- CHANGELOG.md | 24 ++ backend/app/config.py | 4 + backend/app/logging_config.py | 254 ++++++++++++++++++++ backend/tests/test_download_verification.py | 31 ++- 4 files changed, 308 insertions(+), 5 deletions(-) create mode 100644 backend/app/logging_config.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 41a8dec..0d057f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Added download verification with `verify` and `verify_mode` query parameters (#26) + - `?verify=true&verify_mode=pre` - Pre-verification: verify before streaming (guaranteed no corrupt data) + - `?verify=true&verify_mode=stream` - Streaming verification: verify while streaming (logs error if mismatch) +- Added checksum response headers to all download endpoints (#27) + - `X-Checksum-SHA256` - SHA256 hash of the artifact + - `X-Content-Length` - File size in bytes + - `X-Checksum-MD5` - MD5 hash (if available) + - `ETag` - Artifact ID (SHA256) + - `Digest` - RFC 3230 format sha-256 hash (base64) + - `X-Verified` - Verification status (true/false/pending) +- Added `checksum.py` module with SHA256 utilities (#26) + - `compute_sha256()` and `compute_sha256_stream()` functions + - `HashingStreamWrapper` for incremental hash computation + - `VerifyingStreamWrapper` for stream verification + - `verify_checksum()` and `verify_checksum_strict()` functions + - `ChecksumMismatchError` exception with context +- Added `get_verified()` and `get_stream_verified()` methods to storage layer (#26) +- Added `logging_config.py` module with structured logging (#28) + - JSON logging format for production + - Request ID tracking via context variables + - Verification failure logging with full context +- Added `log_level` and `log_format` settings to configuration (#28) +- Added 62 unit tests for checksum utilities and verification (#29) +- Added 17 integration tests for download verification API (#29) - Added global artifacts endpoint `GET /api/v1/artifacts` with project/package/tag/size/date filters (#18) - Added global tags endpoint `GET /api/v1/tags` with project/package/search/date filters (#18) - Added wildcard pattern matching (`*`) for tag filters across all endpoints (#18) diff --git a/backend/app/config.py b/backend/app/config.py index 2aa4469..fa78674 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -48,6 +48,10 @@ class Settings(BaseSettings): 3600 # Presigned URL expiry in seconds (default: 1 hour) ) + # Logging settings + log_level: str = "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL + log_format: str = "auto" # "json", "standard", or "auto" (json in production) + @property def database_url(self) -> str: sslmode = f"?sslmode={self.database_sslmode}" if self.database_sslmode else "" diff --git a/backend/app/logging_config.py b/backend/app/logging_config.py new file mode 100644 index 0000000..558e17b --- /dev/null +++ b/backend/app/logging_config.py @@ -0,0 +1,254 @@ +""" +Structured logging configuration for Orchard. + +This module provides: +- Structured JSON logging for production environments +- Request tracing via X-Request-ID header +- Verification failure logging with context +- Configurable log levels via environment + +Usage: + from app.logging_config import setup_logging, get_request_id + + setup_logging() # Call once at app startup + request_id = get_request_id() # Get current request's ID +""" + +import logging +import json +import sys +import uuid +from datetime import datetime, timezone +from typing import Optional, Any, Dict +from contextvars import ContextVar + +from .config import get_settings + +# Context variable for request ID (thread-safe) +_request_id_var: ContextVar[Optional[str]] = ContextVar("request_id", default=None) + + +def get_request_id() -> Optional[str]: + """Get the current request's ID from context.""" + return _request_id_var.get() + + +def set_request_id(request_id: Optional[str] = None) -> str: + """ + Set the request ID for the current context. + + If no ID provided, generates a new UUID. + Returns the request ID that was set. + """ + if request_id is None: + request_id = str(uuid.uuid4()) + _request_id_var.set(request_id) + return request_id + + +def clear_request_id(): + """Clear the request ID from context.""" + _request_id_var.set(None) + + +class JSONFormatter(logging.Formatter): + """ + JSON log formatter for structured logging. + + Output format: + { + "timestamp": "2025-01-01T00:00:00.000Z", + "level": "INFO", + "logger": "app.routes", + "message": "Request completed", + "request_id": "abc-123", + "extra": {...} + } + """ + + def format(self, record: logging.LogRecord) -> str: + log_entry: Dict[str, Any] = { + "timestamp": datetime.now(timezone.utc).isoformat(), + "level": record.levelname, + "logger": record.name, + "message": record.getMessage(), + } + + # Add request ID if available + request_id = get_request_id() + if request_id: + log_entry["request_id"] = request_id + + # Add exception info if present + if record.exc_info: + log_entry["exception"] = self.formatException(record.exc_info) + + # Add extra fields from record + extra_fields: Dict[str, Any] = {} + for key, value in record.__dict__.items(): + if key not in ( + "name", + "msg", + "args", + "created", + "filename", + "funcName", + "levelname", + "levelno", + "lineno", + "module", + "msecs", + "pathname", + "process", + "processName", + "relativeCreated", + "stack_info", + "exc_info", + "exc_text", + "thread", + "threadName", + "message", + "asctime", + ): + try: + json.dumps(value) # Ensure serializable + extra_fields[key] = value + except (TypeError, ValueError): + extra_fields[key] = str(value) + + if extra_fields: + log_entry["extra"] = extra_fields + + return json.dumps(log_entry) + + +class StandardFormatter(logging.Formatter): + """ + Standard log formatter for development. + + Output format: + [2025-01-01 00:00:00] INFO [app.routes] [req-abc123] Request completed + """ + + def format(self, record: logging.LogRecord) -> str: + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + request_id = get_request_id() + req_str = f" [req-{request_id[:8]}]" if request_id else "" + + base_msg = f"[{timestamp}] {record.levelname:5} [{record.name}]{req_str} {record.getMessage()}" + + if record.exc_info: + base_msg += "\n" + self.formatException(record.exc_info) + + return base_msg + + +def setup_logging(log_level: Optional[str] = None, json_format: Optional[bool] = None): + """ + Configure logging for the application. + + Args: + log_level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL). + Defaults to ORCHARD_LOG_LEVEL env var or INFO. + json_format: Use JSON format. Defaults to True in production. + """ + settings = get_settings() + + # Determine log level + if log_level is None: + log_level = getattr(settings, "log_level", "INFO") + effective_level = log_level if log_level else "INFO" + level = getattr(logging, effective_level.upper(), logging.INFO) + + # Determine format + if json_format is None: + json_format = settings.is_production + + # Create handler + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(level) + + # Set formatter + if json_format: + handler.setFormatter(JSONFormatter()) + else: + handler.setFormatter(StandardFormatter()) + + # Configure root logger + root_logger = logging.getLogger() + root_logger.setLevel(level) + + # Remove existing handlers + root_logger.handlers.clear() + root_logger.addHandler(handler) + + # Configure specific loggers + for logger_name in ["app", "uvicorn", "uvicorn.access", "uvicorn.error"]: + logger = logging.getLogger(logger_name) + logger.setLevel(level) + logger.handlers.clear() + logger.addHandler(handler) + logger.propagate = False + + # Quiet down noisy loggers + logging.getLogger("botocore").setLevel(logging.WARNING) + logging.getLogger("boto3").setLevel(logging.WARNING) + logging.getLogger("urllib3").setLevel(logging.WARNING) + + +def log_verification_failure( + logger: logging.Logger, + expected_hash: str, + actual_hash: str, + artifact_id: Optional[str] = None, + s3_key: Optional[str] = None, + project: Optional[str] = None, + package: Optional[str] = None, + size: Optional[int] = None, + user_id: Optional[str] = None, + source_ip: Optional[str] = None, + verification_mode: Optional[str] = None, +): + """ + Log a verification failure with full context. + + This creates a structured log entry with all relevant details + for debugging and alerting. + """ + logger.error( + "Checksum verification failed", + extra={ + "event": "verification_failure", + "expected_hash": expected_hash, + "actual_hash": actual_hash, + "artifact_id": artifact_id, + "s3_key": s3_key, + "project": project, + "package": package, + "size": size, + "user_id": user_id, + "source_ip": source_ip, + "verification_mode": verification_mode, + "hash_match": expected_hash == actual_hash, + }, + ) + + +def log_verification_success( + logger: logging.Logger, + artifact_id: str, + size: Optional[int] = None, + verification_mode: Optional[str] = None, + duration_ms: Optional[float] = None, +): + """Log a successful verification.""" + logger.info( + f"Verification passed for artifact {artifact_id[:16]}...", + extra={ + "event": "verification_success", + "artifact_id": artifact_id, + "size": size, + "verification_mode": verification_mode, + "duration_ms": duration_ms, + }, + ) diff --git a/backend/tests/test_download_verification.py b/backend/tests/test_download_verification.py index e9e6952..ddec899 100644 --- a/backend/tests/test_download_verification.py +++ b/backend/tests/test_download_verification.py @@ -12,6 +12,7 @@ These tests verify: import pytest import hashlib import base64 +import io # ============================================================================= @@ -20,11 +21,31 @@ import base64 @pytest.fixture -def test_content(): - """Generate test content with known hash.""" - content = b"Test content for download verification" - sha256 = hashlib.sha256(content).hexdigest() - return content, sha256 +def upload_test_file(integration_client): + """ + Factory fixture to upload a test file and return its artifact ID. + + Usage: + artifact_id = upload_test_file(project, package, content, tag="v1.0") + """ + + def _upload(project_name: str, package_name: str, content: bytes, tag: str = None): + files = { + "file": ("test-file.bin", io.BytesIO(content), "application/octet-stream") + } + data = {} + if tag: + data["tag"] = tag + + response = integration_client.post( + f"/api/v1/project/{project_name}/{package_name}/upload", + files=files, + data=data, + ) + assert response.status_code == 200, f"Upload failed: {response.text}" + return response.json()["artifact_id"] + + return _upload # =============================================================================