Add structured logging module for verification events (#28)
This commit is contained in:
24
CHANGELOG.md
24
CHANGELOG.md
@@ -7,6 +7,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
### Added
|
### Added
|
||||||
|
- Added download verification with `verify` and `verify_mode` query parameters (#26)
|
||||||
|
- `?verify=true&verify_mode=pre` - Pre-verification: verify before streaming (guaranteed no corrupt data)
|
||||||
|
- `?verify=true&verify_mode=stream` - Streaming verification: verify while streaming (logs error if mismatch)
|
||||||
|
- Added checksum response headers to all download endpoints (#27)
|
||||||
|
- `X-Checksum-SHA256` - SHA256 hash of the artifact
|
||||||
|
- `X-Content-Length` - File size in bytes
|
||||||
|
- `X-Checksum-MD5` - MD5 hash (if available)
|
||||||
|
- `ETag` - Artifact ID (SHA256)
|
||||||
|
- `Digest` - RFC 3230 format sha-256 hash (base64)
|
||||||
|
- `X-Verified` - Verification status (true/false/pending)
|
||||||
|
- Added `checksum.py` module with SHA256 utilities (#26)
|
||||||
|
- `compute_sha256()` and `compute_sha256_stream()` functions
|
||||||
|
- `HashingStreamWrapper` for incremental hash computation
|
||||||
|
- `VerifyingStreamWrapper` for stream verification
|
||||||
|
- `verify_checksum()` and `verify_checksum_strict()` functions
|
||||||
|
- `ChecksumMismatchError` exception with context
|
||||||
|
- Added `get_verified()` and `get_stream_verified()` methods to storage layer (#26)
|
||||||
|
- Added `logging_config.py` module with structured logging (#28)
|
||||||
|
- JSON logging format for production
|
||||||
|
- Request ID tracking via context variables
|
||||||
|
- Verification failure logging with full context
|
||||||
|
- Added `log_level` and `log_format` settings to configuration (#28)
|
||||||
|
- Added 62 unit tests for checksum utilities and verification (#29)
|
||||||
|
- Added 17 integration tests for download verification API (#29)
|
||||||
- Added global artifacts endpoint `GET /api/v1/artifacts` with project/package/tag/size/date filters (#18)
|
- Added global artifacts endpoint `GET /api/v1/artifacts` with project/package/tag/size/date filters (#18)
|
||||||
- Added global tags endpoint `GET /api/v1/tags` with project/package/search/date filters (#18)
|
- Added global tags endpoint `GET /api/v1/tags` with project/package/search/date filters (#18)
|
||||||
- Added wildcard pattern matching (`*`) for tag filters across all endpoints (#18)
|
- Added wildcard pattern matching (`*`) for tag filters across all endpoints (#18)
|
||||||
|
|||||||
@@ -48,6 +48,10 @@ class Settings(BaseSettings):
|
|||||||
3600 # Presigned URL expiry in seconds (default: 1 hour)
|
3600 # Presigned URL expiry in seconds (default: 1 hour)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Logging settings
|
||||||
|
log_level: str = "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL
|
||||||
|
log_format: str = "auto" # "json", "standard", or "auto" (json in production)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def database_url(self) -> str:
|
def database_url(self) -> str:
|
||||||
sslmode = f"?sslmode={self.database_sslmode}" if self.database_sslmode else ""
|
sslmode = f"?sslmode={self.database_sslmode}" if self.database_sslmode else ""
|
||||||
|
|||||||
254
backend/app/logging_config.py
Normal file
254
backend/app/logging_config.py
Normal file
@@ -0,0 +1,254 @@
|
|||||||
|
"""
|
||||||
|
Structured logging configuration for Orchard.
|
||||||
|
|
||||||
|
This module provides:
|
||||||
|
- Structured JSON logging for production environments
|
||||||
|
- Request tracing via X-Request-ID header
|
||||||
|
- Verification failure logging with context
|
||||||
|
- Configurable log levels via environment
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from app.logging_config import setup_logging, get_request_id
|
||||||
|
|
||||||
|
setup_logging() # Call once at app startup
|
||||||
|
request_id = get_request_id() # Get current request's ID
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import uuid
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Optional, Any, Dict
|
||||||
|
from contextvars import ContextVar
|
||||||
|
|
||||||
|
from .config import get_settings
|
||||||
|
|
||||||
|
# Context variable for request ID (thread-safe)
|
||||||
|
_request_id_var: ContextVar[Optional[str]] = ContextVar("request_id", default=None)
|
||||||
|
|
||||||
|
|
||||||
|
def get_request_id() -> Optional[str]:
|
||||||
|
"""Get the current request's ID from context."""
|
||||||
|
return _request_id_var.get()
|
||||||
|
|
||||||
|
|
||||||
|
def set_request_id(request_id: Optional[str] = None) -> str:
|
||||||
|
"""
|
||||||
|
Set the request ID for the current context.
|
||||||
|
|
||||||
|
If no ID provided, generates a new UUID.
|
||||||
|
Returns the request ID that was set.
|
||||||
|
"""
|
||||||
|
if request_id is None:
|
||||||
|
request_id = str(uuid.uuid4())
|
||||||
|
_request_id_var.set(request_id)
|
||||||
|
return request_id
|
||||||
|
|
||||||
|
|
||||||
|
def clear_request_id():
|
||||||
|
"""Clear the request ID from context."""
|
||||||
|
_request_id_var.set(None)
|
||||||
|
|
||||||
|
|
||||||
|
class JSONFormatter(logging.Formatter):
|
||||||
|
"""
|
||||||
|
JSON log formatter for structured logging.
|
||||||
|
|
||||||
|
Output format:
|
||||||
|
{
|
||||||
|
"timestamp": "2025-01-01T00:00:00.000Z",
|
||||||
|
"level": "INFO",
|
||||||
|
"logger": "app.routes",
|
||||||
|
"message": "Request completed",
|
||||||
|
"request_id": "abc-123",
|
||||||
|
"extra": {...}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
def format(self, record: logging.LogRecord) -> str:
|
||||||
|
log_entry: Dict[str, Any] = {
|
||||||
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"level": record.levelname,
|
||||||
|
"logger": record.name,
|
||||||
|
"message": record.getMessage(),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add request ID if available
|
||||||
|
request_id = get_request_id()
|
||||||
|
if request_id:
|
||||||
|
log_entry["request_id"] = request_id
|
||||||
|
|
||||||
|
# Add exception info if present
|
||||||
|
if record.exc_info:
|
||||||
|
log_entry["exception"] = self.formatException(record.exc_info)
|
||||||
|
|
||||||
|
# Add extra fields from record
|
||||||
|
extra_fields: Dict[str, Any] = {}
|
||||||
|
for key, value in record.__dict__.items():
|
||||||
|
if key not in (
|
||||||
|
"name",
|
||||||
|
"msg",
|
||||||
|
"args",
|
||||||
|
"created",
|
||||||
|
"filename",
|
||||||
|
"funcName",
|
||||||
|
"levelname",
|
||||||
|
"levelno",
|
||||||
|
"lineno",
|
||||||
|
"module",
|
||||||
|
"msecs",
|
||||||
|
"pathname",
|
||||||
|
"process",
|
||||||
|
"processName",
|
||||||
|
"relativeCreated",
|
||||||
|
"stack_info",
|
||||||
|
"exc_info",
|
||||||
|
"exc_text",
|
||||||
|
"thread",
|
||||||
|
"threadName",
|
||||||
|
"message",
|
||||||
|
"asctime",
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
json.dumps(value) # Ensure serializable
|
||||||
|
extra_fields[key] = value
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
extra_fields[key] = str(value)
|
||||||
|
|
||||||
|
if extra_fields:
|
||||||
|
log_entry["extra"] = extra_fields
|
||||||
|
|
||||||
|
return json.dumps(log_entry)
|
||||||
|
|
||||||
|
|
||||||
|
class StandardFormatter(logging.Formatter):
|
||||||
|
"""
|
||||||
|
Standard log formatter for development.
|
||||||
|
|
||||||
|
Output format:
|
||||||
|
[2025-01-01 00:00:00] INFO [app.routes] [req-abc123] Request completed
|
||||||
|
"""
|
||||||
|
|
||||||
|
def format(self, record: logging.LogRecord) -> str:
|
||||||
|
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
request_id = get_request_id()
|
||||||
|
req_str = f" [req-{request_id[:8]}]" if request_id else ""
|
||||||
|
|
||||||
|
base_msg = f"[{timestamp}] {record.levelname:5} [{record.name}]{req_str} {record.getMessage()}"
|
||||||
|
|
||||||
|
if record.exc_info:
|
||||||
|
base_msg += "\n" + self.formatException(record.exc_info)
|
||||||
|
|
||||||
|
return base_msg
|
||||||
|
|
||||||
|
|
||||||
|
def setup_logging(log_level: Optional[str] = None, json_format: Optional[bool] = None):
|
||||||
|
"""
|
||||||
|
Configure logging for the application.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
log_level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).
|
||||||
|
Defaults to ORCHARD_LOG_LEVEL env var or INFO.
|
||||||
|
json_format: Use JSON format. Defaults to True in production.
|
||||||
|
"""
|
||||||
|
settings = get_settings()
|
||||||
|
|
||||||
|
# Determine log level
|
||||||
|
if log_level is None:
|
||||||
|
log_level = getattr(settings, "log_level", "INFO")
|
||||||
|
effective_level = log_level if log_level else "INFO"
|
||||||
|
level = getattr(logging, effective_level.upper(), logging.INFO)
|
||||||
|
|
||||||
|
# Determine format
|
||||||
|
if json_format is None:
|
||||||
|
json_format = settings.is_production
|
||||||
|
|
||||||
|
# Create handler
|
||||||
|
handler = logging.StreamHandler(sys.stdout)
|
||||||
|
handler.setLevel(level)
|
||||||
|
|
||||||
|
# Set formatter
|
||||||
|
if json_format:
|
||||||
|
handler.setFormatter(JSONFormatter())
|
||||||
|
else:
|
||||||
|
handler.setFormatter(StandardFormatter())
|
||||||
|
|
||||||
|
# Configure root logger
|
||||||
|
root_logger = logging.getLogger()
|
||||||
|
root_logger.setLevel(level)
|
||||||
|
|
||||||
|
# Remove existing handlers
|
||||||
|
root_logger.handlers.clear()
|
||||||
|
root_logger.addHandler(handler)
|
||||||
|
|
||||||
|
# Configure specific loggers
|
||||||
|
for logger_name in ["app", "uvicorn", "uvicorn.access", "uvicorn.error"]:
|
||||||
|
logger = logging.getLogger(logger_name)
|
||||||
|
logger.setLevel(level)
|
||||||
|
logger.handlers.clear()
|
||||||
|
logger.addHandler(handler)
|
||||||
|
logger.propagate = False
|
||||||
|
|
||||||
|
# Quiet down noisy loggers
|
||||||
|
logging.getLogger("botocore").setLevel(logging.WARNING)
|
||||||
|
logging.getLogger("boto3").setLevel(logging.WARNING)
|
||||||
|
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
|
||||||
|
def log_verification_failure(
|
||||||
|
logger: logging.Logger,
|
||||||
|
expected_hash: str,
|
||||||
|
actual_hash: str,
|
||||||
|
artifact_id: Optional[str] = None,
|
||||||
|
s3_key: Optional[str] = None,
|
||||||
|
project: Optional[str] = None,
|
||||||
|
package: Optional[str] = None,
|
||||||
|
size: Optional[int] = None,
|
||||||
|
user_id: Optional[str] = None,
|
||||||
|
source_ip: Optional[str] = None,
|
||||||
|
verification_mode: Optional[str] = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Log a verification failure with full context.
|
||||||
|
|
||||||
|
This creates a structured log entry with all relevant details
|
||||||
|
for debugging and alerting.
|
||||||
|
"""
|
||||||
|
logger.error(
|
||||||
|
"Checksum verification failed",
|
||||||
|
extra={
|
||||||
|
"event": "verification_failure",
|
||||||
|
"expected_hash": expected_hash,
|
||||||
|
"actual_hash": actual_hash,
|
||||||
|
"artifact_id": artifact_id,
|
||||||
|
"s3_key": s3_key,
|
||||||
|
"project": project,
|
||||||
|
"package": package,
|
||||||
|
"size": size,
|
||||||
|
"user_id": user_id,
|
||||||
|
"source_ip": source_ip,
|
||||||
|
"verification_mode": verification_mode,
|
||||||
|
"hash_match": expected_hash == actual_hash,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def log_verification_success(
|
||||||
|
logger: logging.Logger,
|
||||||
|
artifact_id: str,
|
||||||
|
size: Optional[int] = None,
|
||||||
|
verification_mode: Optional[str] = None,
|
||||||
|
duration_ms: Optional[float] = None,
|
||||||
|
):
|
||||||
|
"""Log a successful verification."""
|
||||||
|
logger.info(
|
||||||
|
f"Verification passed for artifact {artifact_id[:16]}...",
|
||||||
|
extra={
|
||||||
|
"event": "verification_success",
|
||||||
|
"artifact_id": artifact_id,
|
||||||
|
"size": size,
|
||||||
|
"verification_mode": verification_mode,
|
||||||
|
"duration_ms": duration_ms,
|
||||||
|
},
|
||||||
|
)
|
||||||
@@ -12,6 +12,7 @@ These tests verify:
|
|||||||
import pytest
|
import pytest
|
||||||
import hashlib
|
import hashlib
|
||||||
import base64
|
import base64
|
||||||
|
import io
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -20,11 +21,31 @@ import base64
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def test_content():
|
def upload_test_file(integration_client):
|
||||||
"""Generate test content with known hash."""
|
"""
|
||||||
content = b"Test content for download verification"
|
Factory fixture to upload a test file and return its artifact ID.
|
||||||
sha256 = hashlib.sha256(content).hexdigest()
|
|
||||||
return content, sha256
|
Usage:
|
||||||
|
artifact_id = upload_test_file(project, package, content, tag="v1.0")
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _upload(project_name: str, package_name: str, content: bytes, tag: str = None):
|
||||||
|
files = {
|
||||||
|
"file": ("test-file.bin", io.BytesIO(content), "application/octet-stream")
|
||||||
|
}
|
||||||
|
data = {}
|
||||||
|
if tag:
|
||||||
|
data["tag"] = tag
|
||||||
|
|
||||||
|
response = integration_client.post(
|
||||||
|
f"/api/v1/project/{project_name}/{package_name}/upload",
|
||||||
|
files=files,
|
||||||
|
data=data,
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"Upload failed: {response.text}"
|
||||||
|
return response.json()["artifact_id"]
|
||||||
|
|
||||||
|
return _upload
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
Reference in New Issue
Block a user