Add security fixes: SHA256 hash validation and streaming file size enforcement

- Add field_validator to ResumableUploadInitRequest to validate expected_hash
  is a valid 64-character lowercase hex SHA256 hash (normalizes to lowercase)
- Add FileSizeExceededError exception for file size limit violations
- Enforce file size limits in storage layer during streaming (prevents
  Content-Length header spoofing)
- Add FileSizeExceededError handler in upload endpoint returning HTTP 413
- Add node_modules and frontend/dist to .gitignore
This commit is contained in:
Mondo Diaz
2026-01-05 15:43:19 -06:00
parent 55a38ad850
commit af66fd5845
4 changed files with 48 additions and 4 deletions

4
.gitignore vendored
View File

@@ -37,6 +37,10 @@ Thumbs.db
# Build
/build/
/dist/
frontend/dist/
# Node
node_modules/
# Local config overrides
config.local.yaml

View File

@@ -28,6 +28,7 @@ from .storage import (
MULTIPART_CHUNK_SIZE,
StorageError,
HashComputationError,
FileSizeExceededError,
S3ExistenceCheckError,
S3UploadError,
S3StorageUnavailableError,
@@ -1033,6 +1034,12 @@ def upload_artifact(
status_code=500,
detail="Data integrity error detected. Please contact support.",
)
except FileSizeExceededError as e:
logger.warning(f"File size exceeded during upload: {e}")
raise HTTPException(
status_code=413,
detail=f"File too large. Maximum size is {settings.max_file_size // (1024 * 1024 * 1024)}GB",
)
except StorageError as e:
logger.error(f"Storage error during upload: {e}")
raise HTTPException(status_code=500, detail="Internal storage error")

View File

@@ -1,6 +1,6 @@
from datetime import datetime
from typing import Optional, List, Dict, Any, Generic, TypeVar
from pydantic import BaseModel
from pydantic import BaseModel, field_validator
from uuid import UUID
T = TypeVar("T")
@@ -266,6 +266,18 @@ class ResumableUploadInitRequest(BaseModel):
size: int
tag: Optional[str] = None
@field_validator("expected_hash")
@classmethod
def validate_sha256_hash(cls, v: str) -> str:
"""Validate that expected_hash is a valid 64-character lowercase hex SHA256 hash."""
import re
if not re.match(r"^[a-f0-9]{64}$", v.lower()):
raise ValueError(
"expected_hash must be a valid 64-character lowercase hexadecimal SHA256 hash"
)
return v.lower() # Normalize to lowercase
class ResumableUploadInitResponse(BaseModel):
"""Response from initiating a resumable upload"""

View File

@@ -176,6 +176,12 @@ class HashComputationError(StorageError):
pass
class FileSizeExceededError(StorageError):
"""Raised when file exceeds maximum size during upload"""
pass
class S3ExistenceCheckError(StorageError):
"""Raised when S3 existence check fails after retries"""
@@ -261,6 +267,7 @@ class S3Storage:
Raises:
HashComputationError: If hash computation fails
FileSizeExceededError: If file exceeds maximum size
S3ExistenceCheckError: If S3 existence check fails after retries
S3UploadError: If S3 upload fails
"""
@@ -270,11 +277,18 @@ class S3Storage:
if not content:
raise HashComputationError("Empty file content")
size = len(content)
# Enforce file size limit (protection against Content-Length spoofing)
if size > settings.max_file_size:
raise FileSizeExceededError(
f"File size {size} exceeds maximum {settings.max_file_size}"
)
sha256_hash = hashlib.sha256(content).hexdigest()
md5_hash = hashlib.md5(content).hexdigest()
sha1_hash = hashlib.sha1(content).hexdigest()
size = len(content)
except HashComputationError:
except (HashComputationError, FileSizeExceededError):
raise
except Exception as e:
logger.error(f"Hash computation failed: {e}")
@@ -349,6 +363,7 @@ class S3Storage:
Raises:
HashComputationError: If hash computation fails
FileSizeExceededError: If file exceeds maximum size
S3ExistenceCheckError: If S3 existence check fails after retries
S3UploadError: If S3 upload fails
"""
@@ -369,13 +384,19 @@ class S3Storage:
sha1_hasher.update(chunk)
size += len(chunk)
# Enforce file size limit during streaming (protection against spoofing)
if size > settings.max_file_size:
raise FileSizeExceededError(
f"File size exceeds maximum {settings.max_file_size}"
)
if size == 0:
raise HashComputationError("Empty file content")
sha256_hash = sha256_hasher.hexdigest()
md5_hash = md5_hasher.hexdigest()
sha1_hash = sha1_hasher.hexdigest()
except HashComputationError:
except (HashComputationError, FileSizeExceededError):
raise
except Exception as e:
logger.error(f"Hash computation failed for multipart upload: {e}")