Add security fixes: SHA256 hash validation and streaming file size enforcement

- Add field_validator to ResumableUploadInitRequest to validate expected_hash
  is a valid 64-character lowercase hex SHA256 hash (normalizes to lowercase)
- Add FileSizeExceededError exception for file size limit violations
- Enforce file size limits in storage layer during streaming (prevents
  Content-Length header spoofing)
- Add FileSizeExceededError handler in upload endpoint returning HTTP 413
- Add node_modules and frontend/dist to .gitignore
This commit is contained in:
Mondo Diaz
2026-01-05 15:43:19 -06:00
parent 55a38ad850
commit af66fd5845
4 changed files with 48 additions and 4 deletions

4
.gitignore vendored
View File

@@ -37,6 +37,10 @@ Thumbs.db
# Build # Build
/build/ /build/
/dist/ /dist/
frontend/dist/
# Node
node_modules/
# Local config overrides # Local config overrides
config.local.yaml config.local.yaml

View File

@@ -28,6 +28,7 @@ from .storage import (
MULTIPART_CHUNK_SIZE, MULTIPART_CHUNK_SIZE,
StorageError, StorageError,
HashComputationError, HashComputationError,
FileSizeExceededError,
S3ExistenceCheckError, S3ExistenceCheckError,
S3UploadError, S3UploadError,
S3StorageUnavailableError, S3StorageUnavailableError,
@@ -1033,6 +1034,12 @@ def upload_artifact(
status_code=500, status_code=500,
detail="Data integrity error detected. Please contact support.", detail="Data integrity error detected. Please contact support.",
) )
except FileSizeExceededError as e:
logger.warning(f"File size exceeded during upload: {e}")
raise HTTPException(
status_code=413,
detail=f"File too large. Maximum size is {settings.max_file_size // (1024 * 1024 * 1024)}GB",
)
except StorageError as e: except StorageError as e:
logger.error(f"Storage error during upload: {e}") logger.error(f"Storage error during upload: {e}")
raise HTTPException(status_code=500, detail="Internal storage error") raise HTTPException(status_code=500, detail="Internal storage error")

View File

@@ -1,6 +1,6 @@
from datetime import datetime from datetime import datetime
from typing import Optional, List, Dict, Any, Generic, TypeVar from typing import Optional, List, Dict, Any, Generic, TypeVar
from pydantic import BaseModel from pydantic import BaseModel, field_validator
from uuid import UUID from uuid import UUID
T = TypeVar("T") T = TypeVar("T")
@@ -266,6 +266,18 @@ class ResumableUploadInitRequest(BaseModel):
size: int size: int
tag: Optional[str] = None tag: Optional[str] = None
@field_validator("expected_hash")
@classmethod
def validate_sha256_hash(cls, v: str) -> str:
"""Validate that expected_hash is a valid 64-character lowercase hex SHA256 hash."""
import re
if not re.match(r"^[a-f0-9]{64}$", v.lower()):
raise ValueError(
"expected_hash must be a valid 64-character lowercase hexadecimal SHA256 hash"
)
return v.lower() # Normalize to lowercase
class ResumableUploadInitResponse(BaseModel): class ResumableUploadInitResponse(BaseModel):
"""Response from initiating a resumable upload""" """Response from initiating a resumable upload"""

View File

@@ -176,6 +176,12 @@ class HashComputationError(StorageError):
pass pass
class FileSizeExceededError(StorageError):
"""Raised when file exceeds maximum size during upload"""
pass
class S3ExistenceCheckError(StorageError): class S3ExistenceCheckError(StorageError):
"""Raised when S3 existence check fails after retries""" """Raised when S3 existence check fails after retries"""
@@ -261,6 +267,7 @@ class S3Storage:
Raises: Raises:
HashComputationError: If hash computation fails HashComputationError: If hash computation fails
FileSizeExceededError: If file exceeds maximum size
S3ExistenceCheckError: If S3 existence check fails after retries S3ExistenceCheckError: If S3 existence check fails after retries
S3UploadError: If S3 upload fails S3UploadError: If S3 upload fails
""" """
@@ -270,11 +277,18 @@ class S3Storage:
if not content: if not content:
raise HashComputationError("Empty file content") raise HashComputationError("Empty file content")
size = len(content)
# Enforce file size limit (protection against Content-Length spoofing)
if size > settings.max_file_size:
raise FileSizeExceededError(
f"File size {size} exceeds maximum {settings.max_file_size}"
)
sha256_hash = hashlib.sha256(content).hexdigest() sha256_hash = hashlib.sha256(content).hexdigest()
md5_hash = hashlib.md5(content).hexdigest() md5_hash = hashlib.md5(content).hexdigest()
sha1_hash = hashlib.sha1(content).hexdigest() sha1_hash = hashlib.sha1(content).hexdigest()
size = len(content) except (HashComputationError, FileSizeExceededError):
except HashComputationError:
raise raise
except Exception as e: except Exception as e:
logger.error(f"Hash computation failed: {e}") logger.error(f"Hash computation failed: {e}")
@@ -349,6 +363,7 @@ class S3Storage:
Raises: Raises:
HashComputationError: If hash computation fails HashComputationError: If hash computation fails
FileSizeExceededError: If file exceeds maximum size
S3ExistenceCheckError: If S3 existence check fails after retries S3ExistenceCheckError: If S3 existence check fails after retries
S3UploadError: If S3 upload fails S3UploadError: If S3 upload fails
""" """
@@ -369,13 +384,19 @@ class S3Storage:
sha1_hasher.update(chunk) sha1_hasher.update(chunk)
size += len(chunk) size += len(chunk)
# Enforce file size limit during streaming (protection against spoofing)
if size > settings.max_file_size:
raise FileSizeExceededError(
f"File size exceeds maximum {settings.max_file_size}"
)
if size == 0: if size == 0:
raise HashComputationError("Empty file content") raise HashComputationError("Empty file content")
sha256_hash = sha256_hasher.hexdigest() sha256_hash = sha256_hasher.hexdigest()
md5_hash = md5_hasher.hexdigest() md5_hash = md5_hasher.hexdigest()
sha1_hash = sha1_hasher.hexdigest() sha1_hash = sha1_hasher.hexdigest()
except HashComputationError: except (HashComputationError, FileSizeExceededError):
raise raise
except Exception as e: except Exception as e:
logger.error(f"Hash computation failed for multipart upload: {e}") logger.error(f"Hash computation failed for multipart upload: {e}")