diff --git a/.gitignore b/.gitignore index ddf293e..4dbb618 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,10 @@ Thumbs.db # Build /build/ /dist/ +frontend/dist/ + +# Node +node_modules/ # Local config overrides config.local.yaml diff --git a/backend/app/routes.py b/backend/app/routes.py index 311d6ab..6bf87f3 100644 --- a/backend/app/routes.py +++ b/backend/app/routes.py @@ -28,6 +28,7 @@ from .storage import ( MULTIPART_CHUNK_SIZE, StorageError, HashComputationError, + FileSizeExceededError, S3ExistenceCheckError, S3UploadError, S3StorageUnavailableError, @@ -1033,6 +1034,12 @@ def upload_artifact( status_code=500, detail="Data integrity error detected. Please contact support.", ) + except FileSizeExceededError as e: + logger.warning(f"File size exceeded during upload: {e}") + raise HTTPException( + status_code=413, + detail=f"File too large. Maximum size is {settings.max_file_size // (1024 * 1024 * 1024)}GB", + ) except StorageError as e: logger.error(f"Storage error during upload: {e}") raise HTTPException(status_code=500, detail="Internal storage error") diff --git a/backend/app/schemas.py b/backend/app/schemas.py index 203c842..4c7db29 100644 --- a/backend/app/schemas.py +++ b/backend/app/schemas.py @@ -1,6 +1,6 @@ from datetime import datetime from typing import Optional, List, Dict, Any, Generic, TypeVar -from pydantic import BaseModel +from pydantic import BaseModel, field_validator from uuid import UUID T = TypeVar("T") @@ -266,6 +266,18 @@ class ResumableUploadInitRequest(BaseModel): size: int tag: Optional[str] = None + @field_validator("expected_hash") + @classmethod + def validate_sha256_hash(cls, v: str) -> str: + """Validate that expected_hash is a valid 64-character lowercase hex SHA256 hash.""" + import re + + if not re.match(r"^[a-f0-9]{64}$", v.lower()): + raise ValueError( + "expected_hash must be a valid 64-character lowercase hexadecimal SHA256 hash" + ) + return v.lower() # Normalize to lowercase + class ResumableUploadInitResponse(BaseModel): """Response from initiating a resumable upload""" diff --git a/backend/app/storage.py b/backend/app/storage.py index 6944855..ce7b142 100644 --- a/backend/app/storage.py +++ b/backend/app/storage.py @@ -176,6 +176,12 @@ class HashComputationError(StorageError): pass +class FileSizeExceededError(StorageError): + """Raised when file exceeds maximum size during upload""" + + pass + + class S3ExistenceCheckError(StorageError): """Raised when S3 existence check fails after retries""" @@ -261,6 +267,7 @@ class S3Storage: Raises: HashComputationError: If hash computation fails + FileSizeExceededError: If file exceeds maximum size S3ExistenceCheckError: If S3 existence check fails after retries S3UploadError: If S3 upload fails """ @@ -270,11 +277,18 @@ class S3Storage: if not content: raise HashComputationError("Empty file content") + size = len(content) + + # Enforce file size limit (protection against Content-Length spoofing) + if size > settings.max_file_size: + raise FileSizeExceededError( + f"File size {size} exceeds maximum {settings.max_file_size}" + ) + sha256_hash = hashlib.sha256(content).hexdigest() md5_hash = hashlib.md5(content).hexdigest() sha1_hash = hashlib.sha1(content).hexdigest() - size = len(content) - except HashComputationError: + except (HashComputationError, FileSizeExceededError): raise except Exception as e: logger.error(f"Hash computation failed: {e}") @@ -349,6 +363,7 @@ class S3Storage: Raises: HashComputationError: If hash computation fails + FileSizeExceededError: If file exceeds maximum size S3ExistenceCheckError: If S3 existence check fails after retries S3UploadError: If S3 upload fails """ @@ -369,13 +384,19 @@ class S3Storage: sha1_hasher.update(chunk) size += len(chunk) + # Enforce file size limit during streaming (protection against spoofing) + if size > settings.max_file_size: + raise FileSizeExceededError( + f"File size exceeds maximum {settings.max_file_size}" + ) + if size == 0: raise HashComputationError("Empty file content") sha256_hash = sha256_hasher.hexdigest() md5_hash = md5_hasher.hexdigest() sha1_hash = sha1_hasher.hexdigest() - except HashComputationError: + except (HashComputationError, FileSizeExceededError): raise except Exception as e: logger.error(f"Hash computation failed for multipart upload: {e}")