This commit is contained in:
@@ -22,6 +22,13 @@ from botocore.exceptions import (
|
||||
)
|
||||
|
||||
from .config import get_settings
|
||||
from .checksum import (
|
||||
ChecksumMismatchError,
|
||||
HashingStreamWrapper,
|
||||
VerifyingStreamWrapper,
|
||||
compute_sha256,
|
||||
is_valid_sha256,
|
||||
)
|
||||
|
||||
settings = get_settings()
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -876,6 +883,95 @@ class S3Storage:
|
||||
logger.error(f"Unexpected error during storage health check: {e}")
|
||||
return False
|
||||
|
||||
def get_verified(self, s3_key: str, expected_hash: str) -> bytes:
|
||||
"""
|
||||
Download and verify content matches expected SHA256 hash.
|
||||
|
||||
This method downloads the entire content, computes its hash, and
|
||||
verifies it matches the expected hash before returning.
|
||||
|
||||
Args:
|
||||
s3_key: The S3 storage key of the file
|
||||
expected_hash: Expected SHA256 hash (64 hex characters)
|
||||
|
||||
Returns:
|
||||
File content as bytes (only if verification passes)
|
||||
|
||||
Raises:
|
||||
ChecksumMismatchError: If computed hash doesn't match expected
|
||||
ClientError: If S3 operation fails
|
||||
"""
|
||||
if not is_valid_sha256(expected_hash):
|
||||
raise ValueError(f"Invalid SHA256 hash format: {expected_hash}")
|
||||
|
||||
content = self.get(s3_key)
|
||||
actual_hash = compute_sha256(content)
|
||||
|
||||
if actual_hash != expected_hash.lower():
|
||||
raise ChecksumMismatchError(
|
||||
expected=expected_hash.lower(),
|
||||
actual=actual_hash,
|
||||
s3_key=s3_key,
|
||||
size=len(content),
|
||||
)
|
||||
|
||||
logger.debug(f"Verification passed for {s3_key}: {actual_hash[:16]}...")
|
||||
return content
|
||||
|
||||
def get_stream_verified(
|
||||
self,
|
||||
s3_key: str,
|
||||
expected_hash: str,
|
||||
range_header: Optional[str] = None,
|
||||
) -> Tuple[VerifyingStreamWrapper, int, Optional[str]]:
|
||||
"""
|
||||
Get a verifying stream wrapper for an object.
|
||||
|
||||
Returns a wrapper that computes the hash as chunks are read and
|
||||
can verify after streaming completes. Note that verification happens
|
||||
AFTER content has been streamed to the client.
|
||||
|
||||
IMPORTANT: For range requests, verification is not supported because
|
||||
we cannot verify a partial download against the full file hash.
|
||||
|
||||
Args:
|
||||
s3_key: The S3 storage key of the file
|
||||
expected_hash: Expected SHA256 hash (64 hex characters)
|
||||
range_header: Optional HTTP Range header (verification disabled if set)
|
||||
|
||||
Returns:
|
||||
Tuple of (VerifyingStreamWrapper, content_length, content_range)
|
||||
The wrapper has a verify() method to call after streaming.
|
||||
|
||||
Raises:
|
||||
ValueError: If expected_hash is invalid format
|
||||
ClientError: If S3 operation fails
|
||||
"""
|
||||
if not is_valid_sha256(expected_hash):
|
||||
raise ValueError(f"Invalid SHA256 hash format: {expected_hash}")
|
||||
|
||||
# Get the S3 stream
|
||||
stream, content_length, content_range = self.get_stream(s3_key, range_header)
|
||||
|
||||
# For range requests, we cannot verify (partial content)
|
||||
# Return a HashingStreamWrapper that just tracks bytes without verification
|
||||
if range_header or content_range:
|
||||
logger.debug(
|
||||
f"Range request for {s3_key} - verification disabled (partial content)"
|
||||
)
|
||||
# Return a basic hashing wrapper (caller should not verify)
|
||||
hashing_wrapper = HashingStreamWrapper(stream)
|
||||
return hashing_wrapper, content_length, content_range
|
||||
|
||||
# Create verifying wrapper
|
||||
verifying_wrapper = VerifyingStreamWrapper(
|
||||
stream=stream,
|
||||
expected_hash=expected_hash,
|
||||
s3_key=s3_key,
|
||||
)
|
||||
|
||||
return verifying_wrapper, content_length, content_range
|
||||
|
||||
def verify_integrity(self, s3_key: str, expected_sha256: str) -> bool:
|
||||
"""
|
||||
Verify the integrity of a stored object by downloading and re-hashing.
|
||||
|
||||
Reference in New Issue
Block a user