This commit is contained in:
@@ -378,10 +378,16 @@ class S3Storage:
|
||||
"""
|
||||
# First pass: compute all hashes by streaming through file
|
||||
try:
|
||||
import time
|
||||
sha256_hasher = hashlib.sha256()
|
||||
md5_hasher = hashlib.md5()
|
||||
sha1_hasher = hashlib.sha1()
|
||||
size = 0
|
||||
hash_start_time = time.time()
|
||||
last_log_time = hash_start_time
|
||||
log_interval_seconds = 5 # Log progress every 5 seconds
|
||||
|
||||
logger.info(f"Computing hashes for large file: expected_size={content_length}")
|
||||
|
||||
# Read file in chunks to compute hashes
|
||||
while True:
|
||||
@@ -393,6 +399,18 @@ class S3Storage:
|
||||
sha1_hasher.update(chunk)
|
||||
size += len(chunk)
|
||||
|
||||
# Log hash computation progress periodically
|
||||
current_time = time.time()
|
||||
if current_time - last_log_time >= log_interval_seconds:
|
||||
elapsed = current_time - hash_start_time
|
||||
percent = (size / content_length) * 100 if content_length > 0 else 0
|
||||
throughput = (size / (1024 * 1024)) / elapsed if elapsed > 0 else 0
|
||||
logger.info(
|
||||
f"Hash computation progress: bytes={size}/{content_length} ({percent:.1f}%) "
|
||||
f"throughput={throughput:.2f}MB/s"
|
||||
)
|
||||
last_log_time = current_time
|
||||
|
||||
# Enforce file size limit during streaming (protection against spoofing)
|
||||
if size > settings.max_file_size:
|
||||
raise FileSizeExceededError(
|
||||
@@ -405,6 +423,14 @@ class S3Storage:
|
||||
sha256_hash = sha256_hasher.hexdigest()
|
||||
md5_hash = md5_hasher.hexdigest()
|
||||
sha1_hash = sha1_hasher.hexdigest()
|
||||
|
||||
# Log hash computation completion
|
||||
hash_elapsed = time.time() - hash_start_time
|
||||
hash_throughput = (size / (1024 * 1024)) / hash_elapsed if hash_elapsed > 0 else 0
|
||||
logger.info(
|
||||
f"Hash computation completed: hash={sha256_hash[:16]}... "
|
||||
f"size={size} duration={hash_elapsed:.2f}s throughput={hash_throughput:.2f}MB/s"
|
||||
)
|
||||
except (HashComputationError, FileSizeExceededError):
|
||||
raise
|
||||
except Exception as e:
|
||||
@@ -458,8 +484,19 @@ class S3Storage:
|
||||
upload_id = mpu["UploadId"]
|
||||
|
||||
try:
|
||||
import time
|
||||
parts = []
|
||||
part_number = 1
|
||||
bytes_uploaded = 0
|
||||
upload_start_time = time.time()
|
||||
last_log_time = upload_start_time
|
||||
log_interval_seconds = 5 # Log progress every 5 seconds
|
||||
|
||||
total_parts = (content_length + MULTIPART_CHUNK_SIZE - 1) // MULTIPART_CHUNK_SIZE
|
||||
logger.info(
|
||||
f"Starting multipart upload: hash={sha256_hash[:16]}... "
|
||||
f"size={content_length} parts={total_parts}"
|
||||
)
|
||||
|
||||
while True:
|
||||
chunk = file.read(MULTIPART_CHUNK_SIZE)
|
||||
@@ -479,8 +516,32 @@ class S3Storage:
|
||||
"ETag": response["ETag"],
|
||||
}
|
||||
)
|
||||
bytes_uploaded += len(chunk)
|
||||
|
||||
# Log progress periodically
|
||||
current_time = time.time()
|
||||
if current_time - last_log_time >= log_interval_seconds:
|
||||
elapsed = current_time - upload_start_time
|
||||
percent = (bytes_uploaded / content_length) * 100
|
||||
throughput = (bytes_uploaded / (1024 * 1024)) / elapsed if elapsed > 0 else 0
|
||||
logger.info(
|
||||
f"Upload progress: hash={sha256_hash[:16]}... "
|
||||
f"part={part_number}/{total_parts} "
|
||||
f"bytes={bytes_uploaded}/{content_length} ({percent:.1f}%) "
|
||||
f"throughput={throughput:.2f}MB/s"
|
||||
)
|
||||
last_log_time = current_time
|
||||
|
||||
part_number += 1
|
||||
|
||||
# Log completion
|
||||
total_elapsed = time.time() - upload_start_time
|
||||
final_throughput = (content_length / (1024 * 1024)) / total_elapsed if total_elapsed > 0 else 0
|
||||
logger.info(
|
||||
f"Multipart upload completed: hash={sha256_hash[:16]}... "
|
||||
f"size={content_length} duration={total_elapsed:.2f}s throughput={final_throughput:.2f}MB/s"
|
||||
)
|
||||
|
||||
# Complete multipart upload
|
||||
complete_response = self.client.complete_multipart_upload(
|
||||
Bucket=self.bucket,
|
||||
@@ -502,12 +563,28 @@ class S3Storage:
|
||||
|
||||
except Exception as e:
|
||||
# Abort multipart upload on failure
|
||||
logger.error(f"Multipart upload failed: {e}")
|
||||
self.client.abort_multipart_upload(
|
||||
Bucket=self.bucket,
|
||||
Key=s3_key,
|
||||
UploadId=upload_id,
|
||||
error_str = str(e).lower()
|
||||
is_client_disconnect = (
|
||||
isinstance(e, (ConnectionResetError, BrokenPipeError)) or
|
||||
"connection" in error_str or "broken pipe" in error_str or "reset" in error_str
|
||||
)
|
||||
if is_client_disconnect:
|
||||
logger.warning(
|
||||
f"Multipart upload aborted (client disconnect): hash={sha256_hash[:16]}... "
|
||||
f"parts_uploaded={len(parts)} bytes_uploaded={bytes_uploaded}"
|
||||
)
|
||||
else:
|
||||
logger.error(f"Multipart upload failed: hash={sha256_hash[:16]}... error={e}")
|
||||
|
||||
try:
|
||||
self.client.abort_multipart_upload(
|
||||
Bucket=self.bucket,
|
||||
Key=s3_key,
|
||||
UploadId=upload_id,
|
||||
)
|
||||
logger.info(f"Multipart upload aborted and cleaned up: upload_id={upload_id[:16]}...")
|
||||
except Exception as abort_error:
|
||||
logger.error(f"Failed to abort multipart upload: {abort_error}")
|
||||
raise
|
||||
|
||||
def initiate_resumable_upload(self, expected_hash: str) -> Dict[str, Any]:
|
||||
@@ -529,12 +606,17 @@ class S3Storage:
|
||||
mpu = self.client.create_multipart_upload(Bucket=self.bucket, Key=s3_key)
|
||||
upload_id = mpu["UploadId"]
|
||||
|
||||
import time
|
||||
session = {
|
||||
"upload_id": upload_id,
|
||||
"s3_key": s3_key,
|
||||
"already_exists": False,
|
||||
"parts": [],
|
||||
"expected_hash": expected_hash,
|
||||
"started_at": time.time(),
|
||||
"bytes_uploaded": 0,
|
||||
"expected_size": None, # Set when init provides size
|
||||
"status": "in_progress",
|
||||
}
|
||||
self._active_uploads[upload_id] = session
|
||||
return session
|
||||
@@ -561,10 +643,57 @@ class S3Storage:
|
||||
part_info = {
|
||||
"PartNumber": part_number,
|
||||
"ETag": response["ETag"],
|
||||
"size": len(data),
|
||||
}
|
||||
session["parts"].append(part_info)
|
||||
session["bytes_uploaded"] = session.get("bytes_uploaded", 0) + len(data)
|
||||
return part_info
|
||||
|
||||
def get_upload_progress(self, upload_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get progress information for a resumable upload.
|
||||
Returns None if upload not found.
|
||||
"""
|
||||
import time
|
||||
session = self._active_uploads.get(upload_id)
|
||||
if not session:
|
||||
return None
|
||||
|
||||
bytes_uploaded = session.get("bytes_uploaded", 0)
|
||||
expected_size = session.get("expected_size")
|
||||
started_at = session.get("started_at")
|
||||
|
||||
progress = {
|
||||
"upload_id": upload_id,
|
||||
"status": session.get("status", "in_progress"),
|
||||
"bytes_uploaded": bytes_uploaded,
|
||||
"bytes_total": expected_size,
|
||||
"parts_uploaded": len(session.get("parts", [])),
|
||||
"parts_total": None,
|
||||
"started_at": started_at,
|
||||
"elapsed_seconds": None,
|
||||
"percent_complete": None,
|
||||
"throughput_mbps": None,
|
||||
}
|
||||
|
||||
if expected_size and expected_size > 0:
|
||||
progress["percent_complete"] = round((bytes_uploaded / expected_size) * 100, 2)
|
||||
progress["parts_total"] = (expected_size + MULTIPART_CHUNK_SIZE - 1) // MULTIPART_CHUNK_SIZE
|
||||
|
||||
if started_at:
|
||||
elapsed = time.time() - started_at
|
||||
progress["elapsed_seconds"] = round(elapsed, 2)
|
||||
if elapsed > 0 and bytes_uploaded > 0:
|
||||
progress["throughput_mbps"] = round((bytes_uploaded / (1024 * 1024)) / elapsed, 2)
|
||||
|
||||
return progress
|
||||
|
||||
def set_upload_expected_size(self, upload_id: str, size: int):
|
||||
"""Set the expected size for an upload (for progress tracking)."""
|
||||
session = self._active_uploads.get(upload_id)
|
||||
if session:
|
||||
session["expected_size"] = size
|
||||
|
||||
def complete_resumable_upload(self, upload_id: str) -> Tuple[str, str]:
|
||||
"""
|
||||
Complete a resumable upload.
|
||||
|
||||
Reference in New Issue
Block a user