Add large file upload enhancements and tests (#43)
- Add upload duration/throughput metrics (duration_ms, throughput_mbps) to response
- Add upload progress logging for large files (hash computation and multipart upload)
- Add client disconnect handling during uploads with proper cleanup
- Add upload progress tracking endpoint GET /upload/{upload_id}/progress
- Add large file upload tests (10MB, 100MB, 1GB)
- Add upload cancellation and timeout handling tests
- Add API documentation for upload endpoints with curl, Python, JavaScript examples
This commit is contained in:
@@ -82,6 +82,7 @@ from .schemas import (
|
||||
ResumableUploadCompleteRequest,
|
||||
ResumableUploadCompleteResponse,
|
||||
ResumableUploadStatusResponse,
|
||||
UploadProgressResponse,
|
||||
GlobalSearchResponse,
|
||||
SearchResultProject,
|
||||
SearchResultPackage,
|
||||
@@ -2283,10 +2284,56 @@ def upload_artifact(
|
||||
"""
|
||||
Upload an artifact to a package.
|
||||
|
||||
Headers:
|
||||
- X-Checksum-SHA256: Optional client-provided SHA256 for verification
|
||||
- User-Agent: Captured for audit purposes
|
||||
- Authorization: Bearer <api-key> for authentication
|
||||
**Size Limits:**
|
||||
- Minimum: 1 byte (empty files rejected)
|
||||
- Maximum: 10GB (configurable via ORCHARD_MAX_FILE_SIZE)
|
||||
- Files > 100MB automatically use S3 multipart upload
|
||||
|
||||
**Headers:**
|
||||
- `X-Checksum-SHA256`: Optional SHA256 hash for server-side verification
|
||||
- `Content-Length`: File size (required for early rejection of oversized files)
|
||||
- `Authorization`: Bearer <api-key> for authentication
|
||||
|
||||
**Deduplication:**
|
||||
Content-addressable storage automatically deduplicates identical files.
|
||||
If the same content is uploaded multiple times, only one copy is stored.
|
||||
|
||||
**Response Metrics:**
|
||||
- `duration_ms`: Upload duration in milliseconds
|
||||
- `throughput_mbps`: Upload throughput in MB/s
|
||||
- `deduplicated`: True if content already existed
|
||||
|
||||
**Example (curl):**
|
||||
```bash
|
||||
curl -X POST "http://localhost:8080/api/v1/project/myproject/mypackage/upload" \\
|
||||
-H "Authorization: Bearer <api-key>" \\
|
||||
-F "file=@myfile.tar.gz" \\
|
||||
-F "tag=v1.0.0"
|
||||
```
|
||||
|
||||
**Example (Python requests):**
|
||||
```python
|
||||
import requests
|
||||
with open('myfile.tar.gz', 'rb') as f:
|
||||
response = requests.post(
|
||||
'http://localhost:8080/api/v1/project/myproject/mypackage/upload',
|
||||
files={'file': f},
|
||||
data={'tag': 'v1.0.0'},
|
||||
headers={'Authorization': 'Bearer <api-key>'}
|
||||
)
|
||||
```
|
||||
|
||||
**Example (JavaScript fetch):**
|
||||
```javascript
|
||||
const formData = new FormData();
|
||||
formData.append('file', fileInput.files[0]);
|
||||
formData.append('tag', 'v1.0.0');
|
||||
const response = await fetch('/api/v1/project/myproject/mypackage/upload', {
|
||||
method: 'POST',
|
||||
headers: { 'Authorization': 'Bearer <api-key>' },
|
||||
body: formData
|
||||
});
|
||||
```
|
||||
"""
|
||||
start_time = time.time()
|
||||
settings = get_settings()
|
||||
@@ -2388,6 +2435,30 @@ def upload_artifact(
|
||||
except StorageError as e:
|
||||
logger.error(f"Storage error during upload: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal storage error")
|
||||
except (ConnectionResetError, BrokenPipeError) as e:
|
||||
# Client disconnected during upload
|
||||
logger.warning(
|
||||
f"Client disconnected during upload: project={project_name} "
|
||||
f"package={package_name} filename={file.filename} error={e}"
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=499, # Client Closed Request (nginx convention)
|
||||
detail="Client disconnected during upload",
|
||||
)
|
||||
except Exception as e:
|
||||
# Catch-all for unexpected errors including client disconnects
|
||||
error_str = str(e).lower()
|
||||
if "connection" in error_str or "broken pipe" in error_str or "reset" in error_str:
|
||||
logger.warning(
|
||||
f"Client connection error during upload: project={project_name} "
|
||||
f"package={package_name} filename={file.filename} error={e}"
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=499,
|
||||
detail="Client connection error during upload",
|
||||
)
|
||||
logger.error(f"Unexpected error during upload: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail="Internal server error during upload")
|
||||
|
||||
# Verify client-provided checksum if present
|
||||
checksum_verified = True
|
||||
@@ -2580,6 +2651,12 @@ def upload_artifact(
|
||||
detail="Failed to save upload record. Please retry.",
|
||||
)
|
||||
|
||||
# Calculate throughput
|
||||
throughput_mbps = None
|
||||
if duration_ms > 0:
|
||||
duration_seconds = duration_ms / 1000.0
|
||||
throughput_mbps = round((storage_result.size / (1024 * 1024)) / duration_seconds, 2)
|
||||
|
||||
return UploadResponse(
|
||||
artifact_id=storage_result.sha256,
|
||||
sha256=storage_result.sha256,
|
||||
@@ -2599,6 +2676,8 @@ def upload_artifact(
|
||||
content_type=artifact.content_type,
|
||||
original_name=artifact.original_name,
|
||||
created_at=artifact.created_at,
|
||||
duration_ms=duration_ms,
|
||||
throughput_mbps=throughput_mbps,
|
||||
)
|
||||
|
||||
|
||||
@@ -2616,8 +2695,46 @@ def init_resumable_upload(
|
||||
storage: S3Storage = Depends(get_storage),
|
||||
):
|
||||
"""
|
||||
Initialize a resumable upload session.
|
||||
Client must provide the SHA256 hash of the file in advance.
|
||||
Initialize a resumable upload session for large files.
|
||||
|
||||
Resumable uploads allow uploading large files in chunks, with the ability
|
||||
to resume after interruption. The client must compute the SHA256 hash
|
||||
of the entire file before starting.
|
||||
|
||||
**Workflow:**
|
||||
1. POST /upload/init - Initialize upload session (this endpoint)
|
||||
2. PUT /upload/{upload_id}/part/{part_number} - Upload each part
|
||||
3. GET /upload/{upload_id}/progress - Check upload progress (optional)
|
||||
4. POST /upload/{upload_id}/complete - Finalize upload
|
||||
5. DELETE /upload/{upload_id} - Abort upload (if needed)
|
||||
|
||||
**Chunk Size:**
|
||||
Use the `chunk_size` returned in the response (10MB default).
|
||||
Each part except the last must be exactly this size.
|
||||
|
||||
**Deduplication:**
|
||||
If the expected_hash already exists in storage, the response will include
|
||||
`already_exists: true` and no upload session is created.
|
||||
|
||||
**Example (curl):**
|
||||
```bash
|
||||
# Step 1: Initialize
|
||||
curl -X POST "http://localhost:8080/api/v1/project/myproject/mypackage/upload/init" \\
|
||||
-H "Authorization: Bearer <api-key>" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d '{"expected_hash": "<sha256>", "filename": "large.tar.gz", "size": 104857600}'
|
||||
|
||||
# Step 2: Upload parts
|
||||
curl -X PUT "http://localhost:8080/api/v1/project/myproject/mypackage/upload/<upload_id>/part/1" \\
|
||||
-H "Authorization: Bearer <api-key>" \\
|
||||
--data-binary @part1.bin
|
||||
|
||||
# Step 3: Complete
|
||||
curl -X POST "http://localhost:8080/api/v1/project/myproject/mypackage/upload/<upload_id>/complete" \\
|
||||
-H "Authorization: Bearer <api-key>" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d '{"tag": "v1.0.0"}'
|
||||
```
|
||||
"""
|
||||
user_id = get_user_id(request)
|
||||
|
||||
@@ -2711,6 +2828,10 @@ def init_resumable_upload(
|
||||
# Initialize resumable upload
|
||||
session = storage.initiate_resumable_upload(init_request.expected_hash)
|
||||
|
||||
# Set expected size for progress tracking
|
||||
if session["upload_id"] and init_request.size:
|
||||
storage.set_upload_expected_size(session["upload_id"], init_request.size)
|
||||
|
||||
return ResumableUploadInitResponse(
|
||||
upload_id=session["upload_id"],
|
||||
already_exists=False,
|
||||
@@ -2777,6 +2898,64 @@ def upload_part(
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
||||
|
||||
|
||||
@router.get(
|
||||
"/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/progress",
|
||||
response_model=UploadProgressResponse,
|
||||
)
|
||||
def get_upload_progress(
|
||||
project_name: str,
|
||||
package_name: str,
|
||||
upload_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
storage: S3Storage = Depends(get_storage),
|
||||
):
|
||||
"""
|
||||
Get progress information for an in-flight resumable upload.
|
||||
|
||||
Returns progress metrics including bytes uploaded, percent complete,
|
||||
elapsed time, and throughput.
|
||||
"""
|
||||
# Validate project and package exist
|
||||
project = db.query(Project).filter(Project.name == project_name).first()
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
package = (
|
||||
db.query(Package)
|
||||
.filter(Package.project_id == project.id, Package.name == package_name)
|
||||
.first()
|
||||
)
|
||||
if not package:
|
||||
raise HTTPException(status_code=404, detail="Package not found")
|
||||
|
||||
progress = storage.get_upload_progress(upload_id)
|
||||
if not progress:
|
||||
# Return not_found status instead of 404 to allow polling
|
||||
return UploadProgressResponse(
|
||||
upload_id=upload_id,
|
||||
status="not_found",
|
||||
bytes_uploaded=0,
|
||||
)
|
||||
|
||||
from datetime import datetime, timezone
|
||||
started_at_dt = None
|
||||
if progress.get("started_at"):
|
||||
started_at_dt = datetime.fromtimestamp(progress["started_at"], tz=timezone.utc)
|
||||
|
||||
return UploadProgressResponse(
|
||||
upload_id=upload_id,
|
||||
status=progress.get("status", "in_progress"),
|
||||
bytes_uploaded=progress.get("bytes_uploaded", 0),
|
||||
bytes_total=progress.get("bytes_total"),
|
||||
percent_complete=progress.get("percent_complete"),
|
||||
parts_uploaded=progress.get("parts_uploaded", 0),
|
||||
parts_total=progress.get("parts_total"),
|
||||
started_at=started_at_dt,
|
||||
elapsed_seconds=progress.get("elapsed_seconds"),
|
||||
throughput_mbps=progress.get("throughput_mbps"),
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/api/v1/project/{project_name}/{package_name}/upload/{upload_id}/complete"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user