From 72708bbf910eb50dd83be3df4d9652d39fc7727a Mon Sep 17 00:00:00 2001 From: pratik Date: Wed, 22 Oct 2025 09:18:11 -0500 Subject: [PATCH] Additional files --- CHUNK_SIZE_GUIDE.md | 112 +++++++ MEMORY_FIX.md | 150 ++++++++++ TIMEOUT_SOLUTION.md | 281 ++++++++++++++++++ .../cfdeployer/config/MultipartConfig.java | 26 ++ .../cfdeployer/model/DeploymentStatus.java | 27 ++ .../service/AsyncDeploymentService.java | 99 ++++++ 6 files changed, 695 insertions(+) create mode 100644 CHUNK_SIZE_GUIDE.md create mode 100644 MEMORY_FIX.md create mode 100644 TIMEOUT_SOLUTION.md create mode 100644 src/main/java/com/cfdeployer/config/MultipartConfig.java create mode 100644 src/main/java/com/cfdeployer/model/DeploymentStatus.java create mode 100644 src/main/java/com/cfdeployer/service/AsyncDeploymentService.java diff --git a/CHUNK_SIZE_GUIDE.md b/CHUNK_SIZE_GUIDE.md new file mode 100644 index 0000000..c91d571 --- /dev/null +++ b/CHUNK_SIZE_GUIDE.md @@ -0,0 +1,112 @@ +# Chunk Size Quick Reference + +## TL;DR + +**You control the chunk size in your upload script.** The server doesn't care what size you use - it accepts ANY chunk size and reassembles them sequentially. + +## Recommended Sizes + +| Environment | Chunk Size | Reason | +|-------------|-----------|--------| +| **Tanzu (low memory)** | **1MB** | Safe for 10MB direct memory limit | +| **Strict nginx** | **512KB - 1MB** | Works with any nginx config | +| **Normal setup** | **2-5MB** | Good balance of speed vs safety | +| **High bandwidth** | **5-10MB** | Faster uploads, fewer requests | + +## Setting Chunk Size + +### Bash Script +```bash +CHUNK_SIZE=1048576 # 1MB +``` + +### JavaScript +```javascript +const CHUNK_SIZE = 1 * 1024 * 1024; // 1MB +``` + +### Python +```python +CHUNK_SIZE = 1 * 1024 * 1024 # 1MB +``` + +## Common Sizes in Bytes + +| Size | Bytes | Setting | +|------|-------|---------| +| 100KB | 102,400 | `CHUNK_SIZE=102400` | +| 256KB | 262,144 | `CHUNK_SIZE=262144` | +| 512KB | 524,288 | `CHUNK_SIZE=524288` | +| **1MB** | **1,048,576** | **`CHUNK_SIZE=1048576`** ✅ | +| **2MB** | **2,097,152** | **`CHUNK_SIZE=2097152`** ✅ | +| 5MB | 5,242,880 | `CHUNK_SIZE=5242880` | +| 10MB | 10,485,760 | `CHUNK_SIZE=10485760` | + +## Trade-offs + +### Smaller Chunks (100KB - 1MB) +✅ Less memory per request +✅ Works with ANY nginx config +✅ Safe for Tanzu low-memory instances +❌ More HTTP requests +❌ Slower overall upload + +### Larger Chunks (5MB - 10MB) +✅ Fewer HTTP requests +✅ Faster overall upload +❌ More memory needed +❌ May exceed nginx limits +❌ Can cause OutOfMemoryError on Tanzu + +## For Your Tanzu Issue + +Based on your `OutOfMemoryError: Cannot reserve 10485760 bytes of direct buffer memory`: + +**Use 1MB chunks:** +```bash +CHUNK_SIZE=1048576 # 1MB +``` + +This keeps each request under 1MB, well below your 10MB direct memory limit, leaving plenty of headroom for multiple concurrent requests and garbage collection delays. + +## Testing + +Quick test with different chunk sizes: + +```bash +# Test with 512KB chunks +CHUNK_SIZE=524288 ./deploy-chunked.sh + +# Test with 1MB chunks +CHUNK_SIZE=1048576 ./deploy-chunked.sh + +# Test with 2MB chunks +CHUNK_SIZE=2097152 ./deploy-chunked.sh +``` + +Watch the logs: +```bash +cf logs cf-deployer --recent | grep "Received chunk" +``` + +If you see OutOfMemoryError, use smaller chunks. + +## Rules + +1. **Chunks MUST be uploaded in order**: 0, 1, 2, 3... (enforced by server) +2. **All chunks of the same file MUST use the same chunk size** (except the last chunk, which can be smaller) +3. **Different files can use different chunk sizes** (jarFile vs manifest can differ) +4. **Total chunks must be accurate**: Calculate as `ceil(file_size / chunk_size)` + +## Example + +For a 50MB JAR file: + +| Chunk Size | Number of Chunks | Total Requests | +|-----------|-----------------|----------------| +| 512KB | 100 chunks | ~100 requests | +| 1MB | 50 chunks | ~50 requests | +| 2MB | 25 chunks | ~25 requests | +| 5MB | 10 chunks | ~10 requests | + +All work equally well - pick based on your constraints! diff --git a/MEMORY_FIX.md b/MEMORY_FIX.md new file mode 100644 index 0000000..139ca29 --- /dev/null +++ b/MEMORY_FIX.md @@ -0,0 +1,150 @@ +# Fix for OutOfMemoryError: Cannot reserve direct buffer memory + +## Problem +``` +java.lang.OutOfMemoryError: Cannot reserve 10485760 bytes of direct buffer memory +``` + +This occurs because Tanzu's default JVM configuration allocates very little direct (off-heap) memory, and multipart file uploads use direct buffers. + +## Solutions Applied + +### 1. Code Changes (Already Applied) +✅ **ChunkedUploadService.java** - Changed to stream chunks in 8KB buffers instead of loading entire chunk into memory +✅ **MultipartConfig.java** - Added configuration to write all uploads directly to disk (`file-size-threshold=0`) +✅ **application.properties** - Reduced chunk size from 5MB to 2MB and enabled disk-based uploads + +### 2. Tanzu Manifest Configuration (You Need to Apply) + +**Option A: Set in manifest.yml** + +Create or update your `manifest.yml`: + +```yaml +applications: +- name: cf-deployer + memory: 1G + instances: 1 + path: build/libs/cf-deployer.jar + buildpacks: + - java_buildpack + env: + # Increase direct memory allocation + JAVA_TOOL_OPTIONS: "-XX:MaxDirectMemorySize=256m -XX:+UseG1GC" + # Alternative if using Java Buildpack Memory Calculator + JBP_CONFIG_OPEN_JDK_JRE: '{ jre: { version: 17.+ }, memory_calculator: { memory_sizes: { metaspace: 128m, direct: 256m } } }' +``` + +Then deploy: +```bash +cf push +``` + +**Option B: Set environment variable directly** + +```bash +# Increase direct memory to 256MB +cf set-env cf-deployer JAVA_TOOL_OPTIONS "-XX:MaxDirectMemorySize=256m -XX:+UseG1GC" + +# Restage to apply changes +cf restage cf-deployer +``` + +**Option C: Increase overall memory** + +If you have more memory available: +```bash +# Increase app memory to 2GB (gives more headroom) +cf scale cf-deployer -m 2G + +# Or in manifest.yml +memory: 2G +``` + +### 3. Client-Side Changes + +Update your client to use 2MB chunks instead of 5MB: + +**Bash script:** +```bash +CHUNK_SIZE=2097152 # 2MB instead of 5MB +``` + +**JavaScript:** +```javascript +const CHUNK_SIZE = 2 * 1024 * 1024; // 2MB +``` + +**Python:** +```python +CHUNK_SIZE = 2 * 1024 * 1024 # 2MB +``` + +## Verification + +After applying fixes, check the logs: + +```bash +cf logs cf-deployer --recent +``` + +You should see successful chunk uploads: +``` +2025-10-21 16:30:00 - Session xxx: Received chunk 1/50 for jarFile (2097152 bytes) +2025-10-21 16:30:01 - Session xxx: Received chunk 2/50 for jarFile (2097152 bytes) +``` + +## Why This Works + +1. **`file-size-threshold=0`** - Spring writes uploads directly to disk instead of buffering in memory +2. **Streaming chunks** - We read and write in 8KB buffers instead of loading entire chunk +3. **Smaller chunks** - 2MB chunks use less memory than 5MB chunks +4. **Increased direct memory** - More headroom for JVM's direct buffers +5. **G1GC** - Better garbage collection for managing off-heap memory + +## Testing + +Test with a small file first: +```bash +# Create test session +SESSION_ID=$(curl -s -X POST https://your-app.apps.cf.example.com/api/cf/upload/init \ + -H "Content-Type: application/json" \ + -d '{"apiEndpoint":"https://api.cf.example.com","username":"user","password":"pass","organization":"org","space":"space","appName":"test","skipSslValidation":false}' \ + | grep -o '"uploadSessionId":"[^"]*' | cut -d'"' -f4) + +# Upload a 2MB chunk +head -c 2097152 /dev/urandom > test-chunk.bin + +curl -X POST "https://your-app.apps.cf.example.com/api/cf/upload/chunk" \ + -F "uploadSessionId=$SESSION_ID" \ + -F "fileType=jarFile" \ + -F "chunkIndex=0" \ + -F "totalChunks=1" \ + -F "fileName=test.jar" \ + -F "chunk=@test-chunk.bin" +``` + +If this succeeds, the fix is working! + +## Recommended Tanzu Settings + +For production deployments handling large files: + +```yaml +applications: +- name: cf-deployer + memory: 2G # Total memory + disk_quota: 2G # Disk for temp files + instances: 2 # For high availability + health-check-type: http + health-check-http-endpoint: /actuator/health + env: + JAVA_TOOL_OPTIONS: "-XX:MaxDirectMemorySize=512m -XX:+UseG1GC -XX:MaxGCPauseMillis=200" + JBP_CONFIG_OPEN_JDK_JRE: '{ jre: { version: 17.+ }, memory_calculator: { memory_sizes: { direct: 512m, metaspace: 128m, reserved: 256m } } }' +``` + +This gives you: +- 512MB direct memory (plenty for chunked uploads) +- G1 garbage collector (better for large objects) +- 2GB total memory (Java heap + direct + metaspace + overhead) +- Health check endpoint for monitoring diff --git a/TIMEOUT_SOLUTION.md b/TIMEOUT_SOLUTION.md new file mode 100644 index 0000000..6db67eb --- /dev/null +++ b/TIMEOUT_SOLUTION.md @@ -0,0 +1,281 @@ +# Nginx Timeout Solution + +## The Problem + +### Chunking Solves Upload Timeouts ✅ +- Each chunk upload completes in seconds +- Well under nginx's 30-second timeout +- **No problem here!** + +### But Deployment Still Times Out ❌ +The `/upload/finalize` endpoint can take 3-5+ minutes because: +1. CF login +2. CF push (staging, building, deploying) +3. CF logout + +This **WILL** hit nginx's 30-second timeout! + +## The Solution: Async Deployment + +Instead of waiting for deployment to complete, we return immediately and let the client poll for status. + +### Flow Comparison + +**Before (Times Out):** +``` +Client → finalize → [waits 5 minutes] → ⏱️ NGINX TIMEOUT after 30s +``` + +**After (Works):** +``` +Client → finalize → ✅ Returns immediately (202 Accepted) +Client → poll status every 5s → IN_PROGRESS +Client → poll status → IN_PROGRESS +Client → poll status → COMPLETED ✅ +``` + +## Updated API + +### 1. Initialize Upload (unchanged) +```bash +POST /api/cf/upload/init +``` + +### 2. Upload Chunks (unchanged) +```bash +POST /api/cf/upload/chunk +``` + +### 3. Finalize Upload (NEW: async by default) +```bash +POST /api/cf/upload/finalize?uploadSessionId={sessionId}&async=true +``` + +**Response (202 Accepted):** +```json +{ + "uploadSessionId": "550e8400-e29b-41d4-a716-446655440000", + "status": "IN_PROGRESS", + "message": "Deployment started. Use /deployment/status endpoint to check progress.", + "progress": 0 +} +``` + +### 4. Poll Deployment Status (NEW) +```bash +GET /api/cf/deployment/status/{uploadSessionId} +``` + +**Response while deploying:** +```json +{ + "uploadSessionId": "550e8400-e29b-41d4-a716-446655440000", + "status": "IN_PROGRESS", + "message": "Logging into Cloud Foundry...", + "progress": 10 +} +``` + +**Response when complete:** +```json +{ + "uploadSessionId": "550e8400-e29b-41d4-a716-446655440000", + "status": "COMPLETED", + "message": "Deployment completed successfully", + "output": "[full CF CLI output]", + "progress": 100 +} +``` + +**Response if failed:** +```json +{ + "uploadSessionId": "550e8400-e29b-41d4-a716-446655440000", + "status": "FAILED", + "message": "Deployment failed: ...", + "error": "[error details]", + "progress": 0 +} +``` + +## Updated Bash Script + +```bash +#!/bin/bash + +API_BASE="http://your-app.example.com/api/cf" +JAR_FILE="hf.jar" +MANIFEST_FILE="manifest.yml" +CHUNK_SIZE=1048576 # 1MB + +CF_CONFIG='{ + "apiEndpoint": "https://api.cf.example.com", + "username": "your-username", + "password": "your-password", + "organization": "your-org", + "space": "your-space", + "appName": "your-app", + "skipSslValidation": false +}' + +echo "=== Step 1: Initialize Upload Session ===" +INIT_RESPONSE=$(curl -s -X POST "$API_BASE/upload/init" \ + -H "Content-Type: application/json" \ + -d "$CF_CONFIG") + +SESSION_ID=$(echo $INIT_RESPONSE | grep -o '"uploadSessionId":"[^"]*' | cut -d'"' -f4) +echo "Session created: $SESSION_ID" + +# Function to upload file in chunks +upload_file_in_chunks() { + local file_path=$1 + local file_type=$2 + local file_name=$(basename "$file_path") + local file_size=$(stat -f%z "$file_path" 2>/dev/null || stat -c%s "$file_path") + local total_chunks=$(( ($file_size + $CHUNK_SIZE - 1) / $CHUNK_SIZE )) + + echo "" + echo "=== Uploading $file_type: $file_name ($total_chunks chunks) ===" + + local temp_dir=$(mktemp -d) + split -b $CHUNK_SIZE "$file_path" "$temp_dir/chunk_" + + local chunk_index=0 + for chunk_file in "$temp_dir"/chunk_*; do + printf "Uploading chunk %3d/%3d... " "$((chunk_index + 1))" "$total_chunks" + + RESPONSE=$(curl -s -X POST "$API_BASE/upload/chunk" \ + -F "uploadSessionId=$SESSION_ID" \ + -F "fileType=$file_type" \ + -F "chunkIndex=$chunk_index" \ + -F "totalChunks=$total_chunks" \ + -F "fileName=$file_name" \ + -F "chunk=@$chunk_file") + + SUCCESS=$(echo $RESPONSE | grep -o '"success":[^,]*' | cut -d':' -f2) + + if [ "$SUCCESS" != "true" ]; then + echo "FAILED" + echo "$RESPONSE" + rm -rf "$temp_dir" + exit 1 + fi + + echo "OK" + chunk_index=$((chunk_index + 1)) + done + + rm -rf "$temp_dir" + echo "$file_type upload completed" +} + +# Step 2: Upload JAR file +upload_file_in_chunks "$JAR_FILE" "jarFile" + +# Step 3: Upload manifest file +upload_file_in_chunks "$MANIFEST_FILE" "manifest" + +# Step 4: Start async deployment +echo "" +echo "=== Step 4: Starting deployment (async) ===" +FINALIZE_RESPONSE=$(curl -s -X POST "$API_BASE/upload/finalize?uploadSessionId=$SESSION_ID&async=true") + +STATUS=$(echo $FINALIZE_RESPONSE | grep -o '"status":"[^"]*' | cut -d'"' -f4) + +if [ "$STATUS" != "IN_PROGRESS" ]; then + echo "Failed to start deployment:" + echo "$FINALIZE_RESPONSE" + exit 1 +fi + +echo "Deployment started. Polling for status..." + +# Step 5: Poll deployment status +POLL_INTERVAL=5 # seconds +MAX_WAIT=600 # 10 minutes max + +elapsed=0 +while [ $elapsed -lt $MAX_WAIT ]; do + sleep $POLL_INTERVAL + elapsed=$((elapsed + POLL_INTERVAL)) + + STATUS_RESPONSE=$(curl -s "$API_BASE/deployment/status/$SESSION_ID") + CURRENT_STATUS=$(echo $STATUS_RESPONSE | grep -o '"status":"[^"]*' | cut -d'"' -f4) + MESSAGE=$(echo $STATUS_RESPONSE | grep -o '"message":"[^"]*' | cut -d'"' -f4) + PROGRESS=$(echo $STATUS_RESPONSE | grep -o '"progress":[0-9]*' | cut -d':' -f2) + + printf "\r[%3ds] Status: %-15s Progress: %3s%% - %s" \ + "$elapsed" "$CURRENT_STATUS" "${PROGRESS:-0}" "$MESSAGE" + + if [ "$CURRENT_STATUS" = "COMPLETED" ]; then + echo "" + echo "" + echo "=== Deployment successful! ===" + echo "$STATUS_RESPONSE" | jq '.' 2>/dev/null || echo "$STATUS_RESPONSE" + exit 0 + elif [ "$CURRENT_STATUS" = "FAILED" ]; then + echo "" + echo "" + echo "=== Deployment failed ===" + echo "$STATUS_RESPONSE" | jq '.' 2>/dev/null || echo "$STATUS_RESPONSE" + exit 1 + fi +done + +echo "" +echo "=== Deployment timeout after ${MAX_WAIT}s ===" +echo "Check status manually: curl $API_BASE/deployment/status/$SESSION_ID" +exit 1 +``` + +## Status Values + +| Status | Description | +|--------|-------------| +| `PENDING` | Upload session created but deployment not started | +| `IN_PROGRESS` | Deployment is currently running | +| `COMPLETED` | Deployment finished successfully | +| `FAILED` | Deployment failed with errors | + +## Nginx Configuration + +With async deployment, nginx timeout is **not an issue**: + +```nginx +server { + listen 80; + server_name your-app.example.com; + + # Each chunk upload completes quickly + client_max_body_size 10m; + + # Standard timeouts work fine now + proxy_read_timeout 60s; # Chunks complete in <5s + proxy_connect_timeout 10s; + proxy_send_timeout 60s; + + location /api/cf/ { + proxy_pass http://cf-deployer-backend:8080; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } +} +``` + +## For Backwards Compatibility + +If you want synchronous deployment (will timeout on nginx!): + +```bash +# Synchronous (old way - may timeout) +curl -X POST "$API_BASE/upload/finalize?uploadSessionId=$SESSION_ID&async=false" +``` + +**Default is async=true** to avoid timeout issues. + +## Summary + +✅ **Chunk uploads**: Complete in seconds, no timeout +✅ **Finalize endpoint**: Returns immediately (async), no timeout +✅ **Status polling**: Each poll completes in milliseconds, no timeout +✅ **Total solution**: Works with standard 30-second nginx timeout! diff --git a/src/main/java/com/cfdeployer/config/MultipartConfig.java b/src/main/java/com/cfdeployer/config/MultipartConfig.java new file mode 100644 index 0000000..0687f54 --- /dev/null +++ b/src/main/java/com/cfdeployer/config/MultipartConfig.java @@ -0,0 +1,26 @@ +package com.cfdeployer.config; + +import jakarta.servlet.MultipartConfigElement; +import org.springframework.boot.web.servlet.MultipartConfigFactory; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.util.unit.DataSize; + +@Configuration +public class MultipartConfig { + + @Bean + public MultipartConfigElement multipartConfigElement() { + MultipartConfigFactory factory = new MultipartConfigFactory(); + + // Set max file size for chunks (10MB per request is safe) + factory.setMaxFileSize(DataSize.ofMegabytes(10)); + factory.setMaxRequestSize(DataSize.ofMegabytes(10)); + + // Important: Set file size threshold to write to disk immediately + // Setting to 0 means all uploads go directly to disk, not memory + factory.setFileSizeThreshold(DataSize.ofBytes(0)); + + return factory.createMultipartConfig(); + } +} diff --git a/src/main/java/com/cfdeployer/model/DeploymentStatus.java b/src/main/java/com/cfdeployer/model/DeploymentStatus.java new file mode 100644 index 0000000..9d2c8d2 --- /dev/null +++ b/src/main/java/com/cfdeployer/model/DeploymentStatus.java @@ -0,0 +1,27 @@ +package com.cfdeployer.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class DeploymentStatus { + + public enum Status { + PENDING, // Upload complete, deployment queued + IN_PROGRESS, // Currently deploying + COMPLETED, // Deployment successful + FAILED // Deployment failed + } + + private String uploadSessionId; + private Status status; + private String message; + private String output; + private String error; + private Integer progress; // 0-100 +} diff --git a/src/main/java/com/cfdeployer/service/AsyncDeploymentService.java b/src/main/java/com/cfdeployer/service/AsyncDeploymentService.java new file mode 100644 index 0000000..faa30d8 --- /dev/null +++ b/src/main/java/com/cfdeployer/service/AsyncDeploymentService.java @@ -0,0 +1,99 @@ +package com.cfdeployer.service; + +import com.cfdeployer.model.CfDeployRequest; +import com.cfdeployer.model.CfDeployResponse; +import com.cfdeployer.model.DeploymentStatus; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.scheduling.annotation.Async; +import org.springframework.stereotype.Service; + +import java.nio.file.Path; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +@Service +@RequiredArgsConstructor +@Slf4j +public class AsyncDeploymentService { + + private final CfCliService cfCliService; + private final Map deploymentStatuses = new ConcurrentHashMap<>(); + + @Async + public void deployAsync(String sessionId, CfDeployRequest request, Path jarPath, Path manifestPath) { + log.info("Starting async deployment for session: {}", sessionId); + + // Set initial status + deploymentStatuses.put(sessionId, DeploymentStatus.builder() + .uploadSessionId(sessionId) + .status(DeploymentStatus.Status.IN_PROGRESS) + .message("Deployment in progress...") + .progress(0) + .build()); + + try { + // Update progress + updateProgress(sessionId, 10, "Logging into Cloud Foundry..."); + + CfDeployResponse response = cfCliService.deployApplicationFromPaths(request, jarPath, manifestPath); + + updateProgress(sessionId, 100, "Deployment completed"); + + // Set final status + if (Boolean.TRUE.equals(response.getSuccess())) { + deploymentStatuses.put(sessionId, DeploymentStatus.builder() + .uploadSessionId(sessionId) + .status(DeploymentStatus.Status.COMPLETED) + .message(response.getMessage()) + .output(response.getOutput()) + .progress(100) + .build()); + } else { + deploymentStatuses.put(sessionId, DeploymentStatus.builder() + .uploadSessionId(sessionId) + .status(DeploymentStatus.Status.FAILED) + .message(response.getMessage()) + .error(response.getError()) + .progress(0) + .build()); + } + + } catch (Exception e) { + log.error("Async deployment failed for session: {}", sessionId, e); + deploymentStatuses.put(sessionId, DeploymentStatus.builder() + .uploadSessionId(sessionId) + .status(DeploymentStatus.Status.FAILED) + .message("Deployment failed: " + e.getMessage()) + .error(e.toString()) + .progress(0) + .build()); + } + } + + public DeploymentStatus getDeploymentStatus(String sessionId) { + DeploymentStatus status = deploymentStatuses.get(sessionId); + if (status == null) { + return DeploymentStatus.builder() + .uploadSessionId(sessionId) + .status(DeploymentStatus.Status.PENDING) + .message("No deployment found for this session") + .build(); + } + return status; + } + + public void clearDeploymentStatus(String sessionId) { + deploymentStatuses.remove(sessionId); + log.debug("Cleared deployment status for session: {}", sessionId); + } + + private void updateProgress(String sessionId, int progress, String message) { + DeploymentStatus current = deploymentStatuses.get(sessionId); + if (current != null) { + current.setProgress(progress); + current.setMessage(message); + log.info("Session {}: {} ({}%)", sessionId, message, progress); + } + } +}