Fix PyPI proxy timeout by streaming from S3 instead of loading into memory

Large packages like TensorFlow (~600MB) caused read timeouts because the entire file was loaded into memory before responding to the client. Now the file is stored to S3 first, then streamed back using StreamingResponse.
2026-02-03 16:42:30 -06:00
parent 9a2b323fd8
commit da3fd7a601
1 changed files with 26 additions and 15 deletions
--- a/backend/app/pypi_proxy.py
+++ b/backend/app/pypi_proxy.py
@@ -578,10 +578,7 @@ async def pypi_download_file(
                    result = storage.store(f)
                sha256 = result.sha256
                size = result.size
-
+                s3_key = result.s3_key
                # Read content for response
                with open(tmp_path, 'rb') as f:
                    content = f.read()
                logger.info(f"PyPI proxy: downloaded {filename}, {size} bytes, sha256={sha256[:12]}")
            finally:
@@ -677,17 +674,31 @@ async def pypi_download_file(
        db.commit()
-        # Return the file
+        # Stream the file from S3 (don't load into memory)
-        return Response(
+        try:
-            content=content,
+            stream, content_length, _ = storage.get_stream(s3_key)
-            media_type=content_type,
+
-            headers={
+            def stream_content():
-                "Content-Disposition": f'attachment; filename="{filename}"',
+                """Generator that yields chunks from the S3 stream."""
-                "Content-Length": str(size),
+                try:
-                "X-Checksum-SHA256": sha256,
+                    for chunk in stream.iter_chunks():
-                "X-Cache": "MISS",
+                        yield chunk
-            }
+                finally:
-        )
+                    stream.close()
            return StreamingResponse(
                stream_content(),
                media_type=content_type,
                headers={
                    "Content-Disposition": f'attachment; filename="{filename}"',
                    "Content-Length": str(size),
                    "X-Checksum-SHA256": sha256,
                    "X-Cache": "MISS",
                }
            )
        except Exception as e:
            logger.error(f"PyPI proxy: error streaming from S3: {e}")
            raise HTTPException(status_code=500, detail=f"Error streaming file: {e}")
    except httpx.ConnectError as e:
        raise HTTPException(status_code=502, detail=f"Connection failed: {e}")