Fix PyPI proxy timeout by streaming from S3 instead of loading into memory

Large packages like TensorFlow (~600MB) caused read timeouts because the entire file was loaded into memory before responding to the client. Now the file is stored to S3 first, then streamed back using StreamingResponse.
2026-02-03 16:42:30 -06:00
parent 9a2b323fd8
commit da3fd7a601
1 changed files with 26 additions and 15 deletions
--- a/backend/app/pypi_proxy.py
+++ b/backend/app/pypi_proxy.py
@@ -578,10 +578,7 @@ async def pypi_download_file(
                    result = storage.store(f)
                sha256 = result.sha256
                size = result.size
-
-                # Read content for response
-                with open(tmp_path, 'rb') as f:
-                    content = f.read()
+                s3_key = result.s3_key

                logger.info(f"PyPI proxy: downloaded {filename}, {size} bytes, sha256={sha256[:12]}")
            finally:
@@ -677,9 +674,20 @@ async def pypi_download_file(

        db.commit()

-        # Return the file
-        return Response(
-            content=content,
+        # Stream the file from S3 (don't load into memory)
+        try:
+            stream, content_length, _ = storage.get_stream(s3_key)
+
+            def stream_content():
+                """Generator that yields chunks from the S3 stream."""
+                try:
+                    for chunk in stream.iter_chunks():
+                        yield chunk
+                finally:
+                    stream.close()
+
+            return StreamingResponse(
+                stream_content(),
                media_type=content_type,
                headers={
                    "Content-Disposition": f'attachment; filename="{filename}"',
@@ -688,6 +696,9 @@ async def pypi_download_file(
                    "X-Cache": "MISS",
                }
            )
+        except Exception as e:
+            logger.error(f"PyPI proxy: error streaming from S3: {e}")
+            raise HTTPException(status_code=500, detail=f"Error streaming file: {e}")

    except httpx.ConnectError as e:
        raise HTTPException(status_code=502, detail=f"Connection failed: {e}")