Convert PyPI proxy from sync to async httpx to prevent event loop blocking

The pypi_download_file, pypi_simple_index, and pypi_package_versions endpoints were using synchronous httpx.Client inside async functions. When upstream PyPI servers respond slowly, this blocked the entire FastAPI event loop, preventing health checks from responding. Kubernetes would then kill the pod after the liveness probe timed out. Changes: - httpx.Client → httpx.AsyncClient - client.get() → await client.get() - response.iter_bytes() → response.aiter_bytes() This ensures the event loop remains responsive during slow upstream downloads, allowing health checks to succeed even when downloads take 20+ seconds.
2026-02-02 15:26:24 -06:00
parent 361210a2bc
commit 1329d380a4
1 changed files with 11 additions and 10 deletions
--- a/backend/app/pypi_proxy.py
+++ b/backend/app/pypi_proxy.py
@@ -365,8 +365,8 @@ async def pypi_simple_index(

            timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)

-            with httpx.Client(timeout=timeout, follow_redirects=False) as client:
-                response = client.get(
+            async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client:
+                response = await client.get(
                    simple_url,
                    headers=headers,
                    auth=auth,
@@ -377,7 +377,7 @@ async def pypi_simple_index(
                    redirect_url = response.headers.get('location')
                    if redirect_url:
                        # Follow the redirect once
-                        response = client.get(
+                        response = await client.get(
                            redirect_url,
                            headers=headers,
                            auth=auth,
@@ -455,8 +455,8 @@ async def pypi_package_versions(

            timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)

-            with httpx.Client(timeout=timeout, follow_redirects=False) as client:
-                response = client.get(
+            async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client:
+                response = await client.get(
                    package_url,
                    headers=headers,
                    auth=auth,
@@ -475,7 +475,7 @@ async def pypi_package_versions(

                    final_url = redirect_url  # Update final URL

-                    response = client.get(
+                    response = await client.get(
                        redirect_url,
                        headers=headers,
                        auth=auth,
@@ -598,8 +598,8 @@ async def pypi_download_file(
        # Fetch the file
        logger.info(f"PyPI proxy: fetching {filename} from {upstream_url}")

-        with httpx.Client(timeout=timeout, follow_redirects=False) as client:
-            response = client.get(
+        async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client:
+            response = await client.get(
                upstream_url,
                headers=headers,
                auth=auth,
@@ -624,7 +624,7 @@ async def pypi_download_file(
                    redirect_headers.update(headers)
                    redirect_auth = auth

-                response = client.get(
+                response = await client.get(
                    redirect_url,
                    headers=redirect_headers,
                    auth=redirect_auth,
@@ -642,11 +642,12 @@ async def pypi_download_file(

            # Stream to temp file to avoid loading large packages into memory
            # This keeps memory usage constant regardless of package size
+            # Using async iteration to avoid blocking the event loop
            tmp_path = None
            try:
                with tempfile.NamedTemporaryFile(delete=False, suffix=f"_{filename}") as tmp_file:
                    tmp_path = tmp_file.name
-                    for chunk in response.iter_bytes(chunk_size=65536):  # 64KB chunks
+                    async for chunk in response.aiter_bytes(chunk_size=65536):  # 64KB chunks
                        tmp_file.write(chunk)

                # Store in S3 from temp file (computes hash and deduplicates automatically)