From 1329d380a4e3140ebc31a0f59b7db7badb50d3a5 Mon Sep 17 00:00:00 2001 From: Mondo Diaz Date: Mon, 2 Feb 2026 15:26:24 -0600 Subject: [PATCH] Convert PyPI proxy from sync to async httpx to prevent event loop blocking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pypi_download_file, pypi_simple_index, and pypi_package_versions endpoints were using synchronous httpx.Client inside async functions. When upstream PyPI servers respond slowly, this blocked the entire FastAPI event loop, preventing health checks from responding. Kubernetes would then kill the pod after the liveness probe timed out. Changes: - httpx.Client → httpx.AsyncClient - client.get() → await client.get() - response.iter_bytes() → response.aiter_bytes() This ensures the event loop remains responsive during slow upstream downloads, allowing health checks to succeed even when downloads take 20+ seconds. --- backend/app/pypi_proxy.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/backend/app/pypi_proxy.py b/backend/app/pypi_proxy.py index 6832c28..5ee1f49 100644 --- a/backend/app/pypi_proxy.py +++ b/backend/app/pypi_proxy.py @@ -365,8 +365,8 @@ async def pypi_simple_index( timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT) - with httpx.Client(timeout=timeout, follow_redirects=False) as client: - response = client.get( + async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client: + response = await client.get( simple_url, headers=headers, auth=auth, @@ -377,7 +377,7 @@ async def pypi_simple_index( redirect_url = response.headers.get('location') if redirect_url: # Follow the redirect once - response = client.get( + response = await client.get( redirect_url, headers=headers, auth=auth, @@ -455,8 +455,8 @@ async def pypi_package_versions( timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT) - with httpx.Client(timeout=timeout, follow_redirects=False) as client: - response = client.get( + async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client: + response = await client.get( package_url, headers=headers, auth=auth, @@ -475,7 +475,7 @@ async def pypi_package_versions( final_url = redirect_url # Update final URL - response = client.get( + response = await client.get( redirect_url, headers=headers, auth=auth, @@ -598,8 +598,8 @@ async def pypi_download_file( # Fetch the file logger.info(f"PyPI proxy: fetching {filename} from {upstream_url}") - with httpx.Client(timeout=timeout, follow_redirects=False) as client: - response = client.get( + async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client: + response = await client.get( upstream_url, headers=headers, auth=auth, @@ -624,7 +624,7 @@ async def pypi_download_file( redirect_headers.update(headers) redirect_auth = auth - response = client.get( + response = await client.get( redirect_url, headers=redirect_headers, auth=redirect_auth, @@ -642,11 +642,12 @@ async def pypi_download_file( # Stream to temp file to avoid loading large packages into memory # This keeps memory usage constant regardless of package size + # Using async iteration to avoid blocking the event loop tmp_path = None try: with tempfile.NamedTemporaryFile(delete=False, suffix=f"_{filename}") as tmp_file: tmp_path = tmp_file.name - for chunk in response.iter_bytes(chunk_size=65536): # 64KB chunks + async for chunk in response.aiter_bytes(chunk_size=65536): # 64KB chunks tmp_file.write(chunk) # Store in S3 from temp file (computes hash and deduplicates automatically)