Convert PyPI proxy from sync to async httpx to prevent event loop blocking

The pypi_download_file, pypi_simple_index, and pypi_package_versions endpoints
were using synchronous httpx.Client inside async functions. When upstream PyPI
servers respond slowly, this blocked the entire FastAPI event loop, preventing
health checks from responding. Kubernetes would then kill the pod after the
liveness probe timed out.

Changes:
- httpx.Client → httpx.AsyncClient
- client.get() → await client.get()
- response.iter_bytes() → response.aiter_bytes()

This ensures the event loop remains responsive during slow upstream downloads,
allowing health checks to succeed even when downloads take 20+ seconds.
This commit is contained in:
Mondo Diaz
2026-02-02 15:26:24 -06:00
parent 361210a2bc
commit 1329d380a4

View File

@@ -365,8 +365,8 @@ async def pypi_simple_index(
timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)
with httpx.Client(timeout=timeout, follow_redirects=False) as client:
response = client.get(
async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client:
response = await client.get(
simple_url,
headers=headers,
auth=auth,
@@ -377,7 +377,7 @@ async def pypi_simple_index(
redirect_url = response.headers.get('location')
if redirect_url:
# Follow the redirect once
response = client.get(
response = await client.get(
redirect_url,
headers=headers,
auth=auth,
@@ -455,8 +455,8 @@ async def pypi_package_versions(
timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)
with httpx.Client(timeout=timeout, follow_redirects=False) as client:
response = client.get(
async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client:
response = await client.get(
package_url,
headers=headers,
auth=auth,
@@ -475,7 +475,7 @@ async def pypi_package_versions(
final_url = redirect_url # Update final URL
response = client.get(
response = await client.get(
redirect_url,
headers=headers,
auth=auth,
@@ -598,8 +598,8 @@ async def pypi_download_file(
# Fetch the file
logger.info(f"PyPI proxy: fetching {filename} from {upstream_url}")
with httpx.Client(timeout=timeout, follow_redirects=False) as client:
response = client.get(
async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client:
response = await client.get(
upstream_url,
headers=headers,
auth=auth,
@@ -624,7 +624,7 @@ async def pypi_download_file(
redirect_headers.update(headers)
redirect_auth = auth
response = client.get(
response = await client.get(
redirect_url,
headers=redirect_headers,
auth=redirect_auth,
@@ -642,11 +642,12 @@ async def pypi_download_file(
# Stream to temp file to avoid loading large packages into memory
# This keeps memory usage constant regardless of package size
# Using async iteration to avoid blocking the event loop
tmp_path = None
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=f"_{filename}") as tmp_file:
tmp_path = tmp_file.name
for chunk in response.iter_bytes(chunk_size=65536): # 64KB chunks
async for chunk in response.aiter_bytes(chunk_size=65536): # 64KB chunks
tmp_file.write(chunk)
# Store in S3 from temp file (computes hash and deduplicates automatically)