Convert PyPI proxy from sync to async httpx to prevent event loop blocking

The pypi_download_file, pypi_simple_index, and pypi_package_versions endpoints
were using synchronous httpx.Client inside async functions. When upstream PyPI
servers respond slowly, this blocked the entire FastAPI event loop, preventing
health checks from responding. Kubernetes would then kill the pod after the
liveness probe timed out.

Changes:
- httpx.Client → httpx.AsyncClient
- client.get() → await client.get()
- response.iter_bytes() → response.aiter_bytes()

This ensures the event loop remains responsive during slow upstream downloads,
allowing health checks to succeed even when downloads take 20+ seconds.
This commit is contained in:
Mondo Diaz
2026-02-02 15:26:24 -06:00
parent 0a6dad9af0
commit ff25677b16

View File

@@ -365,8 +365,8 @@ async def pypi_simple_index(
timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT) timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)
with httpx.Client(timeout=timeout, follow_redirects=False) as client: async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client:
response = client.get( response = await client.get(
simple_url, simple_url,
headers=headers, headers=headers,
auth=auth, auth=auth,
@@ -377,7 +377,7 @@ async def pypi_simple_index(
redirect_url = response.headers.get('location') redirect_url = response.headers.get('location')
if redirect_url: if redirect_url:
# Follow the redirect once # Follow the redirect once
response = client.get( response = await client.get(
redirect_url, redirect_url,
headers=headers, headers=headers,
auth=auth, auth=auth,
@@ -455,8 +455,8 @@ async def pypi_package_versions(
timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT) timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)
with httpx.Client(timeout=timeout, follow_redirects=False) as client: async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client:
response = client.get( response = await client.get(
package_url, package_url,
headers=headers, headers=headers,
auth=auth, auth=auth,
@@ -475,7 +475,7 @@ async def pypi_package_versions(
final_url = redirect_url # Update final URL final_url = redirect_url # Update final URL
response = client.get( response = await client.get(
redirect_url, redirect_url,
headers=headers, headers=headers,
auth=auth, auth=auth,
@@ -598,8 +598,8 @@ async def pypi_download_file(
# Fetch the file # Fetch the file
logger.info(f"PyPI proxy: fetching {filename} from {upstream_url}") logger.info(f"PyPI proxy: fetching {filename} from {upstream_url}")
with httpx.Client(timeout=timeout, follow_redirects=False) as client: async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client:
response = client.get( response = await client.get(
upstream_url, upstream_url,
headers=headers, headers=headers,
auth=auth, auth=auth,
@@ -624,7 +624,7 @@ async def pypi_download_file(
redirect_headers.update(headers) redirect_headers.update(headers)
redirect_auth = auth redirect_auth = auth
response = client.get( response = await client.get(
redirect_url, redirect_url,
headers=redirect_headers, headers=redirect_headers,
auth=redirect_auth, auth=redirect_auth,
@@ -642,11 +642,12 @@ async def pypi_download_file(
# Stream to temp file to avoid loading large packages into memory # Stream to temp file to avoid loading large packages into memory
# This keeps memory usage constant regardless of package size # This keeps memory usage constant regardless of package size
# Using async iteration to avoid blocking the event loop
tmp_path = None tmp_path = None
try: try:
with tempfile.NamedTemporaryFile(delete=False, suffix=f"_{filename}") as tmp_file: with tempfile.NamedTemporaryFile(delete=False, suffix=f"_{filename}") as tmp_file:
tmp_path = tmp_file.name tmp_path = tmp_file.name
for chunk in response.iter_bytes(chunk_size=65536): # 64KB chunks async for chunk in response.aiter_bytes(chunk_size=65536): # 64KB chunks
tmp_file.write(chunk) tmp_file.write(chunk)
# Store in S3 from temp file (computes hash and deduplicates automatically) # Store in S3 from temp file (computes hash and deduplicates automatically)