Convert PyPI proxy from sync to async httpx to prevent event loop blocking
The pypi_download_file, pypi_simple_index, and pypi_package_versions endpoints were using synchronous httpx.Client inside async functions. When upstream PyPI servers respond slowly, this blocked the entire FastAPI event loop, preventing health checks from responding. Kubernetes would then kill the pod after the liveness probe timed out. Changes: - httpx.Client → httpx.AsyncClient - client.get() → await client.get() - response.iter_bytes() → response.aiter_bytes() This ensures the event loop remains responsive during slow upstream downloads, allowing health checks to succeed even when downloads take 20+ seconds.
This commit is contained in:
@@ -365,8 +365,8 @@ async def pypi_simple_index(
|
||||
|
||||
timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)
|
||||
|
||||
with httpx.Client(timeout=timeout, follow_redirects=False) as client:
|
||||
response = client.get(
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client:
|
||||
response = await client.get(
|
||||
simple_url,
|
||||
headers=headers,
|
||||
auth=auth,
|
||||
@@ -377,7 +377,7 @@ async def pypi_simple_index(
|
||||
redirect_url = response.headers.get('location')
|
||||
if redirect_url:
|
||||
# Follow the redirect once
|
||||
response = client.get(
|
||||
response = await client.get(
|
||||
redirect_url,
|
||||
headers=headers,
|
||||
auth=auth,
|
||||
@@ -455,8 +455,8 @@ async def pypi_package_versions(
|
||||
|
||||
timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)
|
||||
|
||||
with httpx.Client(timeout=timeout, follow_redirects=False) as client:
|
||||
response = client.get(
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client:
|
||||
response = await client.get(
|
||||
package_url,
|
||||
headers=headers,
|
||||
auth=auth,
|
||||
@@ -475,7 +475,7 @@ async def pypi_package_versions(
|
||||
|
||||
final_url = redirect_url # Update final URL
|
||||
|
||||
response = client.get(
|
||||
response = await client.get(
|
||||
redirect_url,
|
||||
headers=headers,
|
||||
auth=auth,
|
||||
@@ -598,8 +598,8 @@ async def pypi_download_file(
|
||||
# Fetch the file
|
||||
logger.info(f"PyPI proxy: fetching {filename} from {upstream_url}")
|
||||
|
||||
with httpx.Client(timeout=timeout, follow_redirects=False) as client:
|
||||
response = client.get(
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=False) as client:
|
||||
response = await client.get(
|
||||
upstream_url,
|
||||
headers=headers,
|
||||
auth=auth,
|
||||
@@ -624,7 +624,7 @@ async def pypi_download_file(
|
||||
redirect_headers.update(headers)
|
||||
redirect_auth = auth
|
||||
|
||||
response = client.get(
|
||||
response = await client.get(
|
||||
redirect_url,
|
||||
headers=redirect_headers,
|
||||
auth=redirect_auth,
|
||||
@@ -642,11 +642,12 @@ async def pypi_download_file(
|
||||
|
||||
# Stream to temp file to avoid loading large packages into memory
|
||||
# This keeps memory usage constant regardless of package size
|
||||
# Using async iteration to avoid blocking the event loop
|
||||
tmp_path = None
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=f"_{filename}") as tmp_file:
|
||||
tmp_path = tmp_file.name
|
||||
for chunk in response.iter_bytes(chunk_size=65536): # 64KB chunks
|
||||
async for chunk in response.aiter_bytes(chunk_size=65536): # 64KB chunks
|
||||
tmp_file.write(chunk)
|
||||
|
||||
# Store in S3 from temp file (computes hash and deduplicates automatically)
|
||||
|
||||
Reference in New Issue
Block a user