Add configurable PyPI download mode (redirect vs proxy)

Adds ORCHARD_PYPI_DOWNLOAD_MODE setting (default: "redirect"):
- "redirect": Redirect pip to S3 presigned URL - reduces pod bandwidth
- "proxy": Stream through Orchard pod - for environments where clients can't reach S3

In redirect mode, Orchard only handles metadata requests and upstream fetches.
All file transfers go directly from S3 to the client.
This commit is contained in:
Mondo Diaz
2026-02-03 17:09:05 -06:00
parent 1ffe17bf62
commit d12e4cdfc5
2 changed files with 71 additions and 43 deletions

View File

@@ -51,6 +51,7 @@ class Settings(BaseSettings):
presigned_url_expiry: int = (
3600 # Presigned URL expiry in seconds (default: 1 hour)
)
pypi_download_mode: str = "redirect" # "redirect" (to S3) or "proxy" (stream through Orchard)
# Logging settings
log_level: str = "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL

View File

@@ -16,13 +16,13 @@ from urllib.parse import urljoin, urlparse, quote, unquote
import httpx
from fastapi import APIRouter, Depends, HTTPException, Request, Response
from fastapi.responses import StreamingResponse, HTMLResponse
from fastapi.responses import StreamingResponse, HTMLResponse, RedirectResponse
from sqlalchemy.orm import Session
from .database import get_db
from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, PackageVersion
from .storage import S3Storage, get_storage
from .config import get_env_upstream_sources
from .config import get_env_upstream_sources, get_settings
logger = logging.getLogger(__name__)
@@ -472,31 +472,44 @@ async def pypi_download_file(
artifact = db.query(Artifact).filter(Artifact.id == cached_url.artifact_id).first()
if artifact:
logger.info(f"PyPI proxy: serving cached {filename} (artifact {artifact.id[:12]})")
settings = get_settings()
# Stream from S3
try:
stream, content_length, _ = storage.get_stream(artifact.s3_key)
if settings.pypi_download_mode == "redirect":
# Redirect to S3 presigned URL - client downloads directly from S3
presigned_url = storage.generate_presigned_url(artifact.s3_key)
return RedirectResponse(
url=presigned_url,
status_code=302,
headers={
"X-Checksum-SHA256": artifact.id,
"X-Cache": "HIT",
}
)
else:
# Proxy mode - stream from S3 through Orchard
stream, content_length, _ = storage.get_stream(artifact.s3_key)
def stream_content():
"""Generator that yields chunks from the S3 stream."""
try:
for chunk in stream.iter_chunks():
yield chunk
finally:
stream.close()
def stream_content():
"""Generator that yields chunks from the S3 stream."""
try:
for chunk in stream.iter_chunks():
yield chunk
finally:
stream.close()
return StreamingResponse(
stream_content(),
media_type=artifact.content_type or "application/octet-stream",
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
"Content-Length": str(content_length),
"X-Checksum-SHA256": artifact.id,
"X-Cache": "HIT",
}
)
return StreamingResponse(
stream_content(),
media_type=artifact.content_type or "application/octet-stream",
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
"Content-Length": str(content_length),
"X-Checksum-SHA256": artifact.id,
"X-Cache": "HIT",
}
)
except Exception as e:
logger.error(f"PyPI proxy: error streaming cached artifact: {e}")
logger.error(f"PyPI proxy: error serving cached artifact: {e}")
# Fall through to fetch from upstream
# Not cached - fetch from upstream
@@ -674,31 +687,45 @@ async def pypi_download_file(
db.commit()
# Stream the file from S3 (don't load into memory)
# Serve the file from S3
settings = get_settings()
try:
stream, content_length, _ = storage.get_stream(s3_key)
if settings.pypi_download_mode == "redirect":
# Redirect to S3 presigned URL - client downloads directly from S3
presigned_url = storage.generate_presigned_url(s3_key)
return RedirectResponse(
url=presigned_url,
status_code=302,
headers={
"X-Checksum-SHA256": sha256,
"X-Cache": "MISS",
}
)
else:
# Proxy mode - stream from S3 through Orchard
stream, content_length, _ = storage.get_stream(s3_key)
def stream_content():
"""Generator that yields chunks from the S3 stream."""
try:
for chunk in stream.iter_chunks():
yield chunk
finally:
stream.close()
def stream_content():
"""Generator that yields chunks from the S3 stream."""
try:
for chunk in stream.iter_chunks():
yield chunk
finally:
stream.close()
return StreamingResponse(
stream_content(),
media_type=content_type,
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
"Content-Length": str(size),
"X-Checksum-SHA256": sha256,
"X-Cache": "MISS",
}
)
return StreamingResponse(
stream_content(),
media_type=content_type,
headers={
"Content-Disposition": f'attachment; filename="{filename}"',
"Content-Length": str(size),
"X-Checksum-SHA256": sha256,
"X-Cache": "MISS",
}
)
except Exception as e:
logger.error(f"PyPI proxy: error streaming from S3: {e}")
raise HTTPException(status_code=500, detail=f"Error streaming file: {e}")
logger.error(f"PyPI proxy: error serving from S3: {e}")
raise HTTPException(status_code=500, detail=f"Error serving file: {e}")
except httpx.ConnectError as e:
raise HTTPException(status_code=502, detail=f"Connection failed: {e}")