Add configurable PyPI download mode (redirect vs proxy)

Adds ORCHARD_PYPI_DOWNLOAD_MODE setting (default: "redirect"):
- "redirect": Redirect pip to S3 presigned URL - reduces pod bandwidth
- "proxy": Stream through Orchard pod - for environments where clients can't reach S3

In redirect mode, Orchard only handles metadata requests and upstream fetches.
All file transfers go directly from S3 to the client.
This commit is contained in:
Mondo Diaz
2026-02-03 17:09:05 -06:00
parent dc9c217d8a
commit 36c05230ff
2 changed files with 71 additions and 43 deletions

View File

@@ -51,6 +51,7 @@ class Settings(BaseSettings):
presigned_url_expiry: int = ( presigned_url_expiry: int = (
3600 # Presigned URL expiry in seconds (default: 1 hour) 3600 # Presigned URL expiry in seconds (default: 1 hour)
) )
pypi_download_mode: str = "redirect" # "redirect" (to S3) or "proxy" (stream through Orchard)
# Logging settings # Logging settings
log_level: str = "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL log_level: str = "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL

View File

@@ -16,13 +16,13 @@ from urllib.parse import urljoin, urlparse, quote, unquote
import httpx import httpx
from fastapi import APIRouter, Depends, HTTPException, Request, Response from fastapi import APIRouter, Depends, HTTPException, Request, Response
from fastapi.responses import StreamingResponse, HTMLResponse from fastapi.responses import StreamingResponse, HTMLResponse, RedirectResponse
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from .database import get_db from .database import get_db
from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, PackageVersion from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, PackageVersion
from .storage import S3Storage, get_storage from .storage import S3Storage, get_storage
from .config import get_env_upstream_sources from .config import get_env_upstream_sources, get_settings
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -472,9 +472,22 @@ async def pypi_download_file(
artifact = db.query(Artifact).filter(Artifact.id == cached_url.artifact_id).first() artifact = db.query(Artifact).filter(Artifact.id == cached_url.artifact_id).first()
if artifact: if artifact:
logger.info(f"PyPI proxy: serving cached {filename} (artifact {artifact.id[:12]})") logger.info(f"PyPI proxy: serving cached {filename} (artifact {artifact.id[:12]})")
settings = get_settings()
# Stream from S3
try: try:
if settings.pypi_download_mode == "redirect":
# Redirect to S3 presigned URL - client downloads directly from S3
presigned_url = storage.generate_presigned_url(artifact.s3_key)
return RedirectResponse(
url=presigned_url,
status_code=302,
headers={
"X-Checksum-SHA256": artifact.id,
"X-Cache": "HIT",
}
)
else:
# Proxy mode - stream from S3 through Orchard
stream, content_length, _ = storage.get_stream(artifact.s3_key) stream, content_length, _ = storage.get_stream(artifact.s3_key)
def stream_content(): def stream_content():
@@ -496,7 +509,7 @@ async def pypi_download_file(
} }
) )
except Exception as e: except Exception as e:
logger.error(f"PyPI proxy: error streaming cached artifact: {e}") logger.error(f"PyPI proxy: error serving cached artifact: {e}")
# Fall through to fetch from upstream # Fall through to fetch from upstream
# Not cached - fetch from upstream # Not cached - fetch from upstream
@@ -674,8 +687,22 @@ async def pypi_download_file(
db.commit() db.commit()
# Stream the file from S3 (don't load into memory) # Serve the file from S3
settings = get_settings()
try: try:
if settings.pypi_download_mode == "redirect":
# Redirect to S3 presigned URL - client downloads directly from S3
presigned_url = storage.generate_presigned_url(s3_key)
return RedirectResponse(
url=presigned_url,
status_code=302,
headers={
"X-Checksum-SHA256": sha256,
"X-Cache": "MISS",
}
)
else:
# Proxy mode - stream from S3 through Orchard
stream, content_length, _ = storage.get_stream(s3_key) stream, content_length, _ = storage.get_stream(s3_key)
def stream_content(): def stream_content():
@@ -697,8 +724,8 @@ async def pypi_download_file(
} }
) )
except Exception as e: except Exception as e:
logger.error(f"PyPI proxy: error streaming from S3: {e}") logger.error(f"PyPI proxy: error serving from S3: {e}")
raise HTTPException(status_code=500, detail=f"Error streaming file: {e}") raise HTTPException(status_code=500, detail=f"Error serving file: {e}")
except httpx.ConnectError as e: except httpx.ConnectError as e:
raise HTTPException(status_code=502, detail=f"Connection failed: {e}") raise HTTPException(status_code=502, detail=f"Connection failed: {e}")