From d12e4cdfc5ce86b04947665cc068757506ec3515 Mon Sep 17 00:00:00 2001 From: Mondo Diaz Date: Tue, 3 Feb 2026 17:09:05 -0600 Subject: [PATCH] Add configurable PyPI download mode (redirect vs proxy) Adds ORCHARD_PYPI_DOWNLOAD_MODE setting (default: "redirect"): - "redirect": Redirect pip to S3 presigned URL - reduces pod bandwidth - "proxy": Stream through Orchard pod - for environments where clients can't reach S3 In redirect mode, Orchard only handles metadata requests and upstream fetches. All file transfers go directly from S3 to the client. --- backend/app/config.py | 1 + backend/app/pypi_proxy.py | 113 +++++++++++++++++++++++--------------- 2 files changed, 71 insertions(+), 43 deletions(-) diff --git a/backend/app/config.py b/backend/app/config.py index be8b20b..484c38b 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -51,6 +51,7 @@ class Settings(BaseSettings): presigned_url_expiry: int = ( 3600 # Presigned URL expiry in seconds (default: 1 hour) ) + pypi_download_mode: str = "redirect" # "redirect" (to S3) or "proxy" (stream through Orchard) # Logging settings log_level: str = "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL diff --git a/backend/app/pypi_proxy.py b/backend/app/pypi_proxy.py index aff7c24..2dc56a5 100644 --- a/backend/app/pypi_proxy.py +++ b/backend/app/pypi_proxy.py @@ -16,13 +16,13 @@ from urllib.parse import urljoin, urlparse, quote, unquote import httpx from fastapi import APIRouter, Depends, HTTPException, Request, Response -from fastapi.responses import StreamingResponse, HTMLResponse +from fastapi.responses import StreamingResponse, HTMLResponse, RedirectResponse from sqlalchemy.orm import Session from .database import get_db from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, PackageVersion from .storage import S3Storage, get_storage -from .config import get_env_upstream_sources +from .config import get_env_upstream_sources, get_settings logger = logging.getLogger(__name__) @@ -472,31 +472,44 @@ async def pypi_download_file( artifact = db.query(Artifact).filter(Artifact.id == cached_url.artifact_id).first() if artifact: logger.info(f"PyPI proxy: serving cached {filename} (artifact {artifact.id[:12]})") + settings = get_settings() - # Stream from S3 try: - stream, content_length, _ = storage.get_stream(artifact.s3_key) + if settings.pypi_download_mode == "redirect": + # Redirect to S3 presigned URL - client downloads directly from S3 + presigned_url = storage.generate_presigned_url(artifact.s3_key) + return RedirectResponse( + url=presigned_url, + status_code=302, + headers={ + "X-Checksum-SHA256": artifact.id, + "X-Cache": "HIT", + } + ) + else: + # Proxy mode - stream from S3 through Orchard + stream, content_length, _ = storage.get_stream(artifact.s3_key) - def stream_content(): - """Generator that yields chunks from the S3 stream.""" - try: - for chunk in stream.iter_chunks(): - yield chunk - finally: - stream.close() + def stream_content(): + """Generator that yields chunks from the S3 stream.""" + try: + for chunk in stream.iter_chunks(): + yield chunk + finally: + stream.close() - return StreamingResponse( - stream_content(), - media_type=artifact.content_type or "application/octet-stream", - headers={ - "Content-Disposition": f'attachment; filename="{filename}"', - "Content-Length": str(content_length), - "X-Checksum-SHA256": artifact.id, - "X-Cache": "HIT", - } - ) + return StreamingResponse( + stream_content(), + media_type=artifact.content_type or "application/octet-stream", + headers={ + "Content-Disposition": f'attachment; filename="{filename}"', + "Content-Length": str(content_length), + "X-Checksum-SHA256": artifact.id, + "X-Cache": "HIT", + } + ) except Exception as e: - logger.error(f"PyPI proxy: error streaming cached artifact: {e}") + logger.error(f"PyPI proxy: error serving cached artifact: {e}") # Fall through to fetch from upstream # Not cached - fetch from upstream @@ -674,31 +687,45 @@ async def pypi_download_file( db.commit() - # Stream the file from S3 (don't load into memory) + # Serve the file from S3 + settings = get_settings() try: - stream, content_length, _ = storage.get_stream(s3_key) + if settings.pypi_download_mode == "redirect": + # Redirect to S3 presigned URL - client downloads directly from S3 + presigned_url = storage.generate_presigned_url(s3_key) + return RedirectResponse( + url=presigned_url, + status_code=302, + headers={ + "X-Checksum-SHA256": sha256, + "X-Cache": "MISS", + } + ) + else: + # Proxy mode - stream from S3 through Orchard + stream, content_length, _ = storage.get_stream(s3_key) - def stream_content(): - """Generator that yields chunks from the S3 stream.""" - try: - for chunk in stream.iter_chunks(): - yield chunk - finally: - stream.close() + def stream_content(): + """Generator that yields chunks from the S3 stream.""" + try: + for chunk in stream.iter_chunks(): + yield chunk + finally: + stream.close() - return StreamingResponse( - stream_content(), - media_type=content_type, - headers={ - "Content-Disposition": f'attachment; filename="{filename}"', - "Content-Length": str(size), - "X-Checksum-SHA256": sha256, - "X-Cache": "MISS", - } - ) + return StreamingResponse( + stream_content(), + media_type=content_type, + headers={ + "Content-Disposition": f'attachment; filename="{filename}"', + "Content-Length": str(size), + "X-Checksum-SHA256": sha256, + "X-Cache": "MISS", + } + ) except Exception as e: - logger.error(f"PyPI proxy: error streaming from S3: {e}") - raise HTTPException(status_code=500, detail=f"Error streaming file: {e}") + logger.error(f"PyPI proxy: error serving from S3: {e}") + raise HTTPException(status_code=500, detail=f"Error serving file: {e}") except httpx.ConnectError as e: raise HTTPException(status_code=502, detail=f"Connection failed: {e}")