Add configurable PyPI download mode (redirect vs proxy)

Adds ORCHARD_PYPI_DOWNLOAD_MODE setting (default: "redirect"): - "redirect": Redirect pip to S3 presigned URL - reduces pod bandwidth - "proxy": Stream through Orchard pod - for environments where clients can't reach S3 In redirect mode, Orchard only handles metadata requests and upstream fetches. All file transfers go directly from S3 to the client.
2026-02-03 17:09:05 -06:00
parent 1ffe17bf62
commit d12e4cdfc5
2 changed files with 71 additions and 43 deletions
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -51,6 +51,7 @@ class Settings(BaseSettings):
    presigned_url_expiry: int = (
        3600  # Presigned URL expiry in seconds (default: 1 hour)
    )
+    pypi_download_mode: str = "redirect"  # "redirect" (to S3) or "proxy" (stream through Orchard)

    # Logging settings
    log_level: str = "INFO"  # DEBUG, INFO, WARNING, ERROR, CRITICAL
--- a/backend/app/pypi_proxy.py
+++ b/backend/app/pypi_proxy.py
@@ -16,13 +16,13 @@ from urllib.parse import urljoin, urlparse, quote, unquote

 import httpx
 from fastapi import APIRouter, Depends, HTTPException, Request, Response
-from fastapi.responses import StreamingResponse, HTMLResponse
+from fastapi.responses import StreamingResponse, HTMLResponse, RedirectResponse
 from sqlalchemy.orm import Session

 from .database import get_db
 from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, PackageVersion
 from .storage import S3Storage, get_storage
-from .config import get_env_upstream_sources
+from .config import get_env_upstream_sources, get_settings

 logger = logging.getLogger(__name__)

@@ -472,31 +472,44 @@ async def pypi_download_file(
        artifact = db.query(Artifact).filter(Artifact.id == cached_url.artifact_id).first()
        if artifact:
            logger.info(f"PyPI proxy: serving cached {filename} (artifact {artifact.id[:12]})")
+            settings = get_settings()

-            # Stream from S3
            try:
-                stream, content_length, _ = storage.get_stream(artifact.s3_key)
+                if settings.pypi_download_mode == "redirect":
+                    # Redirect to S3 presigned URL - client downloads directly from S3
+                    presigned_url = storage.generate_presigned_url(artifact.s3_key)
+                    return RedirectResponse(
+                        url=presigned_url,
+                        status_code=302,
+                        headers={
+                            "X-Checksum-SHA256": artifact.id,
+                            "X-Cache": "HIT",
+                        }
+                    )
+                else:
+                    # Proxy mode - stream from S3 through Orchard
+                    stream, content_length, _ = storage.get_stream(artifact.s3_key)

-                def stream_content():
-                    """Generator that yields chunks from the S3 stream."""
-                    try:
-                        for chunk in stream.iter_chunks():
-                            yield chunk
-                    finally:
-                        stream.close()
+                    def stream_content():
+                        """Generator that yields chunks from the S3 stream."""
+                        try:
+                            for chunk in stream.iter_chunks():
+                                yield chunk
+                        finally:
+                            stream.close()

-                return StreamingResponse(
-                    stream_content(),
-                    media_type=artifact.content_type or "application/octet-stream",
-                    headers={
-                        "Content-Disposition": f'attachment; filename="{filename}"',
-                        "Content-Length": str(content_length),
-                        "X-Checksum-SHA256": artifact.id,
-                        "X-Cache": "HIT",
-                    }
-                )
+                    return StreamingResponse(
+                        stream_content(),
+                        media_type=artifact.content_type or "application/octet-stream",
+                        headers={
+                            "Content-Disposition": f'attachment; filename="{filename}"',
+                            "Content-Length": str(content_length),
+                            "X-Checksum-SHA256": artifact.id,
+                            "X-Cache": "HIT",
+                        }
+                    )
            except Exception as e:
-                logger.error(f"PyPI proxy: error streaming cached artifact: {e}")
+                logger.error(f"PyPI proxy: error serving cached artifact: {e}")
                # Fall through to fetch from upstream

    # Not cached - fetch from upstream
@@ -674,31 +687,45 @@ async def pypi_download_file(

        db.commit()

-        # Stream the file from S3 (don't load into memory)
+        # Serve the file from S3
+        settings = get_settings()
        try:
-            stream, content_length, _ = storage.get_stream(s3_key)
+            if settings.pypi_download_mode == "redirect":
+                # Redirect to S3 presigned URL - client downloads directly from S3
+                presigned_url = storage.generate_presigned_url(s3_key)
+                return RedirectResponse(
+                    url=presigned_url,
+                    status_code=302,
+                    headers={
+                        "X-Checksum-SHA256": sha256,
+                        "X-Cache": "MISS",
+                    }
+                )
+            else:
+                # Proxy mode - stream from S3 through Orchard
+                stream, content_length, _ = storage.get_stream(s3_key)

-            def stream_content():
-                """Generator that yields chunks from the S3 stream."""
-                try:
-                    for chunk in stream.iter_chunks():
-                        yield chunk
-                finally:
-                    stream.close()
+                def stream_content():
+                    """Generator that yields chunks from the S3 stream."""
+                    try:
+                        for chunk in stream.iter_chunks():
+                            yield chunk
+                    finally:
+                        stream.close()

-            return StreamingResponse(
-                stream_content(),
-                media_type=content_type,
-                headers={
-                    "Content-Disposition": f'attachment; filename="{filename}"',
-                    "Content-Length": str(size),
-                    "X-Checksum-SHA256": sha256,
-                    "X-Cache": "MISS",
-                }
-            )
+                return StreamingResponse(
+                    stream_content(),
+                    media_type=content_type,
+                    headers={
+                        "Content-Disposition": f'attachment; filename="{filename}"',
+                        "Content-Length": str(size),
+                        "X-Checksum-SHA256": sha256,
+                        "X-Cache": "MISS",
+                    }
+                )
        except Exception as e:
-            logger.error(f"PyPI proxy: error streaming from S3: {e}")
-            raise HTTPException(status_code=500, detail=f"Error streaming file: {e}")
+            logger.error(f"PyPI proxy: error serving from S3: {e}")
+            raise HTTPException(status_code=500, detail=f"Error serving file: {e}")

    except httpx.ConnectError as e:
        raise HTTPException(status_code=502, detail=f"Connection failed: {e}")