Add configurable PyPI download mode (redirect vs proxy)
Adds ORCHARD_PYPI_DOWNLOAD_MODE setting (default: "redirect"): - "redirect": Redirect pip to S3 presigned URL - reduces pod bandwidth - "proxy": Stream through Orchard pod - for environments where clients can't reach S3 In redirect mode, Orchard only handles metadata requests and upstream fetches. All file transfers go directly from S3 to the client.
This commit is contained in:
@@ -16,13 +16,13 @@ from urllib.parse import urljoin, urlparse, quote, unquote
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, Response
|
||||
from fastapi.responses import StreamingResponse, HTMLResponse
|
||||
from fastapi.responses import StreamingResponse, HTMLResponse, RedirectResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from .database import get_db
|
||||
from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, PackageVersion
|
||||
from .storage import S3Storage, get_storage
|
||||
from .config import get_env_upstream_sources
|
||||
from .config import get_env_upstream_sources, get_settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -472,31 +472,44 @@ async def pypi_download_file(
|
||||
artifact = db.query(Artifact).filter(Artifact.id == cached_url.artifact_id).first()
|
||||
if artifact:
|
||||
logger.info(f"PyPI proxy: serving cached {filename} (artifact {artifact.id[:12]})")
|
||||
settings = get_settings()
|
||||
|
||||
# Stream from S3
|
||||
try:
|
||||
stream, content_length, _ = storage.get_stream(artifact.s3_key)
|
||||
if settings.pypi_download_mode == "redirect":
|
||||
# Redirect to S3 presigned URL - client downloads directly from S3
|
||||
presigned_url = storage.generate_presigned_url(artifact.s3_key)
|
||||
return RedirectResponse(
|
||||
url=presigned_url,
|
||||
status_code=302,
|
||||
headers={
|
||||
"X-Checksum-SHA256": artifact.id,
|
||||
"X-Cache": "HIT",
|
||||
}
|
||||
)
|
||||
else:
|
||||
# Proxy mode - stream from S3 through Orchard
|
||||
stream, content_length, _ = storage.get_stream(artifact.s3_key)
|
||||
|
||||
def stream_content():
|
||||
"""Generator that yields chunks from the S3 stream."""
|
||||
try:
|
||||
for chunk in stream.iter_chunks():
|
||||
yield chunk
|
||||
finally:
|
||||
stream.close()
|
||||
def stream_content():
|
||||
"""Generator that yields chunks from the S3 stream."""
|
||||
try:
|
||||
for chunk in stream.iter_chunks():
|
||||
yield chunk
|
||||
finally:
|
||||
stream.close()
|
||||
|
||||
return StreamingResponse(
|
||||
stream_content(),
|
||||
media_type=artifact.content_type or "application/octet-stream",
|
||||
headers={
|
||||
"Content-Disposition": f'attachment; filename="{filename}"',
|
||||
"Content-Length": str(content_length),
|
||||
"X-Checksum-SHA256": artifact.id,
|
||||
"X-Cache": "HIT",
|
||||
}
|
||||
)
|
||||
return StreamingResponse(
|
||||
stream_content(),
|
||||
media_type=artifact.content_type or "application/octet-stream",
|
||||
headers={
|
||||
"Content-Disposition": f'attachment; filename="{filename}"',
|
||||
"Content-Length": str(content_length),
|
||||
"X-Checksum-SHA256": artifact.id,
|
||||
"X-Cache": "HIT",
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"PyPI proxy: error streaming cached artifact: {e}")
|
||||
logger.error(f"PyPI proxy: error serving cached artifact: {e}")
|
||||
# Fall through to fetch from upstream
|
||||
|
||||
# Not cached - fetch from upstream
|
||||
@@ -674,31 +687,45 @@ async def pypi_download_file(
|
||||
|
||||
db.commit()
|
||||
|
||||
# Stream the file from S3 (don't load into memory)
|
||||
# Serve the file from S3
|
||||
settings = get_settings()
|
||||
try:
|
||||
stream, content_length, _ = storage.get_stream(s3_key)
|
||||
if settings.pypi_download_mode == "redirect":
|
||||
# Redirect to S3 presigned URL - client downloads directly from S3
|
||||
presigned_url = storage.generate_presigned_url(s3_key)
|
||||
return RedirectResponse(
|
||||
url=presigned_url,
|
||||
status_code=302,
|
||||
headers={
|
||||
"X-Checksum-SHA256": sha256,
|
||||
"X-Cache": "MISS",
|
||||
}
|
||||
)
|
||||
else:
|
||||
# Proxy mode - stream from S3 through Orchard
|
||||
stream, content_length, _ = storage.get_stream(s3_key)
|
||||
|
||||
def stream_content():
|
||||
"""Generator that yields chunks from the S3 stream."""
|
||||
try:
|
||||
for chunk in stream.iter_chunks():
|
||||
yield chunk
|
||||
finally:
|
||||
stream.close()
|
||||
def stream_content():
|
||||
"""Generator that yields chunks from the S3 stream."""
|
||||
try:
|
||||
for chunk in stream.iter_chunks():
|
||||
yield chunk
|
||||
finally:
|
||||
stream.close()
|
||||
|
||||
return StreamingResponse(
|
||||
stream_content(),
|
||||
media_type=content_type,
|
||||
headers={
|
||||
"Content-Disposition": f'attachment; filename="{filename}"',
|
||||
"Content-Length": str(size),
|
||||
"X-Checksum-SHA256": sha256,
|
||||
"X-Cache": "MISS",
|
||||
}
|
||||
)
|
||||
return StreamingResponse(
|
||||
stream_content(),
|
||||
media_type=content_type,
|
||||
headers={
|
||||
"Content-Disposition": f'attachment; filename="{filename}"',
|
||||
"Content-Length": str(size),
|
||||
"X-Checksum-SHA256": sha256,
|
||||
"X-Cache": "MISS",
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"PyPI proxy: error streaming from S3: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Error streaming file: {e}")
|
||||
logger.error(f"PyPI proxy: error serving from S3: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Error serving file: {e}")
|
||||
|
||||
except httpx.ConnectError as e:
|
||||
raise HTTPException(status_code=502, detail=f"Connection failed: {e}")
|
||||
|
||||
Reference in New Issue
Block a user