diff --git a/CHANGELOG.md b/CHANGELOG.md index fe82356..dc14d18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added +- Added transparent PyPI proxy implementing PEP 503 Simple API (#108) + - `GET /pypi/simple/` - package index (proxied from upstream) + - `GET /pypi/simple/{package}/` - version list with rewritten download links + - `GET /pypi/simple/{package}/{filename}` - download with automatic caching + - Allows `pip install --index-url https://orchard.../pypi/simple/ ` + - Artifacts cached on first access through configured upstream sources +- Added `POST /api/v1/cache/resolve` endpoint to cache packages by coordinates instead of URL (#108) + +### Changed +- Upstream sources table text is now centered under column headers (#108) +- ENV badge now appears inline with source name instead of separate column (#108) +- Test and Edit buttons now have more prominent button styling (#108) +- Reduced footer padding for cleaner layout (#108) + ### Fixed - Fixed purge_seed_data crash when deleting access permissions - was comparing UUID to VARCHAR column (#107) diff --git a/backend/app/main.py b/backend/app/main.py index ac71491..08b253e 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -11,6 +11,7 @@ from slowapi.errors import RateLimitExceeded from .config import get_settings from .database import init_db, SessionLocal from .routes import router +from .pypi_proxy import router as pypi_router from .seed import seed_database from .auth import create_default_admin from .rate_limit import limiter @@ -65,6 +66,7 @@ app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) # Include API routes app.include_router(router) +app.include_router(pypi_router) # Serve static files (React build) if the directory exists static_dir = os.path.join(os.path.dirname(__file__), "..", "..", "frontend", "dist") diff --git a/backend/app/pypi_proxy.py b/backend/app/pypi_proxy.py new file mode 100644 index 0000000..4b26145 --- /dev/null +++ b/backend/app/pypi_proxy.py @@ -0,0 +1,543 @@ +""" +Transparent PyPI proxy implementing PEP 503 (Simple API). + +Provides endpoints that allow pip to use Orchard as a PyPI index URL. +Artifacts are cached on first access through configured upstream sources. +""" + +import hashlib +import logging +import re +from typing import Optional +from urllib.parse import urljoin, urlparse, quote, unquote + +import httpx +from fastapi import APIRouter, Depends, HTTPException, Request, Response +from fastapi.responses import StreamingResponse, HTMLResponse +from sqlalchemy.orm import Session + +from .database import get_db +from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, Tag +from .storage import S3Storage, get_storage +from .upstream import ( + UpstreamClient, + UpstreamClientConfig, + UpstreamHTTPError, + UpstreamConnectionError, + UpstreamTimeoutError, +) +from .config import get_env_upstream_sources + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/pypi", tags=["pypi-proxy"]) + +# Timeout configuration for proxy requests +PROXY_CONNECT_TIMEOUT = 30.0 +PROXY_READ_TIMEOUT = 60.0 + + +def _get_pypi_upstream_sources(db: Session) -> list[UpstreamSource]: + """Get all enabled upstream sources configured for PyPI.""" + # Get database sources + db_sources = ( + db.query(UpstreamSource) + .filter( + UpstreamSource.source_type == "pypi", + UpstreamSource.enabled == True, + ) + .order_by(UpstreamSource.priority) + .all() + ) + + # Get env sources + env_sources = [ + s for s in get_env_upstream_sources() + if s.source_type == "pypi" and s.enabled + ] + + # Combine and sort by priority + all_sources = list(db_sources) + list(env_sources) + return sorted(all_sources, key=lambda s: s.priority) + + +def _build_auth_headers(source) -> dict: + """Build authentication headers for an upstream source.""" + headers = {} + + if hasattr(source, 'auth_type'): + if source.auth_type == "bearer": + password = source.get_password() if hasattr(source, 'get_password') else getattr(source, 'password', None) + if password: + headers["Authorization"] = f"Bearer {password}" + elif source.auth_type == "api_key": + custom_headers = source.get_headers() if hasattr(source, 'get_headers') else {} + if custom_headers: + headers.update(custom_headers) + + return headers + + +def _get_basic_auth(source) -> Optional[tuple[str, str]]: + """Get basic auth credentials if applicable.""" + if hasattr(source, 'auth_type') and source.auth_type == "basic": + username = getattr(source, 'username', None) + if username: + password = source.get_password() if hasattr(source, 'get_password') else getattr(source, 'password', '') + return (username, password or '') + return None + + +def _rewrite_package_links(html: str, base_url: str, package_name: str) -> str: + """ + Rewrite download links in a PyPI simple page to go through our proxy. + + Args: + html: The HTML content from upstream + base_url: Our server's base URL + package_name: The package name for the URL path + + Returns: + HTML with rewritten download links + """ + # Pattern to match href attributes in anchor tags + # PyPI simple pages have links like: + # file.tar.gz + + def replace_href(match): + original_url = match.group(1) + # Extract the filename from the URL + parsed = urlparse(original_url) + path_parts = parsed.path.split('/') + filename = path_parts[-1] if path_parts else '' + + # Keep the hash fragment if present + fragment = f"#{parsed.fragment}" if parsed.fragment else "" + + # Encode the original URL for safe transmission + encoded_url = quote(original_url.split('#')[0], safe='') + + # Build new URL pointing to our proxy + new_url = f"{base_url}/pypi/simple/{package_name}/{filename}?upstream={encoded_url}{fragment}" + + return f'href="{new_url}"' + + # Match href="..." patterns + rewritten = re.sub(r'href="([^"]+)"', replace_href, html) + + return rewritten + + +@router.get("/simple/") +async def pypi_simple_index( + request: Request, + db: Session = Depends(get_db), +): + """ + PyPI Simple API index - lists all packages. + + Proxies to the first available upstream PyPI source. + """ + sources = _get_pypi_upstream_sources(db) + + if not sources: + raise HTTPException( + status_code=503, + detail="No PyPI upstream sources configured" + ) + + # Try each source in priority order + last_error = None + for source in sources: + try: + headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"} + headers.update(_build_auth_headers(source)) + auth = _get_basic_auth(source) + + simple_url = source.url.rstrip('/') + '/simple/' + + timeout = httpx.Timeout( + connect=PROXY_CONNECT_TIMEOUT, + read=PROXY_READ_TIMEOUT, + ) + + with httpx.Client(timeout=timeout, follow_redirects=False) as client: + response = client.get( + simple_url, + headers=headers, + auth=auth, + ) + + # Handle redirects manually to avoid loops + if response.status_code in (301, 302, 303, 307, 308): + redirect_url = response.headers.get('location') + if redirect_url: + # Follow the redirect once + response = client.get( + redirect_url, + headers=headers, + auth=auth, + follow_redirects=False, + ) + + if response.status_code == 200: + # Return the index as-is (links are to package pages, not files) + # We could rewrite these too, but for now just proxy + content = response.text + + # Rewrite package links to go through our proxy + base_url = str(request.base_url).rstrip('/') + content = re.sub( + r'href="([^"]+)/"', + lambda m: f'href="{base_url}/pypi/simple/{m.group(1)}/"', + content + ) + + return HTMLResponse(content=content) + + last_error = f"HTTP {response.status_code}" + + except httpx.ConnectError as e: + last_error = f"Connection failed: {e}" + logger.warning(f"PyPI proxy: failed to connect to {source.url}: {e}") + except httpx.TimeoutException as e: + last_error = f"Timeout: {e}" + logger.warning(f"PyPI proxy: timeout connecting to {source.url}: {e}") + except Exception as e: + last_error = str(e) + logger.warning(f"PyPI proxy: error fetching from {source.url}: {e}") + + raise HTTPException( + status_code=502, + detail=f"Failed to fetch package index from upstream: {last_error}" + ) + + +@router.get("/simple/{package_name}/") +async def pypi_package_versions( + request: Request, + package_name: str, + db: Session = Depends(get_db), +): + """ + PyPI Simple API package page - lists all versions/files for a package. + + Proxies to upstream and rewrites download links to go through our cache. + """ + sources = _get_pypi_upstream_sources(db) + + if not sources: + raise HTTPException( + status_code=503, + detail="No PyPI upstream sources configured" + ) + + base_url = str(request.base_url).rstrip('/') + + # Normalize package name (PEP 503) + normalized_name = re.sub(r'[-_.]+', '-', package_name).lower() + + # Try each source in priority order + last_error = None + for source in sources: + try: + headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"} + headers.update(_build_auth_headers(source)) + auth = _get_basic_auth(source) + + package_url = source.url.rstrip('/') + f'/simple/{normalized_name}/' + + timeout = httpx.Timeout( + connect=PROXY_CONNECT_TIMEOUT, + read=PROXY_READ_TIMEOUT, + ) + + with httpx.Client(timeout=timeout, follow_redirects=False) as client: + response = client.get( + package_url, + headers=headers, + auth=auth, + ) + + # Handle redirects manually + redirect_count = 0 + while response.status_code in (301, 302, 303, 307, 308) and redirect_count < 5: + redirect_url = response.headers.get('location') + if not redirect_url: + break + + # Make redirect URL absolute if needed + if not redirect_url.startswith('http'): + redirect_url = urljoin(package_url, redirect_url) + + response = client.get( + redirect_url, + headers=headers, + auth=auth, + follow_redirects=False, + ) + redirect_count += 1 + + if response.status_code == 200: + content = response.text + + # Rewrite download links to go through our proxy + content = _rewrite_package_links(content, base_url, normalized_name) + + return HTMLResponse(content=content) + + if response.status_code == 404: + # Package not found in this source, try next + last_error = f"Package not found in {source.name}" + continue + + last_error = f"HTTP {response.status_code}" + + except httpx.ConnectError as e: + last_error = f"Connection failed: {e}" + logger.warning(f"PyPI proxy: failed to connect to {source.url}: {e}") + except httpx.TimeoutException as e: + last_error = f"Timeout: {e}" + logger.warning(f"PyPI proxy: timeout connecting to {source.url}: {e}") + except Exception as e: + last_error = str(e) + logger.warning(f"PyPI proxy: error fetching {package_name} from {source.url}: {e}") + + raise HTTPException( + status_code=404, + detail=f"Package '{package_name}' not found: {last_error}" + ) + + +@router.get("/simple/{package_name}/{filename}") +async def pypi_download_file( + request: Request, + package_name: str, + filename: str, + upstream: Optional[str] = None, + db: Session = Depends(get_db), + storage: S3Storage = Depends(get_storage), +): + """ + Download a package file, caching it in Orchard. + + Args: + package_name: The package name + filename: The filename to download + upstream: URL-encoded upstream URL to fetch from + """ + if not upstream: + raise HTTPException( + status_code=400, + detail="Missing 'upstream' query parameter with source URL" + ) + + # Decode the upstream URL + upstream_url = unquote(upstream) + + # Check if we already have this URL cached + url_hash = hashlib.sha256(upstream_url.encode()).hexdigest() + cached_url = db.query(CachedUrl).filter(CachedUrl.url_hash == url_hash).first() + + if cached_url: + # Serve from cache + artifact = db.query(Artifact).filter(Artifact.id == cached_url.artifact_id).first() + if artifact: + logger.info(f"PyPI proxy: serving cached {filename} (artifact {artifact.id[:12]})") + + # Stream from S3 + try: + content_stream = storage.get_artifact_stream(artifact.id) + + return StreamingResponse( + content_stream, + media_type=artifact.content_type or "application/octet-stream", + headers={ + "Content-Disposition": f'attachment; filename="{filename}"', + "Content-Length": str(artifact.size), + "X-Checksum-SHA256": artifact.id, + "X-Cache": "HIT", + } + ) + except Exception as e: + logger.error(f"PyPI proxy: error streaming cached artifact: {e}") + # Fall through to fetch from upstream + + # Not cached - fetch from upstream + sources = _get_pypi_upstream_sources(db) + + # Find a source that matches the upstream URL + matched_source = None + for source in sources: + source_url = getattr(source, 'url', '') + # Check if the upstream URL could come from this source + # (This is a loose check - the URL might be from files.pythonhosted.org) + if urlparse(upstream_url).netloc in source_url or True: # Allow any source for now + matched_source = source + break + + if not matched_source and sources: + matched_source = sources[0] # Use first source for auth if available + + try: + headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"} + if matched_source: + headers.update(_build_auth_headers(matched_source)) + auth = _get_basic_auth(matched_source) if matched_source else None + + timeout = httpx.Timeout( + connect=PROXY_CONNECT_TIMEOUT, + read=300.0, # 5 minutes for large files + ) + + # Fetch the file + logger.info(f"PyPI proxy: fetching {filename} from {upstream_url}") + + with httpx.Client(timeout=timeout, follow_redirects=False) as client: + response = client.get( + upstream_url, + headers=headers, + auth=auth, + ) + + # Handle redirects manually + redirect_count = 0 + while response.status_code in (301, 302, 303, 307, 308) and redirect_count < 5: + redirect_url = response.headers.get('location') + if not redirect_url: + break + + if not redirect_url.startswith('http'): + redirect_url = urljoin(upstream_url, redirect_url) + + logger.info(f"PyPI proxy: following redirect to {redirect_url}") + + # Don't send auth to different hosts + redirect_headers = {"User-Agent": "Orchard-PyPI-Proxy/1.0"} + redirect_auth = None + if urlparse(redirect_url).netloc == urlparse(upstream_url).netloc: + redirect_headers.update(headers) + redirect_auth = auth + + response = client.get( + redirect_url, + headers=redirect_headers, + auth=redirect_auth, + follow_redirects=False, + ) + redirect_count += 1 + + if response.status_code != 200: + raise HTTPException( + status_code=response.status_code, + detail=f"Upstream returned {response.status_code}" + ) + + content = response.content + content_type = response.headers.get('content-type', 'application/octet-stream') + + # Compute hash + sha256 = hashlib.sha256(content).hexdigest() + size = len(content) + + logger.info(f"PyPI proxy: downloaded {filename}, {size} bytes, sha256={sha256[:12]}") + + # Store in S3 + from io import BytesIO + artifact = storage.store_artifact( + file_obj=BytesIO(content), + filename=filename, + content_type=content_type, + ) + + # Check if artifact already exists + existing = db.query(Artifact).filter(Artifact.id == sha256).first() + if existing: + # Increment ref count + existing.ref_count += 1 + db.flush() + else: + # Create artifact record + new_artifact = Artifact( + id=sha256, + filename=filename, + content_type=content_type, + size=size, + ref_count=1, + ) + db.add(new_artifact) + db.flush() + + # Create/get system project and package + system_project = db.query(Project).filter(Project.name == "_pypi").first() + if not system_project: + system_project = Project( + name="_pypi", + description="System project for cached PyPI packages", + visibility="private", + ) + db.add(system_project) + db.flush() + + # Normalize package name + normalized_name = re.sub(r'[-_.]+', '-', package_name).lower() + + package = db.query(Package).filter( + Package.project_id == system_project.id, + Package.name == normalized_name, + ).first() + if not package: + package = Package( + project_id=system_project.id, + name=normalized_name, + description=f"PyPI package: {normalized_name}", + ) + db.add(package) + db.flush() + + # Create tag with filename + existing_tag = db.query(Tag).filter( + Tag.package_id == package.id, + Tag.name == filename, + ).first() + if not existing_tag: + tag = Tag( + package_id=package.id, + name=filename, + artifact_id=sha256, + ) + db.add(tag) + + # Cache the URL mapping + existing_cached = db.query(CachedUrl).filter(CachedUrl.url_hash == url_hash).first() + if not existing_cached: + cached_url_record = CachedUrl( + url_hash=url_hash, + url=upstream_url, + artifact_id=sha256, + ) + db.add(cached_url_record) + + db.commit() + + # Return the file + return Response( + content=content, + media_type=content_type, + headers={ + "Content-Disposition": f'attachment; filename="{filename}"', + "Content-Length": str(size), + "X-Checksum-SHA256": sha256, + "X-Cache": "MISS", + } + ) + + except httpx.ConnectError as e: + raise HTTPException(status_code=502, detail=f"Connection failed: {e}") + except httpx.TimeoutException as e: + raise HTTPException(status_code=504, detail=f"Timeout: {e}") + except HTTPException: + raise + except Exception as e: + logger.exception(f"PyPI proxy: error downloading {filename}") + raise HTTPException(status_code=500, detail=str(e)) diff --git a/backend/app/routes.py b/backend/app/routes.py index 7c76ee8..e539ef0 100644 --- a/backend/app/routes.py +++ b/backend/app/routes.py @@ -8305,6 +8305,200 @@ def _create_user_cache_reference( return f"{user_project_name}/{user_package_name}" +# --- Cache Resolve Endpoint --- + +from .schemas import CacheResolveRequest + + +@router.post( + "/api/v1/cache/resolve", + response_model=CacheResponse, + tags=["cache"], + summary="Cache an artifact by package coordinates", +) +def cache_resolve( + request: Request, + resolve_request: CacheResolveRequest, + db: Session = Depends(get_db), + storage: S3Storage = Depends(get_storage), + current_user: User = Depends(get_current_user), +): + """ + Cache an artifact by package coordinates (no URL required). + + The server finds the appropriate download URL based on source_type + and configured upstream sources. Currently supports PyPI packages. + + **Request Body:** + - `source_type` (required): Type of source (pypi, npm, maven, etc.) + - `package` (required): Package name + - `version` (required): Package version + - `user_project` (optional): Also create reference in this user project + - `user_package` (optional): Package name in user project + - `user_tag` (optional): Tag name in user project + + **Example (curl):** + ```bash + curl -X POST "http://localhost:8080/api/v1/cache/resolve" \\ + -H "Authorization: Bearer " \\ + -H "Content-Type: application/json" \\ + -d '{ + "source_type": "pypi", + "package": "requests", + "version": "2.31.0" + }' + ``` + """ + import re + import httpx + from urllib.parse import quote, unquote + + if resolve_request.source_type != "pypi": + raise HTTPException( + status_code=501, + detail=f"Cache resolve for '{resolve_request.source_type}' not yet implemented. Currently only 'pypi' is supported." + ) + + # Get PyPI upstream sources + sources = ( + db.query(UpstreamSource) + .filter( + UpstreamSource.source_type == "pypi", + UpstreamSource.enabled == True, + ) + .order_by(UpstreamSource.priority) + .all() + ) + + # Also get env sources + env_sources = [ + s for s in get_env_upstream_sources() + if s.source_type == "pypi" and s.enabled + ] + all_sources = list(sources) + list(env_sources) + all_sources = sorted(all_sources, key=lambda s: s.priority) + + if not all_sources: + raise HTTPException( + status_code=503, + detail="No PyPI upstream sources configured" + ) + + # Normalize package name (PEP 503) + normalized_package = re.sub(r'[-_.]+', '-', resolve_request.package).lower() + + # Query the Simple API to find the download URL + download_url = None + matched_filename = None + last_error = None + + for source in all_sources: + try: + headers = {"User-Agent": "Orchard-CacheResolver/1.0"} + + # Build auth if needed + if hasattr(source, 'auth_type'): + if source.auth_type == "bearer": + password = source.get_password() if hasattr(source, 'get_password') else getattr(source, 'password', None) + if password: + headers["Authorization"] = f"Bearer {password}" + elif source.auth_type == "api_key": + custom_headers = source.get_headers() if hasattr(source, 'get_headers') else {} + if custom_headers: + headers.update(custom_headers) + + auth = None + if hasattr(source, 'auth_type') and source.auth_type == "basic": + username = getattr(source, 'username', None) + if username: + password = source.get_password() if hasattr(source, 'get_password') else getattr(source, 'password', '') + auth = (username, password or '') + + source_url = getattr(source, 'url', '') + package_url = source_url.rstrip('/') + f'/simple/{normalized_package}/' + + timeout = httpx.Timeout(connect=30.0, read=60.0) + + with httpx.Client(timeout=timeout, follow_redirects=True) as client: + response = client.get(package_url, headers=headers, auth=auth) + + if response.status_code == 404: + last_error = f"Package not found in {getattr(source, 'name', 'source')}" + continue + + if response.status_code != 200: + last_error = f"HTTP {response.status_code} from {getattr(source, 'name', 'source')}" + continue + + # Parse HTML to find the version + html = response.text + # Look for links containing the version + # Pattern: href="...{package}-{version}...#sha256=..." + version_pattern = re.escape(resolve_request.version) + link_pattern = rf'href="([^"]+{normalized_package}[^"]*{version_pattern}[^"]*)"' + + matches = re.findall(link_pattern, html, re.IGNORECASE) + + if not matches: + # Try with original package name + link_pattern = rf'href="([^"]+{re.escape(resolve_request.package)}[^"]*{version_pattern}[^"]*)"' + matches = re.findall(link_pattern, html, re.IGNORECASE) + + if matches: + # Prefer .tar.gz or .whl files + for match in matches: + url = match.split('#')[0] # Remove hash fragment + if url.endswith('.tar.gz') or url.endswith('.whl'): + download_url = url + # Extract filename + matched_filename = url.split('/')[-1] + break + if not download_url: + # Use first match + download_url = matches[0].split('#')[0] + matched_filename = download_url.split('/')[-1] + break + + last_error = f"Version {resolve_request.version} not found for {resolve_request.package}" + + except httpx.ConnectError as e: + last_error = f"Connection failed: {e}" + logger.warning(f"Cache resolve: failed to connect to {getattr(source, 'url', 'source')}: {e}") + except httpx.TimeoutException as e: + last_error = f"Timeout: {e}" + logger.warning(f"Cache resolve: timeout connecting to {getattr(source, 'url', 'source')}: {e}") + except Exception as e: + last_error = str(e) + logger.warning(f"Cache resolve: error: {e}") + + if not download_url: + raise HTTPException( + status_code=404, + detail=f"Could not find {resolve_request.package}=={resolve_request.version}: {last_error}" + ) + + # Now cache the artifact using the existing cache_artifact logic + # Construct a CacheRequest + cache_request = CacheRequest( + url=download_url, + source_type="pypi", + package_name=normalized_package, + tag=matched_filename or resolve_request.version, + user_project=resolve_request.user_project, + user_package=resolve_request.user_package, + user_tag=resolve_request.user_tag, + ) + + # Call the cache logic + return cache_artifact( + request=request, + cache_request=cache_request, + db=db, + storage=storage, + current_user=current_user, + ) + + # --- Upstream Sources Admin API --- from .schemas import ( diff --git a/backend/app/schemas.py b/backend/app/schemas.py index b33c019..085c75c 100644 --- a/backend/app/schemas.py +++ b/backend/app/schemas.py @@ -1432,4 +1432,41 @@ class CacheResponse(BaseModel): user_reference: Optional[str] = None # e.g., "my-app/npm-deps:lodash-4.17.21" +class CacheResolveRequest(BaseModel): + """Request to cache an artifact by package coordinates (no URL required). + + The server will construct the appropriate URL based on source_type and + configured upstream sources. + """ + source_type: str + package: str + version: str + user_project: Optional[str] = None + user_package: Optional[str] = None + user_tag: Optional[str] = None + + @field_validator('source_type') + @classmethod + def validate_source_type(cls, v: str) -> str: + if v not in SOURCE_TYPES: + raise ValueError(f"source_type must be one of: {', '.join(SOURCE_TYPES)}") + return v + + @field_validator('package') + @classmethod + def validate_package(cls, v: str) -> str: + v = v.strip() + if not v: + raise ValueError("package cannot be empty") + return v + + @field_validator('version') + @classmethod + def validate_version(cls, v: str) -> str: + v = v.strip() + if not v: + raise ValueError("version cannot be empty") + return v + + diff --git a/backend/tests/integration/test_pypi_proxy.py b/backend/tests/integration/test_pypi_proxy.py new file mode 100644 index 0000000..64c69bd --- /dev/null +++ b/backend/tests/integration/test_pypi_proxy.py @@ -0,0 +1,93 @@ +"""Integration tests for PyPI transparent proxy.""" + +import os +import pytest +import httpx + + +def get_base_url(): + """Get the base URL for the Orchard server from environment.""" + return os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080") + + +class TestPyPIProxyEndpoints: + """Tests for PyPI proxy endpoints. + + These endpoints are public (no auth required) since pip needs to use them. + """ + + @pytest.mark.integration + def test_pypi_simple_index_no_sources(self): + """Test that /pypi/simple/ returns 503 when no sources configured.""" + with httpx.Client(base_url=get_base_url(), timeout=30.0) as client: + response = client.get("/pypi/simple/") + # Should return 503 when no PyPI upstream sources are configured + assert response.status_code == 503 + assert "No PyPI upstream sources configured" in response.json()["detail"] + + @pytest.mark.integration + def test_pypi_package_no_sources(self): + """Test that /pypi/simple/{package}/ returns 503 when no sources configured.""" + with httpx.Client(base_url=get_base_url(), timeout=30.0) as client: + response = client.get("/pypi/simple/requests/") + assert response.status_code == 503 + assert "No PyPI upstream sources configured" in response.json()["detail"] + + @pytest.mark.integration + def test_pypi_download_missing_upstream_param(self): + """Test that /pypi/simple/{package}/{filename} requires upstream param.""" + with httpx.Client(base_url=get_base_url(), timeout=30.0) as client: + response = client.get("/pypi/simple/requests/requests-2.31.0.tar.gz") + assert response.status_code == 400 + assert "upstream" in response.json()["detail"].lower() + + +class TestPyPILinkRewriting: + """Tests for URL rewriting in PyPI proxy responses.""" + + def test_rewrite_package_links(self): + """Test that download links are rewritten to go through proxy.""" + from app.pypi_proxy import _rewrite_package_links + + html = ''' + + + requests-2.31.0.tar.gz + requests-2.31.0-py3-none-any.whl + + + ''' + + result = _rewrite_package_links(html, "http://localhost:8080", "requests") + + # Links should be rewritten to go through our proxy + assert "/pypi/simple/requests/requests-2.31.0.tar.gz?upstream=" in result + assert "/pypi/simple/requests/requests-2.31.0-py3-none-any.whl?upstream=" in result + # Original URLs should be encoded in upstream param + assert "files.pythonhosted.org" in result + # Hash fragments should be preserved + assert "#sha256=abc123" in result + assert "#sha256=def456" in result + + +class TestPyPIPackageNormalization: + """Tests for PyPI package name normalization.""" + + @pytest.mark.integration + def test_package_name_normalized(self): + """Test that package names are normalized per PEP 503.""" + # These should all be treated the same: + # requests, Requests, requests_, requests- + # The endpoint normalizes to lowercase with hyphens + + with httpx.Client(base_url=get_base_url(), timeout=30.0) as client: + # Without upstream sources, we get 503, but the normalization + # happens before the source lookup + response = client.get("/pypi/simple/Requests/") + assert response.status_code == 503 # No sources, but path was valid + + response = client.get("/pypi/simple/some_package/") + assert response.status_code == 503 + + response = client.get("/pypi/simple/some-package/") + assert response.status_code == 503 diff --git a/frontend/src/components/Layout.css b/frontend/src/components/Layout.css index 584719f..d17679d 100644 --- a/frontend/src/components/Layout.css +++ b/frontend/src/components/Layout.css @@ -272,7 +272,7 @@ .footer { background: var(--bg-secondary); border-top: 1px solid var(--border-primary); - padding: 24px 0; + padding: 12px 0; } .footer-content { diff --git a/frontend/src/pages/AdminCachePage.css b/frontend/src/pages/AdminCachePage.css index 50598db..077b305 100644 --- a/frontend/src/pages/AdminCachePage.css +++ b/frontend/src/pages/AdminCachePage.css @@ -65,7 +65,7 @@ .sources-table th, .sources-table td { padding: 0.75rem 1rem; - text-align: left; + text-align: center; border-bottom: 1px solid var(--border-color); } @@ -91,6 +91,11 @@ white-space: nowrap; } +/* Name column should be left-aligned */ +.sources-table td:first-child { + text-align: left; +} + .url-cell { font-family: monospace; font-size: 0.9rem; @@ -98,6 +103,7 @@ overflow: hidden; text-overflow: ellipsis; white-space: nowrap; + text-align: left; } /* Badges */ @@ -243,10 +249,22 @@ } .btn-sm { - padding: 0.25rem 0.5rem; + padding: 0.25rem 0.75rem; font-size: 0.8rem; } +.btn-secondary { + background-color: var(--bg-tertiary); + border-color: var(--border-color); + color: var(--text-primary); + font-weight: 500; +} + +.btn-secondary:hover { + background-color: var(--bg-secondary); + border-color: var(--text-secondary); +} + .empty-message { color: var(--text-secondary); font-style: italic; diff --git a/frontend/src/pages/AdminCachePage.tsx b/frontend/src/pages/AdminCachePage.tsx index 5e1fbb0..9180f0f 100644 --- a/frontend/src/pages/AdminCachePage.tsx +++ b/frontend/src/pages/AdminCachePage.tsx @@ -272,8 +272,7 @@ function AdminCachePage() { URL Priority Status - Source - + Test Actions @@ -282,24 +281,18 @@ function AdminCachePage() { {source.name} + {source.source === 'env' && ( + ENV + )} {source.source_type} - {source.url} + {source.url} {source.priority} {source.enabled ? 'Enabled' : 'Disabled'} - - {source.source === 'env' ? ( - - ENV - - ) : ( - 'Database' - )} - {testingId === source.id ? ( @@ -317,14 +310,14 @@ function AdminCachePage() { {source.source !== 'env' && ( - )}