From 64bfd3902ff0adabd33e4314fc9a52f354ef024a Mon Sep 17 00:00:00 2001 From: Mondo Diaz Date: Thu, 29 Jan 2026 18:01:19 -0600 Subject: [PATCH] Fix relative URL handling in PyPI proxy Artifactory and other registries may return relative URLs in their Simple API responses (e.g., ../../packages/...). The proxy now resolves these to absolute URLs using urljoin() before encoding them in the upstream parameter. This fixes package downloads failing when the upstream registry uses relative URLs in its package index. --- backend/app/pypi_proxy.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/backend/app/pypi_proxy.py b/backend/app/pypi_proxy.py index aba1613..942bfdb 100644 --- a/backend/app/pypi_proxy.py +++ b/backend/app/pypi_proxy.py @@ -81,7 +81,7 @@ def _get_basic_auth(source) -> Optional[tuple[str, str]]: return None -def _rewrite_package_links(html: str, base_url: str, package_name: str) -> str: +def _rewrite_package_links(html: str, base_url: str, package_name: str, upstream_base_url: str) -> str: """ Rewrite download links in a PyPI simple page to go through our proxy. @@ -89,6 +89,7 @@ def _rewrite_package_links(html: str, base_url: str, package_name: str) -> str: html: The HTML content from upstream base_url: Our server's base URL package_name: The package name for the URL path + upstream_base_url: The upstream URL used to fetch this page (for resolving relative URLs) Returns: HTML with rewritten download links @@ -96,19 +97,31 @@ def _rewrite_package_links(html: str, base_url: str, package_name: str) -> str: # Pattern to match href attributes in anchor tags # PyPI simple pages have links like: # file.tar.gz + # Or relative URLs from Artifactory like: + # def replace_href(match): original_url = match.group(1) + + # Resolve relative URLs to absolute using the upstream base URL + if not original_url.startswith(('http://', 'https://')): + # Split off fragment before resolving + url_without_fragment = original_url.split('#')[0] + fragment_part = original_url[len(url_without_fragment):] + absolute_url = urljoin(upstream_base_url, url_without_fragment) + fragment_part + else: + absolute_url = original_url + # Extract the filename from the URL - parsed = urlparse(original_url) + parsed = urlparse(absolute_url) path_parts = parsed.path.split('/') filename = path_parts[-1] if path_parts else '' # Keep the hash fragment if present fragment = f"#{parsed.fragment}" if parsed.fragment else "" - # Encode the original URL for safe transmission - encoded_url = quote(original_url.split('#')[0], safe='') + # Encode the absolute URL (without fragment) for safe transmission + encoded_url = quote(absolute_url.split('#')[0], safe='') # Build new URL pointing to our proxy new_url = f"{base_url}/pypi/simple/{package_name}/{filename}?upstream={encoded_url}{fragment}" @@ -236,6 +249,7 @@ async def pypi_package_versions( auth = _get_basic_auth(source) package_url = source.url.rstrip('/') + f'/simple/{normalized_name}/' + final_url = package_url # Track final URL after redirects timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT) @@ -255,7 +269,9 @@ async def pypi_package_versions( # Make redirect URL absolute if needed if not redirect_url.startswith('http'): - redirect_url = urljoin(package_url, redirect_url) + redirect_url = urljoin(final_url, redirect_url) + + final_url = redirect_url # Update final URL response = client.get( redirect_url, @@ -269,7 +285,8 @@ async def pypi_package_versions( content = response.text # Rewrite download links to go through our proxy - content = _rewrite_package_links(content, base_url, normalized_name) + # Pass final_url so relative URLs can be resolved correctly + content = _rewrite_package_links(content, base_url, normalized_name, final_url) return HTMLResponse(content=content)