diff --git a/backend/app/pypi_proxy.py b/backend/app/pypi_proxy.py
index aba1613..942bfdb 100644
--- a/backend/app/pypi_proxy.py
+++ b/backend/app/pypi_proxy.py
@@ -81,7 +81,7 @@ def _get_basic_auth(source) -> Optional[tuple[str, str]]:
return None
-def _rewrite_package_links(html: str, base_url: str, package_name: str) -> str:
+def _rewrite_package_links(html: str, base_url: str, package_name: str, upstream_base_url: str) -> str:
"""
Rewrite download links in a PyPI simple page to go through our proxy.
@@ -89,6 +89,7 @@ def _rewrite_package_links(html: str, base_url: str, package_name: str) -> str:
html: The HTML content from upstream
base_url: Our server's base URL
package_name: The package name for the URL path
+ upstream_base_url: The upstream URL used to fetch this page (for resolving relative URLs)
Returns:
HTML with rewritten download links
@@ -96,19 +97,31 @@ def _rewrite_package_links(html: str, base_url: str, package_name: str) -> str:
# Pattern to match href attributes in anchor tags
# PyPI simple pages have links like:
# file.tar.gz
+ # Or relative URLs from Artifactory like:
+ #
def replace_href(match):
original_url = match.group(1)
+
+ # Resolve relative URLs to absolute using the upstream base URL
+ if not original_url.startswith(('http://', 'https://')):
+ # Split off fragment before resolving
+ url_without_fragment = original_url.split('#')[0]
+ fragment_part = original_url[len(url_without_fragment):]
+ absolute_url = urljoin(upstream_base_url, url_without_fragment) + fragment_part
+ else:
+ absolute_url = original_url
+
# Extract the filename from the URL
- parsed = urlparse(original_url)
+ parsed = urlparse(absolute_url)
path_parts = parsed.path.split('/')
filename = path_parts[-1] if path_parts else ''
# Keep the hash fragment if present
fragment = f"#{parsed.fragment}" if parsed.fragment else ""
- # Encode the original URL for safe transmission
- encoded_url = quote(original_url.split('#')[0], safe='')
+ # Encode the absolute URL (without fragment) for safe transmission
+ encoded_url = quote(absolute_url.split('#')[0], safe='')
# Build new URL pointing to our proxy
new_url = f"{base_url}/pypi/simple/{package_name}/{filename}?upstream={encoded_url}{fragment}"
@@ -236,6 +249,7 @@ async def pypi_package_versions(
auth = _get_basic_auth(source)
package_url = source.url.rstrip('/') + f'/simple/{normalized_name}/'
+ final_url = package_url # Track final URL after redirects
timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)
@@ -255,7 +269,9 @@ async def pypi_package_versions(
# Make redirect URL absolute if needed
if not redirect_url.startswith('http'):
- redirect_url = urljoin(package_url, redirect_url)
+ redirect_url = urljoin(final_url, redirect_url)
+
+ final_url = redirect_url # Update final URL
response = client.get(
redirect_url,
@@ -269,7 +285,8 @@ async def pypi_package_versions(
content = response.text
# Rewrite download links to go through our proxy
- content = _rewrite_package_links(content, base_url, normalized_name)
+ # Pass final_url so relative URLs can be resolved correctly
+ content = _rewrite_package_links(content, base_url, normalized_name, final_url)
return HTMLResponse(content=content)