Fix relative URL handling in PyPI proxy
Artifactory and other registries may return relative URLs in their Simple API responses (e.g., ../../packages/...). The proxy now resolves these to absolute URLs using urljoin() before encoding them in the upstream parameter. This fixes package downloads failing when the upstream registry uses relative URLs in its package index.
This commit is contained in:
@@ -81,7 +81,7 @@ def _get_basic_auth(source) -> Optional[tuple[str, str]]:
|
||||
return None
|
||||
|
||||
|
||||
def _rewrite_package_links(html: str, base_url: str, package_name: str) -> str:
|
||||
def _rewrite_package_links(html: str, base_url: str, package_name: str, upstream_base_url: str) -> str:
|
||||
"""
|
||||
Rewrite download links in a PyPI simple page to go through our proxy.
|
||||
|
||||
@@ -89,6 +89,7 @@ def _rewrite_package_links(html: str, base_url: str, package_name: str) -> str:
|
||||
html: The HTML content from upstream
|
||||
base_url: Our server's base URL
|
||||
package_name: The package name for the URL path
|
||||
upstream_base_url: The upstream URL used to fetch this page (for resolving relative URLs)
|
||||
|
||||
Returns:
|
||||
HTML with rewritten download links
|
||||
@@ -96,19 +97,31 @@ def _rewrite_package_links(html: str, base_url: str, package_name: str) -> str:
|
||||
# Pattern to match href attributes in anchor tags
|
||||
# PyPI simple pages have links like:
|
||||
# <a href="https://files.pythonhosted.org/packages/.../file.tar.gz#sha256=...">file.tar.gz</a>
|
||||
# Or relative URLs from Artifactory like:
|
||||
# <a href="../../packages/packages/62/35/.../requests-0.10.0.tar.gz#sha256=...">
|
||||
|
||||
def replace_href(match):
|
||||
original_url = match.group(1)
|
||||
|
||||
# Resolve relative URLs to absolute using the upstream base URL
|
||||
if not original_url.startswith(('http://', 'https://')):
|
||||
# Split off fragment before resolving
|
||||
url_without_fragment = original_url.split('#')[0]
|
||||
fragment_part = original_url[len(url_without_fragment):]
|
||||
absolute_url = urljoin(upstream_base_url, url_without_fragment) + fragment_part
|
||||
else:
|
||||
absolute_url = original_url
|
||||
|
||||
# Extract the filename from the URL
|
||||
parsed = urlparse(original_url)
|
||||
parsed = urlparse(absolute_url)
|
||||
path_parts = parsed.path.split('/')
|
||||
filename = path_parts[-1] if path_parts else ''
|
||||
|
||||
# Keep the hash fragment if present
|
||||
fragment = f"#{parsed.fragment}" if parsed.fragment else ""
|
||||
|
||||
# Encode the original URL for safe transmission
|
||||
encoded_url = quote(original_url.split('#')[0], safe='')
|
||||
# Encode the absolute URL (without fragment) for safe transmission
|
||||
encoded_url = quote(absolute_url.split('#')[0], safe='')
|
||||
|
||||
# Build new URL pointing to our proxy
|
||||
new_url = f"{base_url}/pypi/simple/{package_name}/{filename}?upstream={encoded_url}{fragment}"
|
||||
@@ -236,6 +249,7 @@ async def pypi_package_versions(
|
||||
auth = _get_basic_auth(source)
|
||||
|
||||
package_url = source.url.rstrip('/') + f'/simple/{normalized_name}/'
|
||||
final_url = package_url # Track final URL after redirects
|
||||
|
||||
timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)
|
||||
|
||||
@@ -255,7 +269,9 @@ async def pypi_package_versions(
|
||||
|
||||
# Make redirect URL absolute if needed
|
||||
if not redirect_url.startswith('http'):
|
||||
redirect_url = urljoin(package_url, redirect_url)
|
||||
redirect_url = urljoin(final_url, redirect_url)
|
||||
|
||||
final_url = redirect_url # Update final URL
|
||||
|
||||
response = client.get(
|
||||
redirect_url,
|
||||
@@ -269,7 +285,8 @@ async def pypi_package_versions(
|
||||
content = response.text
|
||||
|
||||
# Rewrite download links to go through our proxy
|
||||
content = _rewrite_package_links(content, base_url, normalized_name)
|
||||
# Pass final_url so relative URLs can be resolved correctly
|
||||
content = _rewrite_package_links(content, base_url, normalized_name, final_url)
|
||||
|
||||
return HTMLResponse(content=content)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user