Fix relative URL handling in PyPI proxy
Artifactory and other registries may return relative URLs in their Simple API responses (e.g., ../../packages/...). The proxy now resolves these to absolute URLs using urljoin() before encoding them in the upstream parameter. This fixes package downloads failing when the upstream registry uses relative URLs in its package index.
This commit is contained in:
@@ -81,7 +81,7 @@ def _get_basic_auth(source) -> Optional[tuple[str, str]]:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _rewrite_package_links(html: str, base_url: str, package_name: str) -> str:
|
def _rewrite_package_links(html: str, base_url: str, package_name: str, upstream_base_url: str) -> str:
|
||||||
"""
|
"""
|
||||||
Rewrite download links in a PyPI simple page to go through our proxy.
|
Rewrite download links in a PyPI simple page to go through our proxy.
|
||||||
|
|
||||||
@@ -89,6 +89,7 @@ def _rewrite_package_links(html: str, base_url: str, package_name: str) -> str:
|
|||||||
html: The HTML content from upstream
|
html: The HTML content from upstream
|
||||||
base_url: Our server's base URL
|
base_url: Our server's base URL
|
||||||
package_name: The package name for the URL path
|
package_name: The package name for the URL path
|
||||||
|
upstream_base_url: The upstream URL used to fetch this page (for resolving relative URLs)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
HTML with rewritten download links
|
HTML with rewritten download links
|
||||||
@@ -96,19 +97,31 @@ def _rewrite_package_links(html: str, base_url: str, package_name: str) -> str:
|
|||||||
# Pattern to match href attributes in anchor tags
|
# Pattern to match href attributes in anchor tags
|
||||||
# PyPI simple pages have links like:
|
# PyPI simple pages have links like:
|
||||||
# <a href="https://files.pythonhosted.org/packages/.../file.tar.gz#sha256=...">file.tar.gz</a>
|
# <a href="https://files.pythonhosted.org/packages/.../file.tar.gz#sha256=...">file.tar.gz</a>
|
||||||
|
# Or relative URLs from Artifactory like:
|
||||||
|
# <a href="../../packages/packages/62/35/.../requests-0.10.0.tar.gz#sha256=...">
|
||||||
|
|
||||||
def replace_href(match):
|
def replace_href(match):
|
||||||
original_url = match.group(1)
|
original_url = match.group(1)
|
||||||
|
|
||||||
|
# Resolve relative URLs to absolute using the upstream base URL
|
||||||
|
if not original_url.startswith(('http://', 'https://')):
|
||||||
|
# Split off fragment before resolving
|
||||||
|
url_without_fragment = original_url.split('#')[0]
|
||||||
|
fragment_part = original_url[len(url_without_fragment):]
|
||||||
|
absolute_url = urljoin(upstream_base_url, url_without_fragment) + fragment_part
|
||||||
|
else:
|
||||||
|
absolute_url = original_url
|
||||||
|
|
||||||
# Extract the filename from the URL
|
# Extract the filename from the URL
|
||||||
parsed = urlparse(original_url)
|
parsed = urlparse(absolute_url)
|
||||||
path_parts = parsed.path.split('/')
|
path_parts = parsed.path.split('/')
|
||||||
filename = path_parts[-1] if path_parts else ''
|
filename = path_parts[-1] if path_parts else ''
|
||||||
|
|
||||||
# Keep the hash fragment if present
|
# Keep the hash fragment if present
|
||||||
fragment = f"#{parsed.fragment}" if parsed.fragment else ""
|
fragment = f"#{parsed.fragment}" if parsed.fragment else ""
|
||||||
|
|
||||||
# Encode the original URL for safe transmission
|
# Encode the absolute URL (without fragment) for safe transmission
|
||||||
encoded_url = quote(original_url.split('#')[0], safe='')
|
encoded_url = quote(absolute_url.split('#')[0], safe='')
|
||||||
|
|
||||||
# Build new URL pointing to our proxy
|
# Build new URL pointing to our proxy
|
||||||
new_url = f"{base_url}/pypi/simple/{package_name}/{filename}?upstream={encoded_url}{fragment}"
|
new_url = f"{base_url}/pypi/simple/{package_name}/{filename}?upstream={encoded_url}{fragment}"
|
||||||
@@ -236,6 +249,7 @@ async def pypi_package_versions(
|
|||||||
auth = _get_basic_auth(source)
|
auth = _get_basic_auth(source)
|
||||||
|
|
||||||
package_url = source.url.rstrip('/') + f'/simple/{normalized_name}/'
|
package_url = source.url.rstrip('/') + f'/simple/{normalized_name}/'
|
||||||
|
final_url = package_url # Track final URL after redirects
|
||||||
|
|
||||||
timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)
|
timeout = httpx.Timeout(PROXY_READ_TIMEOUT, connect=PROXY_CONNECT_TIMEOUT)
|
||||||
|
|
||||||
@@ -255,7 +269,9 @@ async def pypi_package_versions(
|
|||||||
|
|
||||||
# Make redirect URL absolute if needed
|
# Make redirect URL absolute if needed
|
||||||
if not redirect_url.startswith('http'):
|
if not redirect_url.startswith('http'):
|
||||||
redirect_url = urljoin(package_url, redirect_url)
|
redirect_url = urljoin(final_url, redirect_url)
|
||||||
|
|
||||||
|
final_url = redirect_url # Update final URL
|
||||||
|
|
||||||
response = client.get(
|
response = client.get(
|
||||||
redirect_url,
|
redirect_url,
|
||||||
@@ -269,7 +285,8 @@ async def pypi_package_versions(
|
|||||||
content = response.text
|
content = response.text
|
||||||
|
|
||||||
# Rewrite download links to go through our proxy
|
# Rewrite download links to go through our proxy
|
||||||
content = _rewrite_package_links(content, base_url, normalized_name)
|
# Pass final_url so relative URLs can be resolved correctly
|
||||||
|
content = _rewrite_package_links(content, base_url, normalized_name, final_url)
|
||||||
|
|
||||||
return HTMLResponse(content=content)
|
return HTMLResponse(content=content)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user