Compare commits
2 Commits
62c77dc16d
...
f992fc540e
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f992fc540e | ||
|
|
044a6c1d27 |
@@ -10,11 +10,8 @@ Handles:
|
||||
- Conflict detection
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import yaml
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from typing import List, Dict, Any, Optional, Set, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import and_
|
||||
@@ -435,8 +432,6 @@ def _resolve_dependency_to_artifact(
|
||||
if not package:
|
||||
return None
|
||||
|
||||
logger.info(f"_resolve_dependency_to_artifact: package_id={package.id}, version={version!r}, tag={tag!r}")
|
||||
|
||||
if version:
|
||||
# Check if this is a version constraint (>=, <, etc.) or exact version
|
||||
if _is_version_constraint(version):
|
||||
@@ -454,7 +449,6 @@ def _resolve_dependency_to_artifact(
|
||||
Artifact.id == pkg_version.artifact_id
|
||||
).first()
|
||||
if artifact:
|
||||
logger.info(f"_resolve_dependency_to_artifact: found by version {version}")
|
||||
return (artifact.id, version, artifact.size)
|
||||
|
||||
# Also check if there's a tag with this exact name
|
||||
@@ -462,7 +456,6 @@ def _resolve_dependency_to_artifact(
|
||||
Tag.package_id == package.id,
|
||||
Tag.name == version,
|
||||
).first()
|
||||
logger.info(f"_resolve_dependency_to_artifact: tag lookup by version name, found={tag_record is not None}")
|
||||
if tag_record:
|
||||
artifact = db.query(Artifact).filter(
|
||||
Artifact.id == tag_record.artifact_id
|
||||
@@ -476,7 +469,6 @@ def _resolve_dependency_to_artifact(
|
||||
Tag.package_id == package.id,
|
||||
Tag.name == tag,
|
||||
).first()
|
||||
logger.info(f"_resolve_dependency_to_artifact: tag lookup by tag param, found={tag_record is not None}")
|
||||
if tag_record:
|
||||
artifact = db.query(Artifact).filter(
|
||||
Artifact.id == tag_record.artifact_id
|
||||
@@ -484,9 +476,6 @@ def _resolve_dependency_to_artifact(
|
||||
if artifact:
|
||||
return (artifact.id, tag, artifact.size)
|
||||
|
||||
# Debug: list actual tags
|
||||
sample_tags = db.query(Tag).filter(Tag.package_id == package.id).limit(3).all()
|
||||
logger.info(f"_resolve_dependency_to_artifact: NOT FOUND. Sample tags: {[t.name for t in sample_tags]}")
|
||||
return None
|
||||
|
||||
|
||||
@@ -670,12 +659,9 @@ def resolve_dependencies(
|
||||
CircularDependencyError: If circular dependencies are detected
|
||||
DependencyConflictError: If conflicting versions are required
|
||||
"""
|
||||
logger.info(f"resolve_dependencies: project={project_name}, package={package_name}, ref={ref!r}")
|
||||
|
||||
# Resolve the initial artifact
|
||||
project = db.query(Project).filter(Project.name == project_name).first()
|
||||
if not project:
|
||||
logger.warning(f"resolve_dependencies: project '{project_name}' not found")
|
||||
raise DependencyNotFoundError(project_name, package_name, ref)
|
||||
|
||||
package = db.query(Package).filter(
|
||||
@@ -683,11 +669,8 @@ def resolve_dependencies(
|
||||
Package.name == package_name,
|
||||
).first()
|
||||
if not package:
|
||||
logger.warning(f"resolve_dependencies: package '{package_name}' not found in project '{project_name}'")
|
||||
raise DependencyNotFoundError(project_name, package_name, ref)
|
||||
|
||||
logger.info(f"resolve_dependencies: found package id={package.id}")
|
||||
|
||||
# Try to find artifact by tag or version
|
||||
resolved = _resolve_dependency_to_artifact(
|
||||
db, project_name, package_name, ref, ref
|
||||
|
||||
@@ -9,13 +9,14 @@ import hashlib
|
||||
import logging
|
||||
import re
|
||||
import tarfile
|
||||
import threading
|
||||
import zipfile
|
||||
from io import BytesIO
|
||||
from typing import Optional, List, Tuple
|
||||
from urllib.parse import urljoin, urlparse, quote, unquote
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, Response
|
||||
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request, Response
|
||||
from fastapi.responses import StreamingResponse, HTMLResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
@@ -502,6 +503,84 @@ async def pypi_package_versions(
|
||||
)
|
||||
|
||||
|
||||
def _cache_dependency_background(
|
||||
base_url: str,
|
||||
dep_name: str,
|
||||
dep_version: Optional[str],
|
||||
depth: int = 0,
|
||||
max_depth: int = 10,
|
||||
):
|
||||
"""
|
||||
Background task to proactively cache a dependency.
|
||||
|
||||
Fetches the dependency from upstream via our own proxy, which will
|
||||
recursively cache its dependencies as well.
|
||||
"""
|
||||
if depth >= max_depth:
|
||||
logger.warning(f"PyPI proxy: max depth {max_depth} reached caching {dep_name}")
|
||||
return
|
||||
|
||||
try:
|
||||
# Normalize package name for URL (PEP 503)
|
||||
normalized_name = re.sub(r'[-_.]+', '-', dep_name).lower()
|
||||
|
||||
# First, get the simple index page to find available versions
|
||||
simple_url = f"{base_url}/pypi/simple/{normalized_name}/"
|
||||
logger.info(f"PyPI proxy: proactively caching {dep_name} (depth={depth})")
|
||||
|
||||
with httpx.Client(timeout=30.0) as client:
|
||||
response = client.get(simple_url)
|
||||
if response.status_code != 200:
|
||||
logger.warning(f"PyPI proxy: failed to get index for {dep_name}: {response.status_code}")
|
||||
return
|
||||
|
||||
# Parse the HTML to find wheel files
|
||||
html = response.text
|
||||
# Look for wheel files (.whl) - prefer them over sdist
|
||||
wheel_pattern = rf'href="([^"]*{normalized_name}[^"]*\.whl[^"]*)"'
|
||||
matches = re.findall(wheel_pattern, html, re.IGNORECASE)
|
||||
|
||||
if not matches:
|
||||
# Try sdist
|
||||
sdist_pattern = rf'href="([^"]*{normalized_name}[^"]*\.tar\.gz[^"]*)"'
|
||||
matches = re.findall(sdist_pattern, html, re.IGNORECASE)
|
||||
|
||||
if not matches:
|
||||
logger.warning(f"PyPI proxy: no downloadable files found for {dep_name}")
|
||||
return
|
||||
|
||||
# Get the last match (usually the latest version)
|
||||
# The URL might be relative or absolute
|
||||
download_url = matches[-1]
|
||||
if download_url.startswith('/'):
|
||||
download_url = f"{base_url}{download_url}"
|
||||
elif not download_url.startswith('http'):
|
||||
download_url = f"{base_url}/pypi/simple/{normalized_name}/{download_url}"
|
||||
|
||||
# Download the file through our proxy (this will cache it)
|
||||
logger.info(f"PyPI proxy: downloading dependency {dep_name} from {download_url}")
|
||||
response = client.get(download_url)
|
||||
if response.status_code == 200:
|
||||
logger.info(f"PyPI proxy: successfully cached {dep_name}")
|
||||
else:
|
||||
logger.warning(f"PyPI proxy: failed to cache {dep_name}: {response.status_code}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"PyPI proxy: error caching dependency {dep_name}: {e}")
|
||||
|
||||
|
||||
def _start_background_dependency_caching(base_url: str, dependencies: List[Tuple[str, Optional[str]]]):
|
||||
"""Start background threads to cache dependencies."""
|
||||
for dep_name, dep_version in dependencies:
|
||||
# Use a thread to avoid blocking
|
||||
thread = threading.Thread(
|
||||
target=_cache_dependency_background,
|
||||
args=(base_url, dep_name, dep_version),
|
||||
daemon=True,
|
||||
)
|
||||
thread.start()
|
||||
|
||||
|
||||
@router.get("/simple/{package_name}/{filename}")
|
||||
async def pypi_download_file(
|
||||
request: Request,
|
||||
@@ -736,9 +815,17 @@ async def pypi_download_file(
|
||||
|
||||
# Extract and store dependencies
|
||||
dependencies = _extract_dependencies(content, filename)
|
||||
unique_deps = []
|
||||
if dependencies:
|
||||
logger.info(f"PyPI proxy: extracted {len(dependencies)} dependencies from {filename}")
|
||||
# Deduplicate dependencies by package name (keep first occurrence)
|
||||
seen_packages = set()
|
||||
for dep_name, dep_version in dependencies:
|
||||
if dep_name not in seen_packages:
|
||||
seen_packages.add(dep_name)
|
||||
unique_deps.append((dep_name, dep_version))
|
||||
|
||||
logger.info(f"PyPI proxy: extracted {len(unique_deps)} dependencies from {filename} (deduped from {len(dependencies)})")
|
||||
for dep_name, dep_version in unique_deps:
|
||||
# Check if this dependency already exists for this artifact
|
||||
existing_dep = db.query(ArtifactDependency).filter(
|
||||
ArtifactDependency.artifact_id == sha256,
|
||||
@@ -757,6 +844,11 @@ async def pypi_download_file(
|
||||
|
||||
db.commit()
|
||||
|
||||
# Proactively cache dependencies in the background
|
||||
if unique_deps:
|
||||
base_url = str(request.base_url).rstrip("/")
|
||||
_start_background_dependency_caching(base_url, unique_deps)
|
||||
|
||||
# Return the file
|
||||
return Response(
|
||||
content=content,
|
||||
|
||||
Reference in New Issue
Block a user