Compare commits
2 Commits
62c77dc16d
...
f992fc540e
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f992fc540e | ||
|
|
044a6c1d27 |
@@ -10,11 +10,8 @@ Handles:
|
|||||||
- Conflict detection
|
- Conflict detection
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
|
||||||
import re
|
import re
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
from typing import List, Dict, Any, Optional, Set, Tuple
|
from typing import List, Dict, Any, Optional, Set, Tuple
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
from sqlalchemy import and_
|
from sqlalchemy import and_
|
||||||
@@ -435,8 +432,6 @@ def _resolve_dependency_to_artifact(
|
|||||||
if not package:
|
if not package:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
logger.info(f"_resolve_dependency_to_artifact: package_id={package.id}, version={version!r}, tag={tag!r}")
|
|
||||||
|
|
||||||
if version:
|
if version:
|
||||||
# Check if this is a version constraint (>=, <, etc.) or exact version
|
# Check if this is a version constraint (>=, <, etc.) or exact version
|
||||||
if _is_version_constraint(version):
|
if _is_version_constraint(version):
|
||||||
@@ -454,7 +449,6 @@ def _resolve_dependency_to_artifact(
|
|||||||
Artifact.id == pkg_version.artifact_id
|
Artifact.id == pkg_version.artifact_id
|
||||||
).first()
|
).first()
|
||||||
if artifact:
|
if artifact:
|
||||||
logger.info(f"_resolve_dependency_to_artifact: found by version {version}")
|
|
||||||
return (artifact.id, version, artifact.size)
|
return (artifact.id, version, artifact.size)
|
||||||
|
|
||||||
# Also check if there's a tag with this exact name
|
# Also check if there's a tag with this exact name
|
||||||
@@ -462,7 +456,6 @@ def _resolve_dependency_to_artifact(
|
|||||||
Tag.package_id == package.id,
|
Tag.package_id == package.id,
|
||||||
Tag.name == version,
|
Tag.name == version,
|
||||||
).first()
|
).first()
|
||||||
logger.info(f"_resolve_dependency_to_artifact: tag lookup by version name, found={tag_record is not None}")
|
|
||||||
if tag_record:
|
if tag_record:
|
||||||
artifact = db.query(Artifact).filter(
|
artifact = db.query(Artifact).filter(
|
||||||
Artifact.id == tag_record.artifact_id
|
Artifact.id == tag_record.artifact_id
|
||||||
@@ -476,7 +469,6 @@ def _resolve_dependency_to_artifact(
|
|||||||
Tag.package_id == package.id,
|
Tag.package_id == package.id,
|
||||||
Tag.name == tag,
|
Tag.name == tag,
|
||||||
).first()
|
).first()
|
||||||
logger.info(f"_resolve_dependency_to_artifact: tag lookup by tag param, found={tag_record is not None}")
|
|
||||||
if tag_record:
|
if tag_record:
|
||||||
artifact = db.query(Artifact).filter(
|
artifact = db.query(Artifact).filter(
|
||||||
Artifact.id == tag_record.artifact_id
|
Artifact.id == tag_record.artifact_id
|
||||||
@@ -484,9 +476,6 @@ def _resolve_dependency_to_artifact(
|
|||||||
if artifact:
|
if artifact:
|
||||||
return (artifact.id, tag, artifact.size)
|
return (artifact.id, tag, artifact.size)
|
||||||
|
|
||||||
# Debug: list actual tags
|
|
||||||
sample_tags = db.query(Tag).filter(Tag.package_id == package.id).limit(3).all()
|
|
||||||
logger.info(f"_resolve_dependency_to_artifact: NOT FOUND. Sample tags: {[t.name for t in sample_tags]}")
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@@ -670,12 +659,9 @@ def resolve_dependencies(
|
|||||||
CircularDependencyError: If circular dependencies are detected
|
CircularDependencyError: If circular dependencies are detected
|
||||||
DependencyConflictError: If conflicting versions are required
|
DependencyConflictError: If conflicting versions are required
|
||||||
"""
|
"""
|
||||||
logger.info(f"resolve_dependencies: project={project_name}, package={package_name}, ref={ref!r}")
|
|
||||||
|
|
||||||
# Resolve the initial artifact
|
# Resolve the initial artifact
|
||||||
project = db.query(Project).filter(Project.name == project_name).first()
|
project = db.query(Project).filter(Project.name == project_name).first()
|
||||||
if not project:
|
if not project:
|
||||||
logger.warning(f"resolve_dependencies: project '{project_name}' not found")
|
|
||||||
raise DependencyNotFoundError(project_name, package_name, ref)
|
raise DependencyNotFoundError(project_name, package_name, ref)
|
||||||
|
|
||||||
package = db.query(Package).filter(
|
package = db.query(Package).filter(
|
||||||
@@ -683,11 +669,8 @@ def resolve_dependencies(
|
|||||||
Package.name == package_name,
|
Package.name == package_name,
|
||||||
).first()
|
).first()
|
||||||
if not package:
|
if not package:
|
||||||
logger.warning(f"resolve_dependencies: package '{package_name}' not found in project '{project_name}'")
|
|
||||||
raise DependencyNotFoundError(project_name, package_name, ref)
|
raise DependencyNotFoundError(project_name, package_name, ref)
|
||||||
|
|
||||||
logger.info(f"resolve_dependencies: found package id={package.id}")
|
|
||||||
|
|
||||||
# Try to find artifact by tag or version
|
# Try to find artifact by tag or version
|
||||||
resolved = _resolve_dependency_to_artifact(
|
resolved = _resolve_dependency_to_artifact(
|
||||||
db, project_name, package_name, ref, ref
|
db, project_name, package_name, ref, ref
|
||||||
|
|||||||
@@ -9,13 +9,14 @@ import hashlib
|
|||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import tarfile
|
import tarfile
|
||||||
|
import threading
|
||||||
import zipfile
|
import zipfile
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Optional, List, Tuple
|
from typing import Optional, List, Tuple
|
||||||
from urllib.parse import urljoin, urlparse, quote, unquote
|
from urllib.parse import urljoin, urlparse, quote, unquote
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from fastapi import APIRouter, Depends, HTTPException, Request, Response
|
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request, Response
|
||||||
from fastapi.responses import StreamingResponse, HTMLResponse
|
from fastapi.responses import StreamingResponse, HTMLResponse
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
@@ -502,6 +503,84 @@ async def pypi_package_versions(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _cache_dependency_background(
|
||||||
|
base_url: str,
|
||||||
|
dep_name: str,
|
||||||
|
dep_version: Optional[str],
|
||||||
|
depth: int = 0,
|
||||||
|
max_depth: int = 10,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Background task to proactively cache a dependency.
|
||||||
|
|
||||||
|
Fetches the dependency from upstream via our own proxy, which will
|
||||||
|
recursively cache its dependencies as well.
|
||||||
|
"""
|
||||||
|
if depth >= max_depth:
|
||||||
|
logger.warning(f"PyPI proxy: max depth {max_depth} reached caching {dep_name}")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Normalize package name for URL (PEP 503)
|
||||||
|
normalized_name = re.sub(r'[-_.]+', '-', dep_name).lower()
|
||||||
|
|
||||||
|
# First, get the simple index page to find available versions
|
||||||
|
simple_url = f"{base_url}/pypi/simple/{normalized_name}/"
|
||||||
|
logger.info(f"PyPI proxy: proactively caching {dep_name} (depth={depth})")
|
||||||
|
|
||||||
|
with httpx.Client(timeout=30.0) as client:
|
||||||
|
response = client.get(simple_url)
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.warning(f"PyPI proxy: failed to get index for {dep_name}: {response.status_code}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Parse the HTML to find wheel files
|
||||||
|
html = response.text
|
||||||
|
# Look for wheel files (.whl) - prefer them over sdist
|
||||||
|
wheel_pattern = rf'href="([^"]*{normalized_name}[^"]*\.whl[^"]*)"'
|
||||||
|
matches = re.findall(wheel_pattern, html, re.IGNORECASE)
|
||||||
|
|
||||||
|
if not matches:
|
||||||
|
# Try sdist
|
||||||
|
sdist_pattern = rf'href="([^"]*{normalized_name}[^"]*\.tar\.gz[^"]*)"'
|
||||||
|
matches = re.findall(sdist_pattern, html, re.IGNORECASE)
|
||||||
|
|
||||||
|
if not matches:
|
||||||
|
logger.warning(f"PyPI proxy: no downloadable files found for {dep_name}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get the last match (usually the latest version)
|
||||||
|
# The URL might be relative or absolute
|
||||||
|
download_url = matches[-1]
|
||||||
|
if download_url.startswith('/'):
|
||||||
|
download_url = f"{base_url}{download_url}"
|
||||||
|
elif not download_url.startswith('http'):
|
||||||
|
download_url = f"{base_url}/pypi/simple/{normalized_name}/{download_url}"
|
||||||
|
|
||||||
|
# Download the file through our proxy (this will cache it)
|
||||||
|
logger.info(f"PyPI proxy: downloading dependency {dep_name} from {download_url}")
|
||||||
|
response = client.get(download_url)
|
||||||
|
if response.status_code == 200:
|
||||||
|
logger.info(f"PyPI proxy: successfully cached {dep_name}")
|
||||||
|
else:
|
||||||
|
logger.warning(f"PyPI proxy: failed to cache {dep_name}: {response.status_code}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"PyPI proxy: error caching dependency {dep_name}: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def _start_background_dependency_caching(base_url: str, dependencies: List[Tuple[str, Optional[str]]]):
|
||||||
|
"""Start background threads to cache dependencies."""
|
||||||
|
for dep_name, dep_version in dependencies:
|
||||||
|
# Use a thread to avoid blocking
|
||||||
|
thread = threading.Thread(
|
||||||
|
target=_cache_dependency_background,
|
||||||
|
args=(base_url, dep_name, dep_version),
|
||||||
|
daemon=True,
|
||||||
|
)
|
||||||
|
thread.start()
|
||||||
|
|
||||||
|
|
||||||
@router.get("/simple/{package_name}/{filename}")
|
@router.get("/simple/{package_name}/{filename}")
|
||||||
async def pypi_download_file(
|
async def pypi_download_file(
|
||||||
request: Request,
|
request: Request,
|
||||||
@@ -736,9 +815,17 @@ async def pypi_download_file(
|
|||||||
|
|
||||||
# Extract and store dependencies
|
# Extract and store dependencies
|
||||||
dependencies = _extract_dependencies(content, filename)
|
dependencies = _extract_dependencies(content, filename)
|
||||||
|
unique_deps = []
|
||||||
if dependencies:
|
if dependencies:
|
||||||
logger.info(f"PyPI proxy: extracted {len(dependencies)} dependencies from {filename}")
|
# Deduplicate dependencies by package name (keep first occurrence)
|
||||||
|
seen_packages = set()
|
||||||
for dep_name, dep_version in dependencies:
|
for dep_name, dep_version in dependencies:
|
||||||
|
if dep_name not in seen_packages:
|
||||||
|
seen_packages.add(dep_name)
|
||||||
|
unique_deps.append((dep_name, dep_version))
|
||||||
|
|
||||||
|
logger.info(f"PyPI proxy: extracted {len(unique_deps)} dependencies from {filename} (deduped from {len(dependencies)})")
|
||||||
|
for dep_name, dep_version in unique_deps:
|
||||||
# Check if this dependency already exists for this artifact
|
# Check if this dependency already exists for this artifact
|
||||||
existing_dep = db.query(ArtifactDependency).filter(
|
existing_dep = db.query(ArtifactDependency).filter(
|
||||||
ArtifactDependency.artifact_id == sha256,
|
ArtifactDependency.artifact_id == sha256,
|
||||||
@@ -757,6 +844,11 @@ async def pypi_download_file(
|
|||||||
|
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
|
# Proactively cache dependencies in the background
|
||||||
|
if unique_deps:
|
||||||
|
base_url = str(request.base_url).rstrip("/")
|
||||||
|
_start_background_dependency_caching(base_url, unique_deps)
|
||||||
|
|
||||||
# Return the file
|
# Return the file
|
||||||
return Response(
|
return Response(
|
||||||
content=content,
|
content=content,
|
||||||
|
|||||||
Reference in New Issue
Block a user