perf: use batch dependency storage in pypi_proxy

This commit is contained in:
Mondo Diaz
2026-02-04 09:52:16 -06:00
parent 8fdb73901e
commit 7ad5a15ef4

View File

@@ -23,7 +23,7 @@ from fastapi.responses import StreamingResponse, HTMLResponse, RedirectResponse
from sqlalchemy.orm import Session
from .database import get_db
from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, PackageVersion, ArtifactDependency
from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, PackageVersion
from .storage import S3Storage, get_storage
from .config import get_env_upstream_sources, get_settings
from .http_client import HttpClientManager
@@ -895,7 +895,7 @@ async def pypi_download_file(
)
db.add(cached_url_record)
# Store extracted dependencies (deduplicate first - METADATA can list same dep under multiple extras)
# Store extracted dependencies using batch operation
if extracted_deps:
# Deduplicate: keep first version constraint seen for each package name
seen_deps: dict[str, str] = {}
@@ -903,22 +903,17 @@ async def pypi_download_file(
if dep_name not in seen_deps:
seen_deps[dep_name] = dep_version if dep_version else "*"
for dep_name, dep_version in seen_deps.items():
# Check if this dependency already exists for this artifact
existing_dep = db.query(ArtifactDependency).filter(
ArtifactDependency.artifact_id == sha256,
ArtifactDependency.dependency_project == "_pypi",
ArtifactDependency.dependency_package == dep_name,
).first()
# Convert to list of tuples for batch insert
deps_to_store = [
("_pypi", dep_name, dep_version)
for dep_name, dep_version in seen_deps.items()
]
if not existing_dep:
dep = ArtifactDependency(
artifact_id=sha256,
dependency_project="_pypi",
dependency_package=dep_name,
version_constraint=dep_version,
)
db.add(dep)
# Batch upsert - handles duplicates with ON CONFLICT DO NOTHING
repo = ArtifactRepository(db)
inserted = repo.batch_upsert_dependencies(sha256, deps_to_store)
if inserted > 0:
logger.debug(f"Stored {inserted} dependencies for {sha256[:12]}...")
db.commit()