From 7ad5a15ef4a786d814e8d960fbf814ff396f1061 Mon Sep 17 00:00:00 2001 From: Mondo Diaz Date: Wed, 4 Feb 2026 09:52:16 -0600 Subject: [PATCH] perf: use batch dependency storage in pypi_proxy --- backend/app/pypi_proxy.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/backend/app/pypi_proxy.py b/backend/app/pypi_proxy.py index 4d263f8..f84f124 100644 --- a/backend/app/pypi_proxy.py +++ b/backend/app/pypi_proxy.py @@ -23,7 +23,7 @@ from fastapi.responses import StreamingResponse, HTMLResponse, RedirectResponse from sqlalchemy.orm import Session from .database import get_db -from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, PackageVersion, ArtifactDependency +from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, PackageVersion from .storage import S3Storage, get_storage from .config import get_env_upstream_sources, get_settings from .http_client import HttpClientManager @@ -895,7 +895,7 @@ async def pypi_download_file( ) db.add(cached_url_record) - # Store extracted dependencies (deduplicate first - METADATA can list same dep under multiple extras) + # Store extracted dependencies using batch operation if extracted_deps: # Deduplicate: keep first version constraint seen for each package name seen_deps: dict[str, str] = {} @@ -903,22 +903,17 @@ async def pypi_download_file( if dep_name not in seen_deps: seen_deps[dep_name] = dep_version if dep_version else "*" - for dep_name, dep_version in seen_deps.items(): - # Check if this dependency already exists for this artifact - existing_dep = db.query(ArtifactDependency).filter( - ArtifactDependency.artifact_id == sha256, - ArtifactDependency.dependency_project == "_pypi", - ArtifactDependency.dependency_package == dep_name, - ).first() + # Convert to list of tuples for batch insert + deps_to_store = [ + ("_pypi", dep_name, dep_version) + for dep_name, dep_version in seen_deps.items() + ] - if not existing_dep: - dep = ArtifactDependency( - artifact_id=sha256, - dependency_project="_pypi", - dependency_package=dep_name, - version_constraint=dep_version, - ) - db.add(dep) + # Batch upsert - handles duplicates with ON CONFLICT DO NOTHING + repo = ArtifactRepository(db) + inserted = repo.batch_upsert_dependencies(sha256, deps_to_store) + if inserted > 0: + logger.debug(f"Stored {inserted} dependencies for {sha256[:12]}...") db.commit()