perf: use batch dependency storage in pypi_proxy
This commit is contained in:
@@ -23,7 +23,7 @@ from fastapi.responses import StreamingResponse, HTMLResponse, RedirectResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from .database import get_db
|
||||
from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, PackageVersion, ArtifactDependency
|
||||
from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, PackageVersion
|
||||
from .storage import S3Storage, get_storage
|
||||
from .config import get_env_upstream_sources, get_settings
|
||||
from .http_client import HttpClientManager
|
||||
@@ -895,7 +895,7 @@ async def pypi_download_file(
|
||||
)
|
||||
db.add(cached_url_record)
|
||||
|
||||
# Store extracted dependencies (deduplicate first - METADATA can list same dep under multiple extras)
|
||||
# Store extracted dependencies using batch operation
|
||||
if extracted_deps:
|
||||
# Deduplicate: keep first version constraint seen for each package name
|
||||
seen_deps: dict[str, str] = {}
|
||||
@@ -903,22 +903,17 @@ async def pypi_download_file(
|
||||
if dep_name not in seen_deps:
|
||||
seen_deps[dep_name] = dep_version if dep_version else "*"
|
||||
|
||||
for dep_name, dep_version in seen_deps.items():
|
||||
# Check if this dependency already exists for this artifact
|
||||
existing_dep = db.query(ArtifactDependency).filter(
|
||||
ArtifactDependency.artifact_id == sha256,
|
||||
ArtifactDependency.dependency_project == "_pypi",
|
||||
ArtifactDependency.dependency_package == dep_name,
|
||||
).first()
|
||||
# Convert to list of tuples for batch insert
|
||||
deps_to_store = [
|
||||
("_pypi", dep_name, dep_version)
|
||||
for dep_name, dep_version in seen_deps.items()
|
||||
]
|
||||
|
||||
if not existing_dep:
|
||||
dep = ArtifactDependency(
|
||||
artifact_id=sha256,
|
||||
dependency_project="_pypi",
|
||||
dependency_package=dep_name,
|
||||
version_constraint=dep_version,
|
||||
)
|
||||
db.add(dep)
|
||||
# Batch upsert - handles duplicates with ON CONFLICT DO NOTHING
|
||||
repo = ArtifactRepository(db)
|
||||
inserted = repo.batch_upsert_dependencies(sha256, deps_to_store)
|
||||
if inserted > 0:
|
||||
logger.debug(f"Stored {inserted} dependencies for {sha256[:12]}...")
|
||||
|
||||
db.commit()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user