perf: use batch dependency storage in pypi_proxy
This commit is contained in:
@@ -23,7 +23,7 @@ from fastapi.responses import StreamingResponse, HTMLResponse, RedirectResponse
|
|||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from .database import get_db
|
from .database import get_db
|
||||||
from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, PackageVersion, ArtifactDependency
|
from .models import UpstreamSource, CachedUrl, Artifact, Project, Package, PackageVersion
|
||||||
from .storage import S3Storage, get_storage
|
from .storage import S3Storage, get_storage
|
||||||
from .config import get_env_upstream_sources, get_settings
|
from .config import get_env_upstream_sources, get_settings
|
||||||
from .http_client import HttpClientManager
|
from .http_client import HttpClientManager
|
||||||
@@ -895,7 +895,7 @@ async def pypi_download_file(
|
|||||||
)
|
)
|
||||||
db.add(cached_url_record)
|
db.add(cached_url_record)
|
||||||
|
|
||||||
# Store extracted dependencies (deduplicate first - METADATA can list same dep under multiple extras)
|
# Store extracted dependencies using batch operation
|
||||||
if extracted_deps:
|
if extracted_deps:
|
||||||
# Deduplicate: keep first version constraint seen for each package name
|
# Deduplicate: keep first version constraint seen for each package name
|
||||||
seen_deps: dict[str, str] = {}
|
seen_deps: dict[str, str] = {}
|
||||||
@@ -903,22 +903,17 @@ async def pypi_download_file(
|
|||||||
if dep_name not in seen_deps:
|
if dep_name not in seen_deps:
|
||||||
seen_deps[dep_name] = dep_version if dep_version else "*"
|
seen_deps[dep_name] = dep_version if dep_version else "*"
|
||||||
|
|
||||||
for dep_name, dep_version in seen_deps.items():
|
# Convert to list of tuples for batch insert
|
||||||
# Check if this dependency already exists for this artifact
|
deps_to_store = [
|
||||||
existing_dep = db.query(ArtifactDependency).filter(
|
("_pypi", dep_name, dep_version)
|
||||||
ArtifactDependency.artifact_id == sha256,
|
for dep_name, dep_version in seen_deps.items()
|
||||||
ArtifactDependency.dependency_project == "_pypi",
|
]
|
||||||
ArtifactDependency.dependency_package == dep_name,
|
|
||||||
).first()
|
|
||||||
|
|
||||||
if not existing_dep:
|
# Batch upsert - handles duplicates with ON CONFLICT DO NOTHING
|
||||||
dep = ArtifactDependency(
|
repo = ArtifactRepository(db)
|
||||||
artifact_id=sha256,
|
inserted = repo.batch_upsert_dependencies(sha256, deps_to_store)
|
||||||
dependency_project="_pypi",
|
if inserted > 0:
|
||||||
dependency_package=dep_name,
|
logger.debug(f"Stored {inserted} dependencies for {sha256[:12]}...")
|
||||||
version_constraint=dep_version,
|
|
||||||
)
|
|
||||||
db.add(dep)
|
|
||||||
|
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user