From 19e034ef569c9e9eca444fb3a364897125db69b5 Mon Sep 17 00:00:00 2001 From: Mondo Diaz Date: Tue, 3 Feb 2026 17:43:38 -0600 Subject: [PATCH] Fix duplicate dependency extraction from PyPI wheel METADATA Wheel METADATA files can list the same dependency multiple times under different extras (e.g., bokeh appears under [docs] and [bokeh-tests]). This caused unique constraint violations when storing dependencies. Fix by deduplicating extracted deps before DB insertion. --- backend/app/pypi_proxy.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/backend/app/pypi_proxy.py b/backend/app/pypi_proxy.py index e5c37e1..1de988b 100644 --- a/backend/app/pypi_proxy.py +++ b/backend/app/pypi_proxy.py @@ -821,9 +821,15 @@ async def pypi_download_file( ) db.add(cached_url_record) - # Store extracted dependencies + # Store extracted dependencies (deduplicate first - METADATA can list same dep under multiple extras) if extracted_deps: + # Deduplicate: keep first version constraint seen for each package name + seen_deps: dict[str, str] = {} for dep_name, dep_version in extracted_deps: + if dep_name not in seen_deps: + seen_deps[dep_name] = dep_version if dep_version else "*" + + for dep_name, dep_version in seen_deps.items(): # Check if this dependency already exists for this artifact existing_dep = db.query(ArtifactDependency).filter( ArtifactDependency.artifact_id == sha256, @@ -836,7 +842,7 @@ async def pypi_download_file( artifact_id=sha256, dependency_project="_pypi", dependency_package=dep_name, - version_constraint=dep_version if dep_version else "*", + version_constraint=dep_version, ) db.add(dep)