Fix duplicate dependency extraction from PyPI wheel METADATA
Wheel METADATA files can list the same dependency multiple times under different extras (e.g., bokeh appears under [docs] and [bokeh-tests]). This caused unique constraint violations when storing dependencies. Fix by deduplicating extracted deps before DB insertion.
This commit is contained in:
@@ -821,9 +821,15 @@ async def pypi_download_file(
|
||||
)
|
||||
db.add(cached_url_record)
|
||||
|
||||
# Store extracted dependencies
|
||||
# Store extracted dependencies (deduplicate first - METADATA can list same dep under multiple extras)
|
||||
if extracted_deps:
|
||||
# Deduplicate: keep first version constraint seen for each package name
|
||||
seen_deps: dict[str, str] = {}
|
||||
for dep_name, dep_version in extracted_deps:
|
||||
if dep_name not in seen_deps:
|
||||
seen_deps[dep_name] = dep_version if dep_version else "*"
|
||||
|
||||
for dep_name, dep_version in seen_deps.items():
|
||||
# Check if this dependency already exists for this artifact
|
||||
existing_dep = db.query(ArtifactDependency).filter(
|
||||
ArtifactDependency.artifact_id == sha256,
|
||||
@@ -836,7 +842,7 @@ async def pypi_download_file(
|
||||
artifact_id=sha256,
|
||||
dependency_project="_pypi",
|
||||
dependency_package=dep_name,
|
||||
version_constraint=dep_version if dep_version else "*",
|
||||
version_constraint=dep_version,
|
||||
)
|
||||
db.add(dep)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user