Remove proactive PyPI dependency caching feature

The background task queue for proactively caching package dependencies was
causing server instability and unnecessary growth. The PyPI proxy now only
caches packages on-demand when users request them.

Removed:
- PyPI cache worker (background task queue and worker pool)
- PyPICacheTask model and related database schema
- Cache management API endpoints (/pypi/cache/*)
- Background Jobs admin dashboard
- Dependency extraction and queueing logic

Kept:
- On-demand package caching (still works when users request packages)
- Async httpx for non-blocking downloads (prevents health check failures)
- URL-based cache lookups for deduplication
This commit is contained in:
Mondo Diaz
2026-02-02 16:17:33 -06:00
parent cf7bdccb3a
commit 081cc6df83
11 changed files with 4 additions and 2392 deletions

View File

@@ -803,70 +803,3 @@ class CachedUrl(Base):
return hashlib.sha256(url.encode("utf-8")).hexdigest()
class PyPICacheTask(Base):
"""Task for caching a PyPI package and its dependencies.
Tracks the status of background caching operations with retry support.
Used by the PyPI proxy to ensure reliable dependency caching.
"""
__tablename__ = "pypi_cache_tasks"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
# What to cache
package_name = Column(String(255), nullable=False)
version_constraint = Column(String(255))
# Origin tracking
parent_task_id = Column(
UUID(as_uuid=True),
ForeignKey("pypi_cache_tasks.id", ondelete="SET NULL"),
)
depth = Column(Integer, nullable=False, default=0)
triggered_by_artifact = Column(
String(64),
ForeignKey("artifacts.id", ondelete="SET NULL"),
)
# Status
status = Column(String(20), nullable=False, default="pending")
attempts = Column(Integer, nullable=False, default=0)
max_attempts = Column(Integer, nullable=False, default=3)
# Results
cached_artifact_id = Column(
String(64),
ForeignKey("artifacts.id", ondelete="SET NULL"),
)
error_message = Column(Text)
# Timing
created_at = Column(DateTime(timezone=True), nullable=False, default=datetime.utcnow)
started_at = Column(DateTime(timezone=True))
completed_at = Column(DateTime(timezone=True))
next_retry_at = Column(DateTime(timezone=True))
# Relationships
parent_task = relationship(
"PyPICacheTask",
remote_side=[id],
backref="child_tasks",
)
__table_args__ = (
Index("idx_pypi_cache_tasks_status_retry", "status", "next_retry_at"),
Index("idx_pypi_cache_tasks_package_status", "package_name", "status"),
Index("idx_pypi_cache_tasks_parent", "parent_task_id"),
Index("idx_pypi_cache_tasks_triggered_by", "triggered_by_artifact"),
Index("idx_pypi_cache_tasks_cached_artifact", "cached_artifact_id"),
Index("idx_pypi_cache_tasks_depth_created", "depth", "created_at"),
CheckConstraint(
"status IN ('pending', 'in_progress', 'completed', 'failed')",
name="check_task_status",
),
CheckConstraint("depth >= 0", name="check_depth_non_negative"),
CheckConstraint("attempts >= 0", name="check_attempts_non_negative"),
)