Add robust PyPI dependency caching with task queue
Replace unbounded thread spawning with managed worker pool:
- New pypi_cache_tasks table tracks caching jobs
- Thread pool with 5 workers (configurable via ORCHARD_PYPI_CACHE_WORKERS)
- Automatic retries with exponential backoff (30s, 60s, then fail)
- Deduplication to prevent duplicate caching attempts
New API endpoints for visibility and control:
- GET /pypi/cache/status - queue health summary
- GET /pypi/cache/failed - list failed tasks with errors
- POST /pypi/cache/retry/{package} - retry single package
- POST /pypi/cache/retry-all - retry all failed packages
This fixes silent failures in background dependency caching where
packages would fail to cache without any tracking or retry mechanism.
This commit is contained in:
55
migrations/011_pypi_cache_tasks.sql
Normal file
55
migrations/011_pypi_cache_tasks.sql
Normal file
@@ -0,0 +1,55 @@
|
||||
-- Migration: 011_pypi_cache_tasks
|
||||
-- Description: Add table for tracking PyPI dependency caching tasks
|
||||
-- Date: 2026-02-02
|
||||
|
||||
-- Table for tracking PyPI cache tasks with retry support
|
||||
CREATE TABLE pypi_cache_tasks (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
|
||||
-- What to cache
|
||||
package_name VARCHAR(255) NOT NULL,
|
||||
version_constraint VARCHAR(255),
|
||||
|
||||
-- Origin tracking
|
||||
parent_task_id UUID REFERENCES pypi_cache_tasks(id) ON DELETE SET NULL,
|
||||
depth INTEGER NOT NULL DEFAULT 0,
|
||||
triggered_by_artifact VARCHAR(64) REFERENCES artifacts(id) ON DELETE SET NULL,
|
||||
|
||||
-- Status
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'pending',
|
||||
attempts INTEGER NOT NULL DEFAULT 0,
|
||||
max_attempts INTEGER NOT NULL DEFAULT 3,
|
||||
|
||||
-- Results
|
||||
cached_artifact_id VARCHAR(64) REFERENCES artifacts(id) ON DELETE SET NULL,
|
||||
error_message TEXT,
|
||||
|
||||
-- Timing
|
||||
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
|
||||
started_at TIMESTAMP WITH TIME ZONE,
|
||||
completed_at TIMESTAMP WITH TIME ZONE,
|
||||
next_retry_at TIMESTAMP WITH TIME ZONE,
|
||||
|
||||
-- Constraints
|
||||
CONSTRAINT check_task_status CHECK (status IN ('pending', 'in_progress', 'completed', 'failed')),
|
||||
CONSTRAINT check_depth_non_negative CHECK (depth >= 0),
|
||||
CONSTRAINT check_attempts_non_negative CHECK (attempts >= 0)
|
||||
);
|
||||
|
||||
-- Index for finding tasks ready to process (pending with retry time passed)
|
||||
CREATE INDEX idx_pypi_cache_tasks_status_retry ON pypi_cache_tasks(status, next_retry_at);
|
||||
|
||||
-- Index for deduplication check (is this package already queued?)
|
||||
CREATE INDEX idx_pypi_cache_tasks_package_status ON pypi_cache_tasks(package_name, status);
|
||||
|
||||
-- Index for tracing dependency chains
|
||||
CREATE INDEX idx_pypi_cache_tasks_parent ON pypi_cache_tasks(parent_task_id);
|
||||
|
||||
-- Index for finding tasks by artifact that triggered them
|
||||
CREATE INDEX idx_pypi_cache_tasks_triggered_by ON pypi_cache_tasks(triggered_by_artifact);
|
||||
|
||||
-- Index for finding tasks by cached artifact
|
||||
CREATE INDEX idx_pypi_cache_tasks_cached_artifact ON pypi_cache_tasks(cached_artifact_id);
|
||||
|
||||
-- Index for sorting by depth and creation time (processing order)
|
||||
CREATE INDEX idx_pypi_cache_tasks_depth_created ON pypi_cache_tasks(depth, created_at);
|
||||
Reference in New Issue
Block a user