Fix httpx.Timeout configuration in PyPI proxy
This commit is contained in:
262
backend/app/cache_service.py
Normal file
262
backend/app/cache_service.py
Normal file
@@ -0,0 +1,262 @@
|
||||
"""
|
||||
Redis-backed caching service with category-aware TTL and invalidation.
|
||||
|
||||
Provides:
|
||||
- Immutable caching for artifact data (hermetic builds)
|
||||
- TTL-based caching for discovery data
|
||||
- Event-driven invalidation for config changes
|
||||
- Graceful fallback when Redis unavailable
|
||||
"""
|
||||
|
||||
import logging
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
from .config import Settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CacheCategory(Enum):
|
||||
"""
|
||||
Cache categories with different TTL and invalidation rules.
|
||||
|
||||
Immutable (cache forever):
|
||||
- ARTIFACT_METADATA: Artifact info by SHA256
|
||||
- ARTIFACT_DEPENDENCIES: Extracted deps by SHA256
|
||||
- DEPENDENCY_RESOLUTION: Resolution results by input hash
|
||||
|
||||
Mutable (TTL + event invalidation):
|
||||
- UPSTREAM_SOURCES: Upstream config, invalidate on DB change
|
||||
- PACKAGE_INDEX: PyPI/npm index pages, TTL only
|
||||
- PACKAGE_VERSIONS: Version listings, TTL only
|
||||
"""
|
||||
|
||||
# Immutable - cache forever (hermetic builds)
|
||||
ARTIFACT_METADATA = "artifact"
|
||||
ARTIFACT_DEPENDENCIES = "deps"
|
||||
DEPENDENCY_RESOLUTION = "resolve"
|
||||
|
||||
# Mutable - TTL + event invalidation
|
||||
UPSTREAM_SOURCES = "upstream"
|
||||
PACKAGE_INDEX = "index"
|
||||
PACKAGE_VERSIONS = "versions"
|
||||
|
||||
|
||||
def get_category_ttl(category: CacheCategory, settings: Settings) -> Optional[int]:
|
||||
"""
|
||||
Get TTL for a cache category.
|
||||
|
||||
Returns:
|
||||
TTL in seconds, or None for no expiry (immutable).
|
||||
"""
|
||||
ttl_map = {
|
||||
# Immutable - no TTL
|
||||
CacheCategory.ARTIFACT_METADATA: None,
|
||||
CacheCategory.ARTIFACT_DEPENDENCIES: None,
|
||||
CacheCategory.DEPENDENCY_RESOLUTION: None,
|
||||
# Mutable - configurable TTL
|
||||
CacheCategory.UPSTREAM_SOURCES: settings.cache_ttl_upstream,
|
||||
CacheCategory.PACKAGE_INDEX: settings.cache_ttl_index,
|
||||
CacheCategory.PACKAGE_VERSIONS: settings.cache_ttl_versions,
|
||||
}
|
||||
return ttl_map.get(category)
|
||||
|
||||
|
||||
class CacheService:
|
||||
"""
|
||||
Redis-backed caching with category-aware TTL.
|
||||
|
||||
Key format: orchard:{category}:{protocol}:{identifier}
|
||||
Example: orchard:deps:pypi:abc123def456
|
||||
|
||||
When Redis is disabled or unavailable, operations gracefully
|
||||
return None/no-op to allow the application to function without caching.
|
||||
"""
|
||||
|
||||
def __init__(self, settings: Settings):
|
||||
self._settings = settings
|
||||
self._enabled = settings.redis_enabled
|
||||
self._redis: Optional["redis.asyncio.Redis"] = None
|
||||
self._started = False
|
||||
|
||||
async def startup(self) -> None:
|
||||
"""Initialize Redis connection. Called by FastAPI lifespan."""
|
||||
if self._started:
|
||||
return
|
||||
|
||||
if not self._enabled:
|
||||
logger.info("CacheService disabled (redis_enabled=False)")
|
||||
self._started = True
|
||||
return
|
||||
|
||||
try:
|
||||
import redis.asyncio as redis
|
||||
|
||||
logger.info(
|
||||
f"Connecting to Redis at {self._settings.redis_host}:"
|
||||
f"{self._settings.redis_port}/{self._settings.redis_db}"
|
||||
)
|
||||
|
||||
self._redis = redis.Redis(
|
||||
host=self._settings.redis_host,
|
||||
port=self._settings.redis_port,
|
||||
db=self._settings.redis_db,
|
||||
password=self._settings.redis_password,
|
||||
decode_responses=False, # We handle bytes
|
||||
)
|
||||
|
||||
# Test connection
|
||||
await self._redis.ping()
|
||||
logger.info("CacheService connected to Redis")
|
||||
|
||||
except ImportError:
|
||||
logger.warning("redis package not installed, caching disabled")
|
||||
self._enabled = False
|
||||
except Exception as e:
|
||||
logger.warning(f"Redis connection failed, caching disabled: {e}")
|
||||
self._enabled = False
|
||||
self._redis = None
|
||||
|
||||
self._started = True
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
"""Close Redis connection. Called by FastAPI lifespan."""
|
||||
if not self._started:
|
||||
return
|
||||
|
||||
if self._redis:
|
||||
await self._redis.aclose()
|
||||
self._redis = None
|
||||
|
||||
self._started = False
|
||||
logger.info("CacheService shutdown complete")
|
||||
|
||||
@staticmethod
|
||||
def _make_key(category: CacheCategory, protocol: str, identifier: str) -> str:
|
||||
"""Build namespaced cache key."""
|
||||
return f"orchard:{category.value}:{protocol}:{identifier}"
|
||||
|
||||
async def get(
|
||||
self,
|
||||
category: CacheCategory,
|
||||
key: str,
|
||||
protocol: str = "default",
|
||||
) -> Optional[bytes]:
|
||||
"""
|
||||
Get cached value.
|
||||
|
||||
Args:
|
||||
category: Cache category for TTL rules
|
||||
key: Unique identifier within category
|
||||
protocol: Protocol namespace (pypi, npm, etc.)
|
||||
|
||||
Returns:
|
||||
Cached bytes or None if not found/disabled.
|
||||
"""
|
||||
if not self._enabled or not self._redis:
|
||||
return None
|
||||
|
||||
try:
|
||||
full_key = self._make_key(category, protocol, key)
|
||||
return await self._redis.get(full_key)
|
||||
except Exception as e:
|
||||
logger.warning(f"Cache get failed for {key}: {e}")
|
||||
return None
|
||||
|
||||
async def set(
|
||||
self,
|
||||
category: CacheCategory,
|
||||
key: str,
|
||||
value: bytes,
|
||||
protocol: str = "default",
|
||||
) -> None:
|
||||
"""
|
||||
Set cached value with category-appropriate TTL.
|
||||
|
||||
Args:
|
||||
category: Cache category for TTL rules
|
||||
key: Unique identifier within category
|
||||
value: Bytes to cache
|
||||
protocol: Protocol namespace (pypi, npm, etc.)
|
||||
"""
|
||||
if not self._enabled or not self._redis:
|
||||
return
|
||||
|
||||
try:
|
||||
full_key = self._make_key(category, protocol, key)
|
||||
ttl = get_category_ttl(category, self._settings)
|
||||
|
||||
if ttl is None:
|
||||
await self._redis.set(full_key, value)
|
||||
else:
|
||||
await self._redis.setex(full_key, ttl, value)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Cache set failed for {key}: {e}")
|
||||
|
||||
async def delete(
|
||||
self,
|
||||
category: CacheCategory,
|
||||
key: str,
|
||||
protocol: str = "default",
|
||||
) -> None:
|
||||
"""Delete a specific cache entry."""
|
||||
if not self._enabled or not self._redis:
|
||||
return
|
||||
|
||||
try:
|
||||
full_key = self._make_key(category, protocol, key)
|
||||
await self._redis.delete(full_key)
|
||||
except Exception as e:
|
||||
logger.warning(f"Cache delete failed for {key}: {e}")
|
||||
|
||||
async def invalidate_pattern(
|
||||
self,
|
||||
category: CacheCategory,
|
||||
pattern: str = "*",
|
||||
protocol: str = "default",
|
||||
) -> int:
|
||||
"""
|
||||
Invalidate all entries matching pattern.
|
||||
|
||||
Args:
|
||||
category: Cache category
|
||||
pattern: Glob pattern for keys (default "*" = all in category)
|
||||
protocol: Protocol namespace
|
||||
|
||||
Returns:
|
||||
Number of keys deleted.
|
||||
"""
|
||||
if not self._enabled or not self._redis:
|
||||
return 0
|
||||
|
||||
try:
|
||||
full_pattern = self._make_key(category, protocol, pattern)
|
||||
keys = []
|
||||
async for key in self._redis.scan_iter(match=full_pattern):
|
||||
keys.append(key)
|
||||
|
||||
if keys:
|
||||
return await self._redis.delete(*keys)
|
||||
return 0
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Cache invalidate failed for pattern {pattern}: {e}")
|
||||
return 0
|
||||
|
||||
async def ping(self) -> bool:
|
||||
"""Check if Redis is connected and responding."""
|
||||
if not self._enabled or not self._redis:
|
||||
return False
|
||||
|
||||
try:
|
||||
await self._redis.ping()
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@property
|
||||
def enabled(self) -> bool:
|
||||
"""Check if caching is enabled."""
|
||||
return self._enabled
|
||||
@@ -22,8 +22,8 @@ class Settings(BaseSettings):
|
||||
database_sslmode: str = "disable"
|
||||
|
||||
# Database connection pool settings
|
||||
database_pool_size: int = 5 # Number of connections to keep open
|
||||
database_max_overflow: int = 10 # Max additional connections beyond pool_size
|
||||
database_pool_size: int = 20 # Number of connections to keep open
|
||||
database_max_overflow: int = 30 # Max additional connections beyond pool_size
|
||||
database_pool_timeout: int = 30 # Seconds to wait for a connection from pool
|
||||
database_pool_recycle: int = (
|
||||
1800 # Recycle connections after this many seconds (30 min)
|
||||
@@ -51,6 +51,26 @@ class Settings(BaseSettings):
|
||||
presigned_url_expiry: int = (
|
||||
3600 # Presigned URL expiry in seconds (default: 1 hour)
|
||||
)
|
||||
pypi_download_mode: str = "redirect" # "redirect" (to S3) or "proxy" (stream through Orchard)
|
||||
|
||||
# HTTP Client pool settings
|
||||
http_max_connections: int = 100 # Max connections per pool
|
||||
http_max_keepalive: int = 20 # Keep-alive connections
|
||||
http_connect_timeout: float = 30.0 # Connection timeout seconds
|
||||
http_read_timeout: float = 60.0 # Read timeout seconds
|
||||
http_worker_threads: int = 32 # Thread pool for blocking ops
|
||||
|
||||
# Redis cache settings
|
||||
redis_host: str = "localhost"
|
||||
redis_port: int = 6379
|
||||
redis_db: int = 0
|
||||
redis_password: Optional[str] = None
|
||||
redis_enabled: bool = True # Set False to disable caching
|
||||
|
||||
# Cache TTL settings (seconds, 0 = no expiry)
|
||||
cache_ttl_index: int = 300 # Package index pages: 5 min
|
||||
cache_ttl_versions: int = 300 # Version listings: 5 min
|
||||
cache_ttl_upstream: int = 3600 # Upstream source config: 1 hour
|
||||
|
||||
# Logging settings
|
||||
log_level: str = "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL
|
||||
@@ -64,6 +84,15 @@ class Settings(BaseSettings):
|
||||
# Global cache settings override (None = use DB value, True/False = override DB)
|
||||
cache_auto_create_system_projects: Optional[bool] = None # Override auto_create_system_projects
|
||||
|
||||
# PyPI Cache Worker settings
|
||||
pypi_cache_workers: int = 5 # Number of concurrent cache workers
|
||||
pypi_cache_max_depth: int = 10 # Maximum recursion depth for dependency caching
|
||||
pypi_cache_max_attempts: int = 3 # Maximum retry attempts for failed cache tasks
|
||||
|
||||
# Auto-fetch configuration for dependency resolution
|
||||
auto_fetch_dependencies: bool = False # Server default for auto_fetch parameter
|
||||
auto_fetch_timeout: int = 300 # Total timeout for auto-fetch resolution in seconds
|
||||
|
||||
# JWT Authentication settings (optional, for external identity providers)
|
||||
jwt_enabled: bool = False # Enable JWT token validation
|
||||
jwt_secret: str = "" # Secret key for HS256, or leave empty for RS256 with JWKS
|
||||
@@ -88,6 +117,24 @@ class Settings(BaseSettings):
|
||||
def is_production(self) -> bool:
|
||||
return self.env.lower() == "production"
|
||||
|
||||
@property
|
||||
def PORT(self) -> int:
|
||||
"""Alias for server_port for compatibility."""
|
||||
return self.server_port
|
||||
|
||||
# Uppercase aliases for PyPI cache settings (for backward compatibility)
|
||||
@property
|
||||
def PYPI_CACHE_WORKERS(self) -> int:
|
||||
return self.pypi_cache_workers
|
||||
|
||||
@property
|
||||
def PYPI_CACHE_MAX_DEPTH(self) -> int:
|
||||
return self.pypi_cache_max_depth
|
||||
|
||||
@property
|
||||
def PYPI_CACHE_MAX_ATTEMPTS(self) -> int:
|
||||
return self.pypi_cache_max_attempts
|
||||
|
||||
class Config:
|
||||
env_prefix = "ORCHARD_"
|
||||
case_sensitive = False
|
||||
|
||||
@@ -220,17 +220,7 @@ def _run_migrations():
|
||||
CREATE UNIQUE INDEX idx_packages_project_name ON packages(project_id, name);
|
||||
END IF;
|
||||
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_indexes WHERE indexname = 'idx_tags_package_name'
|
||||
) THEN
|
||||
CREATE UNIQUE INDEX idx_tags_package_name ON tags(package_id, name);
|
||||
END IF;
|
||||
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_indexes WHERE indexname = 'idx_tags_package_created_at'
|
||||
) THEN
|
||||
CREATE INDEX idx_tags_package_created_at ON tags(package_id, created_at);
|
||||
END IF;
|
||||
-- Tag indexes removed: tags table no longer exists (removed in tag system removal)
|
||||
END $$;
|
||||
""",
|
||||
),
|
||||
@@ -287,27 +277,8 @@ def _run_migrations():
|
||||
Migration(
|
||||
name="008_create_tags_ref_count_triggers",
|
||||
sql="""
|
||||
DO $$
|
||||
BEGIN
|
||||
DROP TRIGGER IF EXISTS tags_ref_count_insert_trigger ON tags;
|
||||
CREATE TRIGGER tags_ref_count_insert_trigger
|
||||
AFTER INSERT ON tags
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION increment_artifact_ref_count();
|
||||
|
||||
DROP TRIGGER IF EXISTS tags_ref_count_delete_trigger ON tags;
|
||||
CREATE TRIGGER tags_ref_count_delete_trigger
|
||||
AFTER DELETE ON tags
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION decrement_artifact_ref_count();
|
||||
|
||||
DROP TRIGGER IF EXISTS tags_ref_count_update_trigger ON tags;
|
||||
CREATE TRIGGER tags_ref_count_update_trigger
|
||||
AFTER UPDATE ON tags
|
||||
FOR EACH ROW
|
||||
WHEN (OLD.artifact_id IS DISTINCT FROM NEW.artifact_id)
|
||||
EXECUTE FUNCTION update_artifact_ref_count();
|
||||
END $$;
|
||||
-- Tags table removed: triggers no longer needed (tag system removed)
|
||||
DO $$ BEGIN NULL; END $$;
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
@@ -354,9 +325,11 @@ def _run_migrations():
|
||||
Migration(
|
||||
name="011_migrate_semver_tags_to_versions",
|
||||
sql=r"""
|
||||
-- Migrate semver tags to versions (only if both tables exist - for existing databases)
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'package_versions') THEN
|
||||
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'package_versions')
|
||||
AND EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'tags') THEN
|
||||
INSERT INTO package_versions (id, package_id, artifact_id, version, version_source, created_by, created_at)
|
||||
SELECT
|
||||
gen_random_uuid(),
|
||||
@@ -565,6 +538,62 @@ def _run_migrations():
|
||||
WHERE name IN ('npm-public', 'pypi-public', 'maven-central', 'docker-hub');
|
||||
""",
|
||||
),
|
||||
Migration(
|
||||
name="024_remove_tags",
|
||||
sql="""
|
||||
-- Remove tag system, keeping only versions for artifact references
|
||||
DO $$
|
||||
BEGIN
|
||||
-- Drop triggers on tags table (if they exist)
|
||||
DROP TRIGGER IF EXISTS tags_ref_count_insert_trigger ON tags;
|
||||
DROP TRIGGER IF EXISTS tags_ref_count_delete_trigger ON tags;
|
||||
DROP TRIGGER IF EXISTS tags_ref_count_update_trigger ON tags;
|
||||
DROP TRIGGER IF EXISTS tags_updated_at_trigger ON tags;
|
||||
DROP TRIGGER IF EXISTS tag_changes_trigger ON tags;
|
||||
|
||||
-- Drop the tag change tracking function
|
||||
DROP FUNCTION IF EXISTS track_tag_changes();
|
||||
|
||||
-- Remove tag_constraint from artifact_dependencies
|
||||
IF EXISTS (
|
||||
SELECT 1 FROM information_schema.table_constraints
|
||||
WHERE constraint_name = 'check_constraint_type'
|
||||
AND table_name = 'artifact_dependencies'
|
||||
) THEN
|
||||
ALTER TABLE artifact_dependencies DROP CONSTRAINT check_constraint_type;
|
||||
END IF;
|
||||
|
||||
-- Remove the tag_constraint column if it exists
|
||||
IF EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'artifact_dependencies' AND column_name = 'tag_constraint'
|
||||
) THEN
|
||||
ALTER TABLE artifact_dependencies DROP COLUMN tag_constraint;
|
||||
END IF;
|
||||
|
||||
-- Make version_constraint NOT NULL
|
||||
UPDATE artifact_dependencies SET version_constraint = '*' WHERE version_constraint IS NULL;
|
||||
ALTER TABLE artifact_dependencies ALTER COLUMN version_constraint SET NOT NULL;
|
||||
|
||||
-- Drop tag_history table first (depends on tags)
|
||||
DROP TABLE IF EXISTS tag_history;
|
||||
|
||||
-- Drop tags table
|
||||
DROP TABLE IF EXISTS tags;
|
||||
|
||||
-- Rename uploads.tag_name to version if it exists and version doesn't
|
||||
IF EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'uploads' AND column_name = 'tag_name'
|
||||
) AND NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'uploads' AND column_name = 'version'
|
||||
) THEN
|
||||
ALTER TABLE uploads RENAME COLUMN tag_name TO version;
|
||||
END IF;
|
||||
END $$;
|
||||
""",
|
||||
),
|
||||
]
|
||||
|
||||
with engine.connect() as conn:
|
||||
|
||||
175
backend/app/db_utils.py
Normal file
175
backend/app/db_utils.py
Normal file
@@ -0,0 +1,175 @@
|
||||
"""
|
||||
Database utilities for optimized artifact operations.
|
||||
|
||||
Provides batch operations to eliminate N+1 queries.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from .models import Artifact, ArtifactDependency, CachedUrl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ArtifactRepository:
|
||||
"""
|
||||
Optimized database operations for artifact storage.
|
||||
|
||||
Key optimizations:
|
||||
- Atomic upserts using ON CONFLICT
|
||||
- Batch inserts for dependencies
|
||||
- Joined queries to avoid N+1
|
||||
"""
|
||||
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
|
||||
@staticmethod
|
||||
def _format_dependency_values(
|
||||
artifact_id: str,
|
||||
dependencies: list[tuple[str, str, str]],
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Format dependencies for batch insert.
|
||||
|
||||
Args:
|
||||
artifact_id: SHA256 of the artifact
|
||||
dependencies: List of (project, package, version_constraint)
|
||||
|
||||
Returns:
|
||||
List of dicts ready for bulk insert.
|
||||
"""
|
||||
return [
|
||||
{
|
||||
"artifact_id": artifact_id,
|
||||
"dependency_project": proj,
|
||||
"dependency_package": pkg,
|
||||
"version_constraint": ver,
|
||||
}
|
||||
for proj, pkg, ver in dependencies
|
||||
]
|
||||
|
||||
def get_or_create_artifact(
|
||||
self,
|
||||
sha256: str,
|
||||
size: int,
|
||||
filename: str,
|
||||
content_type: Optional[str] = None,
|
||||
created_by: str = "system",
|
||||
s3_key: Optional[str] = None,
|
||||
) -> tuple[Artifact, bool]:
|
||||
"""
|
||||
Get existing artifact or create new one atomically.
|
||||
|
||||
Uses INSERT ... ON CONFLICT DO UPDATE to handle races.
|
||||
If artifact exists, increments ref_count.
|
||||
|
||||
Args:
|
||||
sha256: Content hash (primary key)
|
||||
size: File size in bytes
|
||||
filename: Original filename
|
||||
content_type: MIME type
|
||||
created_by: User who created the artifact
|
||||
s3_key: S3 storage key (defaults to standard path)
|
||||
|
||||
Returns:
|
||||
(artifact, created) tuple where created is True for new artifacts.
|
||||
"""
|
||||
if s3_key is None:
|
||||
s3_key = f"fruits/{sha256[:2]}/{sha256[2:4]}/{sha256}"
|
||||
|
||||
stmt = pg_insert(Artifact).values(
|
||||
id=sha256,
|
||||
size=size,
|
||||
original_name=filename,
|
||||
content_type=content_type,
|
||||
ref_count=1,
|
||||
created_by=created_by,
|
||||
s3_key=s3_key,
|
||||
).on_conflict_do_update(
|
||||
index_elements=['id'],
|
||||
set_={'ref_count': Artifact.ref_count + 1}
|
||||
).returning(Artifact)
|
||||
|
||||
result = self.db.execute(stmt)
|
||||
artifact = result.scalar_one()
|
||||
|
||||
# Check if this was an insert or update by comparing ref_count
|
||||
# ref_count=1 means new, >1 means existing
|
||||
created = artifact.ref_count == 1
|
||||
|
||||
return artifact, created
|
||||
|
||||
def batch_upsert_dependencies(
|
||||
self,
|
||||
artifact_id: str,
|
||||
dependencies: list[tuple[str, str, str]],
|
||||
) -> int:
|
||||
"""
|
||||
Insert dependencies in a single batch operation.
|
||||
|
||||
Uses ON CONFLICT DO NOTHING to skip duplicates.
|
||||
|
||||
Args:
|
||||
artifact_id: SHA256 of the artifact
|
||||
dependencies: List of (project, package, version_constraint)
|
||||
|
||||
Returns:
|
||||
Number of dependencies inserted.
|
||||
"""
|
||||
if not dependencies:
|
||||
return 0
|
||||
|
||||
values = self._format_dependency_values(artifact_id, dependencies)
|
||||
|
||||
stmt = pg_insert(ArtifactDependency).values(values)
|
||||
stmt = stmt.on_conflict_do_nothing(
|
||||
index_elements=['artifact_id', 'dependency_project', 'dependency_package']
|
||||
)
|
||||
|
||||
result = self.db.execute(stmt)
|
||||
return result.rowcount
|
||||
|
||||
def get_cached_url_with_artifact(
|
||||
self,
|
||||
url_hash: str,
|
||||
) -> Optional[tuple[CachedUrl, Artifact]]:
|
||||
"""
|
||||
Get cached URL and its artifact in a single query.
|
||||
|
||||
Args:
|
||||
url_hash: SHA256 of the URL
|
||||
|
||||
Returns:
|
||||
(CachedUrl, Artifact) tuple or None if not found.
|
||||
"""
|
||||
result = (
|
||||
self.db.query(CachedUrl, Artifact)
|
||||
.join(Artifact, CachedUrl.artifact_id == Artifact.id)
|
||||
.filter(CachedUrl.url_hash == url_hash)
|
||||
.first()
|
||||
)
|
||||
return result
|
||||
|
||||
def get_artifact_dependencies(
|
||||
self,
|
||||
artifact_id: str,
|
||||
) -> list[ArtifactDependency]:
|
||||
"""
|
||||
Get all dependencies for an artifact in a single query.
|
||||
|
||||
Args:
|
||||
artifact_id: SHA256 of the artifact
|
||||
|
||||
Returns:
|
||||
List of ArtifactDependency objects.
|
||||
"""
|
||||
return (
|
||||
self.db.query(ArtifactDependency)
|
||||
.filter(ArtifactDependency.artifact_id == artifact_id)
|
||||
.all()
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
179
backend/app/http_client.py
Normal file
179
backend/app/http_client.py
Normal file
@@ -0,0 +1,179 @@
|
||||
"""
|
||||
HTTP client manager with connection pooling and lifecycle management.
|
||||
|
||||
Provides:
|
||||
- Shared connection pools for upstream requests
|
||||
- Per-upstream client isolation when needed
|
||||
- Thread pool for blocking I/O operations
|
||||
- FastAPI lifespan integration
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from .config import Settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HttpClientManager:
|
||||
"""
|
||||
Manages httpx.AsyncClient pools with FastAPI lifespan integration.
|
||||
|
||||
Features:
|
||||
- Default shared pool for general requests
|
||||
- Per-upstream pools for sources needing specific config/auth
|
||||
- Dedicated thread pool for blocking operations
|
||||
- Graceful shutdown
|
||||
"""
|
||||
|
||||
def __init__(self, settings: Settings):
|
||||
self.max_connections = settings.http_max_connections
|
||||
self.max_keepalive = settings.http_max_keepalive
|
||||
self.connect_timeout = settings.http_connect_timeout
|
||||
self.read_timeout = settings.http_read_timeout
|
||||
self.worker_threads = settings.http_worker_threads
|
||||
|
||||
self._default_client: Optional[httpx.AsyncClient] = None
|
||||
self._upstream_clients: dict[str, httpx.AsyncClient] = {}
|
||||
self._executor: Optional[ThreadPoolExecutor] = None
|
||||
self._started = False
|
||||
|
||||
async def startup(self) -> None:
|
||||
"""Initialize clients and thread pool. Called by FastAPI lifespan."""
|
||||
if self._started:
|
||||
return
|
||||
|
||||
logger.info(
|
||||
f"Starting HttpClientManager: max_connections={self.max_connections}, "
|
||||
f"worker_threads={self.worker_threads}"
|
||||
)
|
||||
|
||||
# Create connection limits
|
||||
limits = httpx.Limits(
|
||||
max_connections=self.max_connections,
|
||||
max_keepalive_connections=self.max_keepalive,
|
||||
)
|
||||
|
||||
# Create timeout config
|
||||
timeout = httpx.Timeout(
|
||||
connect=self.connect_timeout,
|
||||
read=self.read_timeout,
|
||||
write=self.read_timeout,
|
||||
pool=self.connect_timeout,
|
||||
)
|
||||
|
||||
# Create default client
|
||||
self._default_client = httpx.AsyncClient(
|
||||
limits=limits,
|
||||
timeout=timeout,
|
||||
follow_redirects=False, # Handle redirects manually for auth
|
||||
)
|
||||
|
||||
# Create thread pool for blocking operations
|
||||
self._executor = ThreadPoolExecutor(
|
||||
max_workers=self.worker_threads,
|
||||
thread_name_prefix="orchard-blocking-",
|
||||
)
|
||||
|
||||
self._started = True
|
||||
logger.info("HttpClientManager started")
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
"""Close all clients and thread pool. Called by FastAPI lifespan."""
|
||||
if not self._started:
|
||||
return
|
||||
|
||||
logger.info("Shutting down HttpClientManager")
|
||||
|
||||
# Close default client
|
||||
if self._default_client:
|
||||
await self._default_client.aclose()
|
||||
self._default_client = None
|
||||
|
||||
# Close upstream-specific clients
|
||||
for name, client in self._upstream_clients.items():
|
||||
logger.debug(f"Closing upstream client: {name}")
|
||||
await client.aclose()
|
||||
self._upstream_clients.clear()
|
||||
|
||||
# Shutdown thread pool
|
||||
if self._executor:
|
||||
self._executor.shutdown(wait=True)
|
||||
self._executor = None
|
||||
|
||||
self._started = False
|
||||
logger.info("HttpClientManager shutdown complete")
|
||||
|
||||
def get_client(self, upstream_name: Optional[str] = None) -> httpx.AsyncClient:
|
||||
"""
|
||||
Get HTTP client for making requests.
|
||||
|
||||
Args:
|
||||
upstream_name: Optional upstream source name for dedicated pool.
|
||||
If None, returns the default shared client.
|
||||
|
||||
Returns:
|
||||
httpx.AsyncClient configured for the request.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If manager not started.
|
||||
"""
|
||||
if not self._started or not self._default_client:
|
||||
raise RuntimeError("HttpClientManager not started. Call startup() first.")
|
||||
|
||||
if upstream_name and upstream_name in self._upstream_clients:
|
||||
return self._upstream_clients[upstream_name]
|
||||
|
||||
return self._default_client
|
||||
|
||||
async def run_blocking(self, func: Callable[..., Any], *args: Any) -> Any:
|
||||
"""
|
||||
Run a blocking function in the thread pool.
|
||||
|
||||
Use this for:
|
||||
- File I/O operations
|
||||
- Archive extraction (zipfile, tarfile)
|
||||
- Hash computation on large data
|
||||
|
||||
Args:
|
||||
func: Synchronous function to execute
|
||||
*args: Arguments to pass to the function
|
||||
|
||||
Returns:
|
||||
The function's return value.
|
||||
"""
|
||||
if not self._executor:
|
||||
raise RuntimeError("HttpClientManager not started. Call startup() first.")
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
return await loop.run_in_executor(self._executor, func, *args)
|
||||
|
||||
@property
|
||||
def active_connections(self) -> int:
|
||||
"""Get approximate number of active connections (for health checks)."""
|
||||
if not self._default_client:
|
||||
return 0
|
||||
# httpx doesn't expose this directly, return pool size as approximation
|
||||
return self.max_connections
|
||||
|
||||
@property
|
||||
def pool_size(self) -> int:
|
||||
"""Get configured pool size."""
|
||||
return self.max_connections
|
||||
|
||||
@property
|
||||
def executor_active(self) -> int:
|
||||
"""Get number of active thread pool workers."""
|
||||
if not self._executor:
|
||||
return 0
|
||||
return len(self._executor._threads)
|
||||
|
||||
@property
|
||||
def executor_max(self) -> int:
|
||||
"""Get max thread pool workers."""
|
||||
return self.worker_threads
|
||||
@@ -15,6 +15,8 @@ from .pypi_proxy import router as pypi_router
|
||||
from .seed import seed_database
|
||||
from .auth import create_default_admin
|
||||
from .rate_limit import limiter
|
||||
from .http_client import HttpClientManager
|
||||
from .cache_service import CacheService
|
||||
|
||||
settings = get_settings()
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
@@ -38,6 +40,17 @@ async def lifespan(app: FastAPI):
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# Initialize infrastructure services
|
||||
logger.info("Initializing infrastructure services...")
|
||||
|
||||
app.state.http_client = HttpClientManager(settings)
|
||||
await app.state.http_client.startup()
|
||||
|
||||
app.state.cache = CacheService(settings)
|
||||
await app.state.cache.startup()
|
||||
|
||||
logger.info("Infrastructure services ready")
|
||||
|
||||
# Seed test data in development mode
|
||||
if settings.is_development:
|
||||
logger.info(f"Running in {settings.env} mode - checking for seed data")
|
||||
@@ -50,7 +63,12 @@ async def lifespan(app: FastAPI):
|
||||
logger.info(f"Running in {settings.env} mode - skipping seed data")
|
||||
|
||||
yield
|
||||
# Shutdown: cleanup if needed
|
||||
|
||||
# Shutdown infrastructure services
|
||||
logger.info("Shutting down infrastructure services...")
|
||||
await app.state.http_client.shutdown()
|
||||
await app.state.cache.shutdown()
|
||||
logger.info("Shutdown complete")
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
|
||||
@@ -71,7 +71,6 @@ class Package(Base):
|
||||
)
|
||||
|
||||
project = relationship("Project", back_populates="packages")
|
||||
tags = relationship("Tag", back_populates="package", cascade="all, delete-orphan")
|
||||
uploads = relationship(
|
||||
"Upload", back_populates="package", cascade="all, delete-orphan"
|
||||
)
|
||||
@@ -120,7 +119,6 @@ class Artifact(Base):
|
||||
ref_count = Column(Integer, default=1)
|
||||
s3_key = Column(String(1024), nullable=False)
|
||||
|
||||
tags = relationship("Tag", back_populates="artifact")
|
||||
uploads = relationship("Upload", back_populates="artifact")
|
||||
versions = relationship("PackageVersion", back_populates="artifact")
|
||||
dependencies = relationship(
|
||||
@@ -151,65 +149,6 @@ class Artifact(Base):
|
||||
)
|
||||
|
||||
|
||||
class Tag(Base):
|
||||
__tablename__ = "tags"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
package_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("packages.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
name = Column(String(255), nullable=False)
|
||||
artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
updated_at = Column(
|
||||
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow
|
||||
)
|
||||
created_by = Column(String(255), nullable=False)
|
||||
|
||||
package = relationship("Package", back_populates="tags")
|
||||
artifact = relationship("Artifact", back_populates="tags")
|
||||
history = relationship(
|
||||
"TagHistory", back_populates="tag", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_tags_package_id", "package_id"),
|
||||
Index("idx_tags_artifact_id", "artifact_id"),
|
||||
Index(
|
||||
"idx_tags_package_name", "package_id", "name", unique=True
|
||||
), # Composite unique index
|
||||
Index(
|
||||
"idx_tags_package_created_at", "package_id", "created_at"
|
||||
), # For recent tags queries
|
||||
)
|
||||
|
||||
|
||||
class TagHistory(Base):
|
||||
__tablename__ = "tag_history"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
tag_id = Column(
|
||||
UUID(as_uuid=True), ForeignKey("tags.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
old_artifact_id = Column(String(64), ForeignKey("artifacts.id"))
|
||||
new_artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
|
||||
change_type = Column(String(20), nullable=False, default="update")
|
||||
changed_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
changed_by = Column(String(255), nullable=False)
|
||||
|
||||
tag = relationship("Tag", back_populates="history")
|
||||
|
||||
__table_args__ = (
|
||||
Index("idx_tag_history_tag_id", "tag_id"),
|
||||
Index("idx_tag_history_changed_at", "changed_at"),
|
||||
CheckConstraint(
|
||||
"change_type IN ('create', 'update', 'delete')", name="check_change_type"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class PackageVersion(Base):
|
||||
"""Immutable version record for a package-artifact relationship.
|
||||
|
||||
@@ -249,7 +188,7 @@ class Upload(Base):
|
||||
artifact_id = Column(String(64), ForeignKey("artifacts.id"), nullable=False)
|
||||
package_id = Column(UUID(as_uuid=True), ForeignKey("packages.id"), nullable=False)
|
||||
original_name = Column(String(1024))
|
||||
tag_name = Column(String(255)) # Tag assigned during upload
|
||||
version = Column(String(255)) # Version assigned during upload
|
||||
user_agent = Column(String(512)) # Client identification
|
||||
duration_ms = Column(Integer) # Upload timing in milliseconds
|
||||
deduplicated = Column(Boolean, default=False) # Whether artifact was deduplicated
|
||||
@@ -524,8 +463,8 @@ class PackageHistory(Base):
|
||||
class ArtifactDependency(Base):
|
||||
"""Dependency declared by an artifact on another package.
|
||||
|
||||
Each artifact can declare dependencies on other packages, specifying either
|
||||
an exact version or a tag. This enables recursive dependency resolution.
|
||||
Each artifact can declare dependencies on other packages, specifying a version.
|
||||
This enables recursive dependency resolution.
|
||||
"""
|
||||
|
||||
__tablename__ = "artifact_dependencies"
|
||||
@@ -538,20 +477,13 @@ class ArtifactDependency(Base):
|
||||
)
|
||||
dependency_project = Column(String(255), nullable=False)
|
||||
dependency_package = Column(String(255), nullable=False)
|
||||
version_constraint = Column(String(255), nullable=True)
|
||||
tag_constraint = Column(String(255), nullable=True)
|
||||
version_constraint = Column(String(255), nullable=False)
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
|
||||
# Relationship to the artifact that declares this dependency
|
||||
artifact = relationship("Artifact", back_populates="dependencies")
|
||||
|
||||
__table_args__ = (
|
||||
# Exactly one of version_constraint or tag_constraint must be set
|
||||
CheckConstraint(
|
||||
"(version_constraint IS NOT NULL AND tag_constraint IS NULL) OR "
|
||||
"(version_constraint IS NULL AND tag_constraint IS NOT NULL)",
|
||||
name="check_constraint_type",
|
||||
),
|
||||
# Each artifact can only depend on a specific project/package once
|
||||
Index(
|
||||
"idx_artifact_dependencies_artifact_id",
|
||||
|
||||
@@ -12,7 +12,6 @@ from .models import (
|
||||
Project,
|
||||
Package,
|
||||
Artifact,
|
||||
Tag,
|
||||
Upload,
|
||||
PackageVersion,
|
||||
ArtifactDependency,
|
||||
@@ -60,7 +59,6 @@ def purge_seed_data(db: Session) -> dict:
|
||||
|
||||
results = {
|
||||
"dependencies_deleted": 0,
|
||||
"tags_deleted": 0,
|
||||
"versions_deleted": 0,
|
||||
"uploads_deleted": 0,
|
||||
"artifacts_deleted": 0,
|
||||
@@ -103,15 +101,7 @@ def purge_seed_data(db: Session) -> dict:
|
||||
results["dependencies_deleted"] = count
|
||||
logger.info(f"Deleted {count} artifact dependencies")
|
||||
|
||||
# 2. Delete tags
|
||||
if seed_package_ids:
|
||||
count = db.query(Tag).filter(Tag.package_id.in_(seed_package_ids)).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
results["tags_deleted"] = count
|
||||
logger.info(f"Deleted {count} tags")
|
||||
|
||||
# 3. Delete package versions
|
||||
# 2. Delete package versions
|
||||
if seed_package_ids:
|
||||
count = db.query(PackageVersion).filter(
|
||||
PackageVersion.package_id.in_(seed_package_ids)
|
||||
@@ -119,7 +109,7 @@ def purge_seed_data(db: Session) -> dict:
|
||||
results["versions_deleted"] = count
|
||||
logger.info(f"Deleted {count} package versions")
|
||||
|
||||
# 4. Delete uploads
|
||||
# 3. Delete uploads
|
||||
if seed_package_ids:
|
||||
count = db.query(Upload).filter(Upload.package_id.in_(seed_package_ids)).delete(
|
||||
synchronize_session=False
|
||||
@@ -127,7 +117,7 @@ def purge_seed_data(db: Session) -> dict:
|
||||
results["uploads_deleted"] = count
|
||||
logger.info(f"Deleted {count} uploads")
|
||||
|
||||
# 5. Delete S3 objects for seed artifacts
|
||||
# 4. Delete S3 objects for seed artifacts
|
||||
if seed_artifact_ids:
|
||||
seed_artifacts = db.query(Artifact).filter(Artifact.id.in_(seed_artifact_ids)).all()
|
||||
for artifact in seed_artifacts:
|
||||
@@ -139,8 +129,8 @@ def purge_seed_data(db: Session) -> dict:
|
||||
logger.warning(f"Failed to delete S3 object {artifact.s3_key}: {e}")
|
||||
logger.info(f"Deleted {results['s3_objects_deleted']} S3 objects")
|
||||
|
||||
# 6. Delete artifacts (only those with ref_count that would be 0 after our deletions)
|
||||
# Since we deleted all tags/versions pointing to these artifacts, we can delete them
|
||||
# 5. Delete artifacts (only those with ref_count that would be 0 after our deletions)
|
||||
# Since we deleted all versions pointing to these artifacts, we can delete them
|
||||
if seed_artifact_ids:
|
||||
count = db.query(Artifact).filter(Artifact.id.in_(seed_artifact_ids)).delete(
|
||||
synchronize_session=False
|
||||
@@ -148,7 +138,7 @@ def purge_seed_data(db: Session) -> dict:
|
||||
results["artifacts_deleted"] = count
|
||||
logger.info(f"Deleted {count} artifacts")
|
||||
|
||||
# 7. Delete packages
|
||||
# 6. Delete packages
|
||||
if seed_package_ids:
|
||||
count = db.query(Package).filter(Package.id.in_(seed_package_ids)).delete(
|
||||
synchronize_session=False
|
||||
@@ -156,7 +146,7 @@ def purge_seed_data(db: Session) -> dict:
|
||||
results["packages_deleted"] = count
|
||||
logger.info(f"Deleted {count} packages")
|
||||
|
||||
# 8. Delete access permissions for seed projects
|
||||
# 7. Delete access permissions for seed projects
|
||||
if seed_project_ids:
|
||||
count = db.query(AccessPermission).filter(
|
||||
AccessPermission.project_id.in_(seed_project_ids)
|
||||
@@ -164,14 +154,14 @@ def purge_seed_data(db: Session) -> dict:
|
||||
results["permissions_deleted"] = count
|
||||
logger.info(f"Deleted {count} access permissions")
|
||||
|
||||
# 9. Delete seed projects
|
||||
# 8. Delete seed projects
|
||||
count = db.query(Project).filter(Project.name.in_(SEED_PROJECT_NAMES)).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
results["projects_deleted"] = count
|
||||
logger.info(f"Deleted {count} projects")
|
||||
|
||||
# 10. Find and delete seed team
|
||||
# 9. Find and delete seed team
|
||||
seed_team = db.query(Team).filter(Team.slug == SEED_TEAM_SLUG).first()
|
||||
if seed_team:
|
||||
# Delete team memberships first
|
||||
@@ -186,7 +176,7 @@ def purge_seed_data(db: Session) -> dict:
|
||||
results["teams_deleted"] = 1
|
||||
logger.info(f"Deleted team: {SEED_TEAM_SLUG}")
|
||||
|
||||
# 11. Delete seed users (but NOT admin)
|
||||
# 10. Delete seed users (but NOT admin)
|
||||
seed_users = db.query(User).filter(User.username.in_(SEED_USERNAMES)).all()
|
||||
for user in seed_users:
|
||||
# Delete any remaining team memberships for this user
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
426
backend/app/registry_client.py
Normal file
426
backend/app/registry_client.py
Normal file
@@ -0,0 +1,426 @@
|
||||
"""
|
||||
Registry client abstraction for upstream package registries.
|
||||
|
||||
Provides a pluggable interface for fetching packages from upstream registries
|
||||
(PyPI, npm, Maven, etc.) during dependency resolution with auto-fetch enabled.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional, TYPE_CHECKING
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
import httpx
|
||||
from packaging.specifiers import SpecifierSet, InvalidSpecifier
|
||||
from packaging.version import Version, InvalidVersion
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .storage import S3Storage
|
||||
from .http_client import HttpClientManager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class VersionInfo:
|
||||
"""Information about a package version from an upstream registry."""
|
||||
|
||||
version: str
|
||||
download_url: str
|
||||
filename: str
|
||||
sha256: Optional[str] = None
|
||||
size: Optional[int] = None
|
||||
content_type: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class FetchResult:
|
||||
"""Result of fetching a package from upstream."""
|
||||
|
||||
artifact_id: str # SHA256 hash
|
||||
size: int
|
||||
version: str
|
||||
filename: str
|
||||
already_cached: bool = False
|
||||
|
||||
|
||||
class RegistryClient(ABC):
|
||||
"""Abstract base class for upstream registry clients."""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def source_type(self) -> str:
|
||||
"""Return the source type this client handles (e.g., 'pypi', 'npm')."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_available_versions(self, package_name: str) -> List[str]:
|
||||
"""
|
||||
Get all available versions of a package from upstream.
|
||||
|
||||
Args:
|
||||
package_name: The normalized package name
|
||||
|
||||
Returns:
|
||||
List of version strings, sorted from oldest to newest
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def resolve_constraint(
|
||||
self, package_name: str, constraint: str
|
||||
) -> Optional[VersionInfo]:
|
||||
"""
|
||||
Find the best version matching a constraint.
|
||||
|
||||
Args:
|
||||
package_name: The normalized package name
|
||||
constraint: Version constraint (e.g., '>=1.9', '<2.0,>=1.5', '*')
|
||||
|
||||
Returns:
|
||||
VersionInfo with download URL, or None if no matching version found
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def fetch_package(
|
||||
self,
|
||||
package_name: str,
|
||||
version_info: VersionInfo,
|
||||
db: Session,
|
||||
storage: "S3Storage",
|
||||
) -> Optional[FetchResult]:
|
||||
"""
|
||||
Fetch and cache a package from upstream.
|
||||
|
||||
Args:
|
||||
package_name: The normalized package name
|
||||
version_info: Version details including download URL
|
||||
db: Database session for creating records
|
||||
storage: S3 storage for caching the artifact
|
||||
|
||||
Returns:
|
||||
FetchResult with artifact_id, or None if fetch failed
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class PyPIRegistryClient(RegistryClient):
|
||||
"""PyPI registry client using the JSON API."""
|
||||
|
||||
# Timeout configuration for PyPI requests
|
||||
CONNECT_TIMEOUT = 30.0
|
||||
READ_TIMEOUT = 60.0
|
||||
DOWNLOAD_TIMEOUT = 300.0 # Longer timeout for file downloads
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
http_client: httpx.AsyncClient,
|
||||
upstream_sources: List,
|
||||
pypi_api_url: str = "https://pypi.org/pypi",
|
||||
):
|
||||
"""
|
||||
Initialize PyPI registry client.
|
||||
|
||||
Args:
|
||||
http_client: Shared async HTTP client
|
||||
upstream_sources: List of configured upstream sources for auth
|
||||
pypi_api_url: Base URL for PyPI JSON API
|
||||
"""
|
||||
self.client = http_client
|
||||
self.sources = upstream_sources
|
||||
self.api_url = pypi_api_url
|
||||
|
||||
@property
|
||||
def source_type(self) -> str:
|
||||
return "pypi"
|
||||
|
||||
def _normalize_package_name(self, name: str) -> str:
|
||||
"""Normalize a PyPI package name per PEP 503."""
|
||||
return re.sub(r"[-_.]+", "-", name).lower()
|
||||
|
||||
def _get_auth_headers(self) -> dict:
|
||||
"""Get authentication headers from configured sources."""
|
||||
headers = {"User-Agent": "Orchard-Registry-Client/1.0"}
|
||||
if self.sources:
|
||||
source = self.sources[0]
|
||||
if hasattr(source, "auth_type"):
|
||||
if source.auth_type == "bearer":
|
||||
password = (
|
||||
source.get_password()
|
||||
if hasattr(source, "get_password")
|
||||
else getattr(source, "password", None)
|
||||
)
|
||||
if password:
|
||||
headers["Authorization"] = f"Bearer {password}"
|
||||
elif source.auth_type == "api_key":
|
||||
custom_headers = (
|
||||
source.get_headers()
|
||||
if hasattr(source, "get_headers")
|
||||
else {}
|
||||
)
|
||||
if custom_headers:
|
||||
headers.update(custom_headers)
|
||||
return headers
|
||||
|
||||
def _get_basic_auth(self) -> Optional[tuple]:
|
||||
"""Get basic auth credentials if configured."""
|
||||
if self.sources:
|
||||
source = self.sources[0]
|
||||
if hasattr(source, "auth_type") and source.auth_type == "basic":
|
||||
username = getattr(source, "username", None)
|
||||
if username:
|
||||
password = (
|
||||
source.get_password()
|
||||
if hasattr(source, "get_password")
|
||||
else getattr(source, "password", "")
|
||||
)
|
||||
return (username, password or "")
|
||||
return None
|
||||
|
||||
async def get_available_versions(self, package_name: str) -> List[str]:
|
||||
"""Get all available versions from PyPI JSON API."""
|
||||
normalized = self._normalize_package_name(package_name)
|
||||
url = f"{self.api_url}/{normalized}/json"
|
||||
|
||||
headers = self._get_auth_headers()
|
||||
auth = self._get_basic_auth()
|
||||
timeout = httpx.Timeout(self.READ_TIMEOUT, connect=self.CONNECT_TIMEOUT)
|
||||
|
||||
try:
|
||||
response = await self.client.get(
|
||||
url, headers=headers, auth=auth, timeout=timeout
|
||||
)
|
||||
|
||||
if response.status_code == 404:
|
||||
logger.debug(f"Package {normalized} not found on PyPI")
|
||||
return []
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.warning(
|
||||
f"PyPI API returned {response.status_code} for {normalized}"
|
||||
)
|
||||
return []
|
||||
|
||||
data = response.json()
|
||||
releases = data.get("releases", {})
|
||||
|
||||
# Filter to valid versions and sort
|
||||
versions = []
|
||||
for v in releases.keys():
|
||||
try:
|
||||
Version(v)
|
||||
versions.append(v)
|
||||
except InvalidVersion:
|
||||
continue
|
||||
|
||||
versions.sort(key=lambda x: Version(x))
|
||||
return versions
|
||||
|
||||
except httpx.RequestError as e:
|
||||
logger.warning(f"Failed to query PyPI for {normalized}: {e}")
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.warning(f"Error parsing PyPI response for {normalized}: {e}")
|
||||
return []
|
||||
|
||||
async def resolve_constraint(
|
||||
self, package_name: str, constraint: str
|
||||
) -> Optional[VersionInfo]:
|
||||
"""Find best version matching constraint from PyPI."""
|
||||
normalized = self._normalize_package_name(package_name)
|
||||
url = f"{self.api_url}/{normalized}/json"
|
||||
|
||||
headers = self._get_auth_headers()
|
||||
auth = self._get_basic_auth()
|
||||
timeout = httpx.Timeout(self.READ_TIMEOUT, connect=self.CONNECT_TIMEOUT)
|
||||
|
||||
try:
|
||||
response = await self.client.get(
|
||||
url, headers=headers, auth=auth, timeout=timeout
|
||||
)
|
||||
|
||||
if response.status_code == 404:
|
||||
logger.debug(f"Package {normalized} not found on PyPI")
|
||||
return None
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.warning(
|
||||
f"PyPI API returned {response.status_code} for {normalized}"
|
||||
)
|
||||
return None
|
||||
|
||||
data = response.json()
|
||||
releases = data.get("releases", {})
|
||||
|
||||
# Handle wildcard - return latest version
|
||||
if constraint == "*":
|
||||
latest_version = data.get("info", {}).get("version")
|
||||
if latest_version and latest_version in releases:
|
||||
return self._get_version_info(
|
||||
normalized, latest_version, releases[latest_version]
|
||||
)
|
||||
return None
|
||||
|
||||
# Parse constraint
|
||||
# If constraint looks like a bare version (no operator), treat as exact match
|
||||
# e.g., "2025.10.5" -> "==2025.10.5"
|
||||
effective_constraint = constraint
|
||||
if constraint and constraint[0].isdigit():
|
||||
effective_constraint = f"=={constraint}"
|
||||
logger.debug(
|
||||
f"Bare version '{constraint}' for {normalized}, "
|
||||
f"treating as exact match '{effective_constraint}'"
|
||||
)
|
||||
|
||||
try:
|
||||
specifier = SpecifierSet(effective_constraint)
|
||||
except InvalidSpecifier:
|
||||
# Invalid constraint - treat as wildcard
|
||||
logger.warning(
|
||||
f"Invalid version constraint '{constraint}' for {normalized}, "
|
||||
"treating as wildcard"
|
||||
)
|
||||
latest_version = data.get("info", {}).get("version")
|
||||
if latest_version and latest_version in releases:
|
||||
return self._get_version_info(
|
||||
normalized, latest_version, releases[latest_version]
|
||||
)
|
||||
return None
|
||||
|
||||
# Find matching versions
|
||||
matching = []
|
||||
for v_str, files in releases.items():
|
||||
if not files: # Skip versions with no files
|
||||
continue
|
||||
try:
|
||||
v = Version(v_str)
|
||||
if v in specifier:
|
||||
matching.append((v_str, v, files))
|
||||
except InvalidVersion:
|
||||
continue
|
||||
|
||||
if not matching:
|
||||
logger.debug(
|
||||
f"No versions of {normalized} match constraint '{constraint}'"
|
||||
)
|
||||
return None
|
||||
|
||||
# Sort by version and return highest match
|
||||
matching.sort(key=lambda x: x[1], reverse=True)
|
||||
best_version, _, best_files = matching[0]
|
||||
|
||||
return self._get_version_info(normalized, best_version, best_files)
|
||||
|
||||
except httpx.RequestError as e:
|
||||
logger.warning(f"Failed to query PyPI for {normalized}: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f"Error resolving {normalized}@{constraint}: {e}")
|
||||
return None
|
||||
|
||||
def _get_version_info(
|
||||
self, package_name: str, version: str, files: List[dict]
|
||||
) -> Optional[VersionInfo]:
|
||||
"""Extract download info from PyPI release files."""
|
||||
if not files:
|
||||
return None
|
||||
|
||||
# Prefer wheel over sdist
|
||||
wheel_file = None
|
||||
sdist_file = None
|
||||
|
||||
for f in files:
|
||||
filename = f.get("filename", "")
|
||||
if filename.endswith(".whl"):
|
||||
# Prefer platform-agnostic wheels
|
||||
if "py3-none-any" in filename or wheel_file is None:
|
||||
wheel_file = f
|
||||
elif filename.endswith(".tar.gz") and sdist_file is None:
|
||||
sdist_file = f
|
||||
|
||||
selected = wheel_file or sdist_file
|
||||
if not selected:
|
||||
# Fall back to first available file
|
||||
selected = files[0]
|
||||
|
||||
return VersionInfo(
|
||||
version=version,
|
||||
download_url=selected.get("url", ""),
|
||||
filename=selected.get("filename", ""),
|
||||
sha256=selected.get("digests", {}).get("sha256"),
|
||||
size=selected.get("size"),
|
||||
content_type="application/zip"
|
||||
if selected.get("filename", "").endswith(".whl")
|
||||
else "application/gzip",
|
||||
)
|
||||
|
||||
async def fetch_package(
|
||||
self,
|
||||
package_name: str,
|
||||
version_info: VersionInfo,
|
||||
db: Session,
|
||||
storage: "S3Storage",
|
||||
) -> Optional[FetchResult]:
|
||||
"""Fetch and cache a PyPI package."""
|
||||
# Import here to avoid circular imports
|
||||
from .pypi_proxy import fetch_and_cache_pypi_package
|
||||
|
||||
normalized = self._normalize_package_name(package_name)
|
||||
|
||||
logger.info(
|
||||
f"Fetching {normalized}=={version_info.version} from upstream PyPI"
|
||||
)
|
||||
|
||||
result = await fetch_and_cache_pypi_package(
|
||||
db=db,
|
||||
storage=storage,
|
||||
http_client=self.client,
|
||||
package_name=normalized,
|
||||
filename=version_info.filename,
|
||||
download_url=version_info.download_url,
|
||||
expected_sha256=version_info.sha256,
|
||||
)
|
||||
|
||||
if result is None:
|
||||
return None
|
||||
|
||||
return FetchResult(
|
||||
artifact_id=result["artifact_id"],
|
||||
size=result["size"],
|
||||
version=version_info.version,
|
||||
filename=version_info.filename,
|
||||
already_cached=result.get("already_cached", False),
|
||||
)
|
||||
|
||||
|
||||
def get_registry_client(
|
||||
source_type: str,
|
||||
http_client: httpx.AsyncClient,
|
||||
upstream_sources: List,
|
||||
) -> Optional[RegistryClient]:
|
||||
"""
|
||||
Factory function to get a registry client for a source type.
|
||||
|
||||
Args:
|
||||
source_type: The registry type ('pypi', 'npm', etc.)
|
||||
http_client: Shared async HTTP client
|
||||
upstream_sources: List of configured upstream sources
|
||||
|
||||
Returns:
|
||||
RegistryClient for the source type, or None if not supported
|
||||
"""
|
||||
if source_type == "pypi":
|
||||
# Filter to PyPI sources
|
||||
pypi_sources = [s for s in upstream_sources if getattr(s, "source_type", "") == "pypi"]
|
||||
return PyPIRegistryClient(http_client, pypi_sources)
|
||||
|
||||
# Future: Add npm, maven, etc.
|
||||
logger.debug(f"No registry client available for source type: {source_type}")
|
||||
return None
|
||||
@@ -9,7 +9,6 @@ from .base import BaseRepository
|
||||
from .project import ProjectRepository
|
||||
from .package import PackageRepository
|
||||
from .artifact import ArtifactRepository
|
||||
from .tag import TagRepository
|
||||
from .upload import UploadRepository
|
||||
|
||||
__all__ = [
|
||||
@@ -17,6 +16,5 @@ __all__ = [
|
||||
"ProjectRepository",
|
||||
"PackageRepository",
|
||||
"ArtifactRepository",
|
||||
"TagRepository",
|
||||
"UploadRepository",
|
||||
]
|
||||
|
||||
@@ -8,7 +8,7 @@ from sqlalchemy import func, or_
|
||||
from uuid import UUID
|
||||
|
||||
from .base import BaseRepository
|
||||
from ..models import Artifact, Tag, Upload, Package, Project
|
||||
from ..models import Artifact, PackageVersion, Upload, Package, Project
|
||||
|
||||
|
||||
class ArtifactRepository(BaseRepository[Artifact]):
|
||||
@@ -77,14 +77,14 @@ class ArtifactRepository(BaseRepository[Artifact]):
|
||||
.all()
|
||||
)
|
||||
|
||||
def get_artifacts_without_tags(self, limit: int = 100) -> List[Artifact]:
|
||||
"""Get artifacts that have no tags pointing to them."""
|
||||
# Subquery to find artifact IDs that have tags
|
||||
tagged_artifacts = self.db.query(Tag.artifact_id).distinct().subquery()
|
||||
def get_artifacts_without_versions(self, limit: int = 100) -> List[Artifact]:
|
||||
"""Get artifacts that have no versions pointing to them."""
|
||||
# Subquery to find artifact IDs that have versions
|
||||
versioned_artifacts = self.db.query(PackageVersion.artifact_id).distinct().subquery()
|
||||
|
||||
return (
|
||||
self.db.query(Artifact)
|
||||
.filter(~Artifact.id.in_(tagged_artifacts))
|
||||
.filter(~Artifact.id.in_(versioned_artifacts))
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
@@ -115,34 +115,34 @@ class ArtifactRepository(BaseRepository[Artifact]):
|
||||
|
||||
return artifacts, total
|
||||
|
||||
def get_referencing_tags(self, artifact_id: str) -> List[Tuple[Tag, Package, Project]]:
|
||||
"""Get all tags referencing this artifact with package and project info."""
|
||||
def get_referencing_versions(self, artifact_id: str) -> List[Tuple[PackageVersion, Package, Project]]:
|
||||
"""Get all versions referencing this artifact with package and project info."""
|
||||
return (
|
||||
self.db.query(Tag, Package, Project)
|
||||
.join(Package, Tag.package_id == Package.id)
|
||||
self.db.query(PackageVersion, Package, Project)
|
||||
.join(Package, PackageVersion.package_id == Package.id)
|
||||
.join(Project, Package.project_id == Project.id)
|
||||
.filter(Tag.artifact_id == artifact_id)
|
||||
.filter(PackageVersion.artifact_id == artifact_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
def search(self, query_str: str, limit: int = 10) -> List[Tuple[Tag, Artifact, str, str]]:
|
||||
def search(self, query_str: str, limit: int = 10) -> List[Tuple[PackageVersion, Artifact, str, str]]:
|
||||
"""
|
||||
Search artifacts by tag name or original filename.
|
||||
Returns (tag, artifact, package_name, project_name) tuples.
|
||||
Search artifacts by version or original filename.
|
||||
Returns (version, artifact, package_name, project_name) tuples.
|
||||
"""
|
||||
search_lower = query_str.lower()
|
||||
return (
|
||||
self.db.query(Tag, Artifact, Package.name, Project.name)
|
||||
.join(Artifact, Tag.artifact_id == Artifact.id)
|
||||
.join(Package, Tag.package_id == Package.id)
|
||||
self.db.query(PackageVersion, Artifact, Package.name, Project.name)
|
||||
.join(Artifact, PackageVersion.artifact_id == Artifact.id)
|
||||
.join(Package, PackageVersion.package_id == Package.id)
|
||||
.join(Project, Package.project_id == Project.id)
|
||||
.filter(
|
||||
or_(
|
||||
func.lower(Tag.name).contains(search_lower),
|
||||
func.lower(PackageVersion.version).contains(search_lower),
|
||||
func.lower(Artifact.original_name).contains(search_lower)
|
||||
)
|
||||
)
|
||||
.order_by(Tag.name)
|
||||
.order_by(PackageVersion.version)
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
|
||||
@@ -8,7 +8,7 @@ from sqlalchemy import func, or_, asc, desc
|
||||
from uuid import UUID
|
||||
|
||||
from .base import BaseRepository
|
||||
from ..models import Package, Project, Tag, Upload, Artifact
|
||||
from ..models import Package, Project, PackageVersion, Upload, Artifact
|
||||
|
||||
|
||||
class PackageRepository(BaseRepository[Package]):
|
||||
@@ -136,10 +136,10 @@ class PackageRepository(BaseRepository[Package]):
|
||||
return self.update(package, **updates)
|
||||
|
||||
def get_stats(self, package_id: UUID) -> dict:
|
||||
"""Get package statistics (tag count, artifact count, total size)."""
|
||||
tag_count = (
|
||||
self.db.query(func.count(Tag.id))
|
||||
.filter(Tag.package_id == package_id)
|
||||
"""Get package statistics (version count, artifact count, total size)."""
|
||||
version_count = (
|
||||
self.db.query(func.count(PackageVersion.id))
|
||||
.filter(PackageVersion.package_id == package_id)
|
||||
.scalar() or 0
|
||||
)
|
||||
|
||||
@@ -154,7 +154,7 @@ class PackageRepository(BaseRepository[Package]):
|
||||
)
|
||||
|
||||
return {
|
||||
"tag_count": tag_count,
|
||||
"version_count": version_count,
|
||||
"artifact_count": artifact_stats[0] if artifact_stats else 0,
|
||||
"total_size": artifact_stats[1] if artifact_stats else 0,
|
||||
}
|
||||
|
||||
@@ -1,168 +0,0 @@
|
||||
"""
|
||||
Tag repository for data access operations.
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func, or_, asc, desc
|
||||
from uuid import UUID
|
||||
|
||||
from .base import BaseRepository
|
||||
from ..models import Tag, TagHistory, Artifact, Package, Project
|
||||
|
||||
|
||||
class TagRepository(BaseRepository[Tag]):
|
||||
"""Repository for Tag entity operations."""
|
||||
|
||||
model = Tag
|
||||
|
||||
def get_by_name(self, package_id: UUID, name: str) -> Optional[Tag]:
|
||||
"""Get tag by name within a package."""
|
||||
return (
|
||||
self.db.query(Tag)
|
||||
.filter(Tag.package_id == package_id, Tag.name == name)
|
||||
.first()
|
||||
)
|
||||
|
||||
def get_with_artifact(self, package_id: UUID, name: str) -> Optional[Tuple[Tag, Artifact]]:
|
||||
"""Get tag with its artifact."""
|
||||
return (
|
||||
self.db.query(Tag, Artifact)
|
||||
.join(Artifact, Tag.artifact_id == Artifact.id)
|
||||
.filter(Tag.package_id == package_id, Tag.name == name)
|
||||
.first()
|
||||
)
|
||||
|
||||
def exists_by_name(self, package_id: UUID, name: str) -> bool:
|
||||
"""Check if tag with name exists in package."""
|
||||
return self.db.query(
|
||||
self.db.query(Tag)
|
||||
.filter(Tag.package_id == package_id, Tag.name == name)
|
||||
.exists()
|
||||
).scalar()
|
||||
|
||||
def list_by_package(
|
||||
self,
|
||||
package_id: UUID,
|
||||
page: int = 1,
|
||||
limit: int = 20,
|
||||
search: Optional[str] = None,
|
||||
sort: str = "name",
|
||||
order: str = "asc",
|
||||
) -> Tuple[List[Tuple[Tag, Artifact]], int]:
|
||||
"""
|
||||
List tags in a package with artifact metadata.
|
||||
|
||||
Returns tuple of ((tag, artifact) tuples, total_count).
|
||||
"""
|
||||
query = (
|
||||
self.db.query(Tag, Artifact)
|
||||
.join(Artifact, Tag.artifact_id == Artifact.id)
|
||||
.filter(Tag.package_id == package_id)
|
||||
)
|
||||
|
||||
# Apply search filter (tag name or artifact original filename)
|
||||
if search:
|
||||
search_lower = search.lower()
|
||||
query = query.filter(
|
||||
or_(
|
||||
func.lower(Tag.name).contains(search_lower),
|
||||
func.lower(Artifact.original_name).contains(search_lower)
|
||||
)
|
||||
)
|
||||
|
||||
# Get total count
|
||||
total = query.count()
|
||||
|
||||
# Apply sorting
|
||||
sort_columns = {
|
||||
"name": Tag.name,
|
||||
"created_at": Tag.created_at,
|
||||
}
|
||||
sort_column = sort_columns.get(sort, Tag.name)
|
||||
if order == "desc":
|
||||
query = query.order_by(desc(sort_column))
|
||||
else:
|
||||
query = query.order_by(asc(sort_column))
|
||||
|
||||
# Apply pagination
|
||||
offset = (page - 1) * limit
|
||||
results = query.offset(offset).limit(limit).all()
|
||||
|
||||
return results, total
|
||||
|
||||
def create_tag(
|
||||
self,
|
||||
package_id: UUID,
|
||||
name: str,
|
||||
artifact_id: str,
|
||||
created_by: str,
|
||||
) -> Tag:
|
||||
"""Create a new tag."""
|
||||
return self.create(
|
||||
package_id=package_id,
|
||||
name=name,
|
||||
artifact_id=artifact_id,
|
||||
created_by=created_by,
|
||||
)
|
||||
|
||||
def update_artifact(
|
||||
self,
|
||||
tag: Tag,
|
||||
new_artifact_id: str,
|
||||
changed_by: str,
|
||||
record_history: bool = True,
|
||||
) -> Tag:
|
||||
"""
|
||||
Update tag to point to a different artifact.
|
||||
Optionally records change in tag history.
|
||||
"""
|
||||
old_artifact_id = tag.artifact_id
|
||||
|
||||
if record_history and old_artifact_id != new_artifact_id:
|
||||
history = TagHistory(
|
||||
tag_id=tag.id,
|
||||
old_artifact_id=old_artifact_id,
|
||||
new_artifact_id=new_artifact_id,
|
||||
changed_by=changed_by,
|
||||
)
|
||||
self.db.add(history)
|
||||
|
||||
tag.artifact_id = new_artifact_id
|
||||
tag.created_by = changed_by
|
||||
self.db.flush()
|
||||
return tag
|
||||
|
||||
def get_history(self, tag_id: UUID) -> List[TagHistory]:
|
||||
"""Get tag change history."""
|
||||
return (
|
||||
self.db.query(TagHistory)
|
||||
.filter(TagHistory.tag_id == tag_id)
|
||||
.order_by(TagHistory.changed_at.desc())
|
||||
.all()
|
||||
)
|
||||
|
||||
def get_latest_in_package(self, package_id: UUID) -> Optional[Tag]:
|
||||
"""Get the most recently created/updated tag in a package."""
|
||||
return (
|
||||
self.db.query(Tag)
|
||||
.filter(Tag.package_id == package_id)
|
||||
.order_by(Tag.created_at.desc())
|
||||
.first()
|
||||
)
|
||||
|
||||
def get_by_artifact(self, artifact_id: str) -> List[Tag]:
|
||||
"""Get all tags pointing to an artifact."""
|
||||
return (
|
||||
self.db.query(Tag)
|
||||
.filter(Tag.artifact_id == artifact_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
def count_by_artifact(self, artifact_id: str) -> int:
|
||||
"""Count tags pointing to an artifact."""
|
||||
return (
|
||||
self.db.query(func.count(Tag.id))
|
||||
.filter(Tag.artifact_id == artifact_id)
|
||||
.scalar() or 0
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -33,6 +33,7 @@ class ProjectResponse(BaseModel):
|
||||
name: str
|
||||
description: Optional[str]
|
||||
is_public: bool
|
||||
is_system: bool = False
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
created_by: str
|
||||
@@ -113,14 +114,6 @@ class PackageUpdate(BaseModel):
|
||||
platform: Optional[str] = None
|
||||
|
||||
|
||||
class TagSummary(BaseModel):
|
||||
"""Lightweight tag info for embedding in package responses"""
|
||||
|
||||
name: str
|
||||
artifact_id: str
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class PackageDetailResponse(BaseModel):
|
||||
"""Package with aggregated metadata"""
|
||||
|
||||
@@ -133,13 +126,9 @@ class PackageDetailResponse(BaseModel):
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
# Aggregated fields
|
||||
tag_count: int = 0
|
||||
artifact_count: int = 0
|
||||
total_size: int = 0
|
||||
latest_tag: Optional[str] = None
|
||||
latest_upload_at: Optional[datetime] = None
|
||||
# Recent tags (limit 5)
|
||||
recent_tags: List[TagSummary] = []
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
@@ -164,79 +153,6 @@ class ArtifactResponse(BaseModel):
|
||||
from_attributes = True
|
||||
|
||||
|
||||
# Tag schemas
|
||||
class TagCreate(BaseModel):
|
||||
name: str
|
||||
artifact_id: str
|
||||
|
||||
|
||||
class TagResponse(BaseModel):
|
||||
id: UUID
|
||||
package_id: UUID
|
||||
name: str
|
||||
artifact_id: str
|
||||
created_at: datetime
|
||||
created_by: str
|
||||
version: Optional[str] = None # Version of the artifact this tag points to
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class TagDetailResponse(BaseModel):
|
||||
"""Tag with embedded artifact metadata"""
|
||||
|
||||
id: UUID
|
||||
package_id: UUID
|
||||
name: str
|
||||
artifact_id: str
|
||||
created_at: datetime
|
||||
created_by: str
|
||||
version: Optional[str] = None # Version of the artifact this tag points to
|
||||
# Artifact metadata
|
||||
artifact_size: int
|
||||
artifact_content_type: Optional[str]
|
||||
artifact_original_name: Optional[str]
|
||||
artifact_created_at: datetime
|
||||
artifact_format_metadata: Optional[Dict[str, Any]] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class TagHistoryResponse(BaseModel):
|
||||
"""History entry for tag changes"""
|
||||
|
||||
id: UUID
|
||||
tag_id: UUID
|
||||
old_artifact_id: Optional[str]
|
||||
new_artifact_id: str
|
||||
changed_at: datetime
|
||||
changed_by: str
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class TagHistoryDetailResponse(BaseModel):
|
||||
"""Tag history with artifact metadata for each version"""
|
||||
|
||||
id: UUID
|
||||
tag_id: UUID
|
||||
tag_name: str
|
||||
old_artifact_id: Optional[str]
|
||||
new_artifact_id: str
|
||||
changed_at: datetime
|
||||
changed_by: str
|
||||
# Artifact metadata for new artifact
|
||||
artifact_size: int
|
||||
artifact_original_name: Optional[str]
|
||||
artifact_content_type: Optional[str]
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
# Audit log schemas
|
||||
class AuditLogResponse(BaseModel):
|
||||
"""Audit log entry response"""
|
||||
@@ -263,7 +179,7 @@ class UploadHistoryResponse(BaseModel):
|
||||
package_name: str
|
||||
project_name: str
|
||||
original_name: Optional[str]
|
||||
tag_name: Optional[str]
|
||||
version: Optional[str]
|
||||
uploaded_at: datetime
|
||||
uploaded_by: str
|
||||
source_ip: Optional[str]
|
||||
@@ -294,10 +210,10 @@ class ArtifactProvenanceResponse(BaseModel):
|
||||
# Usage statistics
|
||||
upload_count: int
|
||||
# References
|
||||
packages: List[Dict[str, Any]] # List of {project_name, package_name, tag_names}
|
||||
tags: List[
|
||||
packages: List[Dict[str, Any]] # List of {project_name, package_name, versions}
|
||||
versions: List[
|
||||
Dict[str, Any]
|
||||
] # List of {project_name, package_name, tag_name, created_at}
|
||||
] # List of {project_name, package_name, version, created_at}
|
||||
# Upload history
|
||||
uploads: List[Dict[str, Any]] # List of upload events
|
||||
|
||||
@@ -305,18 +221,8 @@ class ArtifactProvenanceResponse(BaseModel):
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class ArtifactTagInfo(BaseModel):
|
||||
"""Tag info for embedding in artifact responses"""
|
||||
|
||||
id: UUID
|
||||
name: str
|
||||
package_id: UUID
|
||||
package_name: str
|
||||
project_name: str
|
||||
|
||||
|
||||
class ArtifactDetailResponse(BaseModel):
|
||||
"""Artifact with list of tags/packages referencing it"""
|
||||
"""Artifact with metadata"""
|
||||
|
||||
id: str
|
||||
sha256: str # Explicit SHA256 field (same as id)
|
||||
@@ -330,14 +236,14 @@ class ArtifactDetailResponse(BaseModel):
|
||||
created_by: str
|
||||
ref_count: int
|
||||
format_metadata: Optional[Dict[str, Any]] = None
|
||||
tags: List[ArtifactTagInfo] = []
|
||||
versions: List[Dict[str, Any]] = [] # List of {version, package_name, project_name}
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class PackageArtifactResponse(BaseModel):
|
||||
"""Artifact with tags for package artifact listing"""
|
||||
"""Artifact for package artifact listing"""
|
||||
|
||||
id: str
|
||||
sha256: str # Explicit SHA256 field (same as id)
|
||||
@@ -350,7 +256,7 @@ class PackageArtifactResponse(BaseModel):
|
||||
created_at: datetime
|
||||
created_by: str
|
||||
format_metadata: Optional[Dict[str, Any]] = None
|
||||
tags: List[str] = [] # Tag names pointing to this artifact
|
||||
version: Optional[str] = None # Version from PackageVersion if exists
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
@@ -368,28 +274,9 @@ class GlobalArtifactResponse(BaseModel):
|
||||
created_by: str
|
||||
format_metadata: Optional[Dict[str, Any]] = None
|
||||
ref_count: int = 0
|
||||
# Context from tags/packages
|
||||
# Context from versions/packages
|
||||
projects: List[str] = [] # List of project names containing this artifact
|
||||
packages: List[str] = [] # List of "project/package" paths
|
||||
tags: List[str] = [] # List of "project/package:tag" references
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class GlobalTagResponse(BaseModel):
|
||||
"""Tag with project/package context for global listing"""
|
||||
|
||||
id: UUID
|
||||
name: str
|
||||
artifact_id: str
|
||||
created_at: datetime
|
||||
created_by: str
|
||||
project_name: str
|
||||
package_name: str
|
||||
artifact_size: Optional[int] = None
|
||||
artifact_content_type: Optional[str] = None
|
||||
version: Optional[str] = None # Version of the artifact this tag points to
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
@@ -402,7 +289,6 @@ class UploadResponse(BaseModel):
|
||||
size: int
|
||||
project: str
|
||||
package: str
|
||||
tag: Optional[str]
|
||||
version: Optional[str] = None # Version assigned to this artifact
|
||||
version_source: Optional[str] = None # How version was determined: 'explicit', 'filename', 'metadata'
|
||||
checksum_md5: Optional[str] = None
|
||||
@@ -429,7 +315,6 @@ class ResumableUploadInitRequest(BaseModel):
|
||||
filename: str
|
||||
content_type: Optional[str] = None
|
||||
size: int
|
||||
tag: Optional[str] = None
|
||||
version: Optional[str] = None # Explicit version (auto-detected if not provided)
|
||||
|
||||
@field_validator("expected_hash")
|
||||
@@ -464,7 +349,7 @@ class ResumableUploadPartResponse(BaseModel):
|
||||
class ResumableUploadCompleteRequest(BaseModel):
|
||||
"""Request to complete a resumable upload"""
|
||||
|
||||
tag: Optional[str] = None
|
||||
pass
|
||||
|
||||
|
||||
class ResumableUploadCompleteResponse(BaseModel):
|
||||
@@ -474,7 +359,6 @@ class ResumableUploadCompleteResponse(BaseModel):
|
||||
size: int
|
||||
project: str
|
||||
package: str
|
||||
tag: Optional[str]
|
||||
|
||||
|
||||
class ResumableUploadStatusResponse(BaseModel):
|
||||
@@ -527,7 +411,6 @@ class PackageVersionResponse(BaseModel):
|
||||
size: Optional[int] = None
|
||||
content_type: Optional[str] = None
|
||||
original_name: Optional[str] = None
|
||||
tags: List[str] = [] # Tag names pointing to this artifact
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
@@ -569,11 +452,10 @@ class SearchResultPackage(BaseModel):
|
||||
|
||||
|
||||
class SearchResultArtifact(BaseModel):
|
||||
"""Artifact/tag result for global search"""
|
||||
"""Artifact result for global search"""
|
||||
|
||||
tag_id: UUID
|
||||
tag_name: str
|
||||
artifact_id: str
|
||||
version: Optional[str]
|
||||
package_id: UUID
|
||||
package_name: str
|
||||
project_name: str
|
||||
@@ -611,6 +493,8 @@ class HealthResponse(BaseModel):
|
||||
version: str = "1.0.0"
|
||||
storage_healthy: Optional[bool] = None
|
||||
database_healthy: Optional[bool] = None
|
||||
http_pool: Optional[Dict[str, Any]] = None
|
||||
cache: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
# Garbage collection schemas
|
||||
@@ -686,7 +570,7 @@ class ProjectStatsResponse(BaseModel):
|
||||
project_id: str
|
||||
project_name: str
|
||||
package_count: int
|
||||
tag_count: int
|
||||
version_count: int
|
||||
artifact_count: int
|
||||
total_size_bytes: int
|
||||
upload_count: int
|
||||
@@ -701,7 +585,7 @@ class PackageStatsResponse(BaseModel):
|
||||
package_id: str
|
||||
package_name: str
|
||||
project_name: str
|
||||
tag_count: int
|
||||
version_count: int
|
||||
artifact_count: int
|
||||
total_size_bytes: int
|
||||
upload_count: int
|
||||
@@ -718,9 +602,9 @@ class ArtifactStatsResponse(BaseModel):
|
||||
size: int
|
||||
ref_count: int
|
||||
storage_savings: int # (ref_count - 1) * size
|
||||
tags: List[Dict[str, Any]] # Tags referencing this artifact
|
||||
projects: List[str] # Projects using this artifact
|
||||
packages: List[str] # Packages using this artifact
|
||||
versions: List[Dict[str, Any]] = [] # List of {version, package_name, project_name}
|
||||
first_uploaded: Optional[datetime] = None
|
||||
last_referenced: Optional[datetime] = None
|
||||
|
||||
@@ -929,20 +813,7 @@ class DependencyCreate(BaseModel):
|
||||
"""Schema for creating a dependency"""
|
||||
project: str
|
||||
package: str
|
||||
version: Optional[str] = None
|
||||
tag: Optional[str] = None
|
||||
|
||||
@field_validator('version', 'tag')
|
||||
@classmethod
|
||||
def validate_constraint(cls, v, info):
|
||||
return v
|
||||
|
||||
def model_post_init(self, __context):
|
||||
"""Validate that exactly one of version or tag is set"""
|
||||
if self.version is None and self.tag is None:
|
||||
raise ValueError("Either 'version' or 'tag' must be specified")
|
||||
if self.version is not None and self.tag is not None:
|
||||
raise ValueError("Cannot specify both 'version' and 'tag'")
|
||||
version: str
|
||||
|
||||
|
||||
class DependencyResponse(BaseModel):
|
||||
@@ -951,8 +822,7 @@ class DependencyResponse(BaseModel):
|
||||
artifact_id: str
|
||||
project: str
|
||||
package: str
|
||||
version: Optional[str] = None
|
||||
tag: Optional[str] = None
|
||||
version: str
|
||||
created_at: datetime
|
||||
|
||||
class Config:
|
||||
@@ -967,7 +837,6 @@ class DependencyResponse(BaseModel):
|
||||
project=dep.dependency_project,
|
||||
package=dep.dependency_package,
|
||||
version=dep.version_constraint,
|
||||
tag=dep.tag_constraint,
|
||||
created_at=dep.created_at,
|
||||
)
|
||||
|
||||
@@ -984,7 +853,6 @@ class DependentInfo(BaseModel):
|
||||
project: str
|
||||
package: str
|
||||
version: Optional[str] = None
|
||||
constraint_type: str # 'version' or 'tag'
|
||||
constraint_value: str
|
||||
|
||||
|
||||
@@ -1000,20 +868,7 @@ class EnsureFileDependency(BaseModel):
|
||||
"""Dependency entry from orchard.ensure file"""
|
||||
project: str
|
||||
package: str
|
||||
version: Optional[str] = None
|
||||
tag: Optional[str] = None
|
||||
|
||||
@field_validator('version', 'tag')
|
||||
@classmethod
|
||||
def validate_constraint(cls, v, info):
|
||||
return v
|
||||
|
||||
def model_post_init(self, __context):
|
||||
"""Validate that exactly one of version or tag is set"""
|
||||
if self.version is None and self.tag is None:
|
||||
raise ValueError("Either 'version' or 'tag' must be specified")
|
||||
if self.version is not None and self.tag is not None:
|
||||
raise ValueError("Cannot specify both 'version' and 'tag'")
|
||||
version: str
|
||||
|
||||
|
||||
class EnsureFileContent(BaseModel):
|
||||
@@ -1027,15 +882,26 @@ class ResolvedArtifact(BaseModel):
|
||||
project: str
|
||||
package: str
|
||||
version: Optional[str] = None
|
||||
tag: Optional[str] = None
|
||||
size: int
|
||||
download_url: str
|
||||
|
||||
|
||||
class MissingDependency(BaseModel):
|
||||
"""A dependency that could not be resolved (not cached on server)"""
|
||||
project: str
|
||||
package: str
|
||||
constraint: Optional[str] = None
|
||||
required_by: Optional[str] = None
|
||||
fetch_attempted: bool = False # True if auto-fetch was attempted
|
||||
fetch_error: Optional[str] = None # Error message if fetch failed
|
||||
|
||||
|
||||
class DependencyResolutionResponse(BaseModel):
|
||||
"""Response from dependency resolution endpoint"""
|
||||
requested: Dict[str, str] # project, package, ref
|
||||
resolved: List[ResolvedArtifact]
|
||||
missing: List[MissingDependency] = []
|
||||
fetched: List[ResolvedArtifact] = [] # Artifacts fetched from upstream during resolution
|
||||
total_size: int
|
||||
artifact_count: int
|
||||
|
||||
@@ -1044,7 +910,7 @@ class DependencyConflict(BaseModel):
|
||||
"""Details about a dependency conflict"""
|
||||
project: str
|
||||
package: str
|
||||
requirements: List[Dict[str, Any]] # version/tag and required_by info
|
||||
requirements: List[Dict[str, Any]] # version and required_by info
|
||||
|
||||
|
||||
class DependencyConflictError(BaseModel):
|
||||
@@ -1378,10 +1244,10 @@ class CacheRequest(BaseModel):
|
||||
url: str
|
||||
source_type: str
|
||||
package_name: Optional[str] = None # Auto-derived from URL if not provided
|
||||
tag: Optional[str] = None # Auto-derived from URL if not provided
|
||||
version: Optional[str] = None # Auto-derived from URL if not provided
|
||||
user_project: Optional[str] = None # Cross-reference to user project
|
||||
user_package: Optional[str] = None
|
||||
user_tag: Optional[str] = None
|
||||
user_version: Optional[str] = None
|
||||
expected_hash: Optional[str] = None # Verify downloaded content
|
||||
|
||||
@field_validator('url')
|
||||
@@ -1428,8 +1294,8 @@ class CacheResponse(BaseModel):
|
||||
source_name: Optional[str]
|
||||
system_project: str
|
||||
system_package: str
|
||||
system_tag: Optional[str]
|
||||
user_reference: Optional[str] = None # e.g., "my-app/npm-deps:lodash-4.17.21"
|
||||
system_version: Optional[str]
|
||||
user_reference: Optional[str] = None # e.g., "my-app/npm-deps/+/4.17.21"
|
||||
|
||||
|
||||
class CacheResolveRequest(BaseModel):
|
||||
@@ -1443,7 +1309,7 @@ class CacheResolveRequest(BaseModel):
|
||||
version: str
|
||||
user_project: Optional[str] = None
|
||||
user_package: Optional[str] = None
|
||||
user_tag: Optional[str] = None
|
||||
user_version: Optional[str] = None
|
||||
|
||||
@field_validator('source_type')
|
||||
@classmethod
|
||||
|
||||
@@ -5,7 +5,7 @@ import hashlib
|
||||
import logging
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from .models import Project, Package, Artifact, Tag, Upload, PackageVersion, ArtifactDependency, Team, TeamMembership, User
|
||||
from .models import Project, Package, Artifact, Upload, PackageVersion, ArtifactDependency, Team, TeamMembership, User
|
||||
from .storage import get_storage
|
||||
from .auth import hash_password
|
||||
|
||||
@@ -125,14 +125,14 @@ TEST_ARTIFACTS = [
|
||||
]
|
||||
|
||||
# Dependencies to create (source artifact -> dependency)
|
||||
# Format: (source_project, source_package, source_version, dep_project, dep_package, version_constraint, tag_constraint)
|
||||
# Format: (source_project, source_package, source_version, dep_project, dep_package, version_constraint)
|
||||
TEST_DEPENDENCIES = [
|
||||
# ui-components v1.1.0 depends on design-tokens v1.0.0
|
||||
("frontend-libs", "ui-components", "1.1.0", "frontend-libs", "design-tokens", "1.0.0", None),
|
||||
("frontend-libs", "ui-components", "1.1.0", "frontend-libs", "design-tokens", "1.0.0"),
|
||||
# auth-lib v1.0.0 depends on common-utils v2.0.0
|
||||
("backend-services", "auth-lib", "1.0.0", "backend-services", "common-utils", "2.0.0", None),
|
||||
# auth-lib v1.0.0 also depends on design-tokens (stable tag)
|
||||
("backend-services", "auth-lib", "1.0.0", "frontend-libs", "design-tokens", None, "latest"),
|
||||
("backend-services", "auth-lib", "1.0.0", "backend-services", "common-utils", "2.0.0"),
|
||||
# auth-lib v1.0.0 also depends on design-tokens v1.0.0
|
||||
("backend-services", "auth-lib", "1.0.0", "frontend-libs", "design-tokens", "1.0.0"),
|
||||
]
|
||||
|
||||
|
||||
@@ -252,9 +252,8 @@ def seed_database(db: Session) -> None:
|
||||
|
||||
logger.info(f"Created {len(project_map)} projects and {len(package_map)} packages (assigned to {demo_team.slug})")
|
||||
|
||||
# Create artifacts, tags, and versions
|
||||
# Create artifacts and versions
|
||||
artifact_count = 0
|
||||
tag_count = 0
|
||||
version_count = 0
|
||||
|
||||
for artifact_data in TEST_ARTIFACTS:
|
||||
@@ -316,23 +315,12 @@ def seed_database(db: Session) -> None:
|
||||
db.add(version)
|
||||
version_count += 1
|
||||
|
||||
# Create tags
|
||||
for tag_name in artifact_data["tags"]:
|
||||
tag = Tag(
|
||||
package_id=package.id,
|
||||
name=tag_name,
|
||||
artifact_id=sha256_hash,
|
||||
created_by=team_owner_username,
|
||||
)
|
||||
db.add(tag)
|
||||
tag_count += 1
|
||||
|
||||
db.flush()
|
||||
|
||||
# Create dependencies
|
||||
dependency_count = 0
|
||||
for dep_data in TEST_DEPENDENCIES:
|
||||
src_project, src_package, src_version, dep_project, dep_package, version_constraint, tag_constraint = dep_data
|
||||
src_project, src_package, src_version, dep_project, dep_package, version_constraint = dep_data
|
||||
|
||||
# Find the source artifact by looking up its version
|
||||
src_pkg = package_map.get((src_project, src_package))
|
||||
@@ -356,11 +344,10 @@ def seed_database(db: Session) -> None:
|
||||
dependency_project=dep_project,
|
||||
dependency_package=dep_package,
|
||||
version_constraint=version_constraint,
|
||||
tag_constraint=tag_constraint,
|
||||
)
|
||||
db.add(dependency)
|
||||
dependency_count += 1
|
||||
|
||||
db.commit()
|
||||
logger.info(f"Created {artifact_count} artifacts, {tag_count} tags, {version_count} versions, and {dependency_count} dependencies")
|
||||
logger.info(f"Created {artifact_count} artifacts, {version_count} versions, and {dependency_count} dependencies")
|
||||
logger.info("Database seeding complete")
|
||||
|
||||
@@ -6,9 +6,8 @@ from typing import List, Optional, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
import logging
|
||||
|
||||
from ..models import Artifact, Tag
|
||||
from ..models import Artifact, PackageVersion
|
||||
from ..repositories.artifact import ArtifactRepository
|
||||
from ..repositories.tag import TagRepository
|
||||
from ..storage import S3Storage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -21,8 +20,8 @@ class ArtifactCleanupService:
|
||||
Reference counting rules:
|
||||
- ref_count starts at 1 when artifact is first uploaded
|
||||
- ref_count increments when the same artifact is uploaded again (deduplication)
|
||||
- ref_count decrements when a tag is deleted or updated to point elsewhere
|
||||
- ref_count decrements when a package is deleted (for each tag pointing to artifact)
|
||||
- ref_count decrements when a version is deleted or updated to point elsewhere
|
||||
- ref_count decrements when a package is deleted (for each version pointing to artifact)
|
||||
- When ref_count reaches 0, artifact is a candidate for deletion from S3
|
||||
"""
|
||||
|
||||
@@ -30,12 +29,11 @@ class ArtifactCleanupService:
|
||||
self.db = db
|
||||
self.storage = storage
|
||||
self.artifact_repo = ArtifactRepository(db)
|
||||
self.tag_repo = TagRepository(db)
|
||||
|
||||
def on_tag_deleted(self, artifact_id: str) -> Artifact:
|
||||
def on_version_deleted(self, artifact_id: str) -> Artifact:
|
||||
"""
|
||||
Called when a tag is deleted.
|
||||
Decrements ref_count for the artifact the tag was pointing to.
|
||||
Called when a version is deleted.
|
||||
Decrements ref_count for the artifact the version was pointing to.
|
||||
"""
|
||||
artifact = self.artifact_repo.get_by_sha256(artifact_id)
|
||||
if artifact:
|
||||
@@ -45,11 +43,11 @@ class ArtifactCleanupService:
|
||||
)
|
||||
return artifact
|
||||
|
||||
def on_tag_updated(
|
||||
def on_version_updated(
|
||||
self, old_artifact_id: str, new_artifact_id: str
|
||||
) -> Tuple[Optional[Artifact], Optional[Artifact]]:
|
||||
"""
|
||||
Called when a tag is updated to point to a different artifact.
|
||||
Called when a version is updated to point to a different artifact.
|
||||
Decrements ref_count for old artifact, increments for new (if different).
|
||||
|
||||
Returns (old_artifact, new_artifact) tuple.
|
||||
@@ -79,21 +77,21 @@ class ArtifactCleanupService:
|
||||
def on_package_deleted(self, package_id) -> List[str]:
|
||||
"""
|
||||
Called when a package is deleted.
|
||||
Decrements ref_count for all artifacts that had tags in the package.
|
||||
Decrements ref_count for all artifacts that had versions in the package.
|
||||
|
||||
Returns list of artifact IDs that were affected.
|
||||
"""
|
||||
# Get all tags in the package before deletion
|
||||
tags = self.db.query(Tag).filter(Tag.package_id == package_id).all()
|
||||
# Get all versions in the package before deletion
|
||||
versions = self.db.query(PackageVersion).filter(PackageVersion.package_id == package_id).all()
|
||||
|
||||
affected_artifacts = []
|
||||
for tag in tags:
|
||||
artifact = self.artifact_repo.get_by_sha256(tag.artifact_id)
|
||||
for version in versions:
|
||||
artifact = self.artifact_repo.get_by_sha256(version.artifact_id)
|
||||
if artifact:
|
||||
self.artifact_repo.decrement_ref_count(artifact)
|
||||
affected_artifacts.append(tag.artifact_id)
|
||||
affected_artifacts.append(version.artifact_id)
|
||||
logger.info(
|
||||
f"Decremented ref_count for artifact {tag.artifact_id} (package delete)"
|
||||
f"Decremented ref_count for artifact {version.artifact_id} (package delete)"
|
||||
)
|
||||
|
||||
return affected_artifacts
|
||||
@@ -152,7 +150,7 @@ class ArtifactCleanupService:
|
||||
|
||||
def verify_ref_counts(self, fix: bool = False) -> List[dict]:
|
||||
"""
|
||||
Verify that ref_counts match actual tag references.
|
||||
Verify that ref_counts match actual version references.
|
||||
|
||||
Args:
|
||||
fix: If True, fix any mismatched ref_counts
|
||||
@@ -162,28 +160,28 @@ class ArtifactCleanupService:
|
||||
"""
|
||||
from sqlalchemy import func
|
||||
|
||||
# Get actual tag counts per artifact
|
||||
tag_counts = (
|
||||
self.db.query(Tag.artifact_id, func.count(Tag.id).label("tag_count"))
|
||||
.group_by(Tag.artifact_id)
|
||||
# Get actual version counts per artifact
|
||||
version_counts = (
|
||||
self.db.query(PackageVersion.artifact_id, func.count(PackageVersion.id).label("version_count"))
|
||||
.group_by(PackageVersion.artifact_id)
|
||||
.all()
|
||||
)
|
||||
tag_count_map = {artifact_id: count for artifact_id, count in tag_counts}
|
||||
version_count_map = {artifact_id: count for artifact_id, count in version_counts}
|
||||
|
||||
# Check all artifacts
|
||||
artifacts = self.db.query(Artifact).all()
|
||||
mismatches = []
|
||||
|
||||
for artifact in artifacts:
|
||||
actual_count = tag_count_map.get(artifact.id, 0)
|
||||
actual_count = version_count_map.get(artifact.id, 0)
|
||||
# ref_count should be at least 1 (initial upload) + additional uploads
|
||||
# But tags are the primary reference, so we check against tag count
|
||||
# But versions are the primary reference, so we check against version count
|
||||
|
||||
if artifact.ref_count < actual_count:
|
||||
mismatch = {
|
||||
"artifact_id": artifact.id,
|
||||
"stored_ref_count": artifact.ref_count,
|
||||
"actual_tag_count": actual_count,
|
||||
"actual_version_count": actual_count,
|
||||
}
|
||||
mismatches.append(mismatch)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user