Compare commits
1 Commits
8999552949
...
feature/da
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ec6b3f0ed8 |
83
backend/alembic.ini
Normal file
83
backend/alembic.ini
Normal file
@@ -0,0 +1,83 @@
|
||||
# Alembic Configuration File
|
||||
|
||||
[alembic]
|
||||
# path to migration scripts
|
||||
script_location = alembic
|
||||
|
||||
# template used to generate migration files
|
||||
# file_template = %%(rev)s_%%(slug)s
|
||||
|
||||
# sys.path path, will be prepended to sys.path if present.
|
||||
prepend_sys_path = .
|
||||
|
||||
# timezone to use when rendering the date within the migration file
|
||||
# as well as the filename.
|
||||
# If specified, requires the python-dateutil library
|
||||
# timezone =
|
||||
|
||||
# max length of characters to apply to the "slug" field
|
||||
# truncate_slug_length = 40
|
||||
|
||||
# set to 'true' to run the environment during the 'revision' command,
|
||||
# regardless of autogenerate
|
||||
# revision_environment = false
|
||||
|
||||
# set to 'true' to allow .pyc and .pyo files without a source .py file
|
||||
# to be detected as revisions in the versions/ directory
|
||||
# sourceless = false
|
||||
|
||||
# version location specification
|
||||
# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
|
||||
|
||||
# version path separator
|
||||
# version_path_separator = :
|
||||
|
||||
# set to 'true' to search source files recursively
|
||||
# in each "version_locations" directory
|
||||
# recursive_version_locations = false
|
||||
|
||||
# the output encoding used when revision files are written from script.py.mako
|
||||
# output_encoding = utf-8
|
||||
|
||||
# Database URL - will be overridden by env.py
|
||||
sqlalchemy.url = driver://user:pass@localhost/dbname
|
||||
|
||||
|
||||
[post_write_hooks]
|
||||
# post_write_hooks defines scripts or Python functions that are run
|
||||
# on newly generated revision scripts.
|
||||
|
||||
# Logging configuration
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARN
|
||||
handlers = console
|
||||
qualname =
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARN
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = INFO
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
27
backend/alembic/README
Normal file
27
backend/alembic/README
Normal file
@@ -0,0 +1,27 @@
|
||||
Alembic Migrations for Orchard
|
||||
|
||||
This directory contains database migration scripts managed by Alembic.
|
||||
|
||||
Common Commands:
|
||||
# Generate a new migration (autogenerate from model changes)
|
||||
alembic revision --autogenerate -m "description of changes"
|
||||
|
||||
# Apply all pending migrations
|
||||
alembic upgrade head
|
||||
|
||||
# Rollback one migration
|
||||
alembic downgrade -1
|
||||
|
||||
# Show current migration status
|
||||
alembic current
|
||||
|
||||
# Show migration history
|
||||
alembic history
|
||||
|
||||
# Generate SQL without applying (for review)
|
||||
alembic upgrade head --sql
|
||||
|
||||
Notes:
|
||||
- Always review autogenerated migrations before applying
|
||||
- Test migrations in development before applying to production
|
||||
- Migrations are stored in the versions/ directory
|
||||
95
backend/alembic/env.py
Normal file
95
backend/alembic/env.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""
|
||||
Alembic migration environment configuration.
|
||||
"""
|
||||
|
||||
from logging.config import fileConfig
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from sqlalchemy import engine_from_config
|
||||
from sqlalchemy import pool
|
||||
|
||||
from alembic import context
|
||||
|
||||
# Add the app directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from app.config import get_settings
|
||||
from app.models import Base
|
||||
|
||||
# this is the Alembic Config object, which provides
|
||||
# access to the values within the .ini file in use.
|
||||
config = context.config
|
||||
|
||||
# Get database URL from settings
|
||||
settings = get_settings()
|
||||
config.set_main_option("sqlalchemy.url", settings.database_url)
|
||||
|
||||
# Interpret the config file for Python logging.
|
||||
# This line sets up loggers basically.
|
||||
if config.config_file_name is not None:
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
# add your model's MetaData object here
|
||||
# for 'autogenerate' support
|
||||
target_metadata = Base.metadata
|
||||
|
||||
# other values from the config, defined by the needs of env.py,
|
||||
# can be acquired:
|
||||
# my_important_option = config.get_main_option("my_important_option")
|
||||
# ... etc.
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
"""Run migrations in 'offline' mode.
|
||||
|
||||
This configures the context with just a URL
|
||||
and not an Engine, though an Engine is acceptable
|
||||
here as well. By skipping the Engine creation
|
||||
we don't even need a DBAPI to be available.
|
||||
|
||||
Calls to context.execute() here emit the given string to the
|
||||
script output.
|
||||
|
||||
"""
|
||||
url = config.get_main_option("sqlalchemy.url")
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
"""Run migrations in 'online' mode.
|
||||
|
||||
In this scenario we need to create an Engine
|
||||
and associate a connection with the context.
|
||||
|
||||
"""
|
||||
connectable = engine_from_config(
|
||||
config.get_section(config.config_ini_section, {}),
|
||||
prefix="sqlalchemy.",
|
||||
poolclass=pool.NullPool,
|
||||
)
|
||||
|
||||
with connectable.connect() as connection:
|
||||
context.configure(
|
||||
connection=connection,
|
||||
target_metadata=target_metadata,
|
||||
compare_type=True, # Detect column type changes
|
||||
compare_server_default=True, # Detect default value changes
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
26
backend/alembic/script.py.mako
Normal file
26
backend/alembic/script.py.mako
Normal file
@@ -0,0 +1,26 @@
|
||||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = ${repr(up_revision)}
|
||||
down_revision: Union[str, None] = ${repr(down_revision)}
|
||||
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
||||
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
${downgrades if downgrades else "pass"}
|
||||
@@ -18,6 +18,12 @@ class Settings(BaseSettings):
|
||||
database_dbname: str = "orchard"
|
||||
database_sslmode: str = "disable"
|
||||
|
||||
# Database connection pool settings
|
||||
database_pool_size: int = 5 # Number of connections to keep open
|
||||
database_max_overflow: int = 10 # Max additional connections beyond pool_size
|
||||
database_pool_timeout: int = 30 # Seconds to wait for a connection from pool
|
||||
database_pool_recycle: int = 1800 # Recycle connections after this many seconds (30 min)
|
||||
|
||||
# S3
|
||||
s3_endpoint: str = ""
|
||||
s3_region: str = "us-east-1"
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
from sqlalchemy import create_engine, text
|
||||
from sqlalchemy import create_engine, text, event
|
||||
from sqlalchemy.orm import sessionmaker, Session
|
||||
from sqlalchemy.pool import QueuePool
|
||||
from typing import Generator
|
||||
from contextlib import contextmanager
|
||||
import logging
|
||||
import time
|
||||
|
||||
from .config import get_settings
|
||||
from .models import Base
|
||||
@@ -9,10 +12,44 @@ from .models import Base
|
||||
settings = get_settings()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
engine = create_engine(settings.database_url, pool_pre_ping=True)
|
||||
# Create engine with connection pool configuration
|
||||
engine = create_engine(
|
||||
settings.database_url,
|
||||
pool_pre_ping=True, # Check connection health before using
|
||||
poolclass=QueuePool,
|
||||
pool_size=settings.database_pool_size,
|
||||
max_overflow=settings.database_max_overflow,
|
||||
pool_timeout=settings.database_pool_timeout,
|
||||
pool_recycle=settings.database_pool_recycle,
|
||||
)
|
||||
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
|
||||
|
||||
# Connection pool monitoring
|
||||
@event.listens_for(engine, "checkout")
|
||||
def receive_checkout(dbapi_connection, connection_record, connection_proxy):
|
||||
"""Log when a connection is checked out from the pool"""
|
||||
logger.debug(f"Connection checked out from pool: {id(dbapi_connection)}")
|
||||
|
||||
|
||||
@event.listens_for(engine, "checkin")
|
||||
def receive_checkin(dbapi_connection, connection_record):
|
||||
"""Log when a connection is returned to the pool"""
|
||||
logger.debug(f"Connection returned to pool: {id(dbapi_connection)}")
|
||||
|
||||
|
||||
def get_pool_status() -> dict:
|
||||
"""Get current connection pool status for monitoring"""
|
||||
pool = engine.pool
|
||||
return {
|
||||
"pool_size": pool.size(),
|
||||
"checked_out": pool.checkedout(),
|
||||
"overflow": pool.overflow(),
|
||||
"checked_in": pool.checkedin(),
|
||||
}
|
||||
|
||||
|
||||
def init_db():
|
||||
"""Create all tables and run migrations"""
|
||||
Base.metadata.create_all(bind=engine)
|
||||
@@ -62,6 +99,51 @@ def _run_migrations():
|
||||
END IF;
|
||||
END $$;
|
||||
""",
|
||||
# Add ref_count index and constraints for artifacts
|
||||
"""
|
||||
DO $$
|
||||
BEGIN
|
||||
-- Add ref_count index
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_indexes WHERE indexname = 'idx_artifacts_ref_count'
|
||||
) THEN
|
||||
CREATE INDEX idx_artifacts_ref_count ON artifacts(ref_count);
|
||||
END IF;
|
||||
|
||||
-- Add ref_count >= 0 constraint
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint WHERE conname = 'check_ref_count_non_negative'
|
||||
) THEN
|
||||
ALTER TABLE artifacts ADD CONSTRAINT check_ref_count_non_negative CHECK (ref_count >= 0);
|
||||
END IF;
|
||||
END $$;
|
||||
""",
|
||||
# Add composite indexes for packages and tags
|
||||
"""
|
||||
DO $$
|
||||
BEGIN
|
||||
-- Composite index for package lookup by project and name
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_indexes WHERE indexname = 'idx_packages_project_name'
|
||||
) THEN
|
||||
CREATE UNIQUE INDEX idx_packages_project_name ON packages(project_id, name);
|
||||
END IF;
|
||||
|
||||
-- Composite index for tag lookup by package and name
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_indexes WHERE indexname = 'idx_tags_package_name'
|
||||
) THEN
|
||||
CREATE UNIQUE INDEX idx_tags_package_name ON tags(package_id, name);
|
||||
END IF;
|
||||
|
||||
-- Composite index for recent tags queries
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_indexes WHERE indexname = 'idx_tags_package_created_at'
|
||||
) THEN
|
||||
CREATE INDEX idx_tags_package_created_at ON tags(package_id, created_at);
|
||||
END IF;
|
||||
END $$;
|
||||
""",
|
||||
]
|
||||
|
||||
with engine.connect() as conn:
|
||||
@@ -80,3 +162,75 @@ def get_db() -> Generator[Session, None, None]:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@contextmanager
|
||||
def transaction(db: Session):
|
||||
"""
|
||||
Context manager for explicit transaction management with savepoint support.
|
||||
|
||||
Usage:
|
||||
with transaction(db):
|
||||
# operations here
|
||||
# automatically commits on success, rolls back on exception
|
||||
"""
|
||||
try:
|
||||
yield db
|
||||
db.commit()
|
||||
except Exception:
|
||||
db.rollback()
|
||||
raise
|
||||
|
||||
|
||||
@contextmanager
|
||||
def savepoint(db: Session, name: str = None):
|
||||
"""
|
||||
Create a savepoint for partial rollback support.
|
||||
|
||||
Usage:
|
||||
with savepoint(db, "my_savepoint"):
|
||||
# operations here
|
||||
# rolls back to savepoint on exception, but doesn't rollback whole transaction
|
||||
"""
|
||||
savepoint_obj = db.begin_nested()
|
||||
try:
|
||||
yield savepoint_obj
|
||||
savepoint_obj.commit()
|
||||
except Exception:
|
||||
savepoint_obj.rollback()
|
||||
raise
|
||||
|
||||
|
||||
def retry_on_deadlock(func, max_retries: int = 3, delay: float = 0.1):
|
||||
"""
|
||||
Decorator/wrapper to retry operations on deadlock detection.
|
||||
|
||||
Usage:
|
||||
@retry_on_deadlock
|
||||
def my_operation(db):
|
||||
...
|
||||
|
||||
Or:
|
||||
retry_on_deadlock(lambda: my_operation(db))()
|
||||
"""
|
||||
import functools
|
||||
from sqlalchemy.exc import OperationalError
|
||||
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
last_exception = None
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except OperationalError as e:
|
||||
# Check for deadlock error codes (PostgreSQL: 40P01, MySQL: 1213)
|
||||
error_str = str(e).lower()
|
||||
if "deadlock" in error_str or "40p01" in error_str:
|
||||
last_exception = e
|
||||
logger.warning(f"Deadlock detected, retrying (attempt {attempt + 1}/{max_retries})")
|
||||
time.sleep(delay * (attempt + 1)) # Exponential backoff
|
||||
else:
|
||||
raise
|
||||
raise last_exception
|
||||
|
||||
return wrapper
|
||||
|
||||
@@ -53,6 +53,7 @@ class Package(Base):
|
||||
Index("idx_packages_name", "name"),
|
||||
Index("idx_packages_format", "format"),
|
||||
Index("idx_packages_platform", "platform"),
|
||||
Index("idx_packages_project_name", "project_id", "name", unique=True), # Composite unique index
|
||||
CheckConstraint(
|
||||
"format IN ('generic', 'npm', 'pypi', 'docker', 'deb', 'rpm', 'maven', 'nuget', 'helm')",
|
||||
name="check_package_format"
|
||||
@@ -84,6 +85,9 @@ class Artifact(Base):
|
||||
__table_args__ = (
|
||||
Index("idx_artifacts_created_at", "created_at"),
|
||||
Index("idx_artifacts_created_by", "created_by"),
|
||||
Index("idx_artifacts_ref_count", "ref_count"), # For cleanup queries
|
||||
CheckConstraint("ref_count >= 0", name="check_ref_count_non_negative"),
|
||||
CheckConstraint("size > 0", name="check_size_positive"),
|
||||
)
|
||||
|
||||
|
||||
@@ -104,6 +108,8 @@ class Tag(Base):
|
||||
__table_args__ = (
|
||||
Index("idx_tags_package_id", "package_id"),
|
||||
Index("idx_tags_artifact_id", "artifact_id"),
|
||||
Index("idx_tags_package_name", "package_id", "name", unique=True), # Composite unique index
|
||||
Index("idx_tags_package_created_at", "package_id", "created_at"), # For recent tags queries
|
||||
)
|
||||
|
||||
|
||||
|
||||
22
backend/app/repositories/__init__.py
Normal file
22
backend/app/repositories/__init__.py
Normal file
@@ -0,0 +1,22 @@
|
||||
"""
|
||||
Repository pattern implementation for data access layer.
|
||||
|
||||
Repositories abstract database operations from business logic,
|
||||
providing clean interfaces for CRUD operations on each entity.
|
||||
"""
|
||||
|
||||
from .base import BaseRepository
|
||||
from .project import ProjectRepository
|
||||
from .package import PackageRepository
|
||||
from .artifact import ArtifactRepository
|
||||
from .tag import TagRepository
|
||||
from .upload import UploadRepository
|
||||
|
||||
__all__ = [
|
||||
"BaseRepository",
|
||||
"ProjectRepository",
|
||||
"PackageRepository",
|
||||
"ArtifactRepository",
|
||||
"TagRepository",
|
||||
"UploadRepository",
|
||||
]
|
||||
157
backend/app/repositories/artifact.py
Normal file
157
backend/app/repositories/artifact.py
Normal file
@@ -0,0 +1,157 @@
|
||||
"""
|
||||
Artifact repository for data access operations.
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func, or_
|
||||
from uuid import UUID
|
||||
|
||||
from .base import BaseRepository
|
||||
from ..models import Artifact, Tag, Upload, Package, Project
|
||||
|
||||
|
||||
class ArtifactRepository(BaseRepository[Artifact]):
|
||||
"""Repository for Artifact entity operations."""
|
||||
|
||||
model = Artifact
|
||||
|
||||
def get_by_sha256(self, sha256: str) -> Optional[Artifact]:
|
||||
"""Get artifact by SHA256 hash (primary key)."""
|
||||
return self.db.query(Artifact).filter(Artifact.id == sha256).first()
|
||||
|
||||
def exists_by_sha256(self, sha256: str) -> bool:
|
||||
"""Check if artifact with SHA256 exists."""
|
||||
return self.db.query(
|
||||
self.db.query(Artifact).filter(Artifact.id == sha256).exists()
|
||||
).scalar()
|
||||
|
||||
def create_artifact(
|
||||
self,
|
||||
sha256: str,
|
||||
size: int,
|
||||
s3_key: str,
|
||||
created_by: str,
|
||||
content_type: Optional[str] = None,
|
||||
original_name: Optional[str] = None,
|
||||
format_metadata: Optional[dict] = None,
|
||||
) -> Artifact:
|
||||
"""Create a new artifact."""
|
||||
artifact = Artifact(
|
||||
id=sha256,
|
||||
size=size,
|
||||
s3_key=s3_key,
|
||||
created_by=created_by,
|
||||
content_type=content_type,
|
||||
original_name=original_name,
|
||||
format_metadata=format_metadata or {},
|
||||
ref_count=1,
|
||||
)
|
||||
self.db.add(artifact)
|
||||
self.db.flush()
|
||||
return artifact
|
||||
|
||||
def increment_ref_count(self, artifact: Artifact) -> Artifact:
|
||||
"""Increment artifact reference count."""
|
||||
artifact.ref_count += 1
|
||||
self.db.flush()
|
||||
return artifact
|
||||
|
||||
def decrement_ref_count(self, artifact: Artifact) -> Artifact:
|
||||
"""
|
||||
Decrement artifact reference count.
|
||||
Returns the artifact with updated count.
|
||||
Does not delete the artifact even if ref_count reaches 0.
|
||||
"""
|
||||
if artifact.ref_count > 0:
|
||||
artifact.ref_count -= 1
|
||||
self.db.flush()
|
||||
return artifact
|
||||
|
||||
def get_orphaned_artifacts(self, limit: int = 100) -> List[Artifact]:
|
||||
"""Get artifacts with ref_count = 0 (candidates for cleanup)."""
|
||||
return (
|
||||
self.db.query(Artifact)
|
||||
.filter(Artifact.ref_count == 0)
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
|
||||
def get_artifacts_without_tags(self, limit: int = 100) -> List[Artifact]:
|
||||
"""Get artifacts that have no tags pointing to them."""
|
||||
# Subquery to find artifact IDs that have tags
|
||||
tagged_artifacts = self.db.query(Tag.artifact_id).distinct().subquery()
|
||||
|
||||
return (
|
||||
self.db.query(Artifact)
|
||||
.filter(~Artifact.id.in_(tagged_artifacts))
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
|
||||
def find_by_package(
|
||||
self,
|
||||
package_id: UUID,
|
||||
page: int = 1,
|
||||
limit: int = 20,
|
||||
content_type: Optional[str] = None,
|
||||
) -> Tuple[List[Artifact], int]:
|
||||
"""Find artifacts uploaded to a package."""
|
||||
# Get distinct artifact IDs from uploads
|
||||
artifact_ids_subquery = (
|
||||
self.db.query(func.distinct(Upload.artifact_id))
|
||||
.filter(Upload.package_id == package_id)
|
||||
.subquery()
|
||||
)
|
||||
|
||||
query = self.db.query(Artifact).filter(Artifact.id.in_(artifact_ids_subquery))
|
||||
|
||||
if content_type:
|
||||
query = query.filter(Artifact.content_type == content_type)
|
||||
|
||||
total = query.count()
|
||||
offset = (page - 1) * limit
|
||||
artifacts = query.order_by(Artifact.created_at.desc()).offset(offset).limit(limit).all()
|
||||
|
||||
return artifacts, total
|
||||
|
||||
def get_referencing_tags(self, artifact_id: str) -> List[Tuple[Tag, Package, Project]]:
|
||||
"""Get all tags referencing this artifact with package and project info."""
|
||||
return (
|
||||
self.db.query(Tag, Package, Project)
|
||||
.join(Package, Tag.package_id == Package.id)
|
||||
.join(Project, Package.project_id == Project.id)
|
||||
.filter(Tag.artifact_id == artifact_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
def search(self, query_str: str, limit: int = 10) -> List[Tuple[Tag, Artifact, str, str]]:
|
||||
"""
|
||||
Search artifacts by tag name or original filename.
|
||||
Returns (tag, artifact, package_name, project_name) tuples.
|
||||
"""
|
||||
search_lower = query_str.lower()
|
||||
return (
|
||||
self.db.query(Tag, Artifact, Package.name, Project.name)
|
||||
.join(Artifact, Tag.artifact_id == Artifact.id)
|
||||
.join(Package, Tag.package_id == Package.id)
|
||||
.join(Project, Package.project_id == Project.id)
|
||||
.filter(
|
||||
or_(
|
||||
func.lower(Tag.name).contains(search_lower),
|
||||
func.lower(Artifact.original_name).contains(search_lower)
|
||||
)
|
||||
)
|
||||
.order_by(Tag.name)
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
|
||||
def update_metadata(self, artifact: Artifact, metadata: dict) -> Artifact:
|
||||
"""Update or merge format metadata."""
|
||||
if artifact.format_metadata:
|
||||
artifact.format_metadata = {**artifact.format_metadata, **metadata}
|
||||
else:
|
||||
artifact.format_metadata = metadata
|
||||
self.db.flush()
|
||||
return artifact
|
||||
96
backend/app/repositories/base.py
Normal file
96
backend/app/repositories/base.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""
|
||||
Base repository class with common CRUD operations.
|
||||
"""
|
||||
|
||||
from typing import TypeVar, Generic, Type, Optional, List, Any, Dict
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func, asc, desc
|
||||
from uuid import UUID
|
||||
|
||||
from ..models import Base
|
||||
|
||||
T = TypeVar("T", bound=Base)
|
||||
|
||||
|
||||
class BaseRepository(Generic[T]):
|
||||
"""
|
||||
Base repository providing common CRUD operations.
|
||||
|
||||
Subclasses should set `model` class attribute to the SQLAlchemy model.
|
||||
"""
|
||||
|
||||
model: Type[T]
|
||||
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
|
||||
def get_by_id(self, id: Any) -> Optional[T]:
|
||||
"""Get entity by primary key."""
|
||||
return self.db.query(self.model).filter(self.model.id == id).first()
|
||||
|
||||
def get_all(
|
||||
self,
|
||||
skip: int = 0,
|
||||
limit: int = 100,
|
||||
order_by: str = None,
|
||||
order_desc: bool = False,
|
||||
) -> List[T]:
|
||||
"""Get all entities with pagination and optional ordering."""
|
||||
query = self.db.query(self.model)
|
||||
|
||||
if order_by and hasattr(self.model, order_by):
|
||||
column = getattr(self.model, order_by)
|
||||
query = query.order_by(desc(column) if order_desc else asc(column))
|
||||
|
||||
return query.offset(skip).limit(limit).all()
|
||||
|
||||
def count(self) -> int:
|
||||
"""Count total entities."""
|
||||
return self.db.query(func.count(self.model.id)).scalar() or 0
|
||||
|
||||
def create(self, **kwargs) -> T:
|
||||
"""Create a new entity."""
|
||||
entity = self.model(**kwargs)
|
||||
self.db.add(entity)
|
||||
self.db.flush() # Flush to get ID without committing
|
||||
return entity
|
||||
|
||||
def update(self, entity: T, **kwargs) -> T:
|
||||
"""Update an existing entity."""
|
||||
for key, value in kwargs.items():
|
||||
if hasattr(entity, key):
|
||||
setattr(entity, key, value)
|
||||
self.db.flush()
|
||||
return entity
|
||||
|
||||
def delete(self, entity: T) -> None:
|
||||
"""Delete an entity."""
|
||||
self.db.delete(entity)
|
||||
self.db.flush()
|
||||
|
||||
def delete_by_id(self, id: Any) -> bool:
|
||||
"""Delete entity by ID. Returns True if deleted, False if not found."""
|
||||
entity = self.get_by_id(id)
|
||||
if entity:
|
||||
self.delete(entity)
|
||||
return True
|
||||
return False
|
||||
|
||||
def exists(self, id: Any) -> bool:
|
||||
"""Check if entity exists by ID."""
|
||||
return self.db.query(
|
||||
self.db.query(self.model).filter(self.model.id == id).exists()
|
||||
).scalar()
|
||||
|
||||
def commit(self) -> None:
|
||||
"""Commit the current transaction."""
|
||||
self.db.commit()
|
||||
|
||||
def rollback(self) -> None:
|
||||
"""Rollback the current transaction."""
|
||||
self.db.rollback()
|
||||
|
||||
def refresh(self, entity: T) -> T:
|
||||
"""Refresh entity from database."""
|
||||
self.db.refresh(entity)
|
||||
return entity
|
||||
177
backend/app/repositories/package.py
Normal file
177
backend/app/repositories/package.py
Normal file
@@ -0,0 +1,177 @@
|
||||
"""
|
||||
Package repository for data access operations.
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func, or_, asc, desc
|
||||
from uuid import UUID
|
||||
|
||||
from .base import BaseRepository
|
||||
from ..models import Package, Project, Tag, Upload, Artifact
|
||||
|
||||
|
||||
class PackageRepository(BaseRepository[Package]):
|
||||
"""Repository for Package entity operations."""
|
||||
|
||||
model = Package
|
||||
|
||||
def get_by_name(self, project_id: UUID, name: str) -> Optional[Package]:
|
||||
"""Get package by name within a project."""
|
||||
return (
|
||||
self.db.query(Package)
|
||||
.filter(Package.project_id == project_id, Package.name == name)
|
||||
.first()
|
||||
)
|
||||
|
||||
def get_by_project_and_name(self, project_name: str, package_name: str) -> Optional[Package]:
|
||||
"""Get package by project name and package name."""
|
||||
return (
|
||||
self.db.query(Package)
|
||||
.join(Project, Package.project_id == Project.id)
|
||||
.filter(Project.name == project_name, Package.name == package_name)
|
||||
.first()
|
||||
)
|
||||
|
||||
def exists_by_name(self, project_id: UUID, name: str) -> bool:
|
||||
"""Check if package with name exists in project."""
|
||||
return self.db.query(
|
||||
self.db.query(Package)
|
||||
.filter(Package.project_id == project_id, Package.name == name)
|
||||
.exists()
|
||||
).scalar()
|
||||
|
||||
def list_by_project(
|
||||
self,
|
||||
project_id: UUID,
|
||||
page: int = 1,
|
||||
limit: int = 20,
|
||||
search: Optional[str] = None,
|
||||
format: Optional[str] = None,
|
||||
platform: Optional[str] = None,
|
||||
sort: str = "name",
|
||||
order: str = "asc",
|
||||
) -> Tuple[List[Package], int]:
|
||||
"""
|
||||
List packages in a project with filtering and pagination.
|
||||
|
||||
Returns tuple of (packages, total_count).
|
||||
"""
|
||||
query = self.db.query(Package).filter(Package.project_id == project_id)
|
||||
|
||||
# Apply search filter
|
||||
if search:
|
||||
search_lower = search.lower()
|
||||
query = query.filter(
|
||||
or_(
|
||||
func.lower(Package.name).contains(search_lower),
|
||||
func.lower(Package.description).contains(search_lower)
|
||||
)
|
||||
)
|
||||
|
||||
# Apply format filter
|
||||
if format:
|
||||
query = query.filter(Package.format == format)
|
||||
|
||||
# Apply platform filter
|
||||
if platform:
|
||||
query = query.filter(Package.platform == platform)
|
||||
|
||||
# Get total count
|
||||
total = query.count()
|
||||
|
||||
# Apply sorting
|
||||
sort_columns = {
|
||||
"name": Package.name,
|
||||
"created_at": Package.created_at,
|
||||
"updated_at": Package.updated_at,
|
||||
}
|
||||
sort_column = sort_columns.get(sort, Package.name)
|
||||
if order == "desc":
|
||||
query = query.order_by(desc(sort_column))
|
||||
else:
|
||||
query = query.order_by(asc(sort_column))
|
||||
|
||||
# Apply pagination
|
||||
offset = (page - 1) * limit
|
||||
packages = query.offset(offset).limit(limit).all()
|
||||
|
||||
return packages, total
|
||||
|
||||
def create_package(
|
||||
self,
|
||||
project_id: UUID,
|
||||
name: str,
|
||||
description: Optional[str] = None,
|
||||
format: str = "generic",
|
||||
platform: str = "any",
|
||||
) -> Package:
|
||||
"""Create a new package."""
|
||||
return self.create(
|
||||
project_id=project_id,
|
||||
name=name,
|
||||
description=description,
|
||||
format=format,
|
||||
platform=platform,
|
||||
)
|
||||
|
||||
def update_package(
|
||||
self,
|
||||
package: Package,
|
||||
name: Optional[str] = None,
|
||||
description: Optional[str] = None,
|
||||
format: Optional[str] = None,
|
||||
platform: Optional[str] = None,
|
||||
) -> Package:
|
||||
"""Update package fields."""
|
||||
updates = {}
|
||||
if name is not None:
|
||||
updates["name"] = name
|
||||
if description is not None:
|
||||
updates["description"] = description
|
||||
if format is not None:
|
||||
updates["format"] = format
|
||||
if platform is not None:
|
||||
updates["platform"] = platform
|
||||
return self.update(package, **updates)
|
||||
|
||||
def get_stats(self, package_id: UUID) -> dict:
|
||||
"""Get package statistics (tag count, artifact count, total size)."""
|
||||
tag_count = (
|
||||
self.db.query(func.count(Tag.id))
|
||||
.filter(Tag.package_id == package_id)
|
||||
.scalar() or 0
|
||||
)
|
||||
|
||||
artifact_stats = (
|
||||
self.db.query(
|
||||
func.count(func.distinct(Upload.artifact_id)),
|
||||
func.coalesce(func.sum(Artifact.size), 0)
|
||||
)
|
||||
.join(Artifact, Upload.artifact_id == Artifact.id)
|
||||
.filter(Upload.package_id == package_id)
|
||||
.first()
|
||||
)
|
||||
|
||||
return {
|
||||
"tag_count": tag_count,
|
||||
"artifact_count": artifact_stats[0] if artifact_stats else 0,
|
||||
"total_size": artifact_stats[1] if artifact_stats else 0,
|
||||
}
|
||||
|
||||
def search(self, query_str: str, limit: int = 10) -> List[Tuple[Package, str]]:
|
||||
"""Search packages by name or description. Returns (package, project_name) tuples."""
|
||||
search_lower = query_str.lower()
|
||||
return (
|
||||
self.db.query(Package, Project.name)
|
||||
.join(Project, Package.project_id == Project.id)
|
||||
.filter(
|
||||
or_(
|
||||
func.lower(Package.name).contains(search_lower),
|
||||
func.lower(Package.description).contains(search_lower)
|
||||
)
|
||||
)
|
||||
.order_by(Package.name)
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
132
backend/app/repositories/project.py
Normal file
132
backend/app/repositories/project.py
Normal file
@@ -0,0 +1,132 @@
|
||||
"""
|
||||
Project repository for data access operations.
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func, or_, asc, desc
|
||||
from uuid import UUID
|
||||
|
||||
from .base import BaseRepository
|
||||
from ..models import Project
|
||||
|
||||
|
||||
class ProjectRepository(BaseRepository[Project]):
|
||||
"""Repository for Project entity operations."""
|
||||
|
||||
model = Project
|
||||
|
||||
def get_by_name(self, name: str) -> Optional[Project]:
|
||||
"""Get project by unique name."""
|
||||
return self.db.query(Project).filter(Project.name == name).first()
|
||||
|
||||
def exists_by_name(self, name: str) -> bool:
|
||||
"""Check if project with name exists."""
|
||||
return self.db.query(
|
||||
self.db.query(Project).filter(Project.name == name).exists()
|
||||
).scalar()
|
||||
|
||||
def list_accessible(
|
||||
self,
|
||||
user_id: str,
|
||||
page: int = 1,
|
||||
limit: int = 20,
|
||||
search: Optional[str] = None,
|
||||
visibility: Optional[str] = None,
|
||||
sort: str = "name",
|
||||
order: str = "asc",
|
||||
) -> Tuple[List[Project], int]:
|
||||
"""
|
||||
List projects accessible to user with filtering and pagination.
|
||||
|
||||
Returns tuple of (projects, total_count).
|
||||
"""
|
||||
# Base query - filter by access
|
||||
query = self.db.query(Project).filter(
|
||||
or_(Project.is_public == True, Project.created_by == user_id)
|
||||
)
|
||||
|
||||
# Apply visibility filter
|
||||
if visibility == "public":
|
||||
query = query.filter(Project.is_public == True)
|
||||
elif visibility == "private":
|
||||
query = query.filter(Project.is_public == False, Project.created_by == user_id)
|
||||
|
||||
# Apply search filter
|
||||
if search:
|
||||
search_lower = search.lower()
|
||||
query = query.filter(
|
||||
or_(
|
||||
func.lower(Project.name).contains(search_lower),
|
||||
func.lower(Project.description).contains(search_lower)
|
||||
)
|
||||
)
|
||||
|
||||
# Get total count before pagination
|
||||
total = query.count()
|
||||
|
||||
# Apply sorting
|
||||
sort_columns = {
|
||||
"name": Project.name,
|
||||
"created_at": Project.created_at,
|
||||
"updated_at": Project.updated_at,
|
||||
}
|
||||
sort_column = sort_columns.get(sort, Project.name)
|
||||
if order == "desc":
|
||||
query = query.order_by(desc(sort_column))
|
||||
else:
|
||||
query = query.order_by(asc(sort_column))
|
||||
|
||||
# Apply pagination
|
||||
offset = (page - 1) * limit
|
||||
projects = query.offset(offset).limit(limit).all()
|
||||
|
||||
return projects, total
|
||||
|
||||
def create_project(
|
||||
self,
|
||||
name: str,
|
||||
created_by: str,
|
||||
description: Optional[str] = None,
|
||||
is_public: bool = True,
|
||||
) -> Project:
|
||||
"""Create a new project."""
|
||||
return self.create(
|
||||
name=name,
|
||||
description=description,
|
||||
is_public=is_public,
|
||||
created_by=created_by,
|
||||
)
|
||||
|
||||
def update_project(
|
||||
self,
|
||||
project: Project,
|
||||
name: Optional[str] = None,
|
||||
description: Optional[str] = None,
|
||||
is_public: Optional[bool] = None,
|
||||
) -> Project:
|
||||
"""Update project fields."""
|
||||
updates = {}
|
||||
if name is not None:
|
||||
updates["name"] = name
|
||||
if description is not None:
|
||||
updates["description"] = description
|
||||
if is_public is not None:
|
||||
updates["is_public"] = is_public
|
||||
return self.update(project, **updates)
|
||||
|
||||
def search(self, query_str: str, limit: int = 10) -> List[Project]:
|
||||
"""Search projects by name or description."""
|
||||
search_lower = query_str.lower()
|
||||
return (
|
||||
self.db.query(Project)
|
||||
.filter(
|
||||
or_(
|
||||
func.lower(Project.name).contains(search_lower),
|
||||
func.lower(Project.description).contains(search_lower)
|
||||
)
|
||||
)
|
||||
.order_by(Project.name)
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
168
backend/app/repositories/tag.py
Normal file
168
backend/app/repositories/tag.py
Normal file
@@ -0,0 +1,168 @@
|
||||
"""
|
||||
Tag repository for data access operations.
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func, or_, asc, desc
|
||||
from uuid import UUID
|
||||
|
||||
from .base import BaseRepository
|
||||
from ..models import Tag, TagHistory, Artifact, Package, Project
|
||||
|
||||
|
||||
class TagRepository(BaseRepository[Tag]):
|
||||
"""Repository for Tag entity operations."""
|
||||
|
||||
model = Tag
|
||||
|
||||
def get_by_name(self, package_id: UUID, name: str) -> Optional[Tag]:
|
||||
"""Get tag by name within a package."""
|
||||
return (
|
||||
self.db.query(Tag)
|
||||
.filter(Tag.package_id == package_id, Tag.name == name)
|
||||
.first()
|
||||
)
|
||||
|
||||
def get_with_artifact(self, package_id: UUID, name: str) -> Optional[Tuple[Tag, Artifact]]:
|
||||
"""Get tag with its artifact."""
|
||||
return (
|
||||
self.db.query(Tag, Artifact)
|
||||
.join(Artifact, Tag.artifact_id == Artifact.id)
|
||||
.filter(Tag.package_id == package_id, Tag.name == name)
|
||||
.first()
|
||||
)
|
||||
|
||||
def exists_by_name(self, package_id: UUID, name: str) -> bool:
|
||||
"""Check if tag with name exists in package."""
|
||||
return self.db.query(
|
||||
self.db.query(Tag)
|
||||
.filter(Tag.package_id == package_id, Tag.name == name)
|
||||
.exists()
|
||||
).scalar()
|
||||
|
||||
def list_by_package(
|
||||
self,
|
||||
package_id: UUID,
|
||||
page: int = 1,
|
||||
limit: int = 20,
|
||||
search: Optional[str] = None,
|
||||
sort: str = "name",
|
||||
order: str = "asc",
|
||||
) -> Tuple[List[Tuple[Tag, Artifact]], int]:
|
||||
"""
|
||||
List tags in a package with artifact metadata.
|
||||
|
||||
Returns tuple of ((tag, artifact) tuples, total_count).
|
||||
"""
|
||||
query = (
|
||||
self.db.query(Tag, Artifact)
|
||||
.join(Artifact, Tag.artifact_id == Artifact.id)
|
||||
.filter(Tag.package_id == package_id)
|
||||
)
|
||||
|
||||
# Apply search filter (tag name or artifact original filename)
|
||||
if search:
|
||||
search_lower = search.lower()
|
||||
query = query.filter(
|
||||
or_(
|
||||
func.lower(Tag.name).contains(search_lower),
|
||||
func.lower(Artifact.original_name).contains(search_lower)
|
||||
)
|
||||
)
|
||||
|
||||
# Get total count
|
||||
total = query.count()
|
||||
|
||||
# Apply sorting
|
||||
sort_columns = {
|
||||
"name": Tag.name,
|
||||
"created_at": Tag.created_at,
|
||||
}
|
||||
sort_column = sort_columns.get(sort, Tag.name)
|
||||
if order == "desc":
|
||||
query = query.order_by(desc(sort_column))
|
||||
else:
|
||||
query = query.order_by(asc(sort_column))
|
||||
|
||||
# Apply pagination
|
||||
offset = (page - 1) * limit
|
||||
results = query.offset(offset).limit(limit).all()
|
||||
|
||||
return results, total
|
||||
|
||||
def create_tag(
|
||||
self,
|
||||
package_id: UUID,
|
||||
name: str,
|
||||
artifact_id: str,
|
||||
created_by: str,
|
||||
) -> Tag:
|
||||
"""Create a new tag."""
|
||||
return self.create(
|
||||
package_id=package_id,
|
||||
name=name,
|
||||
artifact_id=artifact_id,
|
||||
created_by=created_by,
|
||||
)
|
||||
|
||||
def update_artifact(
|
||||
self,
|
||||
tag: Tag,
|
||||
new_artifact_id: str,
|
||||
changed_by: str,
|
||||
record_history: bool = True,
|
||||
) -> Tag:
|
||||
"""
|
||||
Update tag to point to a different artifact.
|
||||
Optionally records change in tag history.
|
||||
"""
|
||||
old_artifact_id = tag.artifact_id
|
||||
|
||||
if record_history and old_artifact_id != new_artifact_id:
|
||||
history = TagHistory(
|
||||
tag_id=tag.id,
|
||||
old_artifact_id=old_artifact_id,
|
||||
new_artifact_id=new_artifact_id,
|
||||
changed_by=changed_by,
|
||||
)
|
||||
self.db.add(history)
|
||||
|
||||
tag.artifact_id = new_artifact_id
|
||||
tag.created_by = changed_by
|
||||
self.db.flush()
|
||||
return tag
|
||||
|
||||
def get_history(self, tag_id: UUID) -> List[TagHistory]:
|
||||
"""Get tag change history."""
|
||||
return (
|
||||
self.db.query(TagHistory)
|
||||
.filter(TagHistory.tag_id == tag_id)
|
||||
.order_by(TagHistory.changed_at.desc())
|
||||
.all()
|
||||
)
|
||||
|
||||
def get_latest_in_package(self, package_id: UUID) -> Optional[Tag]:
|
||||
"""Get the most recently created/updated tag in a package."""
|
||||
return (
|
||||
self.db.query(Tag)
|
||||
.filter(Tag.package_id == package_id)
|
||||
.order_by(Tag.created_at.desc())
|
||||
.first()
|
||||
)
|
||||
|
||||
def get_by_artifact(self, artifact_id: str) -> List[Tag]:
|
||||
"""Get all tags pointing to an artifact."""
|
||||
return (
|
||||
self.db.query(Tag)
|
||||
.filter(Tag.artifact_id == artifact_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
def count_by_artifact(self, artifact_id: str) -> int:
|
||||
"""Count tags pointing to an artifact."""
|
||||
return (
|
||||
self.db.query(func.count(Tag.id))
|
||||
.filter(Tag.artifact_id == artifact_id)
|
||||
.scalar() or 0
|
||||
)
|
||||
136
backend/app/repositories/upload.py
Normal file
136
backend/app/repositories/upload.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""
|
||||
Upload repository for data access operations.
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Tuple
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func, desc
|
||||
from uuid import UUID
|
||||
|
||||
from .base import BaseRepository
|
||||
from ..models import Upload, Artifact, Package, Project
|
||||
|
||||
|
||||
class UploadRepository(BaseRepository[Upload]):
|
||||
"""Repository for Upload entity operations."""
|
||||
|
||||
model = Upload
|
||||
|
||||
def create_upload(
|
||||
self,
|
||||
artifact_id: str,
|
||||
package_id: UUID,
|
||||
uploaded_by: str,
|
||||
original_name: Optional[str] = None,
|
||||
source_ip: Optional[str] = None,
|
||||
) -> Upload:
|
||||
"""Record a new upload event."""
|
||||
return self.create(
|
||||
artifact_id=artifact_id,
|
||||
package_id=package_id,
|
||||
original_name=original_name,
|
||||
uploaded_by=uploaded_by,
|
||||
source_ip=source_ip,
|
||||
)
|
||||
|
||||
def list_by_package(
|
||||
self,
|
||||
package_id: UUID,
|
||||
page: int = 1,
|
||||
limit: int = 20,
|
||||
) -> Tuple[List[Upload], int]:
|
||||
"""List uploads for a package with pagination."""
|
||||
query = self.db.query(Upload).filter(Upload.package_id == package_id)
|
||||
|
||||
total = query.count()
|
||||
offset = (page - 1) * limit
|
||||
uploads = query.order_by(Upload.uploaded_at.desc()).offset(offset).limit(limit).all()
|
||||
|
||||
return uploads, total
|
||||
|
||||
def list_by_artifact(self, artifact_id: str) -> List[Upload]:
|
||||
"""List all uploads of a specific artifact."""
|
||||
return (
|
||||
self.db.query(Upload)
|
||||
.filter(Upload.artifact_id == artifact_id)
|
||||
.order_by(Upload.uploaded_at.desc())
|
||||
.all()
|
||||
)
|
||||
|
||||
def get_latest_for_package(self, package_id: UUID) -> Optional[Upload]:
|
||||
"""Get the most recent upload for a package."""
|
||||
return (
|
||||
self.db.query(Upload)
|
||||
.filter(Upload.package_id == package_id)
|
||||
.order_by(Upload.uploaded_at.desc())
|
||||
.first()
|
||||
)
|
||||
|
||||
def get_latest_timestamp(self, package_id: UUID) -> Optional[datetime]:
|
||||
"""Get timestamp of most recent upload for a package."""
|
||||
result = (
|
||||
self.db.query(func.max(Upload.uploaded_at))
|
||||
.filter(Upload.package_id == package_id)
|
||||
.scalar()
|
||||
)
|
||||
return result
|
||||
|
||||
def count_by_artifact(self, artifact_id: str) -> int:
|
||||
"""Count uploads of a specific artifact."""
|
||||
return (
|
||||
self.db.query(func.count(Upload.id))
|
||||
.filter(Upload.artifact_id == artifact_id)
|
||||
.scalar() or 0
|
||||
)
|
||||
|
||||
def count_by_package(self, package_id: UUID) -> int:
|
||||
"""Count total uploads for a package."""
|
||||
return (
|
||||
self.db.query(func.count(Upload.id))
|
||||
.filter(Upload.package_id == package_id)
|
||||
.scalar() or 0
|
||||
)
|
||||
|
||||
def get_distinct_artifacts_count(self, package_id: UUID) -> int:
|
||||
"""Count distinct artifacts uploaded to a package."""
|
||||
return (
|
||||
self.db.query(func.count(func.distinct(Upload.artifact_id)))
|
||||
.filter(Upload.package_id == package_id)
|
||||
.scalar() or 0
|
||||
)
|
||||
|
||||
def get_uploads_by_user(
|
||||
self,
|
||||
user_id: str,
|
||||
page: int = 1,
|
||||
limit: int = 20,
|
||||
) -> Tuple[List[Upload], int]:
|
||||
"""List uploads by a specific user."""
|
||||
query = self.db.query(Upload).filter(Upload.uploaded_by == user_id)
|
||||
|
||||
total = query.count()
|
||||
offset = (page - 1) * limit
|
||||
uploads = query.order_by(Upload.uploaded_at.desc()).offset(offset).limit(limit).all()
|
||||
|
||||
return uploads, total
|
||||
|
||||
def get_upload_stats(self, package_id: UUID) -> dict:
|
||||
"""Get upload statistics for a package."""
|
||||
stats = (
|
||||
self.db.query(
|
||||
func.count(Upload.id),
|
||||
func.count(func.distinct(Upload.artifact_id)),
|
||||
func.min(Upload.uploaded_at),
|
||||
func.max(Upload.uploaded_at),
|
||||
)
|
||||
.filter(Upload.package_id == package_id)
|
||||
.first()
|
||||
)
|
||||
|
||||
return {
|
||||
"total_uploads": stats[0] if stats else 0,
|
||||
"unique_artifacts": stats[1] if stats else 0,
|
||||
"first_upload": stats[2] if stats else None,
|
||||
"last_upload": stats[3] if stats else None,
|
||||
}
|
||||
9
backend/app/services/__init__.py
Normal file
9
backend/app/services/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""
|
||||
Service layer for business logic.
|
||||
"""
|
||||
|
||||
from .artifact_cleanup import ArtifactCleanupService
|
||||
|
||||
__all__ = [
|
||||
"ArtifactCleanupService",
|
||||
]
|
||||
181
backend/app/services/artifact_cleanup.py
Normal file
181
backend/app/services/artifact_cleanup.py
Normal file
@@ -0,0 +1,181 @@
|
||||
"""
|
||||
Service for artifact reference counting and cleanup.
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
import logging
|
||||
|
||||
from ..models import Artifact, Tag, Upload, Package
|
||||
from ..repositories.artifact import ArtifactRepository
|
||||
from ..repositories.tag import TagRepository
|
||||
from ..storage import S3Storage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ArtifactCleanupService:
|
||||
"""
|
||||
Service for managing artifact reference counts and cleaning up orphaned artifacts.
|
||||
|
||||
Reference counting rules:
|
||||
- ref_count starts at 1 when artifact is first uploaded
|
||||
- ref_count increments when the same artifact is uploaded again (deduplication)
|
||||
- ref_count decrements when a tag is deleted or updated to point elsewhere
|
||||
- ref_count decrements when a package is deleted (for each tag pointing to artifact)
|
||||
- When ref_count reaches 0, artifact is a candidate for deletion from S3
|
||||
"""
|
||||
|
||||
def __init__(self, db: Session, storage: Optional[S3Storage] = None):
|
||||
self.db = db
|
||||
self.storage = storage
|
||||
self.artifact_repo = ArtifactRepository(db)
|
||||
self.tag_repo = TagRepository(db)
|
||||
|
||||
def on_tag_deleted(self, artifact_id: str) -> Artifact:
|
||||
"""
|
||||
Called when a tag is deleted.
|
||||
Decrements ref_count for the artifact the tag was pointing to.
|
||||
"""
|
||||
artifact = self.artifact_repo.get_by_sha256(artifact_id)
|
||||
if artifact:
|
||||
artifact = self.artifact_repo.decrement_ref_count(artifact)
|
||||
logger.info(f"Decremented ref_count for artifact {artifact_id}: now {artifact.ref_count}")
|
||||
return artifact
|
||||
|
||||
def on_tag_updated(self, old_artifact_id: str, new_artifact_id: str) -> Tuple[Optional[Artifact], Optional[Artifact]]:
|
||||
"""
|
||||
Called when a tag is updated to point to a different artifact.
|
||||
Decrements ref_count for old artifact, increments for new (if different).
|
||||
|
||||
Returns (old_artifact, new_artifact) tuple.
|
||||
"""
|
||||
old_artifact = None
|
||||
new_artifact = None
|
||||
|
||||
if old_artifact_id != new_artifact_id:
|
||||
# Decrement old artifact ref_count
|
||||
old_artifact = self.artifact_repo.get_by_sha256(old_artifact_id)
|
||||
if old_artifact:
|
||||
old_artifact = self.artifact_repo.decrement_ref_count(old_artifact)
|
||||
logger.info(f"Decremented ref_count for old artifact {old_artifact_id}: now {old_artifact.ref_count}")
|
||||
|
||||
# Increment new artifact ref_count
|
||||
new_artifact = self.artifact_repo.get_by_sha256(new_artifact_id)
|
||||
if new_artifact:
|
||||
new_artifact = self.artifact_repo.increment_ref_count(new_artifact)
|
||||
logger.info(f"Incremented ref_count for new artifact {new_artifact_id}: now {new_artifact.ref_count}")
|
||||
|
||||
return old_artifact, new_artifact
|
||||
|
||||
def on_package_deleted(self, package_id) -> List[str]:
|
||||
"""
|
||||
Called when a package is deleted.
|
||||
Decrements ref_count for all artifacts that had tags in the package.
|
||||
|
||||
Returns list of artifact IDs that were affected.
|
||||
"""
|
||||
# Get all tags in the package before deletion
|
||||
tags = self.db.query(Tag).filter(Tag.package_id == package_id).all()
|
||||
|
||||
affected_artifacts = []
|
||||
for tag in tags:
|
||||
artifact = self.artifact_repo.get_by_sha256(tag.artifact_id)
|
||||
if artifact:
|
||||
self.artifact_repo.decrement_ref_count(artifact)
|
||||
affected_artifacts.append(tag.artifact_id)
|
||||
logger.info(f"Decremented ref_count for artifact {tag.artifact_id} (package delete)")
|
||||
|
||||
return affected_artifacts
|
||||
|
||||
def cleanup_orphaned_artifacts(self, batch_size: int = 100, dry_run: bool = False) -> List[str]:
|
||||
"""
|
||||
Find and delete artifacts with ref_count = 0.
|
||||
|
||||
Args:
|
||||
batch_size: Maximum number of artifacts to process
|
||||
dry_run: If True, only report what would be deleted without actually deleting
|
||||
|
||||
Returns:
|
||||
List of artifact IDs that were (or would be) deleted
|
||||
"""
|
||||
orphaned = self.artifact_repo.get_orphaned_artifacts(limit=batch_size)
|
||||
|
||||
deleted_ids = []
|
||||
for artifact in orphaned:
|
||||
if dry_run:
|
||||
logger.info(f"[DRY RUN] Would delete orphaned artifact: {artifact.id}")
|
||||
deleted_ids.append(artifact.id)
|
||||
else:
|
||||
try:
|
||||
# Delete from S3 first
|
||||
if self.storage:
|
||||
self.storage.delete(artifact.s3_key)
|
||||
logger.info(f"Deleted artifact from S3: {artifact.s3_key}")
|
||||
|
||||
# Then delete from database
|
||||
self.artifact_repo.delete(artifact)
|
||||
deleted_ids.append(artifact.id)
|
||||
logger.info(f"Deleted orphaned artifact from database: {artifact.id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete artifact {artifact.id}: {e}")
|
||||
|
||||
if not dry_run and deleted_ids:
|
||||
self.db.commit()
|
||||
|
||||
return deleted_ids
|
||||
|
||||
def get_orphaned_count(self) -> int:
|
||||
"""Get count of artifacts with ref_count = 0."""
|
||||
from sqlalchemy import func
|
||||
return (
|
||||
self.db.query(func.count(Artifact.id))
|
||||
.filter(Artifact.ref_count == 0)
|
||||
.scalar() or 0
|
||||
)
|
||||
|
||||
def verify_ref_counts(self, fix: bool = False) -> List[dict]:
|
||||
"""
|
||||
Verify that ref_counts match actual tag references.
|
||||
|
||||
Args:
|
||||
fix: If True, fix any mismatched ref_counts
|
||||
|
||||
Returns:
|
||||
List of artifacts with mismatched ref_counts
|
||||
"""
|
||||
from sqlalchemy import func
|
||||
|
||||
# Get actual tag counts per artifact
|
||||
tag_counts = (
|
||||
self.db.query(Tag.artifact_id, func.count(Tag.id).label("tag_count"))
|
||||
.group_by(Tag.artifact_id)
|
||||
.all()
|
||||
)
|
||||
tag_count_map = {artifact_id: count for artifact_id, count in tag_counts}
|
||||
|
||||
# Check all artifacts
|
||||
artifacts = self.db.query(Artifact).all()
|
||||
mismatches = []
|
||||
|
||||
for artifact in artifacts:
|
||||
actual_count = tag_count_map.get(artifact.id, 0)
|
||||
# ref_count should be at least 1 (initial upload) + additional uploads
|
||||
# But tags are the primary reference, so we check against tag count
|
||||
|
||||
if artifact.ref_count < actual_count:
|
||||
mismatch = {
|
||||
"artifact_id": artifact.id,
|
||||
"stored_ref_count": artifact.ref_count,
|
||||
"actual_tag_count": actual_count,
|
||||
}
|
||||
mismatches.append(mismatch)
|
||||
|
||||
if fix:
|
||||
artifact.ref_count = max(actual_count, 1)
|
||||
logger.warning(f"Fixed ref_count for artifact {artifact.id}: {mismatch['stored_ref_count']} -> {artifact.ref_count}")
|
||||
|
||||
if fix and mismatches:
|
||||
self.db.commit()
|
||||
|
||||
return mismatches
|
||||
Reference in New Issue
Block a user