Add separate version tracking for artifacts

This commit is contained in:
Mondo Diaz
2026-01-16 11:36:08 -06:00
parent a98ac154d5
commit b93d5a9c68
15 changed files with 1366 additions and 34 deletions

View File

@@ -16,7 +16,7 @@ from fastapi import (
)
from fastapi.responses import StreamingResponse, RedirectResponse
from sqlalchemy.orm import Session
from sqlalchemy import or_, func, text
from sqlalchemy import or_, and_, func, text
from typing import List, Optional, Literal
import math
import io
@@ -46,6 +46,7 @@ from .models import (
AuditLog,
User,
AccessPermission,
PackageVersion,
)
from .schemas import (
ProjectCreate,
@@ -116,6 +117,8 @@ from .schemas import (
OIDCConfigUpdate,
OIDCStatusResponse,
OIDCLoginResponse,
PackageVersionResponse,
PackageVersionDetailResponse,
)
from .metadata import extract_metadata
from .config import get_settings
@@ -237,6 +240,103 @@ def _decrement_ref_count(db: Session, artifact_id: str) -> int:
return artifact.ref_count
import re
# Regex pattern for detecting version in filename
# Matches: name-1.0.0, name_1.0.0, name-v1.0.0, etc.
# Supports: X.Y, X.Y.Z, X.Y.Z-alpha, X.Y.Z.beta1, X.Y.Z_rc2
VERSION_FILENAME_PATTERN = re.compile(
r"[-_]v?(\d+\.\d+(?:\.\d+)?(?:[-_][a-zA-Z0-9]+)?)(?:\.tar|\.zip|\.tgz|\.gz|\.bz2|\.xz|$)"
)
def _detect_version(
explicit_version: Optional[str],
metadata: dict,
filename: str,
) -> tuple[Optional[str], Optional[str]]:
"""
Detect version from explicit parameter, metadata, or filename.
Priority:
1. Explicit version parameter (user provided)
2. Version from package metadata (deb, rpm, whl, jar)
3. Version from filename pattern
Returns:
tuple of (version, source) where source is one of:
- 'explicit': User provided the version
- 'metadata': Extracted from package metadata
- 'filename': Parsed from filename
- None: No version could be determined
"""
# 1. Explicit version takes priority
if explicit_version:
return explicit_version, "explicit"
# 2. Try metadata extraction (from deb, rpm, whl, jar, etc.)
if metadata.get("version"):
return metadata["version"], "metadata"
# 3. Try filename pattern matching
match = VERSION_FILENAME_PATTERN.search(filename)
if match:
return match.group(1), "filename"
return None, None
def _create_or_update_version(
db: Session,
package_id: str,
artifact_id: str,
version: str,
version_source: str,
user_id: str,
) -> PackageVersion:
"""
Create a version record for a package-artifact pair.
Raises HTTPException 409 if version already exists for this package.
"""
# Check if version already exists
existing = (
db.query(PackageVersion)
.filter(PackageVersion.package_id == package_id, PackageVersion.version == version)
.first()
)
if existing:
raise HTTPException(
status_code=409,
detail=f"Version {version} already exists in this package",
)
# Check if artifact already has a version in this package
existing_artifact_version = (
db.query(PackageVersion)
.filter(
PackageVersion.package_id == package_id,
PackageVersion.artifact_id == artifact_id,
)
.first()
)
if existing_artifact_version:
# Artifact already has a version, return it
return existing_artifact_version
# Create new version record
pkg_version = PackageVersion(
package_id=package_id,
artifact_id=artifact_id,
version=version,
version_source=version_source,
created_by=user_id,
)
db.add(pkg_version)
db.flush()
return pkg_version
def _create_or_update_tag(
db: Session,
package_id: str,
@@ -2147,6 +2247,7 @@ def upload_artifact(
request: Request,
file: UploadFile = File(...),
tag: Optional[str] = Form(None),
version: Optional[str] = Form(None),
db: Session = Depends(get_db),
storage: S3Storage = Depends(get_storage),
content_length: Optional[int] = Header(None, alias="Content-Length"),
@@ -2214,6 +2315,11 @@ def upload_artifact(
io.BytesIO(file_content), file.filename, file.content_type
)
# Detect version (explicit > metadata > filename)
detected_version, version_source = _detect_version(
version, file_metadata, file.filename or ""
)
# Store file (uses multipart for large files) with error handling
try:
storage_result = storage.store(file.file, content_length)
@@ -2383,6 +2489,25 @@ def upload_artifact(
if tag:
_create_or_update_tag(db, package.id, tag, storage_result.sha256, user_id)
# Create version record if version was detected
pkg_version = None
if detected_version:
try:
pkg_version = _create_or_update_version(
db, package.id, storage_result.sha256, detected_version, version_source, user_id
)
except HTTPException as e:
# Version conflict (409) - log but don't fail the upload
if e.status_code == 409:
logger.warning(
f"Version {detected_version} already exists for package {package_name}, "
f"upload continues without version assignment"
)
detected_version = None
version_source = None
else:
raise
# Log deduplication event
if deduplicated:
logger.info(
@@ -2437,6 +2562,8 @@ def upload_artifact(
project=project_name,
package=package_name,
tag=tag,
version=detected_version,
version_source=version_source,
checksum_md5=storage_result.md5,
checksum_sha1=storage_result.sha1,
s3_etag=storage_result.s3_etag,
@@ -2754,15 +2881,30 @@ def _resolve_artifact_ref(
package: Package,
db: Session,
) -> Optional[Artifact]:
"""Resolve a reference (tag name, artifact:hash, tag:name) to an artifact"""
"""Resolve a reference (tag name, version, artifact:hash, tag:name, version:X.Y.Z) to an artifact.
Resolution order for implicit refs (no prefix):
1. Version (immutable)
2. Tag (mutable)
3. Artifact ID (direct hash)
"""
artifact = None
# Check for explicit prefixes
if ref.startswith("artifact:"):
artifact_id = ref[9:]
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
elif ref.startswith("tag:") or ref.startswith("version:"):
tag_name = ref.split(":", 1)[1]
elif ref.startswith("version:"):
version_str = ref[8:]
pkg_version = (
db.query(PackageVersion)
.filter(PackageVersion.package_id == package.id, PackageVersion.version == version_str)
.first()
)
if pkg_version:
artifact = db.query(Artifact).filter(Artifact.id == pkg_version.artifact_id).first()
elif ref.startswith("tag:"):
tag_name = ref[4:]
tag = (
db.query(Tag)
.filter(Tag.package_id == package.id, Tag.name == tag_name)
@@ -2771,15 +2913,25 @@ def _resolve_artifact_ref(
if tag:
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
else:
# Try as tag name first
tag = (
db.query(Tag).filter(Tag.package_id == package.id, Tag.name == ref).first()
# Implicit ref: try version first, then tag, then artifact ID
# Try as version first
pkg_version = (
db.query(PackageVersion)
.filter(PackageVersion.package_id == package.id, PackageVersion.version == ref)
.first()
)
if tag:
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
if pkg_version:
artifact = db.query(Artifact).filter(Artifact.id == pkg_version.artifact_id).first()
else:
# Try as direct artifact ID
artifact = db.query(Artifact).filter(Artifact.id == ref).first()
# Try as tag name
tag = (
db.query(Tag).filter(Tag.package_id == package.id, Tag.name == ref).first()
)
if tag:
artifact = db.query(Artifact).filter(Artifact.id == tag.artifact_id).first()
else:
# Try as direct artifact ID
artifact = db.query(Artifact).filter(Artifact.id == ref).first()
return artifact
@@ -3177,6 +3329,224 @@ def download_artifact_compat(
)
# Version routes
@router.get(
"/api/v1/project/{project_name}/{package_name}/versions",
response_model=PaginatedResponse[PackageVersionResponse],
)
def list_versions(
project_name: str,
package_name: str,
page: int = Query(default=1, ge=1, description="Page number"),
limit: int = Query(default=20, ge=1, le=100, description="Items per page"),
search: Optional[str] = Query(default=None, description="Search by version string"),
sort: str = Query(default="version", description="Sort field (version, created_at)"),
order: str = Query(default="desc", description="Sort order (asc, desc)"),
db: Session = Depends(get_db),
):
"""List all versions for a package."""
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = (
db.query(Package)
.filter(Package.project_id == project.id, Package.name == package_name)
.first()
)
if not package:
raise HTTPException(status_code=404, detail="Package not found")
# Validate sort field
valid_sort_fields = {"version": PackageVersion.version, "created_at": PackageVersion.created_at}
if sort not in valid_sort_fields:
raise HTTPException(
status_code=400,
detail=f"Invalid sort field. Must be one of: {', '.join(valid_sort_fields.keys())}",
)
# Validate order
if order not in ("asc", "desc"):
raise HTTPException(status_code=400, detail="Invalid order. Must be 'asc' or 'desc'")
# Base query with JOIN to artifact for metadata
query = (
db.query(PackageVersion, Artifact)
.join(Artifact, PackageVersion.artifact_id == Artifact.id)
.filter(PackageVersion.package_id == package.id)
)
# Apply search filter
if search:
query = query.filter(PackageVersion.version.ilike(f"%{search}%"))
# Get total count before pagination
total = query.count()
# Apply sorting
sort_column = valid_sort_fields[sort]
if order == "desc":
query = query.order_by(sort_column.desc())
else:
query = query.order_by(sort_column.asc())
# Apply pagination
offset = (page - 1) * limit
results = query.offset(offset).limit(limit).all()
# Get tags for each version's artifact
version_responses = []
for pkg_version, artifact in results:
# Get tags pointing to this artifact in this package
tags = (
db.query(Tag.name)
.filter(Tag.package_id == package.id, Tag.artifact_id == artifact.id)
.all()
)
tag_names = [t[0] for t in tags]
version_responses.append(
PackageVersionResponse(
id=pkg_version.id,
package_id=pkg_version.package_id,
artifact_id=pkg_version.artifact_id,
version=pkg_version.version,
version_source=pkg_version.version_source,
created_at=pkg_version.created_at,
created_by=pkg_version.created_by,
size=artifact.size,
content_type=artifact.content_type,
original_name=artifact.original_name,
tags=tag_names,
)
)
total_pages = math.ceil(total / limit) if total > 0 else 1
has_more = page < total_pages
return PaginatedResponse(
items=version_responses,
pagination=PaginationMeta(
page=page,
limit=limit,
total=total,
total_pages=total_pages,
has_more=has_more,
),
)
@router.get(
"/api/v1/project/{project_name}/{package_name}/versions/{version}",
response_model=PackageVersionDetailResponse,
)
def get_version(
project_name: str,
package_name: str,
version: str,
db: Session = Depends(get_db),
):
"""Get details of a specific version."""
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = (
db.query(Package)
.filter(Package.project_id == project.id, Package.name == package_name)
.first()
)
if not package:
raise HTTPException(status_code=404, detail="Package not found")
pkg_version = (
db.query(PackageVersion)
.filter(PackageVersion.package_id == package.id, PackageVersion.version == version)
.first()
)
if not pkg_version:
raise HTTPException(status_code=404, detail="Version not found")
artifact = db.query(Artifact).filter(Artifact.id == pkg_version.artifact_id).first()
# Get tags pointing to this artifact
tags = (
db.query(Tag.name)
.filter(Tag.package_id == package.id, Tag.artifact_id == artifact.id)
.all()
)
tag_names = [t[0] for t in tags]
return PackageVersionDetailResponse(
id=pkg_version.id,
package_id=pkg_version.package_id,
artifact_id=pkg_version.artifact_id,
version=pkg_version.version,
version_source=pkg_version.version_source,
created_at=pkg_version.created_at,
created_by=pkg_version.created_by,
size=artifact.size,
content_type=artifact.content_type,
original_name=artifact.original_name,
tags=tag_names,
format_metadata=artifact.artifact_metadata,
checksum_md5=artifact.checksum_md5,
checksum_sha1=artifact.checksum_sha1,
)
@router.delete(
"/api/v1/project/{project_name}/{package_name}/versions/{version}",
status_code=204,
)
def delete_version(
project_name: str,
package_name: str,
version: str,
request: Request,
db: Session = Depends(get_db),
current_user: User = Depends(require_admin),
):
"""Delete a version (admin only). Does not delete the underlying artifact."""
project = db.query(Project).filter(Project.name == project_name).first()
if not project:
raise HTTPException(status_code=404, detail="Project not found")
package = (
db.query(Package)
.filter(Package.project_id == project.id, Package.name == package_name)
.first()
)
if not package:
raise HTTPException(status_code=404, detail="Package not found")
pkg_version = (
db.query(PackageVersion)
.filter(PackageVersion.package_id == package.id, PackageVersion.version == version)
.first()
)
if not pkg_version:
raise HTTPException(status_code=404, detail="Version not found")
artifact_id = pkg_version.artifact_id
# Delete version (triggers will decrement ref_count)
db.delete(pkg_version)
# Audit log
_log_audit(
db,
action="version.delete",
resource=f"project/{project_name}/{package_name}/version/{version}",
user_id=current_user.username,
source_ip=request.client.host if request.client else None,
details={"artifact_id": artifact_id},
)
db.commit()
return Response(status_code=204)
# Tag routes
@router.get(
"/api/v1/project/{project_name}/{package_name}/tags",
@@ -3224,10 +3594,17 @@ def list_tags(
status_code=400, detail="Invalid order. Must be 'asc' or 'desc'"
)
# Base query with JOIN to artifact for metadata
# Base query with JOIN to artifact for metadata and LEFT JOIN to version
query = (
db.query(Tag, Artifact)
db.query(Tag, Artifact, PackageVersion.version)
.join(Artifact, Tag.artifact_id == Artifact.id)
.outerjoin(
PackageVersion,
and_(
PackageVersion.package_id == Tag.package_id,
PackageVersion.artifact_id == Tag.artifact_id,
),
)
.filter(Tag.package_id == package.id)
)
@@ -3264,9 +3641,9 @@ def list_tags(
# Calculate total pages
total_pages = math.ceil(total / limit) if total > 0 else 1
# Build detailed responses with artifact metadata
# Build detailed responses with artifact metadata and version
detailed_tags = []
for tag, artifact in results:
for tag, artifact, version in results:
detailed_tags.append(
TagDetailResponse(
id=tag.id,
@@ -3280,6 +3657,7 @@ def list_tags(
artifact_original_name=artifact.original_name,
artifact_created_at=artifact.created_at,
artifact_format_metadata=artifact.format_metadata,
version=version,
)
)
@@ -3396,8 +3774,15 @@ def get_tag(
raise HTTPException(status_code=404, detail="Package not found")
result = (
db.query(Tag, Artifact)
db.query(Tag, Artifact, PackageVersion.version)
.join(Artifact, Tag.artifact_id == Artifact.id)
.outerjoin(
PackageVersion,
and_(
PackageVersion.package_id == Tag.package_id,
PackageVersion.artifact_id == Tag.artifact_id,
),
)
.filter(Tag.package_id == package.id, Tag.name == tag_name)
.first()
)
@@ -3405,7 +3790,7 @@ def get_tag(
if not result:
raise HTTPException(status_code=404, detail="Tag not found")
tag, artifact = result
tag, artifact, version = result
return TagDetailResponse(
id=tag.id,
package_id=tag.package_id,
@@ -3418,6 +3803,7 @@ def get_tag(
artifact_original_name=artifact.original_name,
artifact_created_at=artifact.created_at,
artifact_format_metadata=artifact.format_metadata,
version=version,
)
@@ -3915,10 +4301,17 @@ def list_all_tags(
List all tags globally with filtering by project, package, name, etc.
"""
query = (
db.query(Tag, Package, Project, Artifact)
db.query(Tag, Package, Project, Artifact, PackageVersion.version)
.join(Package, Tag.package_id == Package.id)
.join(Project, Package.project_id == Project.id)
.join(Artifact, Tag.artifact_id == Artifact.id)
.outerjoin(
PackageVersion,
and_(
PackageVersion.package_id == Tag.package_id,
PackageVersion.artifact_id == Tag.artifact_id,
),
)
)
# Apply filters
@@ -3982,8 +4375,9 @@ def list_all_tags(
package_name=pkg.name,
artifact_size=artifact.size,
artifact_content_type=artifact.content_type,
version=version,
)
for tag, pkg, proj, artifact in results
for tag, pkg, proj, artifact, version in results
]
return PaginatedResponse(