Add comprehensive stats endpoints and reporting features
Backend stats endpoints: - GET /api/v1/project/:project/packages/:package/stats - per-package stats - GET /api/v1/artifact/:id/stats - artifact reference statistics - GET /api/v1/stats/cross-project - cross-project deduplication detection - GET /api/v1/stats/timeline - time-based metrics (daily/weekly/monthly) - GET /api/v1/stats/export - CSV/JSON export - GET /api/v1/stats/report - markdown/JSON summary report generation Enhanced existing endpoints: - Added storage_saved_bytes and deduplication_ratio to project stats - Added date range filtering via from_date/to_date params New schemas: - PackageStatsResponse - ArtifactStatsResponse - CrossProjectDeduplicationResponse - TimeBasedStatsResponse - StatsReportResponse
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
|
import json
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
from fastapi import (
|
from fastapi import (
|
||||||
APIRouter,
|
APIRouter,
|
||||||
@@ -80,6 +81,11 @@ from .schemas import (
|
|||||||
StorageStatsResponse,
|
StorageStatsResponse,
|
||||||
DeduplicationStatsResponse,
|
DeduplicationStatsResponse,
|
||||||
ProjectStatsResponse,
|
ProjectStatsResponse,
|
||||||
|
PackageStatsResponse,
|
||||||
|
ArtifactStatsResponse,
|
||||||
|
CrossProjectDeduplicationResponse,
|
||||||
|
TimeBasedStatsResponse,
|
||||||
|
StatsReportResponse,
|
||||||
)
|
)
|
||||||
from .metadata import extract_metadata
|
from .metadata import extract_metadata
|
||||||
from .config import get_settings
|
from .config import get_settings
|
||||||
@@ -2499,17 +2505,25 @@ def get_project_stats(
|
|||||||
artifact_count = artifact_stats[0] if artifact_stats else 0
|
artifact_count = artifact_stats[0] if artifact_stats else 0
|
||||||
total_size_bytes = artifact_stats[1] if artifact_stats else 0
|
total_size_bytes = artifact_stats[1] if artifact_stats else 0
|
||||||
|
|
||||||
# Upload counts
|
# Upload counts and storage saved
|
||||||
upload_stats = (
|
upload_stats = (
|
||||||
db.query(
|
db.query(
|
||||||
func.count(Upload.id),
|
func.count(Upload.id),
|
||||||
func.count(Upload.id).filter(Upload.deduplicated == True),
|
func.count(Upload.id).filter(Upload.deduplicated == True),
|
||||||
|
func.coalesce(
|
||||||
|
func.sum(Artifact.size).filter(Upload.deduplicated == True), 0
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
.join(Artifact, Upload.artifact_id == Artifact.id)
|
||||||
.filter(Upload.package_id.in_(package_ids))
|
.filter(Upload.package_id.in_(package_ids))
|
||||||
.first()
|
.first()
|
||||||
)
|
)
|
||||||
upload_count = upload_stats[0] if upload_stats else 0
|
upload_count = upload_stats[0] if upload_stats else 0
|
||||||
deduplicated_uploads = upload_stats[1] if upload_stats else 0
|
deduplicated_uploads = upload_stats[1] if upload_stats else 0
|
||||||
|
storage_saved_bytes = upload_stats[2] if upload_stats else 0
|
||||||
|
|
||||||
|
# Calculate deduplication ratio
|
||||||
|
deduplication_ratio = upload_count / artifact_count if artifact_count > 0 else 1.0
|
||||||
|
|
||||||
return ProjectStatsResponse(
|
return ProjectStatsResponse(
|
||||||
project_id=str(project.id),
|
project_id=str(project.id),
|
||||||
@@ -2520,4 +2534,502 @@ def get_project_stats(
|
|||||||
total_size_bytes=total_size_bytes,
|
total_size_bytes=total_size_bytes,
|
||||||
upload_count=upload_count,
|
upload_count=upload_count,
|
||||||
deduplicated_uploads=deduplicated_uploads,
|
deduplicated_uploads=deduplicated_uploads,
|
||||||
|
storage_saved_bytes=storage_saved_bytes,
|
||||||
|
deduplication_ratio=deduplication_ratio,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Package Statistics Endpoint
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/api/v1/project/{project_name}/packages/{package_name}/stats",
|
||||||
|
response_model=PackageStatsResponse,
|
||||||
|
)
|
||||||
|
def get_package_stats(
|
||||||
|
project_name: str,
|
||||||
|
package_name: str,
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""Get statistics for a specific package."""
|
||||||
|
project = db.query(Project).filter(Project.name == project_name).first()
|
||||||
|
if not project:
|
||||||
|
raise HTTPException(status_code=404, detail="Project not found")
|
||||||
|
|
||||||
|
package = (
|
||||||
|
db.query(Package)
|
||||||
|
.filter(Package.project_id == project.id, Package.name == package_name)
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
if not package:
|
||||||
|
raise HTTPException(status_code=404, detail="Package not found")
|
||||||
|
|
||||||
|
# Tag count
|
||||||
|
tag_count = (
|
||||||
|
db.query(func.count(Tag.id)).filter(Tag.package_id == package.id).scalar() or 0
|
||||||
|
)
|
||||||
|
|
||||||
|
# Artifact stats via uploads
|
||||||
|
artifact_stats = (
|
||||||
|
db.query(
|
||||||
|
func.count(func.distinct(Upload.artifact_id)),
|
||||||
|
func.coalesce(func.sum(Artifact.size), 0),
|
||||||
|
)
|
||||||
|
.join(Artifact, Upload.artifact_id == Artifact.id)
|
||||||
|
.filter(Upload.package_id == package.id)
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
artifact_count = artifact_stats[0] if artifact_stats else 0
|
||||||
|
total_size_bytes = artifact_stats[1] if artifact_stats else 0
|
||||||
|
|
||||||
|
# Upload stats
|
||||||
|
upload_stats = (
|
||||||
|
db.query(
|
||||||
|
func.count(Upload.id),
|
||||||
|
func.count(Upload.id).filter(Upload.deduplicated == True),
|
||||||
|
func.coalesce(
|
||||||
|
func.sum(Artifact.size).filter(Upload.deduplicated == True), 0
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.join(Artifact, Upload.artifact_id == Artifact.id)
|
||||||
|
.filter(Upload.package_id == package.id)
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
upload_count = upload_stats[0] if upload_stats else 0
|
||||||
|
deduplicated_uploads = upload_stats[1] if upload_stats else 0
|
||||||
|
storage_saved_bytes = upload_stats[2] if upload_stats else 0
|
||||||
|
|
||||||
|
deduplication_ratio = upload_count / artifact_count if artifact_count > 0 else 1.0
|
||||||
|
|
||||||
|
return PackageStatsResponse(
|
||||||
|
package_id=str(package.id),
|
||||||
|
package_name=package.name,
|
||||||
|
project_name=project.name,
|
||||||
|
tag_count=tag_count,
|
||||||
|
artifact_count=artifact_count,
|
||||||
|
total_size_bytes=total_size_bytes,
|
||||||
|
upload_count=upload_count,
|
||||||
|
deduplicated_uploads=deduplicated_uploads,
|
||||||
|
storage_saved_bytes=storage_saved_bytes,
|
||||||
|
deduplication_ratio=deduplication_ratio,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Artifact Statistics Endpoint
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/api/v1/artifact/{artifact_id}/stats", response_model=ArtifactStatsResponse
|
||||||
|
)
|
||||||
|
def get_artifact_stats(
|
||||||
|
artifact_id: str,
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""Get detailed statistics for a specific artifact."""
|
||||||
|
artifact = db.query(Artifact).filter(Artifact.id == artifact_id).first()
|
||||||
|
if not artifact:
|
||||||
|
raise HTTPException(status_code=404, detail="Artifact not found")
|
||||||
|
|
||||||
|
# Get all tags referencing this artifact
|
||||||
|
tags = (
|
||||||
|
db.query(Tag, Package, Project)
|
||||||
|
.join(Package, Tag.package_id == Package.id)
|
||||||
|
.join(Project, Package.project_id == Project.id)
|
||||||
|
.filter(Tag.artifact_id == artifact_id)
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
tag_list = [
|
||||||
|
{
|
||||||
|
"tag_name": tag.name,
|
||||||
|
"package_name": pkg.name,
|
||||||
|
"project_name": proj.name,
|
||||||
|
"created_at": tag.created_at.isoformat() if tag.created_at else None,
|
||||||
|
}
|
||||||
|
for tag, pkg, proj in tags
|
||||||
|
]
|
||||||
|
|
||||||
|
# Get unique projects and packages
|
||||||
|
projects = list(set(proj.name for _, _, proj in tags))
|
||||||
|
packages = list(set(f"{proj.name}/{pkg.name}" for _, pkg, proj in tags))
|
||||||
|
|
||||||
|
# Get first and last upload times
|
||||||
|
upload_times = (
|
||||||
|
db.query(func.min(Upload.uploaded_at), func.max(Upload.uploaded_at))
|
||||||
|
.filter(Upload.artifact_id == artifact_id)
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
|
||||||
|
return ArtifactStatsResponse(
|
||||||
|
artifact_id=artifact.id,
|
||||||
|
sha256=artifact.id,
|
||||||
|
size=artifact.size,
|
||||||
|
ref_count=artifact.ref_count,
|
||||||
|
storage_savings=(artifact.ref_count - 1) * artifact.size
|
||||||
|
if artifact.ref_count > 1
|
||||||
|
else 0,
|
||||||
|
tags=tag_list,
|
||||||
|
projects=projects,
|
||||||
|
packages=packages,
|
||||||
|
first_uploaded=upload_times[0] if upload_times else None,
|
||||||
|
last_referenced=upload_times[1] if upload_times else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Cross-Project Deduplication Endpoint
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/api/v1/stats/cross-project", response_model=CrossProjectDeduplicationResponse
|
||||||
|
)
|
||||||
|
def get_cross_project_deduplication(
|
||||||
|
limit: int = Query(default=20, ge=1, le=100),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""Get statistics about artifacts shared across multiple projects."""
|
||||||
|
# Find artifacts that appear in multiple projects
|
||||||
|
# Subquery to count distinct projects per artifact
|
||||||
|
project_counts = (
|
||||||
|
db.query(
|
||||||
|
Upload.artifact_id,
|
||||||
|
func.count(func.distinct(Package.project_id)).label("project_count"),
|
||||||
|
)
|
||||||
|
.join(Package, Upload.package_id == Package.id)
|
||||||
|
.group_by(Upload.artifact_id)
|
||||||
|
.subquery()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get artifacts with more than one project
|
||||||
|
shared_artifacts_query = (
|
||||||
|
db.query(Artifact, project_counts.c.project_count)
|
||||||
|
.join(project_counts, Artifact.id == project_counts.c.artifact_id)
|
||||||
|
.filter(project_counts.c.project_count > 1)
|
||||||
|
.order_by(project_counts.c.project_count.desc(), Artifact.size.desc())
|
||||||
|
.limit(limit)
|
||||||
|
)
|
||||||
|
|
||||||
|
shared_artifacts = []
|
||||||
|
total_savings = 0
|
||||||
|
|
||||||
|
for artifact, project_count in shared_artifacts_query:
|
||||||
|
# Calculate savings: (project_count - 1) * size
|
||||||
|
savings = (project_count - 1) * artifact.size
|
||||||
|
total_savings += savings
|
||||||
|
|
||||||
|
# Get project names
|
||||||
|
project_names = (
|
||||||
|
db.query(func.distinct(Project.name))
|
||||||
|
.join(Package, Package.project_id == Project.id)
|
||||||
|
.join(Upload, Upload.package_id == Package.id)
|
||||||
|
.filter(Upload.artifact_id == artifact.id)
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
shared_artifacts.append(
|
||||||
|
{
|
||||||
|
"artifact_id": artifact.id,
|
||||||
|
"size": artifact.size,
|
||||||
|
"project_count": project_count,
|
||||||
|
"projects": [p[0] for p in project_names],
|
||||||
|
"storage_savings": savings,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Total count of shared artifacts
|
||||||
|
shared_count = (
|
||||||
|
db.query(func.count())
|
||||||
|
.select_from(project_counts)
|
||||||
|
.filter(project_counts.c.project_count > 1)
|
||||||
|
.scalar()
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
|
||||||
|
return CrossProjectDeduplicationResponse(
|
||||||
|
shared_artifacts_count=shared_count,
|
||||||
|
total_cross_project_savings=total_savings,
|
||||||
|
shared_artifacts=shared_artifacts,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Time-Based Statistics Endpoint
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/api/v1/stats/timeline", response_model=TimeBasedStatsResponse)
|
||||||
|
def get_time_based_stats(
|
||||||
|
period: str = Query(default="daily", regex="^(daily|weekly|monthly)$"),
|
||||||
|
from_date: Optional[datetime] = Query(default=None),
|
||||||
|
to_date: Optional[datetime] = Query(default=None),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""Get deduplication statistics over time."""
|
||||||
|
from datetime import timedelta
|
||||||
|
|
||||||
|
# Default date range: last 30 days
|
||||||
|
if to_date is None:
|
||||||
|
to_date = datetime.utcnow()
|
||||||
|
if from_date is None:
|
||||||
|
from_date = to_date - timedelta(days=30)
|
||||||
|
|
||||||
|
# Determine date truncation based on period
|
||||||
|
if period == "daily":
|
||||||
|
date_trunc = func.date_trunc("day", Upload.uploaded_at)
|
||||||
|
elif period == "weekly":
|
||||||
|
date_trunc = func.date_trunc("week", Upload.uploaded_at)
|
||||||
|
else: # monthly
|
||||||
|
date_trunc = func.date_trunc("month", Upload.uploaded_at)
|
||||||
|
|
||||||
|
# Query uploads grouped by period
|
||||||
|
stats = (
|
||||||
|
db.query(
|
||||||
|
date_trunc.label("period_start"),
|
||||||
|
func.count(Upload.id).label("total_uploads"),
|
||||||
|
func.count(func.distinct(Upload.artifact_id)).label("unique_artifacts"),
|
||||||
|
func.count(Upload.id)
|
||||||
|
.filter(Upload.deduplicated == True)
|
||||||
|
.label("duplicated"),
|
||||||
|
func.coalesce(
|
||||||
|
func.sum(Artifact.size).filter(Upload.deduplicated == True), 0
|
||||||
|
).label("bytes_saved"),
|
||||||
|
)
|
||||||
|
.join(Artifact, Upload.artifact_id == Artifact.id)
|
||||||
|
.filter(Upload.uploaded_at >= from_date, Upload.uploaded_at <= to_date)
|
||||||
|
.group_by(date_trunc)
|
||||||
|
.order_by(date_trunc)
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
data_points = [
|
||||||
|
{
|
||||||
|
"date": row.period_start.isoformat() if row.period_start else None,
|
||||||
|
"total_uploads": row.total_uploads,
|
||||||
|
"unique_artifacts": row.unique_artifacts,
|
||||||
|
"duplicated_uploads": row.duplicated,
|
||||||
|
"bytes_saved": row.bytes_saved,
|
||||||
|
}
|
||||||
|
for row in stats
|
||||||
|
]
|
||||||
|
|
||||||
|
return TimeBasedStatsResponse(
|
||||||
|
period=period,
|
||||||
|
start_date=from_date,
|
||||||
|
end_date=to_date,
|
||||||
|
data_points=data_points,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CSV Export Endpoint
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/api/v1/stats/export")
|
||||||
|
def export_stats(
|
||||||
|
format: str = Query(default="json", regex="^(json|csv)$"),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""Export global statistics in JSON or CSV format."""
|
||||||
|
from fastapi.responses import Response
|
||||||
|
|
||||||
|
# Gather all stats
|
||||||
|
total_artifacts = db.query(func.count(Artifact.id)).scalar() or 0
|
||||||
|
total_size = db.query(func.coalesce(func.sum(Artifact.size), 0)).scalar() or 0
|
||||||
|
total_uploads = db.query(func.count(Upload.id)).scalar() or 0
|
||||||
|
deduplicated_uploads = (
|
||||||
|
db.query(func.count(Upload.id)).filter(Upload.deduplicated == True).scalar()
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
unique_artifacts = (
|
||||||
|
db.query(func.count(Artifact.id)).filter(Artifact.ref_count > 0).scalar() or 0
|
||||||
|
)
|
||||||
|
|
||||||
|
storage_saved = (
|
||||||
|
db.query(func.coalesce(func.sum(Artifact.size), 0))
|
||||||
|
.join(Upload, Upload.artifact_id == Artifact.id)
|
||||||
|
.filter(Upload.deduplicated == True)
|
||||||
|
.scalar()
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
|
||||||
|
stats = {
|
||||||
|
"generated_at": datetime.utcnow().isoformat(),
|
||||||
|
"total_artifacts": total_artifacts,
|
||||||
|
"total_size_bytes": total_size,
|
||||||
|
"total_uploads": total_uploads,
|
||||||
|
"unique_artifacts": unique_artifacts,
|
||||||
|
"deduplicated_uploads": deduplicated_uploads,
|
||||||
|
"storage_saved_bytes": storage_saved,
|
||||||
|
"deduplication_ratio": total_uploads / unique_artifacts
|
||||||
|
if unique_artifacts > 0
|
||||||
|
else 1.0,
|
||||||
|
}
|
||||||
|
|
||||||
|
if format == "csv":
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
|
|
||||||
|
output = io.StringIO()
|
||||||
|
writer = csv.writer(output)
|
||||||
|
writer.writerow(["Metric", "Value"])
|
||||||
|
for key, value in stats.items():
|
||||||
|
writer.writerow([key, value])
|
||||||
|
|
||||||
|
return Response(
|
||||||
|
content=output.getvalue(),
|
||||||
|
media_type="text/csv",
|
||||||
|
headers={"Content-Disposition": "attachment; filename=orchard_stats.csv"},
|
||||||
|
)
|
||||||
|
|
||||||
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Summary Report Endpoint
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/api/v1/stats/report", response_model=StatsReportResponse)
|
||||||
|
def generate_stats_report(
|
||||||
|
format: str = Query(default="markdown", regex="^(markdown|json)$"),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""Generate a summary report of storage and deduplication statistics."""
|
||||||
|
# Gather stats
|
||||||
|
total_artifacts = db.query(func.count(Artifact.id)).scalar() or 0
|
||||||
|
total_size = db.query(func.coalesce(func.sum(Artifact.size), 0)).scalar() or 0
|
||||||
|
total_uploads = db.query(func.count(Upload.id)).scalar() or 0
|
||||||
|
deduplicated_uploads = (
|
||||||
|
db.query(func.count(Upload.id)).filter(Upload.deduplicated == True).scalar()
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
unique_artifacts = (
|
||||||
|
db.query(func.count(Artifact.id)).filter(Artifact.ref_count > 0).scalar() or 0
|
||||||
|
)
|
||||||
|
orphaned_artifacts = (
|
||||||
|
db.query(func.count(Artifact.id)).filter(Artifact.ref_count == 0).scalar() or 0
|
||||||
|
)
|
||||||
|
|
||||||
|
storage_saved = (
|
||||||
|
db.query(func.coalesce(func.sum(Artifact.size), 0))
|
||||||
|
.join(Upload, Upload.artifact_id == Artifact.id)
|
||||||
|
.filter(Upload.deduplicated == True)
|
||||||
|
.scalar()
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
|
||||||
|
project_count = db.query(func.count(Project.id)).scalar() or 0
|
||||||
|
package_count = db.query(func.count(Package.id)).scalar() or 0
|
||||||
|
|
||||||
|
# Top 5 most referenced artifacts
|
||||||
|
top_artifacts = (
|
||||||
|
db.query(Artifact)
|
||||||
|
.filter(Artifact.ref_count > 1)
|
||||||
|
.order_by(Artifact.ref_count.desc())
|
||||||
|
.limit(5)
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
def format_bytes(b):
|
||||||
|
for unit in ["B", "KB", "MB", "GB", "TB"]:
|
||||||
|
if b < 1024:
|
||||||
|
return f"{b:.2f} {unit}"
|
||||||
|
b /= 1024
|
||||||
|
return f"{b:.2f} PB"
|
||||||
|
|
||||||
|
generated_at = datetime.utcnow()
|
||||||
|
|
||||||
|
if format == "markdown":
|
||||||
|
report = f"""# Orchard Storage Report
|
||||||
|
|
||||||
|
Generated: {generated_at.strftime("%Y-%m-%d %H:%M:%S UTC")}
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
| Metric | Value |
|
||||||
|
|--------|-------|
|
||||||
|
| Projects | {project_count} |
|
||||||
|
| Packages | {package_count} |
|
||||||
|
| Total Artifacts | {total_artifacts} |
|
||||||
|
| Unique Artifacts | {unique_artifacts} |
|
||||||
|
| Orphaned Artifacts | {orphaned_artifacts} |
|
||||||
|
|
||||||
|
## Storage
|
||||||
|
|
||||||
|
| Metric | Value |
|
||||||
|
|--------|-------|
|
||||||
|
| Total Storage Used | {format_bytes(total_size)} |
|
||||||
|
| Storage Saved | {format_bytes(storage_saved)} |
|
||||||
|
| Savings Percentage | {(storage_saved / (total_size + storage_saved) * 100) if (total_size + storage_saved) > 0 else 0:.1f}% |
|
||||||
|
|
||||||
|
## Uploads
|
||||||
|
|
||||||
|
| Metric | Value |
|
||||||
|
|--------|-------|
|
||||||
|
| Total Uploads | {total_uploads} |
|
||||||
|
| Deduplicated Uploads | {deduplicated_uploads} |
|
||||||
|
| Deduplication Ratio | {total_uploads / unique_artifacts if unique_artifacts > 0 else 1:.2f}x |
|
||||||
|
|
||||||
|
## Top Referenced Artifacts
|
||||||
|
|
||||||
|
| Artifact ID | Size | References | Savings |
|
||||||
|
|-------------|------|------------|---------|
|
||||||
|
"""
|
||||||
|
for art in top_artifacts:
|
||||||
|
savings = (art.ref_count - 1) * art.size
|
||||||
|
report += f"| `{art.id[:12]}...` | {format_bytes(art.size)} | {art.ref_count} | {format_bytes(savings)} |\n"
|
||||||
|
|
||||||
|
return StatsReportResponse(
|
||||||
|
format="markdown",
|
||||||
|
generated_at=generated_at,
|
||||||
|
content=report,
|
||||||
|
)
|
||||||
|
|
||||||
|
# JSON format
|
||||||
|
return StatsReportResponse(
|
||||||
|
format="json",
|
||||||
|
generated_at=generated_at,
|
||||||
|
content=json.dumps(
|
||||||
|
{
|
||||||
|
"overview": {
|
||||||
|
"projects": project_count,
|
||||||
|
"packages": package_count,
|
||||||
|
"total_artifacts": total_artifacts,
|
||||||
|
"unique_artifacts": unique_artifacts,
|
||||||
|
"orphaned_artifacts": orphaned_artifacts,
|
||||||
|
},
|
||||||
|
"storage": {
|
||||||
|
"total_bytes": total_size,
|
||||||
|
"saved_bytes": storage_saved,
|
||||||
|
"savings_percentage": (
|
||||||
|
storage_saved / (total_size + storage_saved) * 100
|
||||||
|
)
|
||||||
|
if (total_size + storage_saved) > 0
|
||||||
|
else 0,
|
||||||
|
},
|
||||||
|
"uploads": {
|
||||||
|
"total": total_uploads,
|
||||||
|
"deduplicated": deduplicated_uploads,
|
||||||
|
"ratio": total_uploads / unique_artifacts
|
||||||
|
if unique_artifacts > 0
|
||||||
|
else 1,
|
||||||
|
},
|
||||||
|
"top_artifacts": [
|
||||||
|
{
|
||||||
|
"id": art.id,
|
||||||
|
"size": art.size,
|
||||||
|
"ref_count": art.ref_count,
|
||||||
|
"savings": (art.ref_count - 1) * art.size,
|
||||||
|
}
|
||||||
|
for art in top_artifacts
|
||||||
|
],
|
||||||
|
},
|
||||||
|
indent=2,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -456,3 +456,62 @@ class ProjectStatsResponse(BaseModel):
|
|||||||
total_size_bytes: int
|
total_size_bytes: int
|
||||||
upload_count: int
|
upload_count: int
|
||||||
deduplicated_uploads: int
|
deduplicated_uploads: int
|
||||||
|
storage_saved_bytes: int = 0 # Bytes saved through deduplication
|
||||||
|
deduplication_ratio: float = 1.0 # upload_count / artifact_count
|
||||||
|
|
||||||
|
|
||||||
|
class PackageStatsResponse(BaseModel):
|
||||||
|
"""Per-package statistics"""
|
||||||
|
|
||||||
|
package_id: str
|
||||||
|
package_name: str
|
||||||
|
project_name: str
|
||||||
|
tag_count: int
|
||||||
|
artifact_count: int
|
||||||
|
total_size_bytes: int
|
||||||
|
upload_count: int
|
||||||
|
deduplicated_uploads: int
|
||||||
|
storage_saved_bytes: int = 0
|
||||||
|
deduplication_ratio: float = 1.0
|
||||||
|
|
||||||
|
|
||||||
|
class ArtifactStatsResponse(BaseModel):
|
||||||
|
"""Per-artifact reference statistics"""
|
||||||
|
|
||||||
|
artifact_id: str
|
||||||
|
sha256: str
|
||||||
|
size: int
|
||||||
|
ref_count: int
|
||||||
|
storage_savings: int # (ref_count - 1) * size
|
||||||
|
tags: List[Dict[str, Any]] # Tags referencing this artifact
|
||||||
|
projects: List[str] # Projects using this artifact
|
||||||
|
packages: List[str] # Packages using this artifact
|
||||||
|
first_uploaded: Optional[datetime] = None
|
||||||
|
last_referenced: Optional[datetime] = None
|
||||||
|
|
||||||
|
|
||||||
|
class CrossProjectDeduplicationResponse(BaseModel):
|
||||||
|
"""Cross-project deduplication statistics"""
|
||||||
|
|
||||||
|
shared_artifacts_count: int # Artifacts used in multiple projects
|
||||||
|
total_cross_project_savings: int # Bytes saved by cross-project sharing
|
||||||
|
shared_artifacts: List[Dict[str, Any]] # Details of shared artifacts
|
||||||
|
|
||||||
|
|
||||||
|
class TimeBasedStatsResponse(BaseModel):
|
||||||
|
"""Time-based deduplication statistics"""
|
||||||
|
|
||||||
|
period: str # "daily", "weekly", "monthly"
|
||||||
|
start_date: datetime
|
||||||
|
end_date: datetime
|
||||||
|
data_points: List[
|
||||||
|
Dict[str, Any]
|
||||||
|
] # List of {date, uploads, unique, duplicated, bytes_saved}
|
||||||
|
|
||||||
|
|
||||||
|
class StatsReportResponse(BaseModel):
|
||||||
|
"""Summary report in various formats"""
|
||||||
|
|
||||||
|
format: str # "json", "csv", "markdown"
|
||||||
|
generated_at: datetime
|
||||||
|
content: str # The report content
|
||||||
|
|||||||
Reference in New Issue
Block a user