Upload workflow enhancements: S3 verification, timing, client checksum support (#19)

- Add S3 object verification after upload (size validation before DB commit)
- Add cleanup of S3 objects if DB commit fails
- Record upload duration_ms and user_agent
- Support X-Checksum-SHA256 header for client-side checksum verification
- Add already_existed flag to StorageResult for deduplication tracking
- Add status, error_message, client_checksum columns to Upload model
- Add UploadLock model for future 409 conflict detection
- Add consistency-check admin endpoint for detecting orphaned S3 objects
- Add migration 005_upload_enhancements.sql
This commit is contained in:
Mondo Diaz
2026-01-06 15:31:59 -06:00
parent 3056747f39
commit c184272cec
5 changed files with 350 additions and 4 deletions

View File

@@ -0,0 +1,83 @@
-- Migration 005: Upload Workflow Enhancements
-- Adds status tracking and error handling for uploads
-- ============================================
-- Add status column to uploads table
-- ============================================
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'uploads' AND column_name = 'status') THEN
ALTER TABLE uploads ADD COLUMN status VARCHAR(20) DEFAULT 'completed' NOT NULL;
END IF;
END $$;
-- ============================================
-- Add error_message column for failed uploads
-- ============================================
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'uploads' AND column_name = 'error_message') THEN
ALTER TABLE uploads ADD COLUMN error_message TEXT;
END IF;
END $$;
-- ============================================
-- Add client_checksum column for verification
-- ============================================
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'uploads' AND column_name = 'client_checksum') THEN
ALTER TABLE uploads ADD COLUMN client_checksum VARCHAR(64);
END IF;
END $$;
-- ============================================
-- Add indexes for upload status queries
-- ============================================
CREATE INDEX IF NOT EXISTS idx_uploads_status ON uploads(status);
CREATE INDEX IF NOT EXISTS idx_uploads_status_uploaded_at ON uploads(status, uploaded_at);
-- ============================================
-- Add constraint to validate status values
-- ============================================
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.constraint_column_usage
WHERE constraint_name = 'check_upload_status') THEN
ALTER TABLE uploads ADD CONSTRAINT check_upload_status
CHECK (status IN ('pending', 'completed', 'failed'));
END IF;
END $$;
-- ============================================
-- Create table for tracking in-progress uploads (for 409 conflict detection)
-- ============================================
CREATE TABLE IF NOT EXISTS upload_locks (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
sha256_hash VARCHAR(64) NOT NULL,
package_id UUID NOT NULL REFERENCES packages(id) ON DELETE CASCADE,
locked_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
locked_by VARCHAR(255) NOT NULL,
expires_at TIMESTAMP WITH TIME ZONE NOT NULL,
UNIQUE(sha256_hash, package_id)
);
CREATE INDEX IF NOT EXISTS idx_upload_locks_expires_at ON upload_locks(expires_at);
CREATE INDEX IF NOT EXISTS idx_upload_locks_hash_package ON upload_locks(sha256_hash, package_id);
-- ============================================
-- Function to clean up expired upload locks
-- ============================================
CREATE OR REPLACE FUNCTION cleanup_expired_upload_locks()
RETURNS INTEGER AS $$
DECLARE
deleted_count INTEGER;
BEGIN
DELETE FROM upload_locks WHERE expires_at < NOW();
GET DIAGNOSTICS deleted_count = ROW_COUNT;
RETURN deleted_count;
END;
$$ LANGUAGE plpgsql;