""" Test configuration and fixtures for Orchard backend tests. This module provides: - Database fixtures with test isolation - Mock S3 storage using moto - Test data factories for common scenarios """ import os import pytest import hashlib from typing import Generator, BinaryIO from unittest.mock import MagicMock, patch import io # Set test environment defaults before importing app modules # Use setdefault to NOT override existing env vars (from docker-compose) os.environ.setdefault("ORCHARD_DATABASE_HOST", "localhost") os.environ.setdefault("ORCHARD_DATABASE_PORT", "5432") os.environ.setdefault("ORCHARD_DATABASE_USER", "test") os.environ.setdefault("ORCHARD_DATABASE_PASSWORD", "test") os.environ.setdefault("ORCHARD_DATABASE_DBNAME", "orchard_test") os.environ.setdefault("ORCHARD_S3_ENDPOINT", "http://localhost:9000") os.environ.setdefault("ORCHARD_S3_BUCKET", "test-bucket") os.environ.setdefault("ORCHARD_S3_ACCESS_KEY_ID", "test") os.environ.setdefault("ORCHARD_S3_SECRET_ACCESS_KEY", "test") # ============================================================================= # Test Data Factories # ============================================================================= def create_test_file(content: bytes = None, size: int = 1024) -> io.BytesIO: """ Create a test file with known content. Args: content: Specific content to use, or None to generate random-ish content size: Size of generated content if content is None Returns: BytesIO object with the content """ if content is None: content = os.urandom(size) return io.BytesIO(content) def compute_sha256(content: bytes) -> str: """Compute SHA256 hash of content as lowercase hex string.""" return hashlib.sha256(content).hexdigest() def compute_md5(content: bytes) -> str: """Compute MD5 hash of content as lowercase hex string.""" return hashlib.md5(content).hexdigest() def compute_sha1(content: bytes) -> str: """Compute SHA1 hash of content as lowercase hex string.""" return hashlib.sha1(content).hexdigest() # Known test data with pre-computed hashes TEST_CONTENT_HELLO = b"Hello, World!" TEST_HASH_HELLO = "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f" TEST_MD5_HELLO = "65a8e27d8879283831b664bd8b7f0ad4" TEST_SHA1_HELLO = "0a0a9f2a6772942557ab5355d76af442f8f65e01" TEST_CONTENT_EMPTY = b"" # Note: Empty content should be rejected by the storage layer TEST_CONTENT_BINARY = bytes(range(256)) TEST_HASH_BINARY = compute_sha256(TEST_CONTENT_BINARY) # ============================================================================= # Mock Storage Fixtures # ============================================================================= class MockS3Client: """Mock S3 client for unit testing without actual S3/MinIO.""" def __init__(self): self.objects = {} # key -> content self.bucket = "test-bucket" def put_object(self, Bucket: str, Key: str, Body: bytes) -> dict: self.objects[Key] = Body return {"ETag": f'"{compute_md5(Body)}"'} def get_object(self, Bucket: str, Key: str, **kwargs) -> dict: if Key not in self.objects: raise Exception("NoSuchKey") content = self.objects[Key] return { "Body": io.BytesIO(content), "ContentLength": len(content), } def head_object(self, Bucket: str, Key: str) -> dict: if Key not in self.objects: from botocore.exceptions import ClientError error_response = {"Error": {"Code": "404", "Message": "Not Found"}} raise ClientError(error_response, "HeadObject") content = self.objects[Key] return { "ContentLength": len(content), "ETag": f'"{compute_md5(content)}"', } def delete_object(self, Bucket: str, Key: str) -> dict: if Key in self.objects: del self.objects[Key] return {} def head_bucket(self, Bucket: str) -> dict: return {} def create_multipart_upload(self, Bucket: str, Key: str) -> dict: return {"UploadId": "test-upload-id"} def upload_part( self, Bucket: str, Key: str, UploadId: str, PartNumber: int, Body: bytes ) -> dict: return {"ETag": f'"{compute_md5(Body)}"'} def complete_multipart_upload( self, Bucket: str, Key: str, UploadId: str, MultipartUpload: dict ) -> dict: return {"ETag": '"test-etag"'} def abort_multipart_upload(self, Bucket: str, Key: str, UploadId: str) -> dict: return {} def generate_presigned_url( self, ClientMethod: str, Params: dict, ExpiresIn: int ) -> str: return f"https://test-bucket.s3.amazonaws.com/{Params['Key']}?presigned=true" @pytest.fixture def mock_s3_client() -> MockS3Client: """Provide a mock S3 client for unit tests.""" return MockS3Client() @pytest.fixture def mock_storage(mock_s3_client): """ Provide a mock storage instance for unit tests. Uses the MockS3Client to avoid actual S3/MinIO calls. """ from app.storage import S3Storage storage = S3Storage.__new__(S3Storage) storage.client = mock_s3_client storage.bucket = "test-bucket" storage._active_uploads = {} return storage # ============================================================================= # Database Fixtures (for integration tests) # ============================================================================= @pytest.fixture(scope="session") def test_db_url(): """Get the test database URL.""" return ( f"postgresql://{os.environ['ORCHARD_DATABASE_USER']}:" f"{os.environ['ORCHARD_DATABASE_PASSWORD']}@" f"{os.environ['ORCHARD_DATABASE_HOST']}:" f"{os.environ['ORCHARD_DATABASE_PORT']}/" f"{os.environ['ORCHARD_DATABASE_DBNAME']}" ) # ============================================================================= # HTTP Client Fixtures (for API tests) # ============================================================================= @pytest.fixture def test_app(): """ Create a test FastAPI application. Note: This requires the database to be available for integration tests. For unit tests, use mock_storage fixture instead. """ from fastapi.testclient import TestClient from app.main import app return TestClient(app) # ============================================================================= # Integration Test Fixtures # ============================================================================= @pytest.fixture def integration_client(): """ Create a test client for integration tests. Uses the real database and MinIO from docker-compose.local.yml. """ from httpx import Client # Connect to the running orchard-server container base_url = os.environ.get("ORCHARD_TEST_URL", "http://localhost:8080") with Client(base_url=base_url, timeout=30.0) as client: yield client @pytest.fixture def unique_test_id(): """Generate a unique ID for test isolation.""" import uuid return f"test-{uuid.uuid4().hex[:8]}" @pytest.fixture def test_project(integration_client, unique_test_id): """ Create a test project and clean it up after the test. Yields the project name. """ project_name = f"test-project-{unique_test_id}" # Create project response = integration_client.post( "/api/v1/projects", json={"name": project_name, "description": "Test project", "is_public": True}, ) assert response.status_code == 200, f"Failed to create project: {response.text}" yield project_name # Cleanup: delete project try: integration_client.delete(f"/api/v1/projects/{project_name}") except Exception: pass # Ignore cleanup errors @pytest.fixture def test_package(integration_client, test_project, unique_test_id): """ Create a test package within a test project. Yields (project_name, package_name) tuple. """ package_name = f"test-package-{unique_test_id}" # Create package response = integration_client.post( f"/api/v1/project/{test_project}/packages", json={"name": package_name, "description": "Test package"}, ) assert response.status_code == 200, f"Failed to create package: {response.text}" yield (test_project, package_name) # Cleanup handled by test_project fixture (cascade delete) @pytest.fixture def test_content(): """ Generate unique test content for each test. Returns (content_bytes, expected_sha256) tuple. """ import uuid content = f"test-content-{uuid.uuid4().hex}".encode() sha256 = compute_sha256(content) return (content, sha256) def upload_test_file( client, project: str, package: str, content: bytes, filename: str = "test.bin", tag: str = None, ) -> dict: """ Helper function to upload a test file. Returns the upload response as a dict. """ files = {"file": (filename, io.BytesIO(content), "application/octet-stream")} data = {} if tag: data["tag"] = tag response = client.post( f"/api/v1/project/{project}/{package}/upload", files=files, data=data if data else None, ) assert response.status_code == 200, f"Upload failed: {response.text}" return response.json() # ============================================================================= # S3 Direct Access Helpers (for integration tests) # ============================================================================= def get_s3_client(): """ Create a boto3 S3 client for direct S3 access in integration tests. Uses environment variables for configuration (same as the app). Note: When running in container, S3 endpoint should be 'minio:9000' not 'localhost:9000'. """ import boto3 from botocore.config import Config config = Config(s3={"addressing_style": "path"}) # Use the same endpoint as the app (minio:9000 in container, localhost:9000 locally) endpoint = os.environ.get("ORCHARD_S3_ENDPOINT", "http://minio:9000") return boto3.client( "s3", endpoint_url=endpoint, region_name=os.environ.get("ORCHARD_S3_REGION", "us-east-1"), aws_access_key_id=os.environ.get("ORCHARD_S3_ACCESS_KEY_ID", "minioadmin"), aws_secret_access_key=os.environ.get( "ORCHARD_S3_SECRET_ACCESS_KEY", "minioadmin" ), config=config, ) def get_s3_bucket(): """Get the S3 bucket name from environment.""" return os.environ.get("ORCHARD_S3_BUCKET", "orchard-artifacts") def list_s3_objects_by_hash(sha256_hash: str) -> list: """ List S3 objects that match a specific SHA256 hash. Uses the fruits/{hash[:2]}/{hash[2:4]}/{hash} key pattern. Returns list of matching object keys. """ client = get_s3_client() bucket = get_s3_bucket() prefix = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}" response = client.list_objects_v2(Bucket=bucket, Prefix=prefix) if "Contents" not in response: return [] return [obj["Key"] for obj in response["Contents"]] def count_s3_objects_by_prefix(prefix: str) -> int: """ Count S3 objects with a given prefix. Useful for checking if duplicate uploads created multiple objects. """ client = get_s3_client() bucket = get_s3_bucket() response = client.list_objects_v2(Bucket=bucket, Prefix=prefix) if "Contents" not in response: return 0 return len(response["Contents"]) def s3_object_exists(sha256_hash: str) -> bool: """ Check if an S3 object exists for a given SHA256 hash. """ objects = list_s3_objects_by_hash(sha256_hash) return len(objects) > 0 def delete_s3_object_by_hash(sha256_hash: str) -> bool: """ Delete an S3 object by its SHA256 hash (for test cleanup). """ client = get_s3_client() bucket = get_s3_bucket() s3_key = f"fruits/{sha256_hash[:2]}/{sha256_hash[2:4]}/{sha256_hash}" try: client.delete_object(Bucket=bucket, Key=s3_key) return True except Exception: return False