"""Unit tests for metadata extraction functionality.""" import io import gzip import tarfile import zipfile import pytest from app.metadata import ( extract_metadata, extract_deb_metadata, extract_wheel_metadata, extract_tarball_metadata, extract_jar_metadata, parse_deb_control, ) class TestDebMetadata: """Tests for Debian package metadata extraction.""" def test_parse_deb_control_basic(self): """Test parsing a basic control file.""" control = """Package: my-package Version: 1.2.3 Architecture: amd64 Maintainer: Test Description: A test package """ result = parse_deb_control(control) assert result["package_name"] == "my-package" assert result["version"] == "1.2.3" assert result["architecture"] == "amd64" assert result["format"] == "deb" def test_parse_deb_control_with_epoch(self): """Test parsing version with epoch.""" control = """Package: another-pkg Version: 2:1.0.0-1 """ result = parse_deb_control(control) assert result["version"] == "2:1.0.0-1" assert result["package_name"] == "another-pkg" assert result["format"] == "deb" def test_extract_deb_metadata_invalid_magic(self): """Test that invalid ar magic returns empty dict.""" file = io.BytesIO(b"not an ar archive") result = extract_deb_metadata(file) assert result == {} def test_extract_deb_metadata_valid_ar_no_control(self): """Test ar archive without control.tar returns empty.""" # Create minimal ar archive with just debian-binary ar_data = b"!\n" ar_data += b"debian-binary/ 0 0 0 100644 4 `\n" ar_data += b"2.0\n" file = io.BytesIO(ar_data) result = extract_deb_metadata(file) # Should return empty since no control.tar found assert result == {} or "version" not in result class TestWheelMetadata: """Tests for Python wheel metadata extraction.""" def _create_wheel_with_metadata(self, metadata_content: str) -> io.BytesIO: """Helper to create a wheel file with given METADATA content.""" buf = io.BytesIO() with zipfile.ZipFile(buf, 'w') as zf: zf.writestr('package-1.0.0.dist-info/METADATA', metadata_content) buf.seek(0) return buf def test_extract_wheel_version(self): """Test extracting version from wheel METADATA.""" metadata = """Metadata-Version: 2.1 Name: my-package Version: 2.3.4 Summary: A test package """ file = self._create_wheel_with_metadata(metadata) result = extract_wheel_metadata(file) assert result.get("version") == "2.3.4" assert result.get("package_name") == "my-package" assert result.get("format") == "wheel" def test_extract_wheel_no_version(self): """Test wheel without version field.""" metadata = """Metadata-Version: 2.1 Name: no-version-pkg """ file = self._create_wheel_with_metadata(metadata) result = extract_wheel_metadata(file) assert "version" not in result assert result.get("package_name") == "no-version-pkg" assert result.get("format") == "wheel" def test_extract_wheel_invalid_zip(self): """Test that invalid zip returns format-only dict.""" file = io.BytesIO(b"not a zip file") result = extract_wheel_metadata(file) assert result == {"format": "wheel"} def test_extract_wheel_no_metadata_file(self): """Test wheel without METADATA file returns format-only dict.""" buf = io.BytesIO() with zipfile.ZipFile(buf, 'w') as zf: zf.writestr('some_file.py', 'print("hello")') buf.seek(0) result = extract_wheel_metadata(buf) assert result == {"format": "wheel"} class TestTarballMetadata: """Tests for tarball metadata extraction from filename.""" def test_extract_version_from_filename_standard(self): """Test standard package-version.tar.gz format.""" file = io.BytesIO(b"") # Content doesn't matter for filename extraction result = extract_tarball_metadata(file, "mypackage-1.2.3.tar.gz") assert result.get("version") == "1.2.3" assert result.get("package_name") == "mypackage" assert result.get("format") == "tarball" def test_extract_version_with_v_prefix(self): """Test version with v prefix.""" file = io.BytesIO(b"") result = extract_tarball_metadata(file, "package-v2.0.0.tar.gz") assert result.get("version") == "2.0.0" assert result.get("package_name") == "package" assert result.get("format") == "tarball" def test_extract_version_underscore_separator(self): """Test package_version format.""" file = io.BytesIO(b"") result = extract_tarball_metadata(file, "my_package_3.1.4.tar.gz") assert result.get("version") == "3.1.4" assert result.get("package_name") == "my_package" assert result.get("format") == "tarball" def test_extract_version_complex(self): """Test complex version string.""" file = io.BytesIO(b"") result = extract_tarball_metadata(file, "package-1.0.0-beta.1.tar.gz") # The regex handles versions with suffix like -beta_1 assert result.get("format") == "tarball" # May or may not extract version depending on regex match if "version" in result: assert result.get("package_name") == "package" def test_extract_no_version_in_filename(self): """Test filename without version returns format-only dict.""" file = io.BytesIO(b"") result = extract_tarball_metadata(file, "package.tar.gz") # Should return format but no version assert result.get("version") is None assert result.get("format") == "tarball" class TestJarMetadata: """Tests for JAR/Java metadata extraction.""" def _create_jar_with_manifest(self, manifest_content: str) -> io.BytesIO: """Helper to create a JAR file with given MANIFEST.MF content.""" buf = io.BytesIO() with zipfile.ZipFile(buf, 'w') as zf: zf.writestr('META-INF/MANIFEST.MF', manifest_content) buf.seek(0) return buf def test_extract_jar_version_from_manifest(self): """Test extracting version from MANIFEST.MF.""" manifest = """Manifest-Version: 1.0 Implementation-Title: my-library Implementation-Version: 4.5.6 """ file = self._create_jar_with_manifest(manifest) result = extract_jar_metadata(file) assert result.get("version") == "4.5.6" assert result.get("package_name") == "my-library" assert result.get("format") == "jar" def test_extract_jar_bundle_version(self): """Test extracting OSGi Bundle-Version.""" manifest = """Manifest-Version: 1.0 Bundle-Version: 2.1.0 Bundle-Name: Test Bundle """ file = self._create_jar_with_manifest(manifest) result = extract_jar_metadata(file) # Bundle-Version is stored in bundle_version, not version assert result.get("bundle_version") == "2.1.0" assert result.get("bundle_name") == "Test Bundle" assert result.get("format") == "jar" def test_extract_jar_invalid_zip(self): """Test that invalid JAR returns format-only dict.""" file = io.BytesIO(b"not a jar file") result = extract_jar_metadata(file) assert result == {"format": "jar"} class TestExtractMetadataDispatch: """Tests for the main extract_metadata dispatcher function.""" def test_dispatch_to_wheel(self): """Test that .whl files use wheel extractor.""" buf = io.BytesIO() with zipfile.ZipFile(buf, 'w') as zf: zf.writestr('pkg-1.0.dist-info/METADATA', 'Version: 1.0.0\nName: pkg') buf.seek(0) result = extract_metadata(buf, "package-1.0.0-py3-none-any.whl") assert result.get("version") == "1.0.0" assert result.get("package_name") == "pkg" assert result.get("format") == "wheel" def test_dispatch_to_tarball(self): """Test that .tar.gz files use tarball extractor.""" file = io.BytesIO(b"") result = extract_metadata(file, "mypackage-2.3.4.tar.gz") assert result.get("version") == "2.3.4" assert result.get("package_name") == "mypackage" assert result.get("format") == "tarball" def test_dispatch_unknown_extension(self): """Test that unknown extensions return empty dict.""" file = io.BytesIO(b"some content") result = extract_metadata(file, "unknown.xyz") assert result == {} def test_file_position_reset_after_extraction(self): """Test that file position is reset to start after extraction.""" buf = io.BytesIO() with zipfile.ZipFile(buf, 'w') as zf: zf.writestr('pkg-1.0.dist-info/METADATA', 'Version: 1.0.0\nName: pkg') buf.seek(0) extract_metadata(buf, "package.whl") # File should be back at position 0 assert buf.tell() == 0