Remove public internet features and fix upstream source UI (#107)

This commit is contained in:
Mondo Diaz
2026-01-29 13:26:28 -06:00
parent e93e7e7021
commit 82f67539bd
13 changed files with 194 additions and 292 deletions

View File

@@ -61,8 +61,7 @@ class Settings(BaseSettings):
# Cache settings
cache_encryption_key: str = "" # Fernet key for encrypting upstream credentials (auto-generated if empty)
# Global cache settings overrides (None = use DB value, True/False = override DB)
cache_allow_public_internet: Optional[bool] = None # Override allow_public_internet (air-gap mode)
# Global cache settings override (None = use DB value, True/False = override DB)
cache_auto_create_system_projects: Optional[bool] = None # Override auto_create_system_projects
# JWT Authentication settings (optional, for external identity providers)
@@ -108,7 +107,6 @@ class EnvUpstreamSource:
url: str,
source_type: str = "generic",
enabled: bool = True,
is_public: bool = True,
auth_type: str = "none",
username: Optional[str] = None,
password: Optional[str] = None,
@@ -118,7 +116,6 @@ class EnvUpstreamSource:
self.url = url
self.source_type = source_type
self.enabled = enabled
self.is_public = is_public
self.auth_type = auth_type
self.username = username
self.password = password
@@ -188,7 +185,6 @@ def parse_upstream_sources_from_env() -> list[EnvUpstreamSource]:
url=url,
source_type=data.get("TYPE", "generic").lower(),
enabled=parse_bool(data.get("ENABLED"), True),
is_public=parse_bool(data.get("IS_PUBLIC"), True),
auth_type=data.get("AUTH_TYPE", "none").lower(),
username=data.get("USERNAME"),
password=data.get("PASSWORD"),

View File

@@ -462,7 +462,6 @@ def _run_migrations():
source_type VARCHAR(50) NOT NULL DEFAULT 'generic',
url VARCHAR(2048) NOT NULL,
enabled BOOLEAN NOT NULL DEFAULT FALSE,
is_public BOOLEAN NOT NULL DEFAULT TRUE,
auth_type VARCHAR(20) NOT NULL DEFAULT 'none',
username VARCHAR(255),
password_encrypted BYTEA,
@@ -480,7 +479,6 @@ def _run_migrations():
);
CREATE INDEX IF NOT EXISTS idx_upstream_sources_enabled ON upstream_sources(enabled);
CREATE INDEX IF NOT EXISTS idx_upstream_sources_source_type ON upstream_sources(source_type);
CREATE INDEX IF NOT EXISTS idx_upstream_sources_is_public ON upstream_sources(is_public);
CREATE INDEX IF NOT EXISTS idx_upstream_sources_priority ON upstream_sources(priority);
""",
),
@@ -489,14 +487,13 @@ def _run_migrations():
sql="""
CREATE TABLE IF NOT EXISTS cache_settings (
id INTEGER PRIMARY KEY DEFAULT 1,
allow_public_internet BOOLEAN NOT NULL DEFAULT TRUE,
auto_create_system_projects BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
CONSTRAINT check_cache_settings_singleton CHECK (id = 1)
);
INSERT INTO cache_settings (id, allow_public_internet, auto_create_system_projects)
VALUES (1, TRUE, TRUE)
INSERT INTO cache_settings (id, auto_create_system_projects)
VALUES (1, TRUE)
ON CONFLICT (id) DO NOTHING;
""",
),
@@ -522,13 +519,50 @@ def _run_migrations():
Migration(
name="020_seed_default_upstream_sources",
sql="""
INSERT INTO upstream_sources (id, name, source_type, url, enabled, is_public, auth_type, priority)
VALUES
(gen_random_uuid(), 'npm-public', 'npm', 'https://registry.npmjs.org', FALSE, TRUE, 'none', 100),
(gen_random_uuid(), 'pypi-public', 'pypi', 'https://pypi.org/simple', FALSE, TRUE, 'none', 100),
(gen_random_uuid(), 'maven-central', 'maven', 'https://repo1.maven.org/maven2', FALSE, TRUE, 'none', 100),
(gen_random_uuid(), 'docker-hub', 'docker', 'https://registry-1.docker.io', FALSE, TRUE, 'none', 100)
ON CONFLICT (name) DO NOTHING;
-- Originally seeded public sources, but these are no longer used.
-- Migration 023 deletes any previously seeded sources.
-- This migration is now a no-op for fresh installs.
SELECT 1;
""",
),
Migration(
name="021_remove_is_public_from_upstream_sources",
sql="""
DO $$
BEGIN
-- Drop the index if it exists
DROP INDEX IF EXISTS idx_upstream_sources_is_public;
-- Drop the column if it exists
IF EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'upstream_sources' AND column_name = 'is_public'
) THEN
ALTER TABLE upstream_sources DROP COLUMN is_public;
END IF;
END $$;
""",
),
Migration(
name="022_remove_allow_public_internet_from_cache_settings",
sql="""
DO $$
BEGIN
IF EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'cache_settings' AND column_name = 'allow_public_internet'
) THEN
ALTER TABLE cache_settings DROP COLUMN allow_public_internet;
END IF;
END $$;
""",
),
Migration(
name="023_delete_seeded_public_sources",
sql="""
-- Delete the seeded public sources that were added by migration 020
DELETE FROM upstream_sources
WHERE name IN ('npm-public', 'pypi-public', 'maven-central', 'docker-hub');
""",
),
]

View File

@@ -667,7 +667,6 @@ class UpstreamSource(Base):
source_type = Column(String(50), default="generic", nullable=False)
url = Column(String(2048), nullable=False)
enabled = Column(Boolean, default=False, nullable=False)
is_public = Column(Boolean, default=True, nullable=False)
auth_type = Column(String(20), default="none", nullable=False)
username = Column(String(255))
password_encrypted = Column(LargeBinary)
@@ -684,7 +683,6 @@ class UpstreamSource(Base):
__table_args__ = (
Index("idx_upstream_sources_enabled", "enabled"),
Index("idx_upstream_sources_source_type", "source_type"),
Index("idx_upstream_sources_is_public", "is_public"),
Index("idx_upstream_sources_priority", "priority"),
CheckConstraint(
"source_type IN ('npm', 'pypi', 'maven', 'docker', 'helm', 'nuget', 'deb', 'rpm', 'generic')",
@@ -747,13 +745,12 @@ class UpstreamSource(Base):
class CacheSettings(Base):
"""Global cache settings (singleton table).
Controls behavior of the upstream caching system including air-gap mode.
Controls behavior of the upstream caching system.
"""
__tablename__ = "cache_settings"
id = Column(Integer, primary_key=True, default=1)
allow_public_internet = Column(Boolean, default=True, nullable=False)
auto_create_system_projects = Column(Boolean, default=True, nullable=False)
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
updated_at = Column(

View File

@@ -7866,7 +7866,6 @@ from .upstream import (
UpstreamTimeoutError,
UpstreamHTTPError,
UpstreamSSLError,
AirGapError,
FileSizeExceededError as UpstreamFileSizeExceededError,
SourceNotFoundError,
SourceDisabledError,
@@ -8021,10 +8020,6 @@ def cache_artifact(
- Optionally creates tag in user project
- Records URL mapping for provenance
**Air-Gap Mode:**
When `allow_public_internet` is false, only URLs matching private
(non-public) upstream sources are allowed.
**Example (curl):**
```bash
curl -X POST "http://localhost:8080/api/v1/cache" \\
@@ -8118,8 +8113,6 @@ def cache_artifact(
cache_request.url,
expected_hash=cache_request.expected_hash,
)
except AirGapError as e:
raise HTTPException(status_code=403, detail=str(e))
except SourceDisabledError as e:
raise HTTPException(status_code=503, detail=str(e))
except UpstreamHTTPError as e:
@@ -8333,7 +8326,6 @@ def _env_source_to_response(env_source) -> UpstreamSourceResponse:
source_type=env_source.source_type,
url=env_source.url,
enabled=env_source.enabled,
is_public=env_source.is_public,
auth_type=env_source.auth_type,
username=env_source.username,
has_password=bool(env_source.password),
@@ -8417,7 +8409,6 @@ def list_upstream_sources(
source_type=s.source_type,
url=s.url,
enabled=s.enabled,
is_public=s.is_public,
auth_type=s.auth_type,
username=s.username,
has_password=s.has_password(),
@@ -8466,7 +8457,6 @@ def create_upstream_source(
"source_type": "npm",
"url": "https://npm.internal.corp",
"enabled": true,
"is_public": false,
"auth_type": "basic",
"username": "reader",
"password": "secret123",
@@ -8488,7 +8478,6 @@ def create_upstream_source(
source_type=source_create.source_type,
url=source_create.url,
enabled=source_create.enabled,
is_public=source_create.is_public,
auth_type=source_create.auth_type,
username=source_create.username,
priority=source_create.priority,
@@ -8528,7 +8517,6 @@ def create_upstream_source(
source_type=source.source_type,
url=source.url,
enabled=source.enabled,
is_public=source.is_public,
auth_type=source.auth_type,
username=source.username,
has_password=source.has_password(),
@@ -8576,7 +8564,6 @@ def get_upstream_source(
source_type=source.source_type,
url=source.url,
enabled=source.enabled,
is_public=source.is_public,
auth_type=source.auth_type,
username=source.username,
has_password=source.has_password(),
@@ -8663,10 +8650,6 @@ def update_upstream_source(
changes["enabled"] = {"old": source.enabled, "new": source_update.enabled}
source.enabled = source_update.enabled
if source_update.is_public is not None and source_update.is_public != source.is_public:
changes["is_public"] = {"old": source.is_public, "new": source_update.is_public}
source.is_public = source_update.is_public
if source_update.auth_type is not None and source_update.auth_type != source.auth_type:
changes["auth_type"] = {"old": source.auth_type, "new": source_update.auth_type}
source.auth_type = source_update.auth_type
@@ -8719,7 +8702,6 @@ def update_upstream_source(
source_type=source.source_type,
url=source.url,
enabled=source.enabled,
is_public=source.is_public,
auth_type=source.auth_type,
username=source.username,
has_password=source.has_password(),
@@ -8860,12 +8842,10 @@ def get_cache_settings(
Admin-only endpoint for viewing cache configuration.
**Settings:**
- `allow_public_internet`: When false, blocks all requests to sources marked `is_public=true` (air-gap mode)
- `auto_create_system_projects`: When true, system projects (`_npm`, etc.) are created automatically on first cache
**Environment variable overrides:**
Settings can be overridden via environment variables:
- `ORCHARD_CACHE_ALLOW_PUBLIC_INTERNET`: Overrides `allow_public_internet`
- `ORCHARD_CACHE_AUTO_CREATE_SYSTEM_PROJECTS`: Overrides `auto_create_system_projects`
When an env var override is active, the `*_env_override` field will contain the override value.
@@ -8874,12 +8854,6 @@ def get_cache_settings(
db_settings = _get_cache_settings(db)
# Apply env var overrides
allow_public_internet = db_settings.allow_public_internet
allow_public_internet_env_override = None
if app_settings.cache_allow_public_internet is not None:
allow_public_internet = app_settings.cache_allow_public_internet
allow_public_internet_env_override = app_settings.cache_allow_public_internet
auto_create_system_projects = db_settings.auto_create_system_projects
auto_create_system_projects_env_override = None
if app_settings.cache_auto_create_system_projects is not None:
@@ -8887,9 +8861,7 @@ def get_cache_settings(
auto_create_system_projects_env_override = app_settings.cache_auto_create_system_projects
return CacheSettingsResponse(
allow_public_internet=allow_public_internet,
auto_create_system_projects=auto_create_system_projects,
allow_public_internet_env_override=allow_public_internet_env_override,
auto_create_system_projects_env_override=auto_create_system_projects_env_override,
created_at=db_settings.created_at,
updated_at=db_settings.updated_at,
@@ -8915,16 +8887,11 @@ def update_cache_settings(
Supports partial updates - only provided fields are updated.
**Settings:**
- `allow_public_internet`: When false, enables air-gap mode (blocks public sources)
- `auto_create_system_projects`: When false, system projects must be created manually
**Note:** Environment variables can override these settings. When overridden,
the `*_env_override` fields in the response indicate the effective value.
Updates to the database will be saved but won't take effect until the env var is removed.
**Warning:** Changing `allow_public_internet` to false will immediately block
all cache requests to public sources. This is a security-sensitive setting
and is logged prominently.
"""
app_settings = get_settings()
settings = _get_cache_settings(db)
@@ -8932,26 +8899,6 @@ def update_cache_settings(
# Track changes for audit log
changes = {}
if settings_update.allow_public_internet is not None:
if settings_update.allow_public_internet != settings.allow_public_internet:
changes["allow_public_internet"] = {
"old": settings.allow_public_internet,
"new": settings_update.allow_public_internet,
}
settings.allow_public_internet = settings_update.allow_public_internet
# Log prominently for security audit
if not settings_update.allow_public_internet:
logger.warning(
f"AIR-GAP MODE ENABLED by {current_user.username} - "
f"all public internet access is now blocked"
)
else:
logger.warning(
f"AIR-GAP MODE DISABLED by {current_user.username} - "
f"public internet access is now allowed"
)
if settings_update.auto_create_system_projects is not None:
if settings_update.auto_create_system_projects != settings.auto_create_system_projects:
changes["auto_create_system_projects"] = {
@@ -8961,11 +8908,9 @@ def update_cache_settings(
settings.auto_create_system_projects = settings_update.auto_create_system_projects
if changes:
# Audit log with security flag for air-gap changes
is_security_change = "allow_public_internet" in changes
_log_audit(
db,
action="cache_settings.update" if not is_security_change else "cache_settings.security_update",
action="cache_settings.update",
resource="cache-settings",
user_id=current_user.username,
source_ip=request.client.host if request.client else None,
@@ -8976,12 +8921,6 @@ def update_cache_settings(
db.refresh(settings)
# Apply env var overrides for the response
allow_public_internet = settings.allow_public_internet
allow_public_internet_env_override = None
if app_settings.cache_allow_public_internet is not None:
allow_public_internet = app_settings.cache_allow_public_internet
allow_public_internet_env_override = app_settings.cache_allow_public_internet
auto_create_system_projects = settings.auto_create_system_projects
auto_create_system_projects_env_override = None
if app_settings.cache_auto_create_system_projects is not None:
@@ -8989,9 +8928,7 @@ def update_cache_settings(
auto_create_system_projects_env_override = app_settings.cache_auto_create_system_projects
return CacheSettingsResponse(
allow_public_internet=allow_public_internet,
auto_create_system_projects=auto_create_system_projects,
allow_public_internet_env_override=allow_public_internet_env_override,
auto_create_system_projects_env_override=auto_create_system_projects_env_override,
created_at=settings.created_at,
updated_at=settings.updated_at,

View File

@@ -1214,7 +1214,6 @@ class UpstreamSourceCreate(BaseModel):
source_type: str = "generic"
url: str
enabled: bool = False
is_public: bool = True
auth_type: str = "none"
username: Optional[str] = None
password: Optional[str] = None # Write-only
@@ -1271,7 +1270,6 @@ class UpstreamSourceUpdate(BaseModel):
source_type: Optional[str] = None
url: Optional[str] = None
enabled: Optional[bool] = None
is_public: Optional[bool] = None
auth_type: Optional[str] = None
username: Optional[str] = None
password: Optional[str] = None # Write-only, None = keep existing, empty string = clear
@@ -1331,7 +1329,6 @@ class UpstreamSourceResponse(BaseModel):
source_type: str
url: str
enabled: bool
is_public: bool
auth_type: str
username: Optional[str]
has_password: bool # True if password is set
@@ -1347,9 +1344,7 @@ class UpstreamSourceResponse(BaseModel):
class CacheSettingsResponse(BaseModel):
"""Global cache settings response"""
allow_public_internet: bool
auto_create_system_projects: bool
allow_public_internet_env_override: Optional[bool] = None # Set if overridden by env var
auto_create_system_projects_env_override: Optional[bool] = None # Set if overridden by env var
created_at: Optional[datetime] = None # May be None for legacy data
updated_at: Optional[datetime] = None # May be None for legacy data
@@ -1360,7 +1355,6 @@ class CacheSettingsResponse(BaseModel):
class CacheSettingsUpdate(BaseModel):
"""Update cache settings (partial)"""
allow_public_internet: Optional[bool] = None
auto_create_system_projects: Optional[bool] = None

View File

@@ -57,10 +57,6 @@ class UpstreamSSLError(UpstreamError):
pass
class AirGapError(UpstreamError):
"""Request blocked due to air-gap mode."""
pass
class FileSizeExceededError(UpstreamError):
@@ -156,12 +152,6 @@ class UpstreamClient:
# Sort sources by priority (lower = higher priority)
self.sources = sorted(self.sources, key=lambda s: s.priority)
def _get_allow_public_internet(self) -> bool:
"""Get the allow_public_internet setting."""
if self.cache_settings is None:
return True # Default to allowing if no settings provided
return self.cache_settings.allow_public_internet
def _match_source(self, url: str) -> Optional[UpstreamSource]:
"""
Find the upstream source that matches the given URL.
@@ -288,7 +278,6 @@ class UpstreamClient:
FetchResult with content, hash, size, and headers.
Raises:
AirGapError: If air-gap mode blocks the request.
SourceDisabledError: If the matching source is disabled.
UpstreamConnectionError: On connection failures.
UpstreamTimeoutError: On timeout.
@@ -301,19 +290,6 @@ class UpstreamClient:
# Match URL to source
source = self._match_source(url)
# Check air-gap mode
allow_public = self._get_allow_public_internet()
if not allow_public:
if source is None:
raise AirGapError(
f"Air-gap mode enabled: URL does not match any configured upstream source: {url}"
)
if source.is_public:
raise AirGapError(
f"Air-gap mode enabled: Cannot fetch from public source '{source.name}'"
)
# Check if source is enabled (if we have a match)
if source is not None and not source.enabled:
raise SourceDisabledError(
@@ -536,7 +512,8 @@ class UpstreamClient:
Test connectivity to an upstream source.
Performs a HEAD request to the source URL to verify connectivity
and authentication.
and authentication. Does not follow redirects - a 3xx response
is considered successful since it proves the server is reachable.
Args:
source: The upstream source to test.
@@ -564,7 +541,7 @@ class UpstreamClient:
source.url,
headers=headers,
auth=auth,
follow_redirects=True,
follow_redirects=False,
)
# Consider 2xx and 3xx as success, also 405 (Method Not Allowed)
# since some servers don't support HEAD
@@ -582,5 +559,7 @@ class UpstreamClient:
return (False, f"Connection timed out: {e}", None)
except httpx.ReadTimeout as e:
return (False, f"Read timed out: {e}", None)
except httpx.TooManyRedirects as e:
return (False, f"Too many redirects: {e}", None)
except Exception as e:
return (False, f"Error: {e}", None)