From c9276a8009df1bfff899caeb10410d736e3ca867 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Sun, 11 Jan 2026 22:27:32 -0800
Subject: [PATCH 01/12] feat: add GET /manifest endpoint for mothership service
 discovery

Implement manifest endpoint on LoadBalancer handlers to serve flash_manifest.json
for cross-endpoint routing. The endpoint is conditionally registered when
FLASH_IS_MOTHERSHIP=true environment variable is set, enabling child endpoints
to fetch function/resource metadata from the mothership.

Changes:
- Add /manifest to reserved paths in manifest builder
- Implement conditional GET /manifest endpoint in lb_handler factory
- Returns 200 with manifest JSON on success, 404 if not found
- Endpoint only registers for LoadBalancer resources with env var set
- Add comprehensive unit and integration tests (18 unit, 4 integration)
---
 .../cli/commands/build_utils/manifest.py      |   2 +-
 src/tetra_rp/runtime/lb_handler.py            |  38 ++-
 tests/integration/test_lb_remote_execution.py | 146 +++++++++
 tests/unit/runtime/test_lb_handler.py         | 307 ++++++++++++++++++
 4 files changed, 490 insertions(+), 3 deletions(-)
 create mode 100644 tests/unit/runtime/test_lb_handler.py

diff --git a/src/tetra_rp/cli/commands/build_utils/manifest.py b/src/tetra_rp/cli/commands/build_utils/manifest.py
index 2664150f..bd0261cd 100644
--- a/src/tetra_rp/cli/commands/build_utils/manifest.py
+++ b/src/tetra_rp/cli/commands/build_utils/manifest.py
@@ -8,7 +8,7 @@
 
 from .scanner import RemoteFunctionMetadata
 
-RESERVED_PATHS = ["/execute", "/ping"]
+RESERVED_PATHS = ["/execute", "/ping", "/manifest"]
 
 
 @dataclass
diff --git a/src/tetra_rp/runtime/lb_handler.py b/src/tetra_rp/runtime/lb_handler.py
index 82fec707..4193dd02 100644
--- a/src/tetra_rp/runtime/lb_handler.py
+++ b/src/tetra_rp/runtime/lb_handler.py
@@ -1,8 +1,10 @@
 """Factory for creating FastAPI load-balanced handlers.
 
 This module provides the factory function for generating FastAPI applications
-that handle load-balanced serverless endpoints. It supports both user-defined
-HTTP routes and the framework's /execute endpoint for @remote function execution.
+that handle load-balanced serverless endpoints. It supports:
+- User-defined HTTP routes
+- /execute endpoint for @remote function execution (LiveLoadBalancer only)
+- /manifest endpoint for mothership service discovery (when FLASH_IS_MOTHERSHIP=true)
 
 Security Model:
     The /execute endpoint accepts and executes serialized function code. This is
@@ -13,14 +15,20 @@
     4. In production, API authentication should protect the /execute endpoint
 
     Users should NOT expose the /execute endpoint to untrusted clients.
+
+    The /manifest endpoint returns deployment metadata and is safe to expose
+    publicly as it contains only structural information about deployed functions.
 """
 
 import inspect
 import logging
+import os
 from typing import Any, Callable, Dict
 
 from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse
 
+from .generic_handler import load_manifest
 from .serialization import (
     deserialize_args,
     deserialize_kwargs,
@@ -163,6 +171,32 @@ async def execute_remote_function(request: Request) -> Dict[str, Any]:
                 logger.error(f"Unexpected error in /execute endpoint: {e}")
                 return {"success": False, "error": f"Unexpected error: {e}"}
 
+    # Register /manifest endpoint for mothership discovery (if enabled)
+    if os.getenv("FLASH_IS_MOTHERSHIP", "").lower() == "true":
+
+        @app.get("/manifest")
+        async def get_manifest() -> JSONResponse:
+            """Mothership discovery endpoint.
+
+            Returns the flash_manifest.json content for service discovery.
+            Only available when FLASH_IS_MOTHERSHIP=true environment variable is set.
+
+            Returns:
+                JSONResponse with manifest content or 404 if not found
+            """
+            manifest_dict = load_manifest()
+
+            if not manifest_dict or not manifest_dict.get("resources"):
+                return JSONResponse(
+                    status_code=404,
+                    content={
+                        "error": "Manifest not found",
+                        "detail": "flash_manifest.json could not be loaded",
+                    },
+                )
+
+            return JSONResponse(status_code=200, content=manifest_dict)
+
     # Register user-defined routes from registry
     for (method, path), handler in route_registry.items():
         method_upper = method.upper()
diff --git a/tests/integration/test_lb_remote_execution.py b/tests/integration/test_lb_remote_execution.py
index 4d34abf3..2aca85d0 100644
--- a/tests/integration/test_lb_remote_execution.py
+++ b/tests/integration/test_lb_remote_execution.py
@@ -304,3 +304,149 @@ def get_status():
             assert scanner.resource_types["test-api"] == "LiveLoadBalancer"
             assert "deployed-api" in scanner.resource_types
             assert scanner.resource_types["deployed-api"] == "LoadBalancerSlsResource"
+
+
+class TestManifestEndpointIntegration:
+    """Integration tests for GET /manifest endpoint."""
+
+    def test_manifest_endpoint_in_live_load_balancer(self, monkeypatch):
+        """Test manifest endpoint in LiveLoadBalancer with FLASH_IS_MOTHERSHIP=true."""
+        from unittest.mock import patch
+        from fastapi.testclient import TestClient
+
+        monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+
+        # Create a LiveLoadBalancer
+        lb = LiveLoadBalancer(name="test-mothership")
+
+        # Define a simple function on the mothership
+        @remote(lb, method="GET", path="/api/hello")
+        async def hello():
+            return {"message": "hello"}
+
+        # Create manifest data
+        test_manifest = {
+            "version": "1.0",
+            "generated_at": "2024-01-15T10:30:00Z",
+            "project_name": "test-app",
+            "resources": {
+                "test-mothership": {
+                    "resource_type": "LiveLoadBalancer",
+                    "handler_file": "handler_test_mothership.py",
+                    "functions": [
+                        {
+                            "name": "hello",
+                            "module": "test_module",
+                            "is_async": True,
+                            "is_class": False,
+                            "http_method": "GET",
+                            "http_path": "/api/hello",
+                        }
+                    ],
+                }
+            },
+            "function_registry": {"hello": "test-mothership"},
+            "routes": {"test-mothership": {"GET /api/hello": "hello"}},
+        }
+
+        # Mock load_manifest to return test manifest
+        with patch(
+            "tetra_rp.runtime.lb_handler.load_manifest", return_value=test_manifest
+        ):
+            from tetra_rp.runtime.lb_handler import create_lb_handler
+
+            # Create handler with manifest endpoint enabled
+            route_registry = {("GET", "/api/hello"): hello}
+            app = create_lb_handler(route_registry, include_execute=True)
+            client = TestClient(app)
+
+            # Verify /manifest endpoint returns manifest
+            response = client.get("/manifest")
+            assert response.status_code == 200
+            assert response.json() == test_manifest
+
+    def test_manifest_endpoint_excluded_when_env_not_set(self):
+        """Test manifest endpoint is not available when FLASH_IS_MOTHERSHIP not set."""
+        from fastapi.testclient import TestClient
+        from tetra_rp.runtime.lb_handler import create_lb_handler
+
+        # Create handler without env var set
+        app = create_lb_handler({}, include_execute=False)
+        client = TestClient(app)
+
+        # Verify /manifest returns 404
+        response = client.get("/manifest")
+        assert response.status_code == 404
+
+    def test_manifest_endpoint_with_deployed_lb_resource(self, monkeypatch):
+        """Test manifest endpoint with LoadBalancerSlsResource."""
+        from unittest.mock import patch
+        from fastapi.testclient import TestClient
+
+        monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+
+        # Create test manifest for deployed endpoint
+        test_manifest = {
+            "version": "1.0",
+            "generated_at": "2024-01-15T10:30:00Z",
+            "project_name": "deployed-app",
+            "resources": {
+                "gpu-worker": {
+                    "resource_type": "LoadBalancerSlsResource",
+                    "handler_file": "handler_gpu_worker.py",
+                    "functions": [
+                        {
+                            "name": "process_image",
+                            "module": "workers.gpu",
+                            "is_async": True,
+                            "is_class": False,
+                            "http_method": "POST",
+                            "http_path": "/api/process",
+                        }
+                    ],
+                }
+            },
+            "function_registry": {"process_image": "gpu-worker"},
+        }
+
+        with patch(
+            "tetra_rp.runtime.lb_handler.load_manifest", return_value=test_manifest
+        ):
+            from tetra_rp.runtime.lb_handler import create_lb_handler
+
+            # Create deployed handler (not LiveLoadBalancer)
+            app = create_lb_handler({}, include_execute=False)
+            client = TestClient(app)
+
+            # Verify /manifest endpoint is available
+            response = client.get("/manifest")
+            assert response.status_code == 200
+            assert response.json() == test_manifest
+
+    def test_manifest_endpoint_coexists_with_ping(self, monkeypatch):
+        """Test that /manifest endpoint coexists with /ping health check."""
+        from unittest.mock import patch
+        from fastapi.testclient import TestClient
+
+        monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+
+        test_manifest = {
+            "version": "1.0",
+            "resources": {"test": {}},
+            "function_registry": {},
+        }
+
+        with patch(
+            "tetra_rp.runtime.lb_handler.load_manifest", return_value=test_manifest
+        ):
+            from tetra_rp.runtime.lb_handler import create_lb_handler
+
+            app = create_lb_handler({}, include_execute=False)
+            client = TestClient(app)
+
+            # Verify both endpoints exist
+            manifest_response = client.get("/manifest")
+            assert manifest_response.status_code == 200
+
+            ping_response = client.get("/ping")
+            assert ping_response.status_code == 404  # Ping not auto-added by factory
diff --git a/tests/unit/runtime/test_lb_handler.py b/tests/unit/runtime/test_lb_handler.py
new file mode 100644
index 00000000..e02c6aa0
--- /dev/null
+++ b/tests/unit/runtime/test_lb_handler.py
@@ -0,0 +1,307 @@
+"""Unit tests for LoadBalancer handler factory."""
+
+from unittest.mock import patch
+
+import pytest
+from fastapi.testclient import TestClient
+
+from tetra_rp.runtime.lb_handler import create_lb_handler
+
+
+class TestManifestEndpoint:
+    """Tests for GET /manifest endpoint."""
+
+    @pytest.fixture
+    def sample_manifest(self):
+        """Sample manifest for testing."""
+        return {
+            "version": "1.0",
+            "generated_at": "2024-01-15T10:30:00Z",
+            "project_name": "test-app",
+            "resources": {
+                "gpu_config": {
+                    "resource_type": "LoadBalancerSlsResource",
+                    "handler_file": "handler_gpu_config.py",
+                    "functions": [
+                        {
+                            "name": "process_image",
+                            "module": "workers.gpu",
+                            "is_async": True,
+                            "is_class": False,
+                            "http_method": "POST",
+                            "http_path": "/api/process",
+                        }
+                    ],
+                }
+            },
+            "function_registry": {"process_image": "gpu_config"},
+            "routes": {"gpu_config": {"POST /api/process": "process_image"}},
+        }
+
+    def test_manifest_endpoint_registered_when_env_var_true(
+        self, sample_manifest, monkeypatch
+    ):
+        """Verify /manifest endpoint exists when FLASH_IS_MOTHERSHIP=true."""
+        monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+
+        with patch(
+            "tetra_rp.runtime.lb_handler.load_manifest", return_value=sample_manifest
+        ):
+            app = create_lb_handler({}, include_execute=False)
+            routes = [route.path for route in app.routes]
+
+            assert "/manifest" in routes
+
+    def test_manifest_endpoint_not_registered_when_env_var_false(
+        self, sample_manifest, monkeypatch
+    ):
+        """Verify /manifest endpoint doesn't exist when FLASH_IS_MOTHERSHIP=false."""
+        monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "false")
+
+        app = create_lb_handler({}, include_execute=False)
+        routes = [route.path for route in app.routes]
+
+        assert "/manifest" not in routes
+
+    def test_manifest_endpoint_not_registered_when_env_var_missing(
+        self, sample_manifest
+    ):
+        """Verify /manifest endpoint doesn't exist when env var not set."""
+        app = create_lb_handler({}, include_execute=False)
+        client = TestClient(app)
+
+        response = client.get("/manifest")
+        assert response.status_code == 404
+
+    def test_manifest_endpoint_returns_200_with_valid_manifest(
+        self, sample_manifest, monkeypatch
+    ):
+        """Test happy path - endpoint returns 200 with valid manifest."""
+        monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+
+        with patch(
+            "tetra_rp.runtime.lb_handler.load_manifest", return_value=sample_manifest
+        ):
+            app = create_lb_handler({}, include_execute=False)
+            client = TestClient(app)
+
+            response = client.get("/manifest")
+
+            assert response.status_code == 200
+            assert response.json() == sample_manifest
+
+    def test_manifest_endpoint_returns_404_when_manifest_missing(self, monkeypatch):
+        """Test endpoint returns 404 when manifest file not found."""
+        monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+
+        with patch("tetra_rp.runtime.lb_handler.load_manifest", return_value={}):
+            app = create_lb_handler({}, include_execute=False)
+            client = TestClient(app)
+
+            response = client.get("/manifest")
+
+            assert response.status_code == 404
+            data = response.json()
+            assert data["error"] == "Manifest not found"
+            assert "could not be loaded" in data["detail"]
+
+    def test_manifest_endpoint_case_insensitive_env_var_true(
+        self, sample_manifest, monkeypatch
+    ):
+        """Test endpoint registration with different case variations of 'true'."""
+        for env_value in ["True", "TRUE", "TrUe"]:
+            monkeypatch.setenv("FLASH_IS_MOTHERSHIP", env_value)
+
+            with patch(
+                "tetra_rp.runtime.lb_handler.load_manifest",
+                return_value=sample_manifest,
+            ):
+                app = create_lb_handler({}, include_execute=False)
+                routes = [route.path for route in app.routes]
+
+                assert "/manifest" in routes
+
+    def test_manifest_endpoint_case_insensitive_env_var_false(self, monkeypatch):
+        """Test endpoint not registered with non-'true' values."""
+        for env_value in ["False", "false", "yes", "1", ""]:
+            monkeypatch.setenv("FLASH_IS_MOTHERSHIP", env_value)
+
+            app = create_lb_handler({}, include_execute=False)
+            routes = [route.path for route in app.routes]
+
+            assert "/manifest" not in routes
+
+    def test_manifest_endpoint_response_structure(self, sample_manifest, monkeypatch):
+        """Test that manifest response has correct structure."""
+        monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+
+        with patch(
+            "tetra_rp.runtime.lb_handler.load_manifest", return_value=sample_manifest
+        ):
+            app = create_lb_handler({}, include_execute=False)
+            client = TestClient(app)
+
+            response = client.get("/manifest")
+            data = response.json()
+
+            # Verify structure
+            assert "version" in data
+            assert "generated_at" in data
+            assert "project_name" in data
+            assert "resources" in data
+            assert "function_registry" in data
+
+    def test_manifest_endpoint_with_empty_resources(self, monkeypatch):
+        """Test endpoint behavior when manifest has no resources."""
+        monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+
+        empty_manifest = {
+            "version": "1.0",
+            "project_name": "test",
+            "resources": {},
+            "function_registry": {},
+        }
+
+        with patch(
+            "tetra_rp.runtime.lb_handler.load_manifest", return_value=empty_manifest
+        ):
+            app = create_lb_handler({}, include_execute=False)
+            client = TestClient(app)
+
+            response = client.get("/manifest")
+
+            # Should return 404 if no resources
+            assert response.status_code == 404
+
+    def test_manifest_endpoint_with_none_manifest(self, monkeypatch):
+        """Test endpoint behavior when load_manifest returns None."""
+        monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+
+        with patch("tetra_rp.runtime.lb_handler.load_manifest", return_value=None):
+            app = create_lb_handler({}, include_execute=False)
+            client = TestClient(app)
+
+            response = client.get("/manifest")
+
+            assert response.status_code == 404
+
+    def test_manifest_endpoint_coexists_with_execute(
+        self, sample_manifest, monkeypatch
+    ):
+        """Test that /manifest endpoint coexists with /execute endpoint."""
+        monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+
+        with patch(
+            "tetra_rp.runtime.lb_handler.load_manifest", return_value=sample_manifest
+        ):
+            app = create_lb_handler({}, include_execute=True)
+            routes = [route.path for route in app.routes]
+
+            assert "/manifest" in routes
+            assert "/execute" in routes
+
+    def test_manifest_endpoint_coexists_with_user_routes(
+        self, sample_manifest, monkeypatch
+    ):
+        """Test that /manifest endpoint coexists with user-defined routes."""
+        monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+
+        async def dummy_handler():
+            return {"result": "ok"}
+
+        route_registry = {("GET", "/api/health"): dummy_handler}
+
+        with patch(
+            "tetra_rp.runtime.lb_handler.load_manifest", return_value=sample_manifest
+        ):
+            app = create_lb_handler(route_registry, include_execute=False)
+            routes = [route.path for route in app.routes]
+
+            assert "/manifest" in routes
+            assert "/api/health" in routes
+
+    def test_manifest_endpoint_content_type(self, sample_manifest, monkeypatch):
+        """Test that /manifest endpoint returns proper JSON content-type."""
+        monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+
+        with patch(
+            "tetra_rp.runtime.lb_handler.load_manifest", return_value=sample_manifest
+        ):
+            app = create_lb_handler({}, include_execute=False)
+            client = TestClient(app)
+
+            response = client.get("/manifest")
+
+            assert response.headers["content-type"] == "application/json"
+
+    def test_manifest_endpoint_with_complex_manifest(self, monkeypatch):
+        """Test endpoint with complex multi-resource manifest."""
+        monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+
+        complex_manifest = {
+            "version": "1.0",
+            "generated_at": "2024-01-15T10:30:00Z",
+            "project_name": "complex-app",
+            "resources": {
+                "gpu_config": {
+                    "resource_type": "LoadBalancerSlsResource",
+                    "handler_file": "handler_gpu.py",
+                    "functions": [
+                        {
+                            "name": "process_gpu",
+                            "module": "workers.gpu",
+                            "is_async": True,
+                            "is_class": False,
+                        }
+                    ],
+                },
+                "cpu_config": {
+                    "resource_type": "ServerlessEndpoint",
+                    "handler_file": "handler_cpu.py",
+                    "functions": [
+                        {
+                            "name": "process_cpu",
+                            "module": "workers.cpu",
+                            "is_async": True,
+                            "is_class": False,
+                        }
+                    ],
+                },
+            },
+            "function_registry": {
+                "process_gpu": "gpu_config",
+                "process_cpu": "cpu_config",
+            },
+        }
+
+        with patch(
+            "tetra_rp.runtime.lb_handler.load_manifest", return_value=complex_manifest
+        ):
+            app = create_lb_handler({}, include_execute=False)
+            client = TestClient(app)
+
+            response = client.get("/manifest")
+
+            assert response.status_code == 200
+            data = response.json()
+            assert len(data["resources"]) == 2
+            assert "gpu_config" in data["resources"]
+            assert "cpu_config" in data["resources"]
+
+
+class TestExecuteEndpointStillWorks:
+    """Tests to ensure /execute endpoint still works after manifest changes."""
+
+    def test_execute_endpoint_still_available_with_live_load_balancer(self):
+        """Verify /execute endpoint is still registered for LiveLoadBalancer."""
+        app = create_lb_handler({}, include_execute=True)
+        routes = [route.path for route in app.routes]
+
+        assert "/execute" in routes
+
+    def test_execute_endpoint_not_included_for_deployed(self):
+        """Verify /execute endpoint is not registered for deployed LoadBalancer."""
+        app = create_lb_handler({}, include_execute=False)
+        routes = [route.path for route in app.routes]
+
+        assert "/execute" not in routes

From 11fecb2fbb8c1bb317e3cbf25e8ea1c07208fb33 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Mon, 12 Jan 2026 02:30:09 -0800
Subject: [PATCH 02/12] docs: convert ASCII diagrams to MermaidJS

- Local Execution Flow: Shows synchronous path for functions in manifest
- Remote Execution Flow: Shows serialization, HTTP, and deserialization steps
- Manifest Synchronization: Shows cache-first approach with GQL fallback

Uses high-contrast MermaidJS styling with saturated colors and white text
for maximum readability as per project guidelines.
---
 docs/Cross_Endpoint_Routing.md | 191 ++++++++++++++++++++++++++-------
 1 file changed, 155 insertions(+), 36 deletions(-)

diff --git a/docs/Cross_Endpoint_Routing.md b/docs/Cross_Endpoint_Routing.md
index 6d059636..1a4330c6 100644
--- a/docs/Cross_Endpoint_Routing.md
+++ b/docs/Cross_Endpoint_Routing.md
@@ -640,46 +640,70 @@ Add new configuration by:
 
 #### Local Execution Flow
 
-```
-Function Call
-    ↓
-ProductionWrapper.wrap_function_execution()
-    ↓
-ServiceRegistry.get_resource_for_function()
-    ↓
-Manifest Lookup (resource = None)
-    ↓
-Local Execution (original_stub_func)
-    ↓
-Result
+```mermaid
+flowchart TD
+    A["Function Call"]
+    B["ProductionWrapper.wrap_function_execution()"]
+    C["ServiceRegistry.get_resource_for_function()"]
+    D["Manifest Lookup<br/>resource = None"]
+    E["Local Execution<br/>original_stub_func"]
+    F["Result"]
+
+    A --> B
+    B --> C
+    C --> D
+    D --> E
+    E --> F
+
+    style A fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
+    style B fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
+    style C fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
+    style D fill:#388e3c,stroke:#1b5e20,stroke-width:3px,color:#fff
+    style E fill:#388e3c,stroke:#1b5e20,stroke-width:3px,color:#fff
+    style F fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
 ```
 
 #### Remote Execution Flow
 
-```
-Function Call
-    ↓
-ProductionWrapper.wrap_function_execution()
-    ↓
-ServiceRegistry.get_resource_for_function()
-    ↓
-Manifest Lookup (resource found)
-    ↓
-Ensure Directory Loaded
-    ↓
-DirectoryClient.get_endpoints()
-    ↓
-Get Remote Endpoint URL
-    ↓
-Serialize Arguments (cloudpickle → base64)
-    ↓
-HTTP POST to Remote Endpoint
-    ↓
-Remote Function Execution
-    ↓
-Deserialize Result (base64 → cloudpickle)
-    ↓
-Result
+```mermaid
+flowchart TD
+    A["Function Call"]
+    B["ProductionWrapper.wrap_function_execution()"]
+    C["ServiceRegistry.get_resource_for_function()"]
+    D["Manifest Lookup<br/>resource found"]
+    E["Ensure Directory Loaded"]
+    F["DirectoryClient.get_endpoints()"]
+    G["Get Remote Endpoint URL"]
+    H["Serialize Arguments<br/>cloudpickle → base64"]
+    I["HTTP POST to Remote Endpoint"]
+    J["Remote Function Execution"]
+    K["Deserialize Result<br/>base64 → cloudpickle"]
+    L["Result"]
+
+    A --> B
+    B --> C
+    C --> D
+    D --> E
+    E --> F
+    F --> G
+    G --> H
+    H --> I
+    I --> J
+    J --> K
+    K --> L
+
+    style A fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
+    style B fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
+    style C fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
+    style D fill:#d32f2f,stroke:#b71c1c,stroke-width:3px,color:#fff
+    style E fill:#d32f2f,stroke:#b71c1c,stroke-width:3px,color:#fff
+    style F fill:#d32f2f,stroke:#b71c1c,stroke-width:3px,color:#fff
+    style G fill:#d32f2f,stroke:#b71c1c,stroke-width:3px,color:#fff
+    style H fill:#f57c00,stroke:#e65100,stroke-width:3px,color:#fff
+    style I fill:#d32f2f,stroke:#b71c1c,stroke-width:3px,color:#fff
+    style J fill:#d32f2f,stroke:#b71c1c,stroke-width:3px,color:#fff
+    style K fill:#f57c00,stroke:#e65100,stroke-width:3px,color:#fff
+    style L fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
 ```
 
 ### Design Decisions
@@ -920,6 +944,101 @@ client = DirectoryClient(mothership_url=...)
 endpoints = await client.get_endpoints()
 ```
 
+## Manifest Synchronization with RunPod GraphQL API
+
+### Overview
+
+The Mothership's GET /manifest endpoint pulls configuration from RunPod's GraphQL API,
+which serves as the single source of truth for manifest data. This enables centralized
+configuration management and ensures all child endpoints receive consistent routing
+information.
+
+### Architecture
+
+```mermaid
+flowchart TD
+    A["Child Endpoint<br/>GET /manifest"]
+    B["Mothership"]
+    C["ManifestFetcher"]
+    D{Cache Valid?}
+    E["Serve Cached<br/>Manifest"]
+    F["Fetch from RunPod<br/>GraphQL API"]
+    G["Update<br/>flash_manifest.json"]
+    H["Cache Result<br/>TTL: 300s"]
+    I["Serve Manifest"]
+    J["Fallback:<br/>Load Local File"]
+
+    A -->|Request| B
+    B --> C
+    C --> D
+    D -->|Yes| E
+    D -->|No| F
+    E --> I
+    F --> G
+    G --> H
+    H --> I
+    F -->|Fails| J
+    J --> I
+
+    style A fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
+    style B fill:#388e3c,stroke:#1b5e20,stroke-width:3px,color:#fff
+    style C fill:#388e3c,stroke:#1b5e20,stroke-width:3px,color:#fff
+    style D fill:#f57c00,stroke:#e65100,stroke-width:3px,color:#fff
+    style E fill:#388e3c,stroke:#1b5e20,stroke-width:3px,color:#fff
+    style F fill:#d32f2f,stroke:#b71c1c,stroke-width:3px,color:#fff
+    style G fill:#d32f2f,stroke:#b71c1c,stroke-width:3px,color:#fff
+    style H fill:#388e3c,stroke:#1b5e20,stroke-width:3px,color:#fff
+    style I fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
+    style J fill:#d32f2f,stroke:#b71c1c,stroke-width:3px,color:#fff
+```
+
+### How It Works
+
+1. **Source of Truth**: RunPod GraphQL API holds the authoritative manifest configuration
+2. **Caching Proxy**: Mothership fetches from RunPod GQL, caches locally (5 min TTL)
+3. **Local Persistence**: Fetched manifest written to `flash_manifest.json`
+4. **Graceful Fallback**: If RunPod GQL unavailable, serves local file
+5. **Cache Invalidation**: Automatic expiry after TTL, manual invalidation supported
+
+### Implementation Status
+
+**Current (Placeholder)**:
+- `ManifestFetcher` class with caching infrastructure
+- Uses existing `RunpodGraphQLClient` for API communication
+- Falls back to local `flash_manifest.json` (GQL fetch raises `NotImplementedError`)
+- Cache TTL: 300 seconds (configurable)
+
+**Future (Full Implementation)**:
+- Implement `getManifest` query in `ManifestFetcher._fetch_from_gql()`
+- Add `saveManifest` mutation for updating manifest in RunPod
+- Real-time cache invalidation via webhooks
+- Health checks and retry logic
+
+### Configuration
+
+```bash
+# Enable Mothership mode (required for /manifest endpoint)
+export FLASH_IS_MOTHERSHIP=true
+
+# Optional: Identify this mothership instance
+export RUNPOD_ENDPOINT_ID=mothership-prod-1
+
+# Required for RunPod GraphQL API access
+export RUNPOD_API_KEY=your-api-key-here
+```
+
+### Cache Behavior
+
+- **Default TTL**: 300 seconds (5 minutes)
+- **Cache Key**: Per-mothership instance (no cross-instance cache)
+- **Thread-Safe**: Uses `asyncio.Lock` for concurrent request handling
+- **Manual Invalidation**: `fetcher.invalidate_cache()` for testing
+
+### Historical Context
+
+A previous `StateManagerClient` (commit b19bf7c) used REST API. Current placeholder
+prepares for GQL-based architecture with improved caching and error handling.
+
 ## Key Implementation Highlights
 
 ### Design Focus

From 6dd03f13b5809b89e67e742667afb8600920b839 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Mon, 12 Jan 2026 04:03:44 -0800
Subject: [PATCH 03/12] feat: add ManifestFetcher for caching manifest from
 RunPod GraphQL

- Add ManifestFetcher class with caching infrastructure (TTL: 300s)
- Integrate ManifestFetcher into lb_handler /manifest endpoint
- Use RunpodGraphQLClient for API communication
- Fall back to local flash_manifest.json when API unavailable
- Add comprehensive tests for ManifestFetcher and lb_handler
---
 src/tetra_rp/runtime/lb_handler.py            |  28 ++-
 src/tetra_rp/runtime/manifest_fetcher.py      | 192 ++++++++++++++++++
 tests/integration/test_lb_remote_execution.py |  41 ++--
 tests/unit/runtime/test_lb_handler.py         | 148 +++++++++++---
 tests/unit/runtime/test_manifest_fetcher.py   | 164 +++++++++++++++
 5 files changed, 523 insertions(+), 50 deletions(-)
 create mode 100644 src/tetra_rp/runtime/manifest_fetcher.py
 create mode 100644 tests/unit/runtime/test_manifest_fetcher.py

diff --git a/src/tetra_rp/runtime/lb_handler.py b/src/tetra_rp/runtime/lb_handler.py
index 4193dd02..495261d2 100644
--- a/src/tetra_rp/runtime/lb_handler.py
+++ b/src/tetra_rp/runtime/lb_handler.py
@@ -23,12 +23,12 @@
 import inspect
 import logging
 import os
-from typing import Any, Callable, Dict
+from typing import Any, Callable, Dict, Optional
 
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
 
-from .generic_handler import load_manifest
+from .manifest_fetcher import ManifestFetcher
 from .serialization import (
     deserialize_args,
     deserialize_kwargs,
@@ -37,6 +37,17 @@
 
 logger = logging.getLogger(__name__)
 
+# Module-level manifest fetcher (singleton, reused across requests)
+_manifest_fetcher: Optional[ManifestFetcher] = None
+
+
+def _get_manifest_fetcher() -> ManifestFetcher:
+    """Get or create the manifest fetcher singleton."""
+    global _manifest_fetcher
+    if _manifest_fetcher is None:
+        _manifest_fetcher = ManifestFetcher()
+    return _manifest_fetcher
+
 
 def create_lb_handler(
     route_registry: Dict[tuple[str, str], Callable], include_execute: bool = False
@@ -178,20 +189,27 @@ async def execute_remote_function(request: Request) -> Dict[str, Any]:
         async def get_manifest() -> JSONResponse:
             """Mothership discovery endpoint.
 
-            Returns the flash_manifest.json content for service discovery.
+            Fetches manifest from RunPod GraphQL API (source of truth), caches it
+            locally, and serves to child endpoints. Falls back to local file if
+            RunPod API is unavailable.
+
             Only available when FLASH_IS_MOTHERSHIP=true environment variable is set.
 
             Returns:
                 JSONResponse with manifest content or 404 if not found
             """
-            manifest_dict = load_manifest()
+            fetcher = _get_manifest_fetcher()
+            mothership_id = os.getenv("RUNPOD_ENDPOINT_ID")
+
+            # Fetch manifest (from cache, RunPod GQL, or local file)
+            manifest_dict = await fetcher.get_manifest(mothership_id)
 
             if not manifest_dict or not manifest_dict.get("resources"):
                 return JSONResponse(
                     status_code=404,
                     content={
                         "error": "Manifest not found",
-                        "detail": "flash_manifest.json could not be loaded",
+                        "detail": "Could not load manifest from RunPod or local file",
                     },
                 )
 
diff --git a/src/tetra_rp/runtime/manifest_fetcher.py b/src/tetra_rp/runtime/manifest_fetcher.py
new file mode 100644
index 00000000..8815add7
--- /dev/null
+++ b/src/tetra_rp/runtime/manifest_fetcher.py
@@ -0,0 +1,192 @@
+"""Manifest fetcher with RunPod GQL integration and caching.
+
+This module provides manifest fetching from RunPod GraphQL API (source of truth)
+with local file caching and fallback.
+"""
+
+import asyncio
+import json
+import logging
+import time
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+from .config import DEFAULT_CACHE_TTL
+from .generic_handler import load_manifest
+
+logger = logging.getLogger(__name__)
+
+
+class ManifestFetcher:
+    """Fetches and caches manifest from RunPod GraphQL API.
+
+    RunPod's GraphQL API is the source of truth for manifest data. This
+    fetcher pulls from it using RunpodGraphQLClient, caches locally, and
+    falls back to local file if RunPod API is unavailable.
+    """
+
+    def __init__(
+        self,
+        cache_ttl: int = DEFAULT_CACHE_TTL,
+        manifest_path: Optional[Path] = None,
+    ):
+        """Initialize manifest fetcher.
+
+        Args:
+            cache_ttl: Cache time-to-live in seconds (default: 300)
+            manifest_path: Optional path to local manifest file
+        """
+        self.cache_ttl = cache_ttl
+        self.manifest_path = manifest_path
+
+        # Cache state
+        self._cached_manifest: Optional[Dict[str, Any]] = None
+        self._cache_loaded_at: float = 0
+        self._cache_lock = asyncio.Lock()
+
+    async def get_manifest(
+        self,
+        mothership_id: Optional[str] = None,
+    ) -> Optional[Dict[str, Any]]:
+        """Get manifest from cache or fetch from RunPod GraphQL API.
+
+        Flow:
+        1. Check if cached and not expired → return cached
+        2. If expired/not cached → fetch from RunPod GraphQL API
+        3. Update local flash_manifest.json with fetched data
+        4. Cache the result
+        5. Return manifest
+
+        If RunPod GQL fetch fails, falls back to local file.
+
+        Args:
+            mothership_id: Optional mothership endpoint ID for tracking
+
+        Returns:
+            Manifest dictionary or None if unavailable
+        """
+        async with self._cache_lock:
+            now = time.time()
+            cache_age = now - self._cache_loaded_at
+
+            # Return cached if still valid
+            if self._cached_manifest and cache_age < self.cache_ttl:
+                logger.debug(
+                    f"Serving cached manifest (age: {cache_age:.1f}s, "
+                    f"TTL: {self.cache_ttl}s)"
+                )
+                return self._cached_manifest
+
+            # Cache expired or not loaded - fetch from RunPod GQL
+            logger.debug("Cache expired or empty, fetching from RunPod GraphQL API")
+
+            try:
+                # Fetch from RunPod GraphQL API (placeholder)
+                manifest = await self._fetch_from_gql(mothership_id)
+
+                # Update local flash_manifest.json
+                if manifest:
+                    self._update_local_file(manifest)
+
+                    # Update cache
+                    self._cached_manifest = manifest
+                    self._cache_loaded_at = now
+
+                    logger.info(
+                        f"Manifest fetched from RunPod GQL and cached "
+                        f"({len(manifest.get('resources', {}))} resources)"
+                    )
+                    return manifest
+
+            except NotImplementedError:
+                logger.debug(
+                    "RunPod GQL fetch not implemented, falling back to local file"
+                )
+            except Exception as e:
+                logger.warning(
+                    f"RunPod GQL fetch failed: {e}, falling back to local file"
+                )
+
+            # Fallback: load from local file
+            manifest = load_manifest(self.manifest_path)
+            if manifest:
+                # Cache the fallback manifest
+                self._cached_manifest = manifest
+                self._cache_loaded_at = now
+                logger.debug("Loaded and cached manifest from local file")
+
+            return manifest
+
+    async def _fetch_from_gql(
+        self,
+        mothership_id: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """Fetch manifest from RunPod GraphQL API.
+
+        TBD: Future implementation will query RunPod's GraphQL API
+        to retrieve the manifest configuration.
+
+        Args:
+            mothership_id: Optional mothership endpoint ID
+
+        Returns:
+            Manifest dictionary from RunPod GQL
+
+        Raises:
+            NotImplementedError: Placeholder for future implementation
+
+        Note:
+            Future implementation will use RunpodGraphQLClient:
+
+            ```python
+            async with RunpodGraphQLClient() as client:
+                query = '''
+                query GetManifest($mothershipId: ID!) {
+                    getManifest(mothershipId: $mothershipId) {
+                        version
+                        projectName
+                        generatedAt
+                        resources
+                        functionRegistry
+                    }
+                }
+                '''
+                result = await client.execute(query, {"mothershipId": mothership_id})
+                return result["data"]["getManifest"]
+            ```
+        """
+        raise NotImplementedError(
+            "RunPod manifest query not yet implemented. "
+            "Falling back to local flash_manifest.json file."
+        )
+
+    def _update_local_file(self, manifest: Dict[str, Any]) -> None:
+        """Update local flash_manifest.json with fetched data.
+
+        Args:
+            manifest: Manifest dictionary to write
+        """
+        try:
+            # Determine file path
+            if self.manifest_path:
+                file_path = self.manifest_path
+            else:
+                file_path = Path.cwd() / "flash_manifest.json"
+
+            # Write manifest to file
+            with open(file_path, "w") as f:
+                json.dump(manifest, f, indent=2)
+
+            logger.debug(f"Updated local manifest file: {file_path}")
+
+        except Exception as e:
+            logger.warning(f"Failed to update local manifest file: {e}")
+            # Non-critical error - cached manifest still valid
+
+    def invalidate_cache(self) -> None:
+        """Manually invalidate the cache.
+
+        Next get_manifest() call will fetch from GQL.
+        """
+        self._cache_loaded_at = 0
+        logger.debug("Manifest cache invalidated")
diff --git a/tests/integration/test_lb_remote_execution.py b/tests/integration/test_lb_remote_execution.py
index 2aca85d0..8c45022f 100644
--- a/tests/integration/test_lb_remote_execution.py
+++ b/tests/integration/test_lb_remote_execution.py
@@ -309,9 +309,18 @@ def get_status():
 class TestManifestEndpointIntegration:
     """Integration tests for GET /manifest endpoint."""
 
+    @pytest.fixture(autouse=True)
+    def reset_manifest_fetcher(self):
+        """Reset the global manifest fetcher before each test."""
+        import tetra_rp.runtime.lb_handler as lb_handler_module
+
+        lb_handler_module._manifest_fetcher = None
+        yield
+        lb_handler_module._manifest_fetcher = None
+
     def test_manifest_endpoint_in_live_load_balancer(self, monkeypatch):
         """Test manifest endpoint in LiveLoadBalancer with FLASH_IS_MOTHERSHIP=true."""
-        from unittest.mock import patch
+        from unittest.mock import patch, AsyncMock
         from fastapi.testclient import TestClient
 
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
@@ -349,10 +358,12 @@ async def hello():
             "routes": {"test-mothership": {"GET /api/hello": "hello"}},
         }
 
-        # Mock load_manifest to return test manifest
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=test_manifest
-        ):
+        # Mock ManifestFetcher to return test manifest
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=test_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             from tetra_rp.runtime.lb_handler import create_lb_handler
 
             # Create handler with manifest endpoint enabled
@@ -380,7 +391,7 @@ def test_manifest_endpoint_excluded_when_env_not_set(self):
 
     def test_manifest_endpoint_with_deployed_lb_resource(self, monkeypatch):
         """Test manifest endpoint with LoadBalancerSlsResource."""
-        from unittest.mock import patch
+        from unittest.mock import patch, AsyncMock
         from fastapi.testclient import TestClient
 
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
@@ -409,9 +420,11 @@ def test_manifest_endpoint_with_deployed_lb_resource(self, monkeypatch):
             "function_registry": {"process_image": "gpu-worker"},
         }
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=test_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=test_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             from tetra_rp.runtime.lb_handler import create_lb_handler
 
             # Create deployed handler (not LiveLoadBalancer)
@@ -425,7 +438,7 @@ def test_manifest_endpoint_with_deployed_lb_resource(self, monkeypatch):
 
     def test_manifest_endpoint_coexists_with_ping(self, monkeypatch):
         """Test that /manifest endpoint coexists with /ping health check."""
-        from unittest.mock import patch
+        from unittest.mock import patch, AsyncMock
         from fastapi.testclient import TestClient
 
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
@@ -436,9 +449,11 @@ def test_manifest_endpoint_coexists_with_ping(self, monkeypatch):
             "function_registry": {},
         }
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=test_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=test_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             from tetra_rp.runtime.lb_handler import create_lb_handler
 
             app = create_lb_handler({}, include_execute=False)
diff --git a/tests/unit/runtime/test_lb_handler.py b/tests/unit/runtime/test_lb_handler.py
index e02c6aa0..966a2ab6 100644
--- a/tests/unit/runtime/test_lb_handler.py
+++ b/tests/unit/runtime/test_lb_handler.py
@@ -11,6 +11,15 @@
 class TestManifestEndpoint:
     """Tests for GET /manifest endpoint."""
 
+    @pytest.fixture(autouse=True)
+    def reset_manifest_fetcher(self):
+        """Reset the global manifest fetcher before each test."""
+        import tetra_rp.runtime.lb_handler as lb_handler_module
+
+        lb_handler_module._manifest_fetcher = None
+        yield
+        lb_handler_module._manifest_fetcher = None
+
     @pytest.fixture
     def sample_manifest(self):
         """Sample manifest for testing."""
@@ -42,11 +51,15 @@ def test_manifest_endpoint_registered_when_env_var_true(
         self, sample_manifest, monkeypatch
     ):
         """Verify /manifest endpoint exists when FLASH_IS_MOTHERSHIP=true."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=sample_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=sample_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler({}, include_execute=False)
             routes = [route.path for route in app.routes]
 
@@ -77,11 +90,15 @@ def test_manifest_endpoint_returns_200_with_valid_manifest(
         self, sample_manifest, monkeypatch
     ):
         """Test happy path - endpoint returns 200 with valid manifest."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=sample_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=sample_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler({}, include_execute=False)
             client = TestClient(app)
 
@@ -92,9 +109,15 @@ def test_manifest_endpoint_returns_200_with_valid_manifest(
 
     def test_manifest_endpoint_returns_404_when_manifest_missing(self, monkeypatch):
         """Test endpoint returns 404 when manifest file not found."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
-        with patch("tetra_rp.runtime.lb_handler.load_manifest", return_value={}):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value={})
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler({}, include_execute=False)
             client = TestClient(app)
 
@@ -103,19 +126,22 @@ def test_manifest_endpoint_returns_404_when_manifest_missing(self, monkeypatch):
             assert response.status_code == 404
             data = response.json()
             assert data["error"] == "Manifest not found"
-            assert "could not be loaded" in data["detail"]
+            assert "Could not load" in data["detail"]
 
     def test_manifest_endpoint_case_insensitive_env_var_true(
         self, sample_manifest, monkeypatch
     ):
         """Test endpoint registration with different case variations of 'true'."""
+        from unittest.mock import AsyncMock
+
         for env_value in ["True", "TRUE", "TrUe"]:
             monkeypatch.setenv("FLASH_IS_MOTHERSHIP", env_value)
 
-            with patch(
-                "tetra_rp.runtime.lb_handler.load_manifest",
-                return_value=sample_manifest,
-            ):
+            with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+                mock_fetcher = AsyncMock()
+                mock_fetcher.get_manifest = AsyncMock(return_value=sample_manifest)
+                MockFetcher.return_value = mock_fetcher
+
                 app = create_lb_handler({}, include_execute=False)
                 routes = [route.path for route in app.routes]
 
@@ -133,11 +159,15 @@ def test_manifest_endpoint_case_insensitive_env_var_false(self, monkeypatch):
 
     def test_manifest_endpoint_response_structure(self, sample_manifest, monkeypatch):
         """Test that manifest response has correct structure."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=sample_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=sample_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler({}, include_execute=False)
             client = TestClient(app)
 
@@ -153,6 +183,8 @@ def test_manifest_endpoint_response_structure(self, sample_manifest, monkeypatch
 
     def test_manifest_endpoint_with_empty_resources(self, monkeypatch):
         """Test endpoint behavior when manifest has no resources."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
         empty_manifest = {
@@ -162,9 +194,11 @@ def test_manifest_endpoint_with_empty_resources(self, monkeypatch):
             "function_registry": {},
         }
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=empty_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=empty_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler({}, include_execute=False)
             client = TestClient(app)
 
@@ -174,10 +208,16 @@ def test_manifest_endpoint_with_empty_resources(self, monkeypatch):
             assert response.status_code == 404
 
     def test_manifest_endpoint_with_none_manifest(self, monkeypatch):
-        """Test endpoint behavior when load_manifest returns None."""
+        """Test endpoint behavior when get_manifest returns None."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
-        with patch("tetra_rp.runtime.lb_handler.load_manifest", return_value=None):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=None)
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler({}, include_execute=False)
             client = TestClient(app)
 
@@ -189,11 +229,15 @@ def test_manifest_endpoint_coexists_with_execute(
         self, sample_manifest, monkeypatch
     ):
         """Test that /manifest endpoint coexists with /execute endpoint."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=sample_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=sample_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler({}, include_execute=True)
             routes = [route.path for route in app.routes]
 
@@ -204,6 +248,8 @@ def test_manifest_endpoint_coexists_with_user_routes(
         self, sample_manifest, monkeypatch
     ):
         """Test that /manifest endpoint coexists with user-defined routes."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
         async def dummy_handler():
@@ -211,9 +257,11 @@ async def dummy_handler():
 
         route_registry = {("GET", "/api/health"): dummy_handler}
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=sample_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=sample_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler(route_registry, include_execute=False)
             routes = [route.path for route in app.routes]
 
@@ -222,11 +270,15 @@ async def dummy_handler():
 
     def test_manifest_endpoint_content_type(self, sample_manifest, monkeypatch):
         """Test that /manifest endpoint returns proper JSON content-type."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=sample_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=sample_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler({}, include_execute=False)
             client = TestClient(app)
 
@@ -236,6 +288,8 @@ def test_manifest_endpoint_content_type(self, sample_manifest, monkeypatch):
 
     def test_manifest_endpoint_with_complex_manifest(self, monkeypatch):
         """Test endpoint with complex multi-resource manifest."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
         complex_manifest = {
@@ -274,9 +328,11 @@ def test_manifest_endpoint_with_complex_manifest(self, monkeypatch):
             },
         }
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=complex_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=complex_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler({}, include_execute=False)
             client = TestClient(app)
 
@@ -288,6 +344,34 @@ def test_manifest_endpoint_with_complex_manifest(self, monkeypatch):
             assert "gpu_config" in data["resources"]
             assert "cpu_config" in data["resources"]
 
+    def test_manifest_endpoint_uses_fetcher_with_caching(
+        self, sample_manifest, monkeypatch
+    ):
+        """Verify GET /manifest uses ManifestFetcher with caching."""
+        from unittest.mock import AsyncMock
+
+        monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=sample_manifest)
+            MockFetcher.return_value = mock_fetcher
+
+            app = create_lb_handler({}, include_execute=False)
+            client = TestClient(app)
+
+            # First request
+            response1 = client.get("/manifest")
+            assert response1.status_code == 200
+            assert response1.json() == sample_manifest
+
+            # Second request - should reuse fetcher
+            response2 = client.get("/manifest")
+            assert response2.status_code == 200
+
+            # Verify fetcher was called (once per request)
+            assert mock_fetcher.get_manifest.call_count == 2
+
 
 class TestExecuteEndpointStillWorks:
     """Tests to ensure /execute endpoint still works after manifest changes."""
diff --git a/tests/unit/runtime/test_manifest_fetcher.py b/tests/unit/runtime/test_manifest_fetcher.py
new file mode 100644
index 00000000..f7ae27a1
--- /dev/null
+++ b/tests/unit/runtime/test_manifest_fetcher.py
@@ -0,0 +1,164 @@
+"""Unit tests for ManifestFetcher."""
+
+import asyncio
+import json
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from tetra_rp.runtime.manifest_fetcher import ManifestFetcher
+
+
+class TestManifestFetcher:
+    """Test ManifestFetcher caching and GQL integration."""
+
+    @pytest.fixture
+    def sample_manifest(self):
+        """Sample manifest for testing."""
+        return {
+            "version": "1.0",
+            "project_name": "test-app",
+            "resources": {"gpu_config": {"resource_type": "ServerlessEndpoint"}},
+            "function_registry": {"process_gpu": "gpu_config"},
+        }
+
+    @pytest.mark.asyncio
+    async def test_fetch_falls_back_to_local_file_when_gql_not_implemented(
+        self, sample_manifest, tmp_path
+    ):
+        """Verify fetcher falls back to local file when GQL raises NotImplementedError."""
+        # Write sample manifest to temp file
+        manifest_file = tmp_path / "flash_manifest.json"
+        with open(manifest_file, "w") as f:
+            json.dump(sample_manifest, f)
+
+        fetcher = ManifestFetcher(manifest_path=manifest_file)
+        result = await fetcher.get_manifest()
+
+        assert result == sample_manifest
+
+    @pytest.mark.asyncio
+    async def test_caching_prevents_multiple_fetches(self, sample_manifest, tmp_path):
+        """Verify cached manifest is reused within TTL."""
+        manifest_file = tmp_path / "flash_manifest.json"
+        with open(manifest_file, "w") as f:
+            json.dump(sample_manifest, f)
+
+        fetcher = ManifestFetcher(cache_ttl=300, manifest_path=manifest_file)
+
+        # First call - loads from file
+        result1 = await fetcher.get_manifest()
+        assert result1 == sample_manifest
+
+        # Second call immediately - should use cache
+        result2 = await fetcher.get_manifest()
+        assert result2 == sample_manifest
+        assert result2 is result1  # Same object reference (cached)
+
+    @pytest.mark.asyncio
+    async def test_cache_expiration_triggers_refetch(self, sample_manifest, tmp_path):
+        """Verify expired cache triggers new fetch."""
+        manifest_file = tmp_path / "flash_manifest.json"
+        with open(manifest_file, "w") as f:
+            json.dump(sample_manifest, f)
+
+        # Very short TTL
+        fetcher = ManifestFetcher(cache_ttl=0.1, manifest_path=manifest_file)
+
+        # First call
+        result1 = await fetcher.get_manifest()
+        assert result1 == sample_manifest
+
+        # Wait for cache to expire
+        await asyncio.sleep(0.2)
+
+        # Second call - cache expired, should refetch
+        result2 = await fetcher.get_manifest()
+        assert result2 == sample_manifest
+
+    @pytest.mark.asyncio
+    async def test_fetch_from_gql_raises_not_implemented(self):
+        """Verify GQL fetch placeholder raises NotImplementedError."""
+        fetcher = ManifestFetcher()
+
+        with pytest.raises(NotImplementedError, match="not yet implemented"):
+            await fetcher._fetch_from_gql()
+
+    @pytest.mark.asyncio
+    async def test_update_local_file_writes_manifest(self, sample_manifest, tmp_path):
+        """Verify manifest is written to local file."""
+        manifest_file = tmp_path / "flash_manifest.json"
+        fetcher = ManifestFetcher(manifest_path=manifest_file)
+
+        fetcher._update_local_file(sample_manifest)
+
+        # Verify file was written
+        assert manifest_file.exists()
+        with open(manifest_file) as f:
+            written = json.load(f)
+        assert written == sample_manifest
+
+    @pytest.mark.asyncio
+    async def test_cache_invalidation(self, sample_manifest, tmp_path):
+        """Verify manual cache invalidation works."""
+        manifest_file = tmp_path / "flash_manifest.json"
+        with open(manifest_file, "w") as f:
+            json.dump(sample_manifest, f)
+
+        fetcher = ManifestFetcher(cache_ttl=300, manifest_path=manifest_file)
+
+        # Load and cache
+        await fetcher.get_manifest()
+        assert fetcher._cached_manifest is not None
+
+        # Invalidate
+        fetcher.invalidate_cache()
+
+        # Next call should refetch (cache_loaded_at is 0)
+        assert fetcher._cache_loaded_at == 0
+
+    @pytest.mark.asyncio
+    async def test_concurrent_requests_use_lock(self, sample_manifest, tmp_path):
+        """Verify concurrent requests are properly synchronized."""
+        manifest_file = tmp_path / "flash_manifest.json"
+        with open(manifest_file, "w") as f:
+            json.dump(sample_manifest, f)
+
+        fetcher = ManifestFetcher(manifest_path=manifest_file)
+
+        # Make multiple concurrent requests
+        results = await asyncio.gather(
+            fetcher.get_manifest(),
+            fetcher.get_manifest(),
+            fetcher.get_manifest(),
+        )
+
+        # All should return the same manifest
+        assert all(r == sample_manifest for r in results)
+
+    @pytest.mark.asyncio
+    async def test_handles_missing_local_file_gracefully(self):
+        """Verify fetcher handles missing local file gracefully."""
+        # Point to non-existent file
+        fetcher = ManifestFetcher(manifest_path=Path("/nonexistent/manifest.json"))
+
+        # Should fall back to loading from cwd (which also won't exist in test)
+        result = await fetcher.get_manifest()
+
+        # load_manifest returns empty dict when no file is found
+        assert result == {"resources": {}, "function_registry": {}}
+
+    @pytest.mark.asyncio
+    async def test_mothership_id_passed_to_gql(self):
+        """Verify mothership_id is passed through to GQL fetch."""
+        fetcher = ManifestFetcher()
+
+        # Spy on _fetch_from_gql to capture arguments
+        with patch.object(fetcher, "_fetch_from_gql") as mock_fetch:
+            mock_fetch.side_effect = NotImplementedError()
+
+            await fetcher.get_manifest(mothership_id="test-123")
+
+            # Verify mothership_id was passed to fetch
+            mock_fetch.assert_called_once_with("test-123")

From 42edb23f039e4ebabf4e793e2b0ba1978b6328e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Mon, 12 Jan 2026 08:38:00 -0800
Subject: [PATCH 04/12] refactor: rename directory terminology to manifest
 throughout codebase

- Rename _directory to _endpoint_registry in ServiceRegistry
- Rename directory_client parameter to manifest_client
- Change API endpoint from /directory to /manifest
- Change JSON response key from "directory" to "manifest"
- Update _ensure_directory_loaded() to _ensure_manifest_loaded()
- Update refresh_directory() to refresh_manifest()
- Update all tests and documentation to reflect new terminology
---
 docs/Cross_Endpoint_Routing.md                | 137 +++++++++---------
 docs/Load_Balancer_Endpoints.md               |   6 +-
 src/tetra_rp/runtime/config.py                |   2 +-
 src/tetra_rp/runtime/exceptions.py            |   2 +-
 src/tetra_rp/runtime/manifest_client.py       |  33 ++---
 src/tetra_rp/runtime/production_wrapper.py    |   9 +-
 src/tetra_rp/runtime/service_registry.py      |  64 ++++----
 .../test_cross_endpoint_routing.py            |  57 ++++----
 tests/unit/runtime/test_manifest_client.py    |  30 ++--
 tests/unit/runtime/test_production_wrapper.py |  10 +-
 tests/unit/runtime/test_service_registry.py   | 114 ++++++++-------
 11 files changed, 234 insertions(+), 230 deletions(-)

diff --git a/docs/Cross_Endpoint_Routing.md b/docs/Cross_Endpoint_Routing.md
index 1a4330c6..341d4b09 100644
--- a/docs/Cross_Endpoint_Routing.md
+++ b/docs/Cross_Endpoint_Routing.md
@@ -59,7 +59,7 @@ The manifest structure:
 
 #### 2. Set Environment Variables
 
-Configure the mothership directory URL (required for remote routing):
+Configure the mothership manifest URL (required for remote routing):
 
 ```bash
 # Required for cross-endpoint routing to work
@@ -149,7 +149,7 @@ The manifest file (`flash_manifest.json`) defines function routing and resource
 
 | Variable | Required | Purpose |
 |----------|----------|---------|
-| `FLASH_MOTHERSHIP_URL` | Yes* | URL of mothership directory service |
+| `FLASH_MOTHERSHIP_URL` | Yes* | URL of mothership manifest service |
 | `RUNPOD_ENDPOINT_ID` | No | Current endpoint ID (for tracing) |
 | `FLASH_MANIFEST_PATH` | No | Explicit path to manifest file |
 
@@ -255,7 +255,7 @@ Functions gracefully fall back to local execution if routing fails:
 async def critical_service(request: dict) -> dict:
     # Routes to critical-endpoint if:
     # - In function_registry
-    # - Directory available
+    # - Manifest available
     # Otherwise executes locally
     return handle_critical(request)
 
@@ -269,11 +269,11 @@ async def helper_function(x: int) -> int:
 
 #### Common Issues
 
-**Directory Unavailable**
+**Manifest Service Unavailable**
 
 If `FLASH_MOTHERSHIP_URL` is not set or unreachable:
 ```
-WARNING: FLASH_MOTHERSHIP_URL not set, directory unavailable
+WARNING: FLASH_MOTHERSHIP_URL not set, manifest service unavailable
 ```
 
 Functions default to local execution. Set the environment variable to enable routing.
@@ -342,8 +342,8 @@ graph TD
     A["Function Call"] -->|"intercepts stub layer"| B["ProductionWrapper"]
 
     B -->|"load service configuration"| C["ServiceRegistry"]
-    C -->|"if not cached"| D["DirectoryClient"]
-    D -->|"query mothership API"| E["Directory<br/>Endpoint URLs"]
+    C -->|"if not cached"| D["ManifestClient"]
+    D -->|"query mothership API"| E["Manifest<br/>Endpoint URLs"]
     E -->|"cache result<br/>TTL 300s"| C
 
     C -->|"lookup in manifest<br/>flash_manifest.json"| F{"Routing<br/>Decision"}
@@ -358,7 +358,7 @@ graph TD
     K --> L["Return Response<br/>base64 → cloudpickle"]
     L --> M["Deserialized Result"]
 
-    N["Error Handling:<br/>- RemoteExecutionError<br/>- SerializationError<br/>- DirectoryUnavailableError"] -.-> H
+    N["Error Handling:<br/>- RemoteExecutionError<br/>- SerializationError<br/>- ManifestServiceUnavailableError"] -.-> H
     N -.-> I
     N -.-> J
 
@@ -405,8 +405,8 @@ class ProductionWrapper:
         **kwargs: Any,
     ) -> Any:
         """Route function execution to local or remote endpoint."""
-        # 1. Load directory (if needed)
-        await self.service_registry._ensure_directory_loaded()
+        # 1. Load manifest (if needed)
+        await self.service_registry._ensure_manifest_loaded()
 
         # 2. Look up function in manifest
         resource = self.service_registry.get_resource_for_function(func.__name__)
@@ -450,30 +450,29 @@ class ServiceRegistry:
     """Service discovery and routing for cross-endpoint function calls."""
 
     def __init__(self, manifest_path: Optional[Path] = None):
-        """Initialize with manifest and optional directory client."""
+        """Initialize with manifest and optional manifest client."""
         self._load_manifest(manifest_path)
-        self._directory_client = DirectoryClient(...)
-        self._directory = {}  # Cached endpoint URLs
-        self._directory_lock = asyncio.Lock()
+        self._manifest_client = ManifestClient(...)
+        self._endpoint_registry = {}  # Cached endpoint URLs
+        self._endpoint_registry_lock = asyncio.Lock()
 
     def get_resource_for_function(self, func_name: str) -> Optional[ServerlessResource]:
         """Get resource config for function from manifest."""
-        # Returns None if:
-        # - Function not in manifest
-        # - Explicitly set to null in manifest
-
-        # Returns ServerlessResource if mapped in manifest
-        config = self._manifest["functions"].get(func_name)
+        # Returns the ServerlessResource if function is mapped in manifest
+        # Returns None if function maps to current endpoint
+        # Raises ValueError if function not found in manifest
+        config = self._manifest.function_registry.get(func_name)
         return self._resolve_resource(config)
 
-    async def _ensure_directory_loaded(self) -> None:
-        """Load directory from mothership with caching (TTL 300s)."""
-        if self._is_directory_fresh():
-            return
+    async def _ensure_manifest_loaded(self) -> None:
+        """Load manifest from mothership if cache expired or not loaded."""
+        async with self._endpoint_registry_lock:
+            now = time.time()
+            cache_age = now - self._endpoint_registry_loaded_at
 
-        async with self._directory_lock:
-            self._directory = await self._directory_client.get_directory()
-            self._directory_loaded_at = time.time()
+            if cache_age > self.cache_ttl:
+                self._endpoint_registry = await self._manifest_client.get_manifest()
+                self._endpoint_registry_loaded_at = now
 ```
 
 **Manifest Format**:
@@ -499,36 +498,36 @@ class ServiceRegistry:
 - `function_registry`: Maps function names to resource config names (null = local)
 - `resources`: Defines resource configurations and their handler details
 
-**Directory Cache**:
+**Manifest Cache**:
 - TTL: 300 seconds (configurable via `DEFAULT_CACHE_TTL`)
 - Thread-safe with `asyncio.Lock()`
-- Graceful fallback if directory unavailable
+- Graceful fallback if manifest service unavailable
 
-#### 3. DirectoryClient
+#### 3. ManifestClient
 
-**Location**: `src/tetra_rp/runtime/directory_client.py`
+**Location**: `src/tetra_rp/runtime/manifest_client.py`
 
-HTTP client for mothership directory service:
+HTTP client for mothership manifest service:
 
 ```python
-class DirectoryClient:
-    """HTTP client for querying mothership directory.
+class ManifestClient:
+    """HTTP client for querying mothership manifest.
 
-    The directory maps resource_config names to their endpoint URLs.
+    The manifest maps resource_config names to their endpoint URLs.
     Example: {"gpu_config": "https://api.runpod.io/v2/abc123"}
     """
 
-    async def get_directory(self) -> Dict[str, str]:
-        """Fetch endpoint directory from mothership.
+    async def get_manifest(self) -> Dict[str, str]:
+        """Fetch endpoint manifest from mothership.
 
         Returns:
             Dictionary mapping resource_config_name → endpoint_url.
             Example: {"gpu_config": "https://api.runpod.io/v2/abc123"}
 
         Raises:
-            DirectoryUnavailableError: If directory service unavailable after retries.
+            ManifestServiceUnavailableError: If manifest service unavailable after retries.
         """
-        # Queries {mothership_url}/directory endpoint with retry logic
+        # Queries {mothership_url}/manifest endpoint with retry logic
 ```
 
 **Configuration**:
@@ -561,8 +560,8 @@ class ManifestError(FlashRuntimeError):
     """Raised when manifest is invalid, missing, or has unexpected structure."""
     pass
 
-class DirectoryUnavailableError(FlashRuntimeError):
-    """Raised when directory service is unavailable."""
+class ManifestServiceUnavailableError(FlashRuntimeError):
+    """Raised when manifest service is unavailable."""
     pass
 ```
 
@@ -576,8 +575,8 @@ except SerializationError as e:
     logger.error(f"Serialization failed: {e}")
 except ManifestError as e:
     logger.error(f"Manifest configuration error: {e}")
-except DirectoryUnavailableError as e:
-    logger.warning(f"Directory unavailable, using fallback")
+except ManifestServiceUnavailableError as e:
+    logger.warning(f"Manifest unavailable, using fallback")
 ```
 
 ### Integration Points
@@ -613,7 +612,7 @@ Functions retrieve remote endpoint info from ResourceManager:
 # ServiceRegistry uses ResourceManager to find endpoint URLs
 resource_manager = ResourceManager()
 endpoint = resource_manager.get_resource_for_function("function_name")
-endpoint_url = endpoint.url  # e.g., "https://api.runpod.io/v1/abc123"
+endpoint_url = endpoint.url  # e.g., "https://api.runpod.io/v2/abc123"
 ```
 
 ### Configuration
@@ -671,8 +670,8 @@ flowchart TD
     B["ProductionWrapper.wrap_function_execution()"]
     C["ServiceRegistry.get_resource_for_function()"]
     D["Manifest Lookup<br/>resource found"]
-    E["Ensure Directory Loaded"]
-    F["DirectoryClient.get_endpoints()"]
+    E["Ensure Manifest Loaded"]
+    F["ManifestClient.get_manifest()"]
     G["Get Remote Endpoint URL"]
     H["Serialize Arguments<br/>cloudpickle → base64"]
     I["HTTP POST to Remote Endpoint"]
@@ -720,11 +719,11 @@ flowchart TD
 
 #### 2. Thread-Safe Async Caching
 
-**Decision**: Use `asyncio.Lock()` for directory cache synchronization
+**Decision**: Use `asyncio.Lock()` for manifest cache synchronization
 
 **Rationale**:
 - Prevents thundering herd on cache expiry
-- Efficient - only one coroutine loads directory
+- Efficient - only one coroutine loads manifest
 - Simple to understand and maintain
 - Follows async/await patterns
 
@@ -740,12 +739,12 @@ flowchart TD
 
 #### 4. Graceful Fallback
 
-**Decision**: Default to local execution if directory unavailable
+**Decision**: Default to local execution if manifest service unavailable
 
 **Rationale**:
 - Maintains application resilience
 - Doesn't fail if mothership unreachable
-- Allows local testing without directory
+- Allows local testing without manifest service
 - Gradual degradation vs catastrophic failure
 
 #### 5. Transparent Routing
@@ -779,15 +778,15 @@ class JsonSerializer:
 2. Update ProductionWrapper to select serializer based on config
 3. Add tests for new format
 
-#### Adding New Directory Backends
+#### Adding New Manifest Backends
 
 To support directories other than mothership:
 
-1. Create client class with `get_directory()` method:
+1. Create client class with `get_manifest()` method:
 ```python
-class CustomDirectoryClient:
-    async def get_directory(self) -> Dict[str, str]:
-        """Fetch directory mapping resource_config_name → endpoint_url."""
+class CustomManifestClient:
+    async def get_manifest(self) -> Dict[str, str]:
+        """Fetch manifest mapping resource_config_name → endpoint_url."""
         # Implementation specific to backend
         return {"resource_name": "https://endpoint.url"}
 ```
@@ -796,11 +795,11 @@ class CustomDirectoryClient:
 ```python
 registry = ServiceRegistry(
     manifest_path=Path("manifest.json"),
-    directory_client=CustomDirectoryClient(...)
+    manifest_client=CustomManifestClient(...)
 )
 ```
 
-3. Update environment variable handling if needed (CustomDirectoryClient can read from env vars)
+3. Update environment variable handling if needed (CustomManifestClient can read from env vars)
 
 #### Adding Routing Policies
 
@@ -830,11 +829,11 @@ class RoutingPolicy:
 **ServiceRegistry Tests** (`tests/unit/runtime/test_service_registry.py`):
 - Manifest loading
 - Resource lookup
-- Directory caching
+- Manifest caching
 - TTL expiry
 - Lock behavior under concurrency
 
-**DirectoryClient Tests** (`tests/unit/runtime/test_directory_client.py`):
+**ManifestClient Tests** (`tests/unit/runtime/test_manifest_client.py`):
 - Successful HTTP requests
 - Error handling
 - Retry logic
@@ -855,7 +854,7 @@ class RoutingPolicy:
 - End-to-end remote execution
 - Function call across endpoints
 - Error handling in real scenarios
-- Directory caching behavior
+- Manifest caching behavior
 - Serialization of complex objects
 
 #### Test Patterns
@@ -904,7 +903,7 @@ logging.basicConfig(level=logging.DEBUG)
 
 # ProductionWrapper logs
 # ServiceRegistry logs
-# DirectoryClient logs
+# ManifestClient logs
 ```
 
 #### Common Debug Scenarios
@@ -914,8 +913,8 @@ logging.basicConfig(level=logging.DEBUG)
 # Check manifest
 print(registry._manifest)
 
-# Check directory
-print(registry._directory)
+# Check cached endpoint URLs
+print(registry._endpoint_registry)
 
 # Check resource lookup
 resource = registry.get_resource_for_function("function_name")
@@ -932,16 +931,16 @@ except Exception as e:
     print(f"Not serializable: {e}")
 ```
 
-**Directory unavailable**:
+**Manifest unavailable**:
 ```python
 # Check environment variables
 import os
 print(f"FLASH_MOTHERSHIP_URL: {os.getenv('FLASH_MOTHERSHIP_URL')}")
 print(f"RUNPOD_ENDPOINT_ID: {os.getenv('RUNPOD_ENDPOINT_ID')}")
 
-# Check directory client directly
-client = DirectoryClient(mothership_url=...)
-endpoints = await client.get_endpoints()
+# Check manifest client directly
+client = ManifestClient(mothership_url=...)
+endpoints = await client.get_manifest()
 ```
 
 ## Manifest Synchronization with RunPod GraphQL API
@@ -1044,7 +1043,7 @@ prepares for GQL-based architecture with improved caching and error handling.
 ### Design Focus
 
 1. **Transparent Routing**: Functions route automatically without code changes
-2. **Graceful Degradation**: Defaults to local execution if directory unavailable
+2. **Graceful Degradation**: Defaults to local execution if manifest service unavailable
 3. **Type Safety**: Full type hints throughout for IDE support and static analysis
 4. **Thread-Safe Async**: Proper `asyncio.Lock()` usage for concurrent operations
 5. **Clear Error Hierarchy**: Custom exceptions provide actionable error context
@@ -1055,7 +1054,7 @@ Cross-endpoint routing provides:
 
 - **Transparency**: Functions route automatically without manual HTTP calls
 - **Flexibility**: Manifest-based routing enables environment-specific configurations
-- **Resilience**: Graceful fallback to local execution if directory unavailable
+- **Resilience**: Graceful fallback to local execution if manifest service unavailable
 - **Simplicity**: No changes to function code or signatures
 - **Debuggability**: Clear error messages and logging for troubleshooting
 
diff --git a/docs/Load_Balancer_Endpoints.md b/docs/Load_Balancer_Endpoints.md
index ea551884..62db7c7a 100644
--- a/docs/Load_Balancer_Endpoints.md
+++ b/docs/Load_Balancer_Endpoints.md
@@ -35,9 +35,9 @@ Load-balanced endpoints require different provisioning and health check logic th
 
 ### Why This Matters
 
-The Mothership needs to serve as a directory server for child endpoints. This requires:
+The Mothership needs to serve as a manifest server for child endpoints. This requires:
 - HTTP-based service discovery (not queue-based)
-- Ability to expose custom endpoints (`/directory`, `/ping`)
+- Ability to expose custom endpoints (`/manifest`, `/ping`)
 - Health checking to verify children are ready before routing traffic
 
 ## Architecture
@@ -401,6 +401,6 @@ endpoint = LoadBalancerSlsResource(
 ## Next Steps
 
 - **Mothership integration**: Use LoadBalancerSlsResource for Mothership endpoints
-- **Service discovery**: Implement `/directory` endpoint for child endpoint discovery
+- **Service discovery**: Implement `/manifest` endpoint for child endpoint discovery
 - **Auto-provisioning**: Automatic child endpoint deployment on Mothership startup
 - **Cross-endpoint routing**: Route requests between endpoints using service discovery
diff --git a/src/tetra_rp/runtime/config.py b/src/tetra_rp/runtime/config.py
index c0efc11f..974bb5d5 100644
--- a/src/tetra_rp/runtime/config.py
+++ b/src/tetra_rp/runtime/config.py
@@ -5,7 +5,7 @@
 DEFAULT_MAX_RETRIES = 3
 DEFAULT_BACKOFF_BASE = 2
 
-# Directory cache configuration
+# Manifest cache configuration
 DEFAULT_CACHE_TTL = 300  # seconds
 
 # Serialization limits
diff --git a/src/tetra_rp/runtime/exceptions.py b/src/tetra_rp/runtime/exceptions.py
index fec800fd..e072a6ea 100644
--- a/src/tetra_rp/runtime/exceptions.py
+++ b/src/tetra_rp/runtime/exceptions.py
@@ -26,6 +26,6 @@ class ManifestError(FlashRuntimeError):
 
 
 class ManifestServiceUnavailableError(FlashRuntimeError):
-    """Raised when manifest directory service is unavailable."""
+    """Raised when manifest service is unavailable."""
 
     pass
diff --git a/src/tetra_rp/runtime/manifest_client.py b/src/tetra_rp/runtime/manifest_client.py
index bfe69ca8..db845a63 100644
--- a/src/tetra_rp/runtime/manifest_client.py
+++ b/src/tetra_rp/runtime/manifest_client.py
@@ -1,4 +1,4 @@
-"""HTTP client for mothership manifest directory API."""
+"""HTTP client for mothership manifest API."""
 
 import asyncio
 import logging
@@ -17,13 +17,12 @@
 
 
 class ManifestClient:
-    """HTTP client for querying mothership manifest directory service.
+    """HTTP client for querying mothership manifest service.
 
-    Fetches the endpoint registry that maps resource_config names to their
-    deployment URLs. This is the "manifest directory service" - an endpoint
-    registry showing where resources are deployed.
+    Fetches the manifest (endpoint registry) that maps resource_config names to
+    their deployment URLs. The manifest provides service discovery for remote
+    resource endpoints.
 
-    The directory maps resource_config names to their endpoint URLs.
     Example: {"gpu_config": "https://api.runpod.io/v2/abc123"}
     """
 
@@ -55,15 +54,15 @@ def __init__(
         self.max_retries = max_retries
         self._client: Optional[httpx.AsyncClient] = None
 
-    async def get_directory(self) -> Dict[str, str]:
-        """Fetch endpoint directory from mothership.
+    async def get_manifest(self) -> Dict[str, str]:
+        """Fetch endpoint manifest from mothership.
 
         Returns:
             Dictionary mapping resource_config_name → endpoint_url.
             Example: {"gpu_config": "https://api.runpod.io/v2/abc123"}
 
         Raises:
-            ManifestServiceUnavailableError: If manifest directory service unavailable after retries.
+            ManifestServiceUnavailableError: If manifest service unavailable after retries.
         """
         if httpx is None:
             raise ImportError(
@@ -76,25 +75,25 @@ async def get_directory(self) -> Dict[str, str]:
             try:
                 client = await self._get_client()
                 response = await client.get(
-                    f"{self.mothership_url}/directory",
+                    f"{self.mothership_url}/manifest",
                     timeout=self.timeout,
                 )
 
                 if response.status_code >= 400:
                     raise ManifestServiceUnavailableError(
-                        f"Directory API returned {response.status_code}: "
+                        f"Manifest API returned {response.status_code}: "
                         f"{response.text[:200]}"
                     )
 
                 data = response.json()
-                if "directory" not in data:
+                if "manifest" not in data:
                     raise ManifestServiceUnavailableError(
-                        "Invalid directory response: missing 'directory' key"
+                        "Invalid manifest response: missing 'manifest' key"
                     )
 
-                directory = data["directory"]
-                logger.debug(f"Directory loaded: {len(directory)} endpoints")
-                return directory
+                manifest = data["manifest"]
+                logger.debug(f"Manifest loaded: {len(manifest)} endpoints")
+                return manifest
 
             except (
                 asyncio.TimeoutError,
@@ -112,7 +111,7 @@ async def get_directory(self) -> Dict[str, str]:
                     continue
 
         raise ManifestServiceUnavailableError(
-            f"Failed to fetch manifest directory after {self.max_retries} attempts: {last_exception}"
+            f"Failed to fetch manifest after {self.max_retries} attempts: {last_exception}"
         )
 
     async def _get_client(self) -> httpx.AsyncClient:
diff --git a/src/tetra_rp/runtime/production_wrapper.py b/src/tetra_rp/runtime/production_wrapper.py
index 65ce815d..22a48f9e 100644
--- a/src/tetra_rp/runtime/production_wrapper.py
+++ b/src/tetra_rp/runtime/production_wrapper.py
@@ -26,7 +26,6 @@ def __init__(self, service_registry: ServiceRegistry):
             service_registry: Service registry for routing decisions.
         """
         self.service_registry = service_registry
-        self._directory_loaded = False
 
     async def wrap_function_execution(
         self,
@@ -57,8 +56,8 @@ async def wrap_function_execution(
         """
         function_name = func.__name__
 
-        # Ensure directory is loaded
-        await self.service_registry._ensure_directory_loaded()
+        # Ensure manifest is loaded
+        await self.service_registry._ensure_manifest_loaded()
 
         # Determine routing
         try:
@@ -116,8 +115,8 @@ async def wrap_class_method_execution(
         Raises:
             Exception: If execution fails.
         """
-        # Ensure directory is loaded
-        await self.service_registry._ensure_directory_loaded()
+        # Ensure manifest is loaded
+        await self.service_registry._ensure_manifest_loaded()
 
         class_name = getattr(request, "class_name", None)
 
diff --git a/src/tetra_rp/runtime/service_registry.py b/src/tetra_rp/runtime/service_registry.py
index ddcbcd84..2a2fb865 100644
--- a/src/tetra_rp/runtime/service_registry.py
+++ b/src/tetra_rp/runtime/service_registry.py
@@ -22,14 +22,14 @@ class ServiceRegistry:
     """Service discovery and routing for cross-endpoint function calls.
 
     Loads manifest to map functions to resource configs, queries mothership
-    directory for endpoint URLs, and determines if function calls are local
+    manifest for endpoint URLs, and determines if function calls are local
     or remote.
     """
 
     def __init__(
         self,
         manifest_path: Optional[Path] = None,
-        directory_client: Optional[ManifestClient] = None,
+        manifest_client: Optional[ManifestClient] = None,
         cache_ttl: int = DEFAULT_CACHE_TTL,
     ):
         """Initialize service registry.
@@ -37,17 +37,17 @@ def __init__(
         Args:
             manifest_path: Path to flash_manifest.json. Defaults to
                 FLASH_MANIFEST_PATH env var or auto-detection.
-            directory_client: Manifest service client for mothership API. If None, creates one
+            manifest_client: Manifest service client for mothership API. If None, creates one
                 from FLASH_MOTHERSHIP_URL env var.
-            cache_ttl: Directory cache lifetime in seconds (default: 300).
+            cache_ttl: Manifest cache lifetime in seconds (default: 300).
 
         Raises:
             FileNotFoundError: If manifest_path doesn't exist.
-            ValueError: If required env vars missing for directory_client.
+            ValueError: If required env vars missing for manifest_client.
         """
         self.cache_ttl = cache_ttl
-        self._directory: Dict[str, str] = {}
-        self._directory_loaded_at = 0.0
+        self._endpoint_registry: Dict[str, str] = {}
+        self._endpoint_registry_loaded_at = 0.0
         self._manifest: Manifest = Manifest(
             version="1.0",
             generated_at="",
@@ -55,21 +55,23 @@ def __init__(
             function_registry={},
             resources={},
         )
-        self._directory_lock = asyncio.Lock()
+        self._endpoint_registry_lock = asyncio.Lock()
 
         # Load manifest
         self._load_manifest(manifest_path)
 
         # Initialize manifest client
-        if directory_client is None:
+        if manifest_client is None:
             mothership_url = os.getenv("FLASH_MOTHERSHIP_URL")
             if mothership_url:
-                directory_client = ManifestClient(mothership_url=mothership_url)
+                manifest_client = ManifestClient(mothership_url=mothership_url)
             else:
-                logger.warning("FLASH_MOTHERSHIP_URL not set, directory unavailable")
-                directory_client = None
+                logger.warning(
+                    "FLASH_MOTHERSHIP_URL not set, manifest service unavailable"
+                )
+                manifest_client = None
 
-        self._directory_client = directory_client
+        self._manifest_client = manifest_client
         self._current_endpoint = os.getenv("RUNPOD_ENDPOINT_ID")
 
     def _load_manifest(self, manifest_path: Optional[Path]) -> None:
@@ -127,30 +129,30 @@ def _load_manifest(self, manifest_path: Optional[Path]) -> None:
             resources={},
         )
 
-    async def _ensure_directory_loaded(self) -> None:
-        """Load directory from mothership if cache expired or not loaded."""
-        async with self._directory_lock:
+    async def _ensure_manifest_loaded(self) -> None:
+        """Load manifest from mothership if cache expired or not loaded."""
+        async with self._endpoint_registry_lock:
             now = time.time()
-            cache_age = now - self._directory_loaded_at
+            cache_age = now - self._endpoint_registry_loaded_at
 
             if cache_age > self.cache_ttl:
-                if self._directory_client is None:
-                    logger.debug("Directory client not available, skipping refresh")
+                if self._manifest_client is None:
+                    logger.debug("Manifest client not available, skipping refresh")
                     return
 
                 try:
-                    self._directory = await self._directory_client.get_directory()
-                    self._directory_loaded_at = now
+                    self._endpoint_registry = await self._manifest_client.get_manifest()
+                    self._endpoint_registry_loaded_at = now
                     logger.debug(
-                        f"Directory loaded: {len(self._directory)} endpoints, "
+                        f"Manifest loaded: {len(self._endpoint_registry)} endpoints, "
                         f"cache TTL {self.cache_ttl}s"
                     )
                 except ManifestServiceUnavailableError as e:
                     logger.warning(
-                        f"Failed to load manifest directory: {e}. "
+                        f"Failed to load manifest: {e}. "
                         f"Cross-endpoint routing unavailable."
                     )
-                    self._directory = {}
+                    self._endpoint_registry = {}
 
     def get_endpoint_for_function(self, function_name: str) -> Optional[str]:
         """Get endpoint URL for a function.
@@ -181,12 +183,12 @@ def get_endpoint_for_function(self, function_name: str) -> Optional[str]:
         if resource_config_name == self._current_endpoint:
             return None
 
-        # Check directory for remote endpoint URL
-        endpoint_url = self._directory.get(resource_config_name)
+        # Check manifest for remote endpoint URL
+        endpoint_url = self._endpoint_registry.get(resource_config_name)
         if not endpoint_url:
             logger.debug(
-                f"Endpoint URL for '{resource_config_name}' not in directory. "
-                f"Directory has: {list(self._directory.keys())}"
+                f"Endpoint URL for '{resource_config_name}' not in manifest. "
+                f"Manifest has: {list(self._endpoint_registry.keys())}"
             )
 
         return endpoint_url
@@ -260,9 +262,9 @@ def get_current_endpoint_id(self) -> Optional[str]:
         """
         return self._current_endpoint
 
-    def refresh_directory(self) -> None:
-        """Force refresh directory from mothership on next access."""
-        self._directory_loaded_at = 0
+    def refresh_manifest(self) -> None:
+        """Force refresh manifest from mothership on next access."""
+        self._endpoint_registry_loaded_at = 0
 
     def get_manifest(self) -> Manifest:
         """Get loaded manifest.
diff --git a/tests/integration/test_cross_endpoint_routing.py b/tests/integration/test_cross_endpoint_routing.py
index 1b67967e..aab993d1 100644
--- a/tests/integration/test_cross_endpoint_routing.py
+++ b/tests/integration/test_cross_endpoint_routing.py
@@ -74,7 +74,7 @@ async def test_local_function_execution(self, manifest):
                 "FLASH_MOTHERSHIP_URL": "https://mothership.example.com",
             },
         ):
-            directory = {
+            endpoint_registry = {
                 "gpu_config": "https://gpu.example.com",
                 "cpu_config": "https://cpu.example.com",
             }
@@ -88,12 +88,12 @@ async def test_local_function_execution(self, manifest):
             try:
                 registry = ServiceRegistry(manifest_path=manifest_path)
 
-                mock_dir_client = AsyncMock(spec=ManifestClient)
-                mock_dir_client.get_directory.return_value = directory
+                mock_manifest_client = AsyncMock(spec=ManifestClient)
+                mock_manifest_client.get_manifest.return_value = endpoint_registry
 
-                registry._directory_client = mock_dir_client
-                registry._directory = directory
-                registry._directory_loaded_at = float("inf")
+                registry._manifest_client = mock_manifest_client
+                registry._endpoint_registry = endpoint_registry
+                registry._endpoint_registry_loaded_at = float("inf")
 
                 wrapper = ProductionWrapper(registry)
 
@@ -128,7 +128,7 @@ async def test_remote_function_execution_routing(self, manifest):
                 "FLASH_MOTHERSHIP_URL": "https://mothership.example.com",
             },
         ):
-            directory = {
+            endpoint_registry = {
                 "gpu_config": "https://gpu.example.com",
                 "cpu_config": "https://cpu.example.com",
             }
@@ -141,11 +141,11 @@ async def test_remote_function_execution_routing(self, manifest):
 
             try:
                 registry = ServiceRegistry(manifest_path=manifest_path)
-                mock_dir_client = AsyncMock(spec=ManifestClient)
-                mock_dir_client.get_directory.return_value = directory
-                registry._directory_client = mock_dir_client
-                registry._directory = directory
-                registry._directory_loaded_at = float("inf")
+                mock_manifest_client = AsyncMock(spec=ManifestClient)
+                mock_manifest_client.get_manifest.return_value = endpoint_registry
+                registry._manifest_client = mock_manifest_client
+                registry._endpoint_registry = endpoint_registry
+                registry._endpoint_registry_loaded_at = float("inf")
 
                 # Mock ServerlessResource
                 mock_resource = AsyncMock()
@@ -183,8 +183,8 @@ async def cpu_task(x):
                 manifest_path.unlink()
 
     @pytest.mark.asyncio
-    async def test_directory_loading_on_demand(self, manifest):
-        """Test that directory is loaded on-demand before routing decision."""
+    async def test_manifest_loading_on_demand(self, manifest):
+        """Test that manifest is loaded on-demand before routing decision."""
         with patch.dict(
             "os.environ",
             {
@@ -192,7 +192,7 @@ async def test_directory_loading_on_demand(self, manifest):
                 "FLASH_MOTHERSHIP_URL": "https://mothership.example.com",
             },
         ):
-            directory = {
+            endpoint_registry = {
                 "gpu_config": "https://gpu.example.com",
                 "cpu_config": "https://cpu.example.com",
             }
@@ -205,11 +205,11 @@ async def test_directory_loading_on_demand(self, manifest):
 
             try:
                 registry = ServiceRegistry(manifest_path=manifest_path)
-                mock_dir_client = AsyncMock(spec=ManifestClient)
-                mock_dir_client.get_directory.return_value = directory
-                registry._directory_client = mock_dir_client
+                mock_manifest_client = AsyncMock(spec=ManifestClient)
+                mock_manifest_client.get_manifest.return_value = endpoint_registry
+                registry._manifest_client = mock_manifest_client
 
-                assert registry._directory == {}
+                assert registry._endpoint_registry == {}
 
                 wrapper = ProductionWrapper(registry)
 
@@ -230,8 +230,11 @@ async def cpu_task(x):
                         original_stub, cpu_task, None, None, True
                     )
 
-                assert len(registry._directory) > 0
-                assert registry._directory["gpu_config"] == "https://gpu.example.com"
+                assert len(registry._endpoint_registry) > 0
+                assert (
+                    registry._endpoint_registry["gpu_config"]
+                    == "https://gpu.example.com"
+                )
 
             finally:
                 manifest_path.unlink()
@@ -246,7 +249,7 @@ async def test_error_handling_in_remote_execution(self, manifest):
                 "FLASH_MOTHERSHIP_URL": "https://mothership.example.com",
             },
         ):
-            directory = {
+            endpoint_registry = {
                 "gpu_config": "https://gpu.example.com",
                 "cpu_config": "https://cpu.example.com",
             }
@@ -259,11 +262,11 @@ async def test_error_handling_in_remote_execution(self, manifest):
 
             try:
                 registry = ServiceRegistry(manifest_path=manifest_path)
-                mock_dir_client = AsyncMock(spec=ManifestClient)
-                mock_dir_client.get_directory.return_value = directory
-                registry._directory_client = mock_dir_client
-                registry._directory = directory
-                registry._directory_loaded_at = float("inf")
+                mock_manifest_client = AsyncMock(spec=ManifestClient)
+                mock_manifest_client.get_manifest.return_value = endpoint_registry
+                registry._manifest_client = mock_manifest_client
+                registry._endpoint_registry = endpoint_registry
+                registry._endpoint_registry_loaded_at = float("inf")
 
                 # Mock ServerlessResource that returns error
                 mock_resource = AsyncMock()
diff --git a/tests/unit/runtime/test_manifest_client.py b/tests/unit/runtime/test_manifest_client.py
index 27bb12cc..be48a38c 100644
--- a/tests/unit/runtime/test_manifest_client.py
+++ b/tests/unit/runtime/test_manifest_client.py
@@ -21,7 +21,7 @@ def mock_response(self):
         response = MagicMock()
         response.status_code = 200
         response.json.return_value = {
-            "directory": {
+            "manifest": {
                 "gpu_config": "https://api.runpod.io/v2/gpu123",
                 "cpu_config": "https://api.runpod.io/v2/cpu456",
             },
@@ -53,8 +53,8 @@ def test_init_explicit_over_env(self):
             assert client.mothership_url == "https://explicit.com"
 
     @pytest.mark.asyncio
-    async def test_get_directory_success(self, mock_response):
-        """Test successful directory fetch."""
+    async def test_get_manifest_success(self, mock_response):
+        """Test successful manifest fetch."""
         client = ManifestClient(mothership_url="https://mothership.example.com")
 
         with patch("tetra_rp.runtime.manifest_client.httpx"):
@@ -63,15 +63,15 @@ async def test_get_directory_success(self, mock_response):
             mock_client.get.return_value = mock_response
 
             with patch.object(client, "_get_client", return_value=mock_client):
-                directory = await client.get_directory()
+                manifest = await client.get_manifest()
 
-                assert directory == {
+                assert manifest == {
                     "gpu_config": "https://api.runpod.io/v2/gpu123",
                     "cpu_config": "https://api.runpod.io/v2/cpu456",
                 }
 
     @pytest.mark.asyncio
-    async def test_get_directory_http_error(self):
+    async def test_get_manifest_http_error(self):
         """Test handling of HTTP errors."""
         client = ManifestClient(mothership_url="https://mothership.example.com")
 
@@ -86,10 +86,10 @@ async def test_get_directory_http_error(self):
             mock_get_client.return_value = mock_http_client
 
             with pytest.raises(ManifestServiceUnavailableError, match="500"):
-                await client.get_directory()
+                await client.get_manifest()
 
     @pytest.mark.asyncio
-    async def test_get_directory_timeout(self):
+    async def test_get_manifest_timeout(self):
         """Test handling of request timeout."""
         client = ManifestClient(
             mothership_url="https://mothership.example.com", timeout=0.1
@@ -104,10 +104,10 @@ async def test_get_directory_timeout(self):
             with pytest.raises(
                 ManifestServiceUnavailableError, match="after \\d+ attempts"
             ):
-                await client.get_directory()
+                await client.get_manifest()
 
     @pytest.mark.asyncio
-    async def test_get_directory_retry(self):
+    async def test_get_manifest_retry(self):
         """Test retry logic on transient failure."""
         client = ManifestClient(
             mothership_url="https://mothership.example.com", max_retries=3
@@ -115,7 +115,7 @@ async def test_get_directory_retry(self):
 
         response = MagicMock()
         response.status_code = 200
-        response.json.return_value = {"directory": {"gpu": "https://gpu.example.com"}}
+        response.json.return_value = {"manifest": {"gpu": "https://gpu.example.com"}}
 
         with patch.object(client, "_get_client") as mock_get_client:
             mock_http_client = AsyncMock()
@@ -133,12 +133,12 @@ async def test_get_directory_retry(self):
                 "tetra_rp.runtime.manifest_client.asyncio.sleep",
                 new_callable=AsyncMock,
             ):
-                directory = await client.get_directory()
-                assert directory == {"gpu": "https://gpu.example.com"}
+                manifest = await client.get_manifest()
+                assert manifest == {"gpu": "https://gpu.example.com"}
                 assert mock_http_client.get.call_count == 3
 
     @pytest.mark.asyncio
-    async def test_get_directory_exhaust_retries(self):
+    async def test_get_manifest_exhaust_retries(self):
         """Test failure after exhausting retries."""
         client = ManifestClient(
             mothership_url="https://mothership.example.com", max_retries=2
@@ -157,7 +157,7 @@ async def test_get_directory_exhaust_retries(self):
                 with pytest.raises(
                     ManifestServiceUnavailableError, match="after 2 attempts"
                 ):
-                    await client.get_directory()
+                    await client.get_manifest()
 
     @pytest.mark.asyncio
     async def test_context_manager(self):
diff --git a/tests/unit/runtime/test_production_wrapper.py b/tests/unit/runtime/test_production_wrapper.py
index cc628047..bda5c31d 100644
--- a/tests/unit/runtime/test_production_wrapper.py
+++ b/tests/unit/runtime/test_production_wrapper.py
@@ -19,7 +19,7 @@ class TestProductionWrapper:
     def mock_registry(self):
         """Mock service registry."""
         registry = AsyncMock(spec=ServiceRegistry)
-        registry._ensure_directory_loaded = AsyncMock()
+        registry._ensure_manifest_loaded = AsyncMock()
         return registry
 
     @pytest.fixture
@@ -135,8 +135,8 @@ async def test_wrap_function_remote_error(
             )
 
     @pytest.mark.asyncio
-    async def test_wrap_function_loads_directory(self, wrapper, mock_registry):
-        """Test that directory is loaded before routing decision."""
+    async def test_wrap_function_loads_manifest(self, wrapper, mock_registry):
+        """Test that manifest is loaded before routing decision."""
         mock_registry.get_resource_for_function.return_value = None
 
         async def sample_func():
@@ -147,8 +147,8 @@ async def sample_func():
             original_stub, sample_func, None, None, True
         )
 
-        # Should ensure directory is loaded
-        mock_registry._ensure_directory_loaded.assert_called_once()
+        # Should ensure manifest is loaded
+        mock_registry._ensure_manifest_loaded.assert_called_once()
 
     @pytest.mark.asyncio
     async def test_wrap_class_method_local(self, wrapper, mock_registry, original_stub):
diff --git a/tests/unit/runtime/test_service_registry.py b/tests/unit/runtime/test_service_registry.py
index 8dc88aa1..c7c83aaf 100644
--- a/tests/unit/runtime/test_service_registry.py
+++ b/tests/unit/runtime/test_service_registry.py
@@ -103,19 +103,19 @@ def test_is_local_function_local(self, manifest_file):
             assert registry.is_local_function("inference") is True
 
     def test_is_local_function_remote(self, manifest_file):
-        """Test determining remote function (with directory loaded)."""
+        """Test determining remote function (with manifest loaded)."""
         with patch.dict(os.environ, {"RUNPOD_ENDPOINT_ID": "gpu_config"}):
             mock_client = AsyncMock()
-            mock_client.get_directory.return_value = {
+            mock_client.get_manifest.return_value = {
                 "cpu_config": "https://cpu.example.com"
             }
 
             registry = ServiceRegistry(
-                manifest_path=manifest_file, directory_client=mock_client
+                manifest_path=manifest_file, manifest_client=mock_client
             )
-            # After directory is loaded, CPU tasks should be recognized as remote
+            # After manifest is loaded, CPU tasks should be recognized as remote
             # (but is_local_function doesn't async load, so returns True for now)
-            # This is actually expected behavior - sync method can't load async directory
+            # This is actually expected behavior - sync method can't load async manifest
             assert registry.is_local_function("preprocess") is True
 
     def test_is_local_function_not_in_manifest(self, manifest_file):
@@ -131,11 +131,11 @@ def test_get_endpoint_for_function_local(self, manifest_file):
             endpoint = registry.get_endpoint_for_function("gpu_task")
             assert endpoint is None  # Local returns None
 
-    def test_get_endpoint_for_function_remote_no_directory(self, manifest_file):
-        """Test getting endpoint for remote function without directory."""
+    def test_get_endpoint_for_function_remote_no_manifest(self, manifest_file):
+        """Test getting endpoint for remote function without manifest."""
         with patch.dict(os.environ, {"RUNPOD_ENDPOINT_ID": "gpu_config"}):
             registry = ServiceRegistry(manifest_path=manifest_file)
-            # CPU function is remote, but no directory loaded
+            # CPU function is remote, but no manifest loaded
             endpoint = registry.get_endpoint_for_function("preprocess")
             assert endpoint is None
 
@@ -157,15 +157,17 @@ def test_get_resource_for_function_remote(self, manifest_file):
         """Test getting ServerlessResource for remote function."""
         with patch.dict(os.environ, {"RUNPOD_ENDPOINT_ID": "gpu_config"}):
             mock_client = AsyncMock()
-            mock_client.get_directory.return_value = {
+            mock_client.get_manifest.return_value = {
                 "cpu_config": "https://api.runpod.io/v2/abc123"
             }
 
             registry = ServiceRegistry(
-                manifest_path=manifest_file, directory_client=mock_client
+                manifest_path=manifest_file, manifest_client=mock_client
             )
-            # Manually set directory to simulate loaded state
-            registry._directory = {"cpu_config": "https://api.runpod.io/v2/abc123"}
+            # Manually set endpoint registry to simulate loaded state
+            registry._endpoint_registry = {
+                "cpu_config": "https://api.runpod.io/v2/abc123"
+            }
 
             resource = registry.get_resource_for_function("preprocess")
 
@@ -182,77 +184,77 @@ def test_get_resource_for_function_not_in_manifest(self, manifest_file):
             registry.get_resource_for_function("unknown_function")
 
     @pytest.mark.asyncio
-    async def test_ensure_directory_loaded(self, manifest_file):
-        """Test lazy loading of directory from client."""
-        mock_directory = {
+    async def test_ensure_manifest_loaded(self, manifest_file):
+        """Test lazy loading of manifest from client."""
+        mock_endpoint_registry = {
             "gpu_config": "https://gpu.example.com",
             "cpu_config": "https://cpu.example.com",
         }
 
         mock_client = AsyncMock()
-        mock_client.get_directory.return_value = mock_directory
+        mock_client.get_manifest.return_value = mock_endpoint_registry
 
         registry = ServiceRegistry(
-            manifest_path=manifest_file, directory_client=mock_client, cache_ttl=10
+            manifest_path=manifest_file, manifest_client=mock_client, cache_ttl=10
         )
 
-        # Directory not loaded yet
-        assert registry._directory == {}
+        # Endpoint registry not loaded yet
+        assert registry._endpoint_registry == {}
 
-        # Load directory
-        await registry._ensure_directory_loaded()
+        # Load manifest
+        await registry._ensure_manifest_loaded()
 
-        # Should now have loaded directory
-        assert registry._directory == mock_directory
-        mock_client.get_directory.assert_called_once()
+        # Should now have loaded endpoint registry
+        assert registry._endpoint_registry == mock_endpoint_registry
+        mock_client.get_manifest.assert_called_once()
 
     @pytest.mark.asyncio
-    async def test_ensure_directory_cache_respects_ttl(self, manifest_file):
-        """Test that directory cache respects TTL."""
-        mock_directory = {"gpu_config": "https://gpu.example.com"}
+    async def test_ensure_manifest_cache_respects_ttl(self, manifest_file):
+        """Test that manifest cache respects TTL."""
+        mock_endpoint_registry = {"gpu_config": "https://gpu.example.com"}
 
         mock_client = AsyncMock()
-        mock_client.get_directory.return_value = mock_directory
+        mock_client.get_manifest.return_value = mock_endpoint_registry
 
         registry = ServiceRegistry(
-            manifest_path=manifest_file, directory_client=mock_client, cache_ttl=1
+            manifest_path=manifest_file, manifest_client=mock_client, cache_ttl=1
         )
 
-        # Load directory
-        await registry._ensure_directory_loaded()
-        assert mock_client.get_directory.call_count == 1
+        # Load manifest
+        await registry._ensure_manifest_loaded()
+        assert mock_client.get_manifest.call_count == 1
 
         # Immediate reload should use cache
-        await registry._ensure_directory_loaded()
-        assert mock_client.get_directory.call_count == 1
+        await registry._ensure_manifest_loaded()
+        assert mock_client.get_manifest.call_count == 1
 
         # After TTL, should reload
-        registry._directory_loaded_at = time.time() - 2  # 2 seconds ago
-        await registry._ensure_directory_loaded()
-        assert mock_client.get_directory.call_count == 2
+        registry._endpoint_registry_loaded_at = time.time() - 2  # 2 seconds ago
+        await registry._ensure_manifest_loaded()
+        assert mock_client.get_manifest.call_count == 2
 
     @pytest.mark.asyncio
-    async def test_refresh_directory(self, manifest_file):
-        """Test forcing directory refresh."""
-        mock_directory = {"gpu_config": "https://gpu.example.com"}
+    async def test_refresh_manifest(self, manifest_file):
+        """Test forcing manifest refresh."""
+        mock_endpoint_registry = {"gpu_config": "https://gpu.example.com"}
 
         mock_client = AsyncMock()
-        mock_client.get_directory.return_value = mock_directory
+        mock_client.get_manifest.return_value = mock_endpoint_registry
 
         registry = ServiceRegistry(
-            manifest_path=manifest_file, directory_client=mock_client, cache_ttl=3600
+            manifest_path=manifest_file, manifest_client=mock_client, cache_ttl=3600
         )
 
-        # Load directory
-        await registry._ensure_directory_loaded()
-        assert mock_client.get_directory.call_count == 1
+        # Load manifest
+        await registry._ensure_manifest_loaded()
+        assert mock_client.get_manifest.call_count == 1
 
         # Force refresh
-        registry.refresh_directory()
+        registry.refresh_manifest()
 
         # Next load should fetch again
-        await registry._ensure_directory_loaded()
-        assert mock_client.get_directory.call_count == 2
+        await registry._ensure_manifest_loaded()
+        assert mock_client.get_manifest.call_count == 2
 
     def test_get_manifest(self, manifest_file):
         """Test getting manifest."""
@@ -282,16 +284,16 @@ def test_get_resource_functions_not_found(self, manifest_file):
         functions = registry.get_resource_functions("nonexistent")
         assert functions == []
 
-    def test_init_no_directory_client_no_mothership_url(self, manifest_file):
-        """Test initialization without directory client or URL."""
+    def test_init_no_manifest_client_no_mothership_url(self, manifest_file):
+        """Test initialization without manifest client or URL."""
         with patch.dict(os.environ, {}, clear=True):
             registry = ServiceRegistry(manifest_path=manifest_file)
-            assert registry._directory_client is None
+            assert registry._manifest_client is None
 
     @pytest.mark.asyncio
-    async def test_ensure_directory_loaded_unavailable_client(self, manifest_file):
-        """Test directory loading when client is None."""
-        registry = ServiceRegistry(manifest_path=manifest_file, directory_client=None)
+    async def test_ensure_manifest_loaded_unavailable_client(self, manifest_file):
+        """Test manifest loading when client is None."""
+        registry = ServiceRegistry(manifest_path=manifest_file, manifest_client=None)
         # Should not fail, just log warning
-        await registry._ensure_directory_loaded()
-        assert registry._directory == {}
+        await registry._ensure_manifest_loaded()
+        assert registry._endpoint_registry == {}

From 9bc59b4903d61f04a96bb9b0f36ebd243ff6f0ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Mon, 12 Jan 2026 13:24:41 -0800
Subject: [PATCH 05/12] fix: align GET /manifest response format to
 Deployment_Architecture spec

Remove {"manifest": ...} wrapper and return manifest directly per spec
(Deployment_Architecture.md:235-273). Update ManifestClient parser to expect
manifest directly without unwrap logic.

Changes:
- Remove wrapper from GET /manifest endpoint (lb_handler.py:215)
- Update ManifestClient to validate manifest has "resources" key directly
- Replace global _manifest_fetcher with @lru_cache(maxsize=1) for thread safety
- Update all test assertions to expect unwrapped manifest format

All 636 tests pass, coverage: 66.48%
---
 src/tetra_rp/runtime/lb_handler.py            | 17 +++---
 src/tetra_rp/runtime/manifest_client.py       | 11 ++--
 tests/integration/test_lb_remote_execution.py | 57 +++++++++++++++++++
 tests/unit/runtime/test_lb_handler.py         | 32 +++++------
 tests/unit/runtime/test_manifest_client.py    | 37 +++++++++---
 5 files changed, 115 insertions(+), 39 deletions(-)

diff --git a/src/tetra_rp/runtime/lb_handler.py b/src/tetra_rp/runtime/lb_handler.py
index 495261d2..3647aa52 100644
--- a/src/tetra_rp/runtime/lb_handler.py
+++ b/src/tetra_rp/runtime/lb_handler.py
@@ -23,7 +23,8 @@
 import inspect
 import logging
 import os
-from typing import Any, Callable, Dict, Optional
+from functools import lru_cache
+from typing import Any, Callable, Dict
 
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
@@ -37,16 +38,14 @@
 
 logger = logging.getLogger(__name__)
 
-# Module-level manifest fetcher (singleton, reused across requests)
-_manifest_fetcher: Optional[ManifestFetcher] = None
-
 
+@lru_cache(maxsize=1)
 def _get_manifest_fetcher() -> ManifestFetcher:
-    """Get or create the manifest fetcher singleton."""
-    global _manifest_fetcher
-    if _manifest_fetcher is None:
-        _manifest_fetcher = ManifestFetcher()
-    return _manifest_fetcher
+    """Get or create the manifest fetcher singleton.
+
+    Uses @lru_cache for thread-safe lazy initialization.
+    """
+    return ManifestFetcher()
 
 
 def create_lb_handler(
diff --git a/src/tetra_rp/runtime/manifest_client.py b/src/tetra_rp/runtime/manifest_client.py
index db845a63..eb234cbd 100644
--- a/src/tetra_rp/runtime/manifest_client.py
+++ b/src/tetra_rp/runtime/manifest_client.py
@@ -85,14 +85,15 @@ async def get_manifest(self) -> Dict[str, str]:
                         f"{response.text[:200]}"
                     )
 
-                data = response.json()
-                if "manifest" not in data:
+                manifest = response.json()
+                if not isinstance(manifest, dict) or "resources" not in manifest:
                     raise ManifestServiceUnavailableError(
-                        "Invalid manifest response: missing 'manifest' key"
+                        "Invalid manifest response: missing 'resources'"
                     )
 
-                manifest = data["manifest"]
-                logger.debug(f"Manifest loaded: {len(manifest)} endpoints")
+                logger.debug(
+                    f"Manifest loaded: {len(manifest.get('resources', {}))} resources"
+                )
                 return manifest
 
             except (
diff --git a/tests/integration/test_lb_remote_execution.py b/tests/integration/test_lb_remote_execution.py
index 8c45022f..d1413a93 100644
--- a/tests/integration/test_lb_remote_execution.py
+++ b/tests/integration/test_lb_remote_execution.py
@@ -393,8 +393,10 @@ def test_manifest_endpoint_with_deployed_lb_resource(self, monkeypatch):
         """Test manifest endpoint with LoadBalancerSlsResource."""
         from unittest.mock import patch, AsyncMock
         from fastapi.testclient import TestClient
+        from tetra_rp.runtime.lb_handler import _get_manifest_fetcher
 
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+        _get_manifest_fetcher.cache_clear()
 
         # Create test manifest for deployed endpoint
         test_manifest = {
@@ -436,12 +438,16 @@ def test_manifest_endpoint_with_deployed_lb_resource(self, monkeypatch):
             assert response.status_code == 200
             assert response.json() == test_manifest
 
+        _get_manifest_fetcher.cache_clear()
+
     def test_manifest_endpoint_coexists_with_ping(self, monkeypatch):
         """Test that /manifest endpoint coexists with /ping health check."""
         from unittest.mock import patch, AsyncMock
         from fastapi.testclient import TestClient
+        from tetra_rp.runtime.lb_handler import _get_manifest_fetcher
 
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+        _get_manifest_fetcher.cache_clear()
 
         test_manifest = {
             "version": "1.0",
@@ -465,3 +471,54 @@ def test_manifest_endpoint_coexists_with_ping(self, monkeypatch):
 
             ping_response = client.get("/ping")
             assert ping_response.status_code == 404  # Ping not auto-added by factory
+
+        _get_manifest_fetcher.cache_clear()
+
+
+class TestManifestClientToEndpointIntegration:
+    """Integration tests for ManifestClient calling GET /manifest endpoint."""
+
+    def test_manifest_client_can_parse_response(self):
+        """Test ManifestClient can parse manifest response directly."""
+        import asyncio
+        from unittest.mock import patch, AsyncMock, MagicMock
+        from tetra_rp.runtime.manifest_client import ManifestClient
+
+        # Create a manifest to simulate
+        test_manifest = {
+            "version": "1.0",
+            "generated_at": "2024-01-15T10:30:00Z",
+            "project_name": "test-app",
+            "resources": {
+                "gpu_config": {
+                    "resource_type": "LoadBalancerSlsResource",
+                    "handler_file": "handler_gpu.py",
+                    "endpoint_url": "https://api.runpod.io/v2/gpu123",
+                }
+            },
+            "function_registry": {"process_gpu": "gpu_config"},
+        }
+
+        async def test_client_parsing():
+            # Create a mock httpx client that returns the manifest directly
+            mock_http_client = AsyncMock()
+            mock_response = MagicMock()
+            mock_response.status_code = 200
+            mock_response.json.return_value = test_manifest
+            mock_http_client.get = AsyncMock(return_value=mock_response)
+
+            # Create ManifestClient
+            client = ManifestClient(mothership_url="http://localhost:8000")
+
+            # Mock the _get_client to return our mock
+            with patch.object(client, "_get_client", return_value=mock_http_client):
+                # Call get_manifest - should parse the response
+                result = await client.get_manifest()
+
+                # Verify it successfully parsed the manifest
+                assert result == test_manifest
+                assert "gpu_config" in result["resources"]
+                assert result["function_registry"]["process_gpu"] == "gpu_config"
+
+        # Run the async test
+        asyncio.run(test_client_parsing())
diff --git a/tests/unit/runtime/test_lb_handler.py b/tests/unit/runtime/test_lb_handler.py
index 966a2ab6..1da78a4f 100644
--- a/tests/unit/runtime/test_lb_handler.py
+++ b/tests/unit/runtime/test_lb_handler.py
@@ -5,7 +5,7 @@
 import pytest
 from fastapi.testclient import TestClient
 
-from tetra_rp.runtime.lb_handler import create_lb_handler
+from tetra_rp.runtime.lb_handler import create_lb_handler, _get_manifest_fetcher
 
 
 class TestManifestEndpoint:
@@ -13,12 +13,10 @@ class TestManifestEndpoint:
 
     @pytest.fixture(autouse=True)
     def reset_manifest_fetcher(self):
-        """Reset the global manifest fetcher before each test."""
-        import tetra_rp.runtime.lb_handler as lb_handler_module
-
-        lb_handler_module._manifest_fetcher = None
+        """Reset the manifest fetcher cache before each test."""
+        _get_manifest_fetcher.cache_clear()
         yield
-        lb_handler_module._manifest_fetcher = None
+        _get_manifest_fetcher.cache_clear()
 
     @pytest.fixture
     def sample_manifest(self):
@@ -172,14 +170,14 @@ def test_manifest_endpoint_response_structure(self, sample_manifest, monkeypatch
             client = TestClient(app)
 
             response = client.get("/manifest")
-            data = response.json()
+            manifest = response.json()
 
-            # Verify structure
-            assert "version" in data
-            assert "generated_at" in data
-            assert "project_name" in data
-            assert "resources" in data
-            assert "function_registry" in data
+            # Verify manifest structure
+            assert "version" in manifest
+            assert "generated_at" in manifest
+            assert "project_name" in manifest
+            assert "resources" in manifest
+            assert "function_registry" in manifest
 
     def test_manifest_endpoint_with_empty_resources(self, monkeypatch):
         """Test endpoint behavior when manifest has no resources."""
@@ -339,10 +337,10 @@ def test_manifest_endpoint_with_complex_manifest(self, monkeypatch):
             response = client.get("/manifest")
 
             assert response.status_code == 200
-            data = response.json()
-            assert len(data["resources"]) == 2
-            assert "gpu_config" in data["resources"]
-            assert "cpu_config" in data["resources"]
+            manifest = response.json()
+            assert len(manifest["resources"]) == 2
+            assert "gpu_config" in manifest["resources"]
+            assert "cpu_config" in manifest["resources"]
 
     def test_manifest_endpoint_uses_fetcher_with_caching(
         self, sample_manifest, monkeypatch
diff --git a/tests/unit/runtime/test_manifest_client.py b/tests/unit/runtime/test_manifest_client.py
index be48a38c..0578613e 100644
--- a/tests/unit/runtime/test_manifest_client.py
+++ b/tests/unit/runtime/test_manifest_client.py
@@ -21,11 +21,14 @@ def mock_response(self):
         response = MagicMock()
         response.status_code = 200
         response.json.return_value = {
-            "manifest": {
-                "gpu_config": "https://api.runpod.io/v2/gpu123",
-                "cpu_config": "https://api.runpod.io/v2/cpu456",
+            "version": "1.0",
+            "generated_at": "2025-01-03T12:00:00Z",
+            "project_name": "test-app",
+            "resources": {
+                "gpu_config": {"endpoint_url": "https://api.runpod.io/v2/gpu123"},
+                "cpu_config": {"endpoint_url": "https://api.runpod.io/v2/cpu456"},
             },
-            "updated_at": "2025-01-03T12:00:00Z",
+            "function_registry": {},
         }
         return response
 
@@ -66,8 +69,18 @@ async def test_get_manifest_success(self, mock_response):
                 manifest = await client.get_manifest()
 
                 assert manifest == {
-                    "gpu_config": "https://api.runpod.io/v2/gpu123",
-                    "cpu_config": "https://api.runpod.io/v2/cpu456",
+                    "version": "1.0",
+                    "generated_at": "2025-01-03T12:00:00Z",
+                    "project_name": "test-app",
+                    "resources": {
+                        "gpu_config": {
+                            "endpoint_url": "https://api.runpod.io/v2/gpu123"
+                        },
+                        "cpu_config": {
+                            "endpoint_url": "https://api.runpod.io/v2/cpu456"
+                        },
+                    },
+                    "function_registry": {},
                 }
 
     @pytest.mark.asyncio
@@ -115,7 +128,11 @@ async def test_get_manifest_retry(self):
 
         response = MagicMock()
         response.status_code = 200
-        response.json.return_value = {"manifest": {"gpu": "https://gpu.example.com"}}
+        response.json.return_value = {
+            "version": "1.0",
+            "resources": {"gpu": {"endpoint_url": "https://gpu.example.com"}},
+            "function_registry": {},
+        }
 
         with patch.object(client, "_get_client") as mock_get_client:
             mock_http_client = AsyncMock()
@@ -134,7 +151,11 @@ async def test_get_manifest_retry(self):
                 new_callable=AsyncMock,
             ):
                 manifest = await client.get_manifest()
-                assert manifest == {"gpu": "https://gpu.example.com"}
+                assert manifest == {
+                    "version": "1.0",
+                    "resources": {"gpu": {"endpoint_url": "https://gpu.example.com"}},
+                    "function_registry": {},
+                }
                 assert mock_http_client.get.call_count == 3
 
     @pytest.mark.asyncio

From 632fd9ebf6b132cfffde106af71f6629c7b68b0f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Mon, 12 Jan 2026 02:30:09 -0800
Subject: [PATCH 06/12] docs: convert ASCII diagrams to MermaidJS

- Local Execution Flow: Shows synchronous path for functions in manifest
- Remote Execution Flow: Shows serialization, HTTP, and deserialization steps
- Manifest Synchronization: Shows cache-first approach with GQL fallback

Uses high-contrast MermaidJS styling with saturated colors and white text
for maximum readability as per project guidelines.
---
 docs/Cross_Endpoint_Routing.md | 191 ++++++++++++++++++++++++++-------
 1 file changed, 155 insertions(+), 36 deletions(-)

diff --git a/docs/Cross_Endpoint_Routing.md b/docs/Cross_Endpoint_Routing.md
index 6d059636..1a4330c6 100644
--- a/docs/Cross_Endpoint_Routing.md
+++ b/docs/Cross_Endpoint_Routing.md
@@ -640,46 +640,70 @@ Add new configuration by:
 
 #### Local Execution Flow
 
-```
-Function Call
-    ↓
-ProductionWrapper.wrap_function_execution()
-    ↓
-ServiceRegistry.get_resource_for_function()
-    ↓
-Manifest Lookup (resource = None)
-    ↓
-Local Execution (original_stub_func)
-    ↓
-Result
+```mermaid
+flowchart TD
+    A["Function Call"]
+    B["ProductionWrapper.wrap_function_execution()"]
+    C["ServiceRegistry.get_resource_for_function()"]
+    D["Manifest Lookup<br/>resource = None"]
+    E["Local Execution<br/>original_stub_func"]
+    F["Result"]
+
+    A --> B
+    B --> C
+    C --> D
+    D --> E
+    E --> F
+
+    style A fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
+    style B fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
+    style C fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
+    style D fill:#388e3c,stroke:#1b5e20,stroke-width:3px,color:#fff
+    style E fill:#388e3c,stroke:#1b5e20,stroke-width:3px,color:#fff
+    style F fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
 ```
 
 #### Remote Execution Flow
 
-```
-Function Call
-    ↓
-ProductionWrapper.wrap_function_execution()
-    ↓
-ServiceRegistry.get_resource_for_function()
-    ↓
-Manifest Lookup (resource found)
-    ↓
-Ensure Directory Loaded
-    ↓
-DirectoryClient.get_endpoints()
-    ↓
-Get Remote Endpoint URL
-    ↓
-Serialize Arguments (cloudpickle → base64)
-    ↓
-HTTP POST to Remote Endpoint
-    ↓
-Remote Function Execution
-    ↓
-Deserialize Result (base64 → cloudpickle)
-    ↓
-Result
+```mermaid
+flowchart TD
+    A["Function Call"]
+    B["ProductionWrapper.wrap_function_execution()"]
+    C["ServiceRegistry.get_resource_for_function()"]
+    D["Manifest Lookup<br/>resource found"]
+    E["Ensure Directory Loaded"]
+    F["DirectoryClient.get_endpoints()"]
+    G["Get Remote Endpoint URL"]
+    H["Serialize Arguments<br/>cloudpickle → base64"]
+    I["HTTP POST to Remote Endpoint"]
+    J["Remote Function Execution"]
+    K["Deserialize Result<br/>base64 → cloudpickle"]
+    L["Result"]
+
+    A --> B
+    B --> C
+    C --> D
+    D --> E
+    E --> F
+    F --> G
+    G --> H
+    H --> I
+    I --> J
+    J --> K
+    K --> L
+
+    style A fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
+    style B fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
+    style C fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
+    style D fill:#d32f2f,stroke:#b71c1c,stroke-width:3px,color:#fff
+    style E fill:#d32f2f,stroke:#b71c1c,stroke-width:3px,color:#fff
+    style F fill:#d32f2f,stroke:#b71c1c,stroke-width:3px,color:#fff
+    style G fill:#d32f2f,stroke:#b71c1c,stroke-width:3px,color:#fff
+    style H fill:#f57c00,stroke:#e65100,stroke-width:3px,color:#fff
+    style I fill:#d32f2f,stroke:#b71c1c,stroke-width:3px,color:#fff
+    style J fill:#d32f2f,stroke:#b71c1c,stroke-width:3px,color:#fff
+    style K fill:#f57c00,stroke:#e65100,stroke-width:3px,color:#fff
+    style L fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
 ```
 
 ### Design Decisions
@@ -920,6 +944,101 @@ client = DirectoryClient(mothership_url=...)
 endpoints = await client.get_endpoints()
 ```
 
+## Manifest Synchronization with RunPod GraphQL API
+
+### Overview
+
+The Mothership's GET /manifest endpoint pulls configuration from RunPod's GraphQL API,
+which serves as the single source of truth for manifest data. This enables centralized
+configuration management and ensures all child endpoints receive consistent routing
+information.
+
+### Architecture
+
+```mermaid
+flowchart TD
+    A["Child Endpoint<br/>GET /manifest"]
+    B["Mothership"]
+    C["ManifestFetcher"]
+    D{Cache Valid?}
+    E["Serve Cached<br/>Manifest"]
+    F["Fetch from RunPod<br/>GraphQL API"]
+    G["Update<br/>flash_manifest.json"]
+    H["Cache Result<br/>TTL: 300s"]
+    I["Serve Manifest"]
+    J["Fallback:<br/>Load Local File"]
+
+    A -->|Request| B
+    B --> C
+    C --> D
+    D -->|Yes| E
+    D -->|No| F
+    E --> I
+    F --> G
+    G --> H
+    H --> I
+    F -->|Fails| J
+    J --> I
+
+    style A fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
+    style B fill:#388e3c,stroke:#1b5e20,stroke-width:3px,color:#fff
+    style C fill:#388e3c,stroke:#1b5e20,stroke-width:3px,color:#fff
+    style D fill:#f57c00,stroke:#e65100,stroke-width:3px,color:#fff
+    style E fill:#388e3c,stroke:#1b5e20,stroke-width:3px,color:#fff
+    style F fill:#d32f2f,stroke:#b71c1c,stroke-width:3px,color:#fff
+    style G fill:#d32f2f,stroke:#b71c1c,stroke-width:3px,color:#fff
+    style H fill:#388e3c,stroke:#1b5e20,stroke-width:3px,color:#fff
+    style I fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
+    style J fill:#d32f2f,stroke:#b71c1c,stroke-width:3px,color:#fff
+```
+
+### How It Works
+
+1. **Source of Truth**: RunPod GraphQL API holds the authoritative manifest configuration
+2. **Caching Proxy**: Mothership fetches from RunPod GQL, caches locally (5 min TTL)
+3. **Local Persistence**: Fetched manifest written to `flash_manifest.json`
+4. **Graceful Fallback**: If RunPod GQL unavailable, serves local file
+5. **Cache Invalidation**: Automatic expiry after TTL, manual invalidation supported
+
+### Implementation Status
+
+**Current (Placeholder)**:
+- `ManifestFetcher` class with caching infrastructure
+- Uses existing `RunpodGraphQLClient` for API communication
+- Falls back to local `flash_manifest.json` (GQL fetch raises `NotImplementedError`)
+- Cache TTL: 300 seconds (configurable)
+
+**Future (Full Implementation)**:
+- Implement `getManifest` query in `ManifestFetcher._fetch_from_gql()`
+- Add `saveManifest` mutation for updating manifest in RunPod
+- Real-time cache invalidation via webhooks
+- Health checks and retry logic
+
+### Configuration
+
+```bash
+# Enable Mothership mode (required for /manifest endpoint)
+export FLASH_IS_MOTHERSHIP=true
+
+# Optional: Identify this mothership instance
+export RUNPOD_ENDPOINT_ID=mothership-prod-1
+
+# Required for RunPod GraphQL API access
+export RUNPOD_API_KEY=your-api-key-here
+```
+
+### Cache Behavior
+
+- **Default TTL**: 300 seconds (5 minutes)
+- **Cache Key**: Per-mothership instance (no cross-instance cache)
+- **Thread-Safe**: Uses `asyncio.Lock` for concurrent request handling
+- **Manual Invalidation**: `fetcher.invalidate_cache()` for testing
+
+### Historical Context
+
+A previous `StateManagerClient` (commit b19bf7c) used REST API. Current placeholder
+prepares for GQL-based architecture with improved caching and error handling.
+
 ## Key Implementation Highlights
 
 ### Design Focus

From 375f1caa9add3408a755c44a08bbb10536dbc45e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Mon, 12 Jan 2026 04:03:44 -0800
Subject: [PATCH 07/12] feat: add ManifestFetcher for caching manifest from
 RunPod GraphQL

- Add ManifestFetcher class with caching infrastructure (TTL: 300s)
- Integrate ManifestFetcher into lb_handler /manifest endpoint
- Use RunpodGraphQLClient for API communication
- Fall back to local flash_manifest.json when API unavailable
- Add comprehensive tests for ManifestFetcher and lb_handler
---
 src/tetra_rp/runtime/lb_handler.py            |  28 ++-
 src/tetra_rp/runtime/manifest_fetcher.py      | 192 ++++++++++++++++++
 tests/integration/test_lb_remote_execution.py |  41 ++--
 tests/unit/runtime/test_lb_handler.py         | 148 +++++++++++---
 tests/unit/runtime/test_manifest_fetcher.py   | 164 +++++++++++++++
 5 files changed, 523 insertions(+), 50 deletions(-)
 create mode 100644 src/tetra_rp/runtime/manifest_fetcher.py
 create mode 100644 tests/unit/runtime/test_manifest_fetcher.py

diff --git a/src/tetra_rp/runtime/lb_handler.py b/src/tetra_rp/runtime/lb_handler.py
index 4193dd02..495261d2 100644
--- a/src/tetra_rp/runtime/lb_handler.py
+++ b/src/tetra_rp/runtime/lb_handler.py
@@ -23,12 +23,12 @@
 import inspect
 import logging
 import os
-from typing import Any, Callable, Dict
+from typing import Any, Callable, Dict, Optional
 
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
 
-from .generic_handler import load_manifest
+from .manifest_fetcher import ManifestFetcher
 from .serialization import (
     deserialize_args,
     deserialize_kwargs,
@@ -37,6 +37,17 @@
 
 logger = logging.getLogger(__name__)
 
+# Module-level manifest fetcher (singleton, reused across requests)
+_manifest_fetcher: Optional[ManifestFetcher] = None
+
+
+def _get_manifest_fetcher() -> ManifestFetcher:
+    """Get or create the manifest fetcher singleton."""
+    global _manifest_fetcher
+    if _manifest_fetcher is None:
+        _manifest_fetcher = ManifestFetcher()
+    return _manifest_fetcher
+
 
 def create_lb_handler(
     route_registry: Dict[tuple[str, str], Callable], include_execute: bool = False
@@ -178,20 +189,27 @@ async def execute_remote_function(request: Request) -> Dict[str, Any]:
         async def get_manifest() -> JSONResponse:
             """Mothership discovery endpoint.
 
-            Returns the flash_manifest.json content for service discovery.
+            Fetches manifest from RunPod GraphQL API (source of truth), caches it
+            locally, and serves to child endpoints. Falls back to local file if
+            RunPod API is unavailable.
+
             Only available when FLASH_IS_MOTHERSHIP=true environment variable is set.
 
             Returns:
                 JSONResponse with manifest content or 404 if not found
             """
-            manifest_dict = load_manifest()
+            fetcher = _get_manifest_fetcher()
+            mothership_id = os.getenv("RUNPOD_ENDPOINT_ID")
+
+            # Fetch manifest (from cache, RunPod GQL, or local file)
+            manifest_dict = await fetcher.get_manifest(mothership_id)
 
             if not manifest_dict or not manifest_dict.get("resources"):
                 return JSONResponse(
                     status_code=404,
                     content={
                         "error": "Manifest not found",
-                        "detail": "flash_manifest.json could not be loaded",
+                        "detail": "Could not load manifest from RunPod or local file",
                     },
                 )
 
diff --git a/src/tetra_rp/runtime/manifest_fetcher.py b/src/tetra_rp/runtime/manifest_fetcher.py
new file mode 100644
index 00000000..8815add7
--- /dev/null
+++ b/src/tetra_rp/runtime/manifest_fetcher.py
@@ -0,0 +1,192 @@
+"""Manifest fetcher with RunPod GQL integration and caching.
+
+This module provides manifest fetching from RunPod GraphQL API (source of truth)
+with local file caching and fallback.
+"""
+
+import asyncio
+import json
+import logging
+import time
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+from .config import DEFAULT_CACHE_TTL
+from .generic_handler import load_manifest
+
+logger = logging.getLogger(__name__)
+
+
+class ManifestFetcher:
+    """Fetches and caches manifest from RunPod GraphQL API.
+
+    RunPod's GraphQL API is the source of truth for manifest data. This
+    fetcher pulls from it using RunpodGraphQLClient, caches locally, and
+    falls back to local file if RunPod API is unavailable.
+    """
+
+    def __init__(
+        self,
+        cache_ttl: int = DEFAULT_CACHE_TTL,
+        manifest_path: Optional[Path] = None,
+    ):
+        """Initialize manifest fetcher.
+
+        Args:
+            cache_ttl: Cache time-to-live in seconds (default: 300)
+            manifest_path: Optional path to local manifest file
+        """
+        self.cache_ttl = cache_ttl
+        self.manifest_path = manifest_path
+
+        # Cache state
+        self._cached_manifest: Optional[Dict[str, Any]] = None
+        self._cache_loaded_at: float = 0
+        self._cache_lock = asyncio.Lock()
+
+    async def get_manifest(
+        self,
+        mothership_id: Optional[str] = None,
+    ) -> Optional[Dict[str, Any]]:
+        """Get manifest from cache or fetch from RunPod GraphQL API.
+
+        Flow:
+        1. Check if cached and not expired → return cached
+        2. If expired/not cached → fetch from RunPod GraphQL API
+        3. Update local flash_manifest.json with fetched data
+        4. Cache the result
+        5. Return manifest
+
+        If RunPod GQL fetch fails, falls back to local file.
+
+        Args:
+            mothership_id: Optional mothership endpoint ID for tracking
+
+        Returns:
+            Manifest dictionary or None if unavailable
+        """
+        async with self._cache_lock:
+            now = time.time()
+            cache_age = now - self._cache_loaded_at
+
+            # Return cached if still valid
+            if self._cached_manifest and cache_age < self.cache_ttl:
+                logger.debug(
+                    f"Serving cached manifest (age: {cache_age:.1f}s, "
+                    f"TTL: {self.cache_ttl}s)"
+                )
+                return self._cached_manifest
+
+            # Cache expired or not loaded - fetch from RunPod GQL
+            logger.debug("Cache expired or empty, fetching from RunPod GraphQL API")
+
+            try:
+                # Fetch from RunPod GraphQL API (placeholder)
+                manifest = await self._fetch_from_gql(mothership_id)
+
+                # Update local flash_manifest.json
+                if manifest:
+                    self._update_local_file(manifest)
+
+                    # Update cache
+                    self._cached_manifest = manifest
+                    self._cache_loaded_at = now
+
+                    logger.info(
+                        f"Manifest fetched from RunPod GQL and cached "
+                        f"({len(manifest.get('resources', {}))} resources)"
+                    )
+                    return manifest
+
+            except NotImplementedError:
+                logger.debug(
+                    "RunPod GQL fetch not implemented, falling back to local file"
+                )
+            except Exception as e:
+                logger.warning(
+                    f"RunPod GQL fetch failed: {e}, falling back to local file"
+                )
+
+            # Fallback: load from local file
+            manifest = load_manifest(self.manifest_path)
+            if manifest:
+                # Cache the fallback manifest
+                self._cached_manifest = manifest
+                self._cache_loaded_at = now
+                logger.debug("Loaded and cached manifest from local file")
+
+            return manifest
+
+    async def _fetch_from_gql(
+        self,
+        mothership_id: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """Fetch manifest from RunPod GraphQL API.
+
+        TBD: Future implementation will query RunPod's GraphQL API
+        to retrieve the manifest configuration.
+
+        Args:
+            mothership_id: Optional mothership endpoint ID
+
+        Returns:
+            Manifest dictionary from RunPod GQL
+
+        Raises:
+            NotImplementedError: Placeholder for future implementation
+
+        Note:
+            Future implementation will use RunpodGraphQLClient:
+
+            ```python
+            async with RunpodGraphQLClient() as client:
+                query = '''
+                query GetManifest($mothershipId: ID!) {
+                    getManifest(mothershipId: $mothershipId) {
+                        version
+                        projectName
+                        generatedAt
+                        resources
+                        functionRegistry
+                    }
+                }
+                '''
+                result = await client.execute(query, {"mothershipId": mothership_id})
+                return result["data"]["getManifest"]
+            ```
+        """
+        raise NotImplementedError(
+            "RunPod manifest query not yet implemented. "
+            "Falling back to local flash_manifest.json file."
+        )
+
+    def _update_local_file(self, manifest: Dict[str, Any]) -> None:
+        """Update local flash_manifest.json with fetched data.
+
+        Args:
+            manifest: Manifest dictionary to write
+        """
+        try:
+            # Determine file path
+            if self.manifest_path:
+                file_path = self.manifest_path
+            else:
+                file_path = Path.cwd() / "flash_manifest.json"
+
+            # Write manifest to file
+            with open(file_path, "w") as f:
+                json.dump(manifest, f, indent=2)
+
+            logger.debug(f"Updated local manifest file: {file_path}")
+
+        except Exception as e:
+            logger.warning(f"Failed to update local manifest file: {e}")
+            # Non-critical error - cached manifest still valid
+
+    def invalidate_cache(self) -> None:
+        """Manually invalidate the cache.
+
+        Next get_manifest() call will fetch from GQL.
+        """
+        self._cache_loaded_at = 0
+        logger.debug("Manifest cache invalidated")
diff --git a/tests/integration/test_lb_remote_execution.py b/tests/integration/test_lb_remote_execution.py
index 2aca85d0..8c45022f 100644
--- a/tests/integration/test_lb_remote_execution.py
+++ b/tests/integration/test_lb_remote_execution.py
@@ -309,9 +309,18 @@ def get_status():
 class TestManifestEndpointIntegration:
     """Integration tests for GET /manifest endpoint."""
 
+    @pytest.fixture(autouse=True)
+    def reset_manifest_fetcher(self):
+        """Reset the global manifest fetcher before each test."""
+        import tetra_rp.runtime.lb_handler as lb_handler_module
+
+        lb_handler_module._manifest_fetcher = None
+        yield
+        lb_handler_module._manifest_fetcher = None
+
     def test_manifest_endpoint_in_live_load_balancer(self, monkeypatch):
         """Test manifest endpoint in LiveLoadBalancer with FLASH_IS_MOTHERSHIP=true."""
-        from unittest.mock import patch
+        from unittest.mock import patch, AsyncMock
         from fastapi.testclient import TestClient
 
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
@@ -349,10 +358,12 @@ async def hello():
             "routes": {"test-mothership": {"GET /api/hello": "hello"}},
         }
 
-        # Mock load_manifest to return test manifest
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=test_manifest
-        ):
+        # Mock ManifestFetcher to return test manifest
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=test_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             from tetra_rp.runtime.lb_handler import create_lb_handler
 
             # Create handler with manifest endpoint enabled
@@ -380,7 +391,7 @@ def test_manifest_endpoint_excluded_when_env_not_set(self):
 
     def test_manifest_endpoint_with_deployed_lb_resource(self, monkeypatch):
         """Test manifest endpoint with LoadBalancerSlsResource."""
-        from unittest.mock import patch
+        from unittest.mock import patch, AsyncMock
         from fastapi.testclient import TestClient
 
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
@@ -409,9 +420,11 @@ def test_manifest_endpoint_with_deployed_lb_resource(self, monkeypatch):
             "function_registry": {"process_image": "gpu-worker"},
         }
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=test_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=test_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             from tetra_rp.runtime.lb_handler import create_lb_handler
 
             # Create deployed handler (not LiveLoadBalancer)
@@ -425,7 +438,7 @@ def test_manifest_endpoint_with_deployed_lb_resource(self, monkeypatch):
 
     def test_manifest_endpoint_coexists_with_ping(self, monkeypatch):
         """Test that /manifest endpoint coexists with /ping health check."""
-        from unittest.mock import patch
+        from unittest.mock import patch, AsyncMock
         from fastapi.testclient import TestClient
 
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
@@ -436,9 +449,11 @@ def test_manifest_endpoint_coexists_with_ping(self, monkeypatch):
             "function_registry": {},
         }
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=test_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=test_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             from tetra_rp.runtime.lb_handler import create_lb_handler
 
             app = create_lb_handler({}, include_execute=False)
diff --git a/tests/unit/runtime/test_lb_handler.py b/tests/unit/runtime/test_lb_handler.py
index e02c6aa0..966a2ab6 100644
--- a/tests/unit/runtime/test_lb_handler.py
+++ b/tests/unit/runtime/test_lb_handler.py
@@ -11,6 +11,15 @@
 class TestManifestEndpoint:
     """Tests for GET /manifest endpoint."""
 
+    @pytest.fixture(autouse=True)
+    def reset_manifest_fetcher(self):
+        """Reset the global manifest fetcher before each test."""
+        import tetra_rp.runtime.lb_handler as lb_handler_module
+
+        lb_handler_module._manifest_fetcher = None
+        yield
+        lb_handler_module._manifest_fetcher = None
+
     @pytest.fixture
     def sample_manifest(self):
         """Sample manifest for testing."""
@@ -42,11 +51,15 @@ def test_manifest_endpoint_registered_when_env_var_true(
         self, sample_manifest, monkeypatch
     ):
         """Verify /manifest endpoint exists when FLASH_IS_MOTHERSHIP=true."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=sample_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=sample_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler({}, include_execute=False)
             routes = [route.path for route in app.routes]
 
@@ -77,11 +90,15 @@ def test_manifest_endpoint_returns_200_with_valid_manifest(
         self, sample_manifest, monkeypatch
     ):
         """Test happy path - endpoint returns 200 with valid manifest."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=sample_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=sample_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler({}, include_execute=False)
             client = TestClient(app)
 
@@ -92,9 +109,15 @@ def test_manifest_endpoint_returns_200_with_valid_manifest(
 
     def test_manifest_endpoint_returns_404_when_manifest_missing(self, monkeypatch):
         """Test endpoint returns 404 when manifest file not found."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
-        with patch("tetra_rp.runtime.lb_handler.load_manifest", return_value={}):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value={})
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler({}, include_execute=False)
             client = TestClient(app)
 
@@ -103,19 +126,22 @@ def test_manifest_endpoint_returns_404_when_manifest_missing(self, monkeypatch):
             assert response.status_code == 404
             data = response.json()
             assert data["error"] == "Manifest not found"
-            assert "could not be loaded" in data["detail"]
+            assert "Could not load" in data["detail"]
 
     def test_manifest_endpoint_case_insensitive_env_var_true(
         self, sample_manifest, monkeypatch
     ):
         """Test endpoint registration with different case variations of 'true'."""
+        from unittest.mock import AsyncMock
+
         for env_value in ["True", "TRUE", "TrUe"]:
             monkeypatch.setenv("FLASH_IS_MOTHERSHIP", env_value)
 
-            with patch(
-                "tetra_rp.runtime.lb_handler.load_manifest",
-                return_value=sample_manifest,
-            ):
+            with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+                mock_fetcher = AsyncMock()
+                mock_fetcher.get_manifest = AsyncMock(return_value=sample_manifest)
+                MockFetcher.return_value = mock_fetcher
+
                 app = create_lb_handler({}, include_execute=False)
                 routes = [route.path for route in app.routes]
 
@@ -133,11 +159,15 @@ def test_manifest_endpoint_case_insensitive_env_var_false(self, monkeypatch):
 
     def test_manifest_endpoint_response_structure(self, sample_manifest, monkeypatch):
         """Test that manifest response has correct structure."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=sample_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=sample_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler({}, include_execute=False)
             client = TestClient(app)
 
@@ -153,6 +183,8 @@ def test_manifest_endpoint_response_structure(self, sample_manifest, monkeypatch
 
     def test_manifest_endpoint_with_empty_resources(self, monkeypatch):
         """Test endpoint behavior when manifest has no resources."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
         empty_manifest = {
@@ -162,9 +194,11 @@ def test_manifest_endpoint_with_empty_resources(self, monkeypatch):
             "function_registry": {},
         }
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=empty_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=empty_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler({}, include_execute=False)
             client = TestClient(app)
 
@@ -174,10 +208,16 @@ def test_manifest_endpoint_with_empty_resources(self, monkeypatch):
             assert response.status_code == 404
 
     def test_manifest_endpoint_with_none_manifest(self, monkeypatch):
-        """Test endpoint behavior when load_manifest returns None."""
+        """Test endpoint behavior when get_manifest returns None."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
-        with patch("tetra_rp.runtime.lb_handler.load_manifest", return_value=None):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=None)
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler({}, include_execute=False)
             client = TestClient(app)
 
@@ -189,11 +229,15 @@ def test_manifest_endpoint_coexists_with_execute(
         self, sample_manifest, monkeypatch
     ):
         """Test that /manifest endpoint coexists with /execute endpoint."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=sample_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=sample_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler({}, include_execute=True)
             routes = [route.path for route in app.routes]
 
@@ -204,6 +248,8 @@ def test_manifest_endpoint_coexists_with_user_routes(
         self, sample_manifest, monkeypatch
     ):
         """Test that /manifest endpoint coexists with user-defined routes."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
         async def dummy_handler():
@@ -211,9 +257,11 @@ async def dummy_handler():
 
         route_registry = {("GET", "/api/health"): dummy_handler}
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=sample_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=sample_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler(route_registry, include_execute=False)
             routes = [route.path for route in app.routes]
 
@@ -222,11 +270,15 @@ async def dummy_handler():
 
     def test_manifest_endpoint_content_type(self, sample_manifest, monkeypatch):
         """Test that /manifest endpoint returns proper JSON content-type."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=sample_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=sample_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler({}, include_execute=False)
             client = TestClient(app)
 
@@ -236,6 +288,8 @@ def test_manifest_endpoint_content_type(self, sample_manifest, monkeypatch):
 
     def test_manifest_endpoint_with_complex_manifest(self, monkeypatch):
         """Test endpoint with complex multi-resource manifest."""
+        from unittest.mock import AsyncMock
+
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
 
         complex_manifest = {
@@ -274,9 +328,11 @@ def test_manifest_endpoint_with_complex_manifest(self, monkeypatch):
             },
         }
 
-        with patch(
-            "tetra_rp.runtime.lb_handler.load_manifest", return_value=complex_manifest
-        ):
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=complex_manifest)
+            MockFetcher.return_value = mock_fetcher
+
             app = create_lb_handler({}, include_execute=False)
             client = TestClient(app)
 
@@ -288,6 +344,34 @@ def test_manifest_endpoint_with_complex_manifest(self, monkeypatch):
             assert "gpu_config" in data["resources"]
             assert "cpu_config" in data["resources"]
 
+    def test_manifest_endpoint_uses_fetcher_with_caching(
+        self, sample_manifest, monkeypatch
+    ):
+        """Verify GET /manifest uses ManifestFetcher with caching."""
+        from unittest.mock import AsyncMock
+
+        monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+
+        with patch("tetra_rp.runtime.lb_handler.ManifestFetcher") as MockFetcher:
+            mock_fetcher = AsyncMock()
+            mock_fetcher.get_manifest = AsyncMock(return_value=sample_manifest)
+            MockFetcher.return_value = mock_fetcher
+
+            app = create_lb_handler({}, include_execute=False)
+            client = TestClient(app)
+
+            # First request
+            response1 = client.get("/manifest")
+            assert response1.status_code == 200
+            assert response1.json() == sample_manifest
+
+            # Second request - should reuse fetcher
+            response2 = client.get("/manifest")
+            assert response2.status_code == 200
+
+            # Verify fetcher was called (once per request)
+            assert mock_fetcher.get_manifest.call_count == 2
+
 
 class TestExecuteEndpointStillWorks:
     """Tests to ensure /execute endpoint still works after manifest changes."""
diff --git a/tests/unit/runtime/test_manifest_fetcher.py b/tests/unit/runtime/test_manifest_fetcher.py
new file mode 100644
index 00000000..f7ae27a1
--- /dev/null
+++ b/tests/unit/runtime/test_manifest_fetcher.py
@@ -0,0 +1,164 @@
+"""Unit tests for ManifestFetcher."""
+
+import asyncio
+import json
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from tetra_rp.runtime.manifest_fetcher import ManifestFetcher
+
+
+class TestManifestFetcher:
+    """Test ManifestFetcher caching and GQL integration."""
+
+    @pytest.fixture
+    def sample_manifest(self):
+        """Sample manifest for testing."""
+        return {
+            "version": "1.0",
+            "project_name": "test-app",
+            "resources": {"gpu_config": {"resource_type": "ServerlessEndpoint"}},
+            "function_registry": {"process_gpu": "gpu_config"},
+        }
+
+    @pytest.mark.asyncio
+    async def test_fetch_falls_back_to_local_file_when_gql_not_implemented(
+        self, sample_manifest, tmp_path
+    ):
+        """Verify fetcher falls back to local file when GQL raises NotImplementedError."""
+        # Write sample manifest to temp file
+        manifest_file = tmp_path / "flash_manifest.json"
+        with open(manifest_file, "w") as f:
+            json.dump(sample_manifest, f)
+
+        fetcher = ManifestFetcher(manifest_path=manifest_file)
+        result = await fetcher.get_manifest()
+
+        assert result == sample_manifest
+
+    @pytest.mark.asyncio
+    async def test_caching_prevents_multiple_fetches(self, sample_manifest, tmp_path):
+        """Verify cached manifest is reused within TTL."""
+        manifest_file = tmp_path / "flash_manifest.json"
+        with open(manifest_file, "w") as f:
+            json.dump(sample_manifest, f)
+
+        fetcher = ManifestFetcher(cache_ttl=300, manifest_path=manifest_file)
+
+        # First call - loads from file
+        result1 = await fetcher.get_manifest()
+        assert result1 == sample_manifest
+
+        # Second call immediately - should use cache
+        result2 = await fetcher.get_manifest()
+        assert result2 == sample_manifest
+        assert result2 is result1  # Same object reference (cached)
+
+    @pytest.mark.asyncio
+    async def test_cache_expiration_triggers_refetch(self, sample_manifest, tmp_path):
+        """Verify expired cache triggers new fetch."""
+        manifest_file = tmp_path / "flash_manifest.json"
+        with open(manifest_file, "w") as f:
+            json.dump(sample_manifest, f)
+
+        # Very short TTL
+        fetcher = ManifestFetcher(cache_ttl=0.1, manifest_path=manifest_file)
+
+        # First call
+        result1 = await fetcher.get_manifest()
+        assert result1 == sample_manifest
+
+        # Wait for cache to expire
+        await asyncio.sleep(0.2)
+
+        # Second call - cache expired, should refetch
+        result2 = await fetcher.get_manifest()
+        assert result2 == sample_manifest
+
+    @pytest.mark.asyncio
+    async def test_fetch_from_gql_raises_not_implemented(self):
+        """Verify GQL fetch placeholder raises NotImplementedError."""
+        fetcher = ManifestFetcher()
+
+        with pytest.raises(NotImplementedError, match="not yet implemented"):
+            await fetcher._fetch_from_gql()
+
+    @pytest.mark.asyncio
+    async def test_update_local_file_writes_manifest(self, sample_manifest, tmp_path):
+        """Verify manifest is written to local file."""
+        manifest_file = tmp_path / "flash_manifest.json"
+        fetcher = ManifestFetcher(manifest_path=manifest_file)
+
+        fetcher._update_local_file(sample_manifest)
+
+        # Verify file was written
+        assert manifest_file.exists()
+        with open(manifest_file) as f:
+            written = json.load(f)
+        assert written == sample_manifest
+
+    @pytest.mark.asyncio
+    async def test_cache_invalidation(self, sample_manifest, tmp_path):
+        """Verify manual cache invalidation works."""
+        manifest_file = tmp_path / "flash_manifest.json"
+        with open(manifest_file, "w") as f:
+            json.dump(sample_manifest, f)
+
+        fetcher = ManifestFetcher(cache_ttl=300, manifest_path=manifest_file)
+
+        # Load and cache
+        await fetcher.get_manifest()
+        assert fetcher._cached_manifest is not None
+
+        # Invalidate
+        fetcher.invalidate_cache()
+
+        # Next call should refetch (cache_loaded_at is 0)
+        assert fetcher._cache_loaded_at == 0
+
+    @pytest.mark.asyncio
+    async def test_concurrent_requests_use_lock(self, sample_manifest, tmp_path):
+        """Verify concurrent requests are properly synchronized."""
+        manifest_file = tmp_path / "flash_manifest.json"
+        with open(manifest_file, "w") as f:
+            json.dump(sample_manifest, f)
+
+        fetcher = ManifestFetcher(manifest_path=manifest_file)
+
+        # Make multiple concurrent requests
+        results = await asyncio.gather(
+            fetcher.get_manifest(),
+            fetcher.get_manifest(),
+            fetcher.get_manifest(),
+        )
+
+        # All should return the same manifest
+        assert all(r == sample_manifest for r in results)
+
+    @pytest.mark.asyncio
+    async def test_handles_missing_local_file_gracefully(self):
+        """Verify fetcher handles missing local file gracefully."""
+        # Point to non-existent file
+        fetcher = ManifestFetcher(manifest_path=Path("/nonexistent/manifest.json"))
+
+        # Should fall back to loading from cwd (which also won't exist in test)
+        result = await fetcher.get_manifest()
+
+        # load_manifest returns empty dict when no file is found
+        assert result == {"resources": {}, "function_registry": {}}
+
+    @pytest.mark.asyncio
+    async def test_mothership_id_passed_to_gql(self):
+        """Verify mothership_id is passed through to GQL fetch."""
+        fetcher = ManifestFetcher()
+
+        # Spy on _fetch_from_gql to capture arguments
+        with patch.object(fetcher, "_fetch_from_gql") as mock_fetch:
+            mock_fetch.side_effect = NotImplementedError()
+
+            await fetcher.get_manifest(mothership_id="test-123")
+
+            # Verify mothership_id was passed to fetch
+            mock_fetch.assert_called_once_with("test-123")

From 1da8ee2c095e9b06853f19a7b2a23d2a541d5f38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Mon, 12 Jan 2026 08:38:00 -0800
Subject: [PATCH 08/12] refactor: rename directory terminology to manifest
 throughout codebase

- Rename _directory to _endpoint_registry in ServiceRegistry
- Rename directory_client parameter to manifest_client
- Change API endpoint from /directory to /manifest
- Change JSON response key from "directory" to "manifest"
- Update _ensure_directory_loaded() to _ensure_manifest_loaded()
- Update refresh_directory() to refresh_manifest()
- Update all tests and documentation to reflect new terminology
---
 docs/Cross_Endpoint_Routing.md                | 137 +++++++++---------
 docs/Load_Balancer_Endpoints.md               |   6 +-
 src/tetra_rp/runtime/config.py                |   2 +-
 src/tetra_rp/runtime/exceptions.py            |   2 +-
 src/tetra_rp/runtime/manifest_client.py       |  33 ++---
 src/tetra_rp/runtime/production_wrapper.py    |   9 +-
 src/tetra_rp/runtime/service_registry.py      |  64 ++++----
 .../test_cross_endpoint_routing.py            |  57 ++++----
 tests/unit/runtime/test_manifest_client.py    |  30 ++--
 tests/unit/runtime/test_production_wrapper.py |  10 +-
 tests/unit/runtime/test_service_registry.py   | 114 ++++++++-------
 11 files changed, 234 insertions(+), 230 deletions(-)

diff --git a/docs/Cross_Endpoint_Routing.md b/docs/Cross_Endpoint_Routing.md
index 1a4330c6..341d4b09 100644
--- a/docs/Cross_Endpoint_Routing.md
+++ b/docs/Cross_Endpoint_Routing.md
@@ -59,7 +59,7 @@ The manifest structure:
 
 #### 2. Set Environment Variables
 
-Configure the mothership directory URL (required for remote routing):
+Configure the mothership manifest URL (required for remote routing):
 
 ```bash
 # Required for cross-endpoint routing to work
@@ -149,7 +149,7 @@ The manifest file (`flash_manifest.json`) defines function routing and resource
 
 | Variable | Required | Purpose |
 |----------|----------|---------|
-| `FLASH_MOTHERSHIP_URL` | Yes* | URL of mothership directory service |
+| `FLASH_MOTHERSHIP_URL` | Yes* | URL of mothership manifest service |
 | `RUNPOD_ENDPOINT_ID` | No | Current endpoint ID (for tracing) |
 | `FLASH_MANIFEST_PATH` | No | Explicit path to manifest file |
 
@@ -255,7 +255,7 @@ Functions gracefully fall back to local execution if routing fails:
 async def critical_service(request: dict) -> dict:
     # Routes to critical-endpoint if:
     # - In function_registry
-    # - Directory available
+    # - Manifest available
     # Otherwise executes locally
     return handle_critical(request)
 
@@ -269,11 +269,11 @@ async def helper_function(x: int) -> int:
 
 #### Common Issues
 
-**Directory Unavailable**
+**Manifest Service Unavailable**
 
 If `FLASH_MOTHERSHIP_URL` is not set or unreachable:
 ```
-WARNING: FLASH_MOTHERSHIP_URL not set, directory unavailable
+WARNING: FLASH_MOTHERSHIP_URL not set, manifest service unavailable
 ```
 
 Functions default to local execution. Set the environment variable to enable routing.
@@ -342,8 +342,8 @@ graph TD
     A["Function Call"] -->|"intercepts stub layer"| B["ProductionWrapper"]
 
     B -->|"load service configuration"| C["ServiceRegistry"]
-    C -->|"if not cached"| D["DirectoryClient"]
-    D -->|"query mothership API"| E["Directory<br/>Endpoint URLs"]
+    C -->|"if not cached"| D["ManifestClient"]
+    D -->|"query mothership API"| E["Manifest<br/>Endpoint URLs"]
     E -->|"cache result<br/>TTL 300s"| C
 
     C -->|"lookup in manifest<br/>flash_manifest.json"| F{"Routing<br/>Decision"}
@@ -358,7 +358,7 @@ graph TD
     K --> L["Return Response<br/>base64 → cloudpickle"]
     L --> M["Deserialized Result"]
 
-    N["Error Handling:<br/>- RemoteExecutionError<br/>- SerializationError<br/>- DirectoryUnavailableError"] -.-> H
+    N["Error Handling:<br/>- RemoteExecutionError<br/>- SerializationError<br/>- ManifestServiceUnavailableError"] -.-> H
     N -.-> I
     N -.-> J
 
@@ -405,8 +405,8 @@ class ProductionWrapper:
         **kwargs: Any,
     ) -> Any:
         """Route function execution to local or remote endpoint."""
-        # 1. Load directory (if needed)
-        await self.service_registry._ensure_directory_loaded()
+        # 1. Load manifest (if needed)
+        await self.service_registry._ensure_manifest_loaded()
 
         # 2. Look up function in manifest
         resource = self.service_registry.get_resource_for_function(func.__name__)
@@ -450,30 +450,29 @@ class ServiceRegistry:
     """Service discovery and routing for cross-endpoint function calls."""
 
     def __init__(self, manifest_path: Optional[Path] = None):
-        """Initialize with manifest and optional directory client."""
+        """Initialize with manifest and optional manifest client."""
         self._load_manifest(manifest_path)
-        self._directory_client = DirectoryClient(...)
-        self._directory = {}  # Cached endpoint URLs
-        self._directory_lock = asyncio.Lock()
+        self._manifest_client = ManifestClient(...)
+        self._endpoint_registry = {}  # Cached endpoint URLs
+        self._endpoint_registry_lock = asyncio.Lock()
 
     def get_resource_for_function(self, func_name: str) -> Optional[ServerlessResource]:
         """Get resource config for function from manifest."""
-        # Returns None if:
-        # - Function not in manifest
-        # - Explicitly set to null in manifest
-
-        # Returns ServerlessResource if mapped in manifest
-        config = self._manifest["functions"].get(func_name)
+        # Returns the ServerlessResource if function is mapped in manifest
+        # Returns None if function maps to current endpoint
+        # Raises ValueError if function not found in manifest
+        config = self._manifest.function_registry.get(func_name)
         return self._resolve_resource(config)
 
-    async def _ensure_directory_loaded(self) -> None:
-        """Load directory from mothership with caching (TTL 300s)."""
-        if self._is_directory_fresh():
-            return
+    async def _ensure_manifest_loaded(self) -> None:
+        """Load manifest from mothership if cache expired or not loaded."""
+        async with self._endpoint_registry_lock:
+            now = time.time()
+            cache_age = now - self._endpoint_registry_loaded_at
 
-        async with self._directory_lock:
-            self._directory = await self._directory_client.get_directory()
-            self._directory_loaded_at = time.time()
+            if cache_age > self.cache_ttl:
+                self._endpoint_registry = await self._manifest_client.get_manifest()
+                self._endpoint_registry_loaded_at = now
 ```
 
 **Manifest Format**:
@@ -499,36 +498,36 @@ class ServiceRegistry:
 - `function_registry`: Maps function names to resource config names (null = local)
 - `resources`: Defines resource configurations and their handler details
 
-**Directory Cache**:
+**Manifest Cache**:
 - TTL: 300 seconds (configurable via `DEFAULT_CACHE_TTL`)
 - Thread-safe with `asyncio.Lock()`
-- Graceful fallback if directory unavailable
+- Graceful fallback if manifest service unavailable
 
-#### 3. DirectoryClient
+#### 3. ManifestClient
 
-**Location**: `src/tetra_rp/runtime/directory_client.py`
+**Location**: `src/tetra_rp/runtime/manifest_client.py`
 
-HTTP client for mothership directory service:
+HTTP client for mothership manifest service:
 
 ```python
-class DirectoryClient:
-    """HTTP client for querying mothership directory.
+class ManifestClient:
+    """HTTP client for querying mothership manifest.
 
-    The directory maps resource_config names to their endpoint URLs.
+    The manifest maps resource_config names to their endpoint URLs.
     Example: {"gpu_config": "https://api.runpod.io/v2/abc123"}
     """
 
-    async def get_directory(self) -> Dict[str, str]:
-        """Fetch endpoint directory from mothership.
+    async def get_manifest(self) -> Dict[str, str]:
+        """Fetch endpoint manifest from mothership.
 
         Returns:
             Dictionary mapping resource_config_name → endpoint_url.
             Example: {"gpu_config": "https://api.runpod.io/v2/abc123"}
 
         Raises:
-            DirectoryUnavailableError: If directory service unavailable after retries.
+            ManifestServiceUnavailableError: If manifest service unavailable after retries.
         """
-        # Queries {mothership_url}/directory endpoint with retry logic
+        # Queries {mothership_url}/manifest endpoint with retry logic
 ```
 
 **Configuration**:
@@ -561,8 +560,8 @@ class ManifestError(FlashRuntimeError):
     """Raised when manifest is invalid, missing, or has unexpected structure."""
     pass
 
-class DirectoryUnavailableError(FlashRuntimeError):
-    """Raised when directory service is unavailable."""
+class ManifestServiceUnavailableError(FlashRuntimeError):
+    """Raised when manifest service is unavailable."""
     pass
 ```
 
@@ -576,8 +575,8 @@ except SerializationError as e:
     logger.error(f"Serialization failed: {e}")
 except ManifestError as e:
     logger.error(f"Manifest configuration error: {e}")
-except DirectoryUnavailableError as e:
-    logger.warning(f"Directory unavailable, using fallback")
+except ManifestServiceUnavailableError as e:
+    logger.warning(f"Manifest unavailable, using fallback")
 ```
 
 ### Integration Points
@@ -613,7 +612,7 @@ Functions retrieve remote endpoint info from ResourceManager:
 # ServiceRegistry uses ResourceManager to find endpoint URLs
 resource_manager = ResourceManager()
 endpoint = resource_manager.get_resource_for_function("function_name")
-endpoint_url = endpoint.url  # e.g., "https://api.runpod.io/v1/abc123"
+endpoint_url = endpoint.url  # e.g., "https://api.runpod.io/v2/abc123"
 ```
 
 ### Configuration
@@ -671,8 +670,8 @@ flowchart TD
     B["ProductionWrapper.wrap_function_execution()"]
     C["ServiceRegistry.get_resource_for_function()"]
     D["Manifest Lookup<br/>resource found"]
-    E["Ensure Directory Loaded"]
-    F["DirectoryClient.get_endpoints()"]
+    E["Ensure Manifest Loaded"]
+    F["ManifestClient.get_manifest()"]
     G["Get Remote Endpoint URL"]
     H["Serialize Arguments<br/>cloudpickle → base64"]
     I["HTTP POST to Remote Endpoint"]
@@ -720,11 +719,11 @@ flowchart TD
 
 #### 2. Thread-Safe Async Caching
 
-**Decision**: Use `asyncio.Lock()` for directory cache synchronization
+**Decision**: Use `asyncio.Lock()` for manifest cache synchronization
 
 **Rationale**:
 - Prevents thundering herd on cache expiry
-- Efficient - only one coroutine loads directory
+- Efficient - only one coroutine loads manifest
 - Simple to understand and maintain
 - Follows async/await patterns
 
@@ -740,12 +739,12 @@ flowchart TD
 
 #### 4. Graceful Fallback
 
-**Decision**: Default to local execution if directory unavailable
+**Decision**: Default to local execution if manifest service unavailable
 
 **Rationale**:
 - Maintains application resilience
 - Doesn't fail if mothership unreachable
-- Allows local testing without directory
+- Allows local testing without manifest service
 - Gradual degradation vs catastrophic failure
 
 #### 5. Transparent Routing
@@ -779,15 +778,15 @@ class JsonSerializer:
 2. Update ProductionWrapper to select serializer based on config
 3. Add tests for new format
 
-#### Adding New Directory Backends
+#### Adding New Manifest Backends
 
 To support directories other than mothership:
 
-1. Create client class with `get_directory()` method:
+1. Create client class with `get_manifest()` method:
 ```python
-class CustomDirectoryClient:
-    async def get_directory(self) -> Dict[str, str]:
-        """Fetch directory mapping resource_config_name → endpoint_url."""
+class CustomManifestClient:
+    async def get_manifest(self) -> Dict[str, str]:
+        """Fetch manifest mapping resource_config_name → endpoint_url."""
         # Implementation specific to backend
         return {"resource_name": "https://endpoint.url"}
 ```
@@ -796,11 +795,11 @@ class CustomDirectoryClient:
 ```python
 registry = ServiceRegistry(
     manifest_path=Path("manifest.json"),
-    directory_client=CustomDirectoryClient(...)
+    manifest_client=CustomManifestClient(...)
 )
 ```
 
-3. Update environment variable handling if needed (CustomDirectoryClient can read from env vars)
+3. Update environment variable handling if needed (CustomManifestClient can read from env vars)
 
 #### Adding Routing Policies
 
@@ -830,11 +829,11 @@ class RoutingPolicy:
 **ServiceRegistry Tests** (`tests/unit/runtime/test_service_registry.py`):
 - Manifest loading
 - Resource lookup
-- Directory caching
+- Manifest caching
 - TTL expiry
 - Lock behavior under concurrency
 
-**DirectoryClient Tests** (`tests/unit/runtime/test_directory_client.py`):
+**ManifestClient Tests** (`tests/unit/runtime/test_manifest_client.py`):
 - Successful HTTP requests
 - Error handling
 - Retry logic
@@ -855,7 +854,7 @@ class RoutingPolicy:
 - End-to-end remote execution
 - Function call across endpoints
 - Error handling in real scenarios
-- Directory caching behavior
+- Manifest caching behavior
 - Serialization of complex objects
 
 #### Test Patterns
@@ -904,7 +903,7 @@ logging.basicConfig(level=logging.DEBUG)
 
 # ProductionWrapper logs
 # ServiceRegistry logs
-# DirectoryClient logs
+# ManifestClient logs
 ```
 
 #### Common Debug Scenarios
@@ -914,8 +913,8 @@ logging.basicConfig(level=logging.DEBUG)
 # Check manifest
 print(registry._manifest)
 
-# Check directory
-print(registry._directory)
+# Check cached endpoint URLs
+print(registry._endpoint_registry)
 
 # Check resource lookup
 resource = registry.get_resource_for_function("function_name")
@@ -932,16 +931,16 @@ except Exception as e:
     print(f"Not serializable: {e}")
 ```
 
-**Directory unavailable**:
+**Manifest unavailable**:
 ```python
 # Check environment variables
 import os
 print(f"FLASH_MOTHERSHIP_URL: {os.getenv('FLASH_MOTHERSHIP_URL')}")
 print(f"RUNPOD_ENDPOINT_ID: {os.getenv('RUNPOD_ENDPOINT_ID')}")
 
-# Check directory client directly
-client = DirectoryClient(mothership_url=...)
-endpoints = await client.get_endpoints()
+# Check manifest client directly
+client = ManifestClient(mothership_url=...)
+endpoints = await client.get_manifest()
 ```
 
 ## Manifest Synchronization with RunPod GraphQL API
@@ -1044,7 +1043,7 @@ prepares for GQL-based architecture with improved caching and error handling.
 ### Design Focus
 
 1. **Transparent Routing**: Functions route automatically without code changes
-2. **Graceful Degradation**: Defaults to local execution if directory unavailable
+2. **Graceful Degradation**: Defaults to local execution if manifest service unavailable
 3. **Type Safety**: Full type hints throughout for IDE support and static analysis
 4. **Thread-Safe Async**: Proper `asyncio.Lock()` usage for concurrent operations
 5. **Clear Error Hierarchy**: Custom exceptions provide actionable error context
@@ -1055,7 +1054,7 @@ Cross-endpoint routing provides:
 
 - **Transparency**: Functions route automatically without manual HTTP calls
 - **Flexibility**: Manifest-based routing enables environment-specific configurations
-- **Resilience**: Graceful fallback to local execution if directory unavailable
+- **Resilience**: Graceful fallback to local execution if manifest service unavailable
 - **Simplicity**: No changes to function code or signatures
 - **Debuggability**: Clear error messages and logging for troubleshooting
 
diff --git a/docs/Load_Balancer_Endpoints.md b/docs/Load_Balancer_Endpoints.md
index ea551884..62db7c7a 100644
--- a/docs/Load_Balancer_Endpoints.md
+++ b/docs/Load_Balancer_Endpoints.md
@@ -35,9 +35,9 @@ Load-balanced endpoints require different provisioning and health check logic th
 
 ### Why This Matters
 
-The Mothership needs to serve as a directory server for child endpoints. This requires:
+The Mothership needs to serve as a manifest server for child endpoints. This requires:
 - HTTP-based service discovery (not queue-based)
-- Ability to expose custom endpoints (`/directory`, `/ping`)
+- Ability to expose custom endpoints (`/manifest`, `/ping`)
 - Health checking to verify children are ready before routing traffic
 
 ## Architecture
@@ -401,6 +401,6 @@ endpoint = LoadBalancerSlsResource(
 ## Next Steps
 
 - **Mothership integration**: Use LoadBalancerSlsResource for Mothership endpoints
-- **Service discovery**: Implement `/directory` endpoint for child endpoint discovery
+- **Service discovery**: Implement `/manifest` endpoint for child endpoint discovery
 - **Auto-provisioning**: Automatic child endpoint deployment on Mothership startup
 - **Cross-endpoint routing**: Route requests between endpoints using service discovery
diff --git a/src/tetra_rp/runtime/config.py b/src/tetra_rp/runtime/config.py
index c0efc11f..974bb5d5 100644
--- a/src/tetra_rp/runtime/config.py
+++ b/src/tetra_rp/runtime/config.py
@@ -5,7 +5,7 @@
 DEFAULT_MAX_RETRIES = 3
 DEFAULT_BACKOFF_BASE = 2
 
-# Directory cache configuration
+# Manifest cache configuration
 DEFAULT_CACHE_TTL = 300  # seconds
 
 # Serialization limits
diff --git a/src/tetra_rp/runtime/exceptions.py b/src/tetra_rp/runtime/exceptions.py
index fec800fd..e072a6ea 100644
--- a/src/tetra_rp/runtime/exceptions.py
+++ b/src/tetra_rp/runtime/exceptions.py
@@ -26,6 +26,6 @@ class ManifestError(FlashRuntimeError):
 
 
 class ManifestServiceUnavailableError(FlashRuntimeError):
-    """Raised when manifest directory service is unavailable."""
+    """Raised when manifest service is unavailable."""
 
     pass
diff --git a/src/tetra_rp/runtime/manifest_client.py b/src/tetra_rp/runtime/manifest_client.py
index bfe69ca8..db845a63 100644
--- a/src/tetra_rp/runtime/manifest_client.py
+++ b/src/tetra_rp/runtime/manifest_client.py
@@ -1,4 +1,4 @@
-"""HTTP client for mothership manifest directory API."""
+"""HTTP client for mothership manifest API."""
 
 import asyncio
 import logging
@@ -17,13 +17,12 @@
 
 
 class ManifestClient:
-    """HTTP client for querying mothership manifest directory service.
+    """HTTP client for querying mothership manifest service.
 
-    Fetches the endpoint registry that maps resource_config names to their
-    deployment URLs. This is the "manifest directory service" - an endpoint
-    registry showing where resources are deployed.
+    Fetches the manifest (endpoint registry) that maps resource_config names to
+    their deployment URLs. The manifest provides service discovery for remote
+    resource endpoints.
 
-    The directory maps resource_config names to their endpoint URLs.
     Example: {"gpu_config": "https://api.runpod.io/v2/abc123"}
     """
 
@@ -55,15 +54,15 @@ def __init__(
         self.max_retries = max_retries
         self._client: Optional[httpx.AsyncClient] = None
 
-    async def get_directory(self) -> Dict[str, str]:
-        """Fetch endpoint directory from mothership.
+    async def get_manifest(self) -> Dict[str, str]:
+        """Fetch endpoint manifest from mothership.
 
         Returns:
             Dictionary mapping resource_config_name → endpoint_url.
             Example: {"gpu_config": "https://api.runpod.io/v2/abc123"}
 
         Raises:
-            ManifestServiceUnavailableError: If manifest directory service unavailable after retries.
+            ManifestServiceUnavailableError: If manifest service unavailable after retries.
         """
         if httpx is None:
             raise ImportError(
@@ -76,25 +75,25 @@ async def get_directory(self) -> Dict[str, str]:
             try:
                 client = await self._get_client()
                 response = await client.get(
-                    f"{self.mothership_url}/directory",
+                    f"{self.mothership_url}/manifest",
                     timeout=self.timeout,
                 )
 
                 if response.status_code >= 400:
                     raise ManifestServiceUnavailableError(
-                        f"Directory API returned {response.status_code}: "
+                        f"Manifest API returned {response.status_code}: "
                         f"{response.text[:200]}"
                     )
 
                 data = response.json()
-                if "directory" not in data:
+                if "manifest" not in data:
                     raise ManifestServiceUnavailableError(
-                        "Invalid directory response: missing 'directory' key"
+                        "Invalid manifest response: missing 'manifest' key"
                     )
 
-                directory = data["directory"]
-                logger.debug(f"Directory loaded: {len(directory)} endpoints")
-                return directory
+                manifest = data["manifest"]
+                logger.debug(f"Manifest loaded: {len(manifest)} endpoints")
+                return manifest
 
             except (
                 asyncio.TimeoutError,
@@ -112,7 +111,7 @@ async def get_directory(self) -> Dict[str, str]:
                     continue
 
         raise ManifestServiceUnavailableError(
-            f"Failed to fetch manifest directory after {self.max_retries} attempts: {last_exception}"
+            f"Failed to fetch manifest after {self.max_retries} attempts: {last_exception}"
         )
 
     async def _get_client(self) -> httpx.AsyncClient:
diff --git a/src/tetra_rp/runtime/production_wrapper.py b/src/tetra_rp/runtime/production_wrapper.py
index 65ce815d..22a48f9e 100644
--- a/src/tetra_rp/runtime/production_wrapper.py
+++ b/src/tetra_rp/runtime/production_wrapper.py
@@ -26,7 +26,6 @@ def __init__(self, service_registry: ServiceRegistry):
             service_registry: Service registry for routing decisions.
         """
         self.service_registry = service_registry
-        self._directory_loaded = False
 
     async def wrap_function_execution(
         self,
@@ -57,8 +56,8 @@ async def wrap_function_execution(
         """
         function_name = func.__name__
 
-        # Ensure directory is loaded
-        await self.service_registry._ensure_directory_loaded()
+        # Ensure manifest is loaded
+        await self.service_registry._ensure_manifest_loaded()
 
         # Determine routing
         try:
@@ -116,8 +115,8 @@ async def wrap_class_method_execution(
         Raises:
             Exception: If execution fails.
         """
-        # Ensure directory is loaded
-        await self.service_registry._ensure_directory_loaded()
+        # Ensure manifest is loaded
+        await self.service_registry._ensure_manifest_loaded()
 
         class_name = getattr(request, "class_name", None)
 
diff --git a/src/tetra_rp/runtime/service_registry.py b/src/tetra_rp/runtime/service_registry.py
index ddcbcd84..2a2fb865 100644
--- a/src/tetra_rp/runtime/service_registry.py
+++ b/src/tetra_rp/runtime/service_registry.py
@@ -22,14 +22,14 @@ class ServiceRegistry:
     """Service discovery and routing for cross-endpoint function calls.
 
     Loads manifest to map functions to resource configs, queries mothership
-    directory for endpoint URLs, and determines if function calls are local
+    manifest for endpoint URLs, and determines if function calls are local
     or remote.
     """
 
     def __init__(
         self,
         manifest_path: Optional[Path] = None,
-        directory_client: Optional[ManifestClient] = None,
+        manifest_client: Optional[ManifestClient] = None,
         cache_ttl: int = DEFAULT_CACHE_TTL,
     ):
         """Initialize service registry.
@@ -37,17 +37,17 @@ def __init__(
         Args:
             manifest_path: Path to flash_manifest.json. Defaults to
                 FLASH_MANIFEST_PATH env var or auto-detection.
-            directory_client: Manifest service client for mothership API. If None, creates one
+            manifest_client: Manifest service client for mothership API. If None, creates one
                 from FLASH_MOTHERSHIP_URL env var.
-            cache_ttl: Directory cache lifetime in seconds (default: 300).
+            cache_ttl: Manifest cache lifetime in seconds (default: 300).
 
         Raises:
             FileNotFoundError: If manifest_path doesn't exist.
-            ValueError: If required env vars missing for directory_client.
+            ValueError: If required env vars missing for manifest_client.
         """
         self.cache_ttl = cache_ttl
-        self._directory: Dict[str, str] = {}
-        self._directory_loaded_at = 0.0
+        self._endpoint_registry: Dict[str, str] = {}
+        self._endpoint_registry_loaded_at = 0.0
         self._manifest: Manifest = Manifest(
             version="1.0",
             generated_at="",
@@ -55,21 +55,23 @@ def __init__(
             function_registry={},
             resources={},
         )
-        self._directory_lock = asyncio.Lock()
+        self._endpoint_registry_lock = asyncio.Lock()
 
         # Load manifest
         self._load_manifest(manifest_path)
 
         # Initialize manifest client
-        if directory_client is None:
+        if manifest_client is None:
             mothership_url = os.getenv("FLASH_MOTHERSHIP_URL")
             if mothership_url:
-                directory_client = ManifestClient(mothership_url=mothership_url)
+                manifest_client = ManifestClient(mothership_url=mothership_url)
             else:
-                logger.warning("FLASH_MOTHERSHIP_URL not set, directory unavailable")
-                directory_client = None
+                logger.warning(
+                    "FLASH_MOTHERSHIP_URL not set, manifest service unavailable"
+                )
+                manifest_client = None
 
-        self._directory_client = directory_client
+        self._manifest_client = manifest_client
         self._current_endpoint = os.getenv("RUNPOD_ENDPOINT_ID")
 
     def _load_manifest(self, manifest_path: Optional[Path]) -> None:
@@ -127,30 +129,30 @@ def _load_manifest(self, manifest_path: Optional[Path]) -> None:
             resources={},
         )
 
-    async def _ensure_directory_loaded(self) -> None:
-        """Load directory from mothership if cache expired or not loaded."""
-        async with self._directory_lock:
+    async def _ensure_manifest_loaded(self) -> None:
+        """Load manifest from mothership if cache expired or not loaded."""
+        async with self._endpoint_registry_lock:
             now = time.time()
-            cache_age = now - self._directory_loaded_at
+            cache_age = now - self._endpoint_registry_loaded_at
 
             if cache_age > self.cache_ttl:
-                if self._directory_client is None:
-                    logger.debug("Directory client not available, skipping refresh")
+                if self._manifest_client is None:
+                    logger.debug("Manifest client not available, skipping refresh")
                     return
 
                 try:
-                    self._directory = await self._directory_client.get_directory()
-                    self._directory_loaded_at = now
+                    self._endpoint_registry = await self._manifest_client.get_manifest()
+                    self._endpoint_registry_loaded_at = now
                     logger.debug(
-                        f"Directory loaded: {len(self._directory)} endpoints, "
+                        f"Manifest loaded: {len(self._endpoint_registry)} endpoints, "
                         f"cache TTL {self.cache_ttl}s"
                     )
                 except ManifestServiceUnavailableError as e:
                     logger.warning(
-                        f"Failed to load manifest directory: {e}. "
+                        f"Failed to load manifest: {e}. "
                         f"Cross-endpoint routing unavailable."
                     )
-                    self._directory = {}
+                    self._endpoint_registry = {}
 
     def get_endpoint_for_function(self, function_name: str) -> Optional[str]:
         """Get endpoint URL for a function.
@@ -181,12 +183,12 @@ def get_endpoint_for_function(self, function_name: str) -> Optional[str]:
         if resource_config_name == self._current_endpoint:
             return None
 
-        # Check directory for remote endpoint URL
-        endpoint_url = self._directory.get(resource_config_name)
+        # Check manifest for remote endpoint URL
+        endpoint_url = self._endpoint_registry.get(resource_config_name)
         if not endpoint_url:
             logger.debug(
-                f"Endpoint URL for '{resource_config_name}' not in directory. "
-                f"Directory has: {list(self._directory.keys())}"
+                f"Endpoint URL for '{resource_config_name}' not in manifest. "
+                f"Manifest has: {list(self._endpoint_registry.keys())}"
             )
 
         return endpoint_url
@@ -260,9 +262,9 @@ def get_current_endpoint_id(self) -> Optional[str]:
         """
         return self._current_endpoint
 
-    def refresh_directory(self) -> None:
-        """Force refresh directory from mothership on next access."""
-        self._directory_loaded_at = 0
+    def refresh_manifest(self) -> None:
+        """Force refresh manifest from mothership on next access."""
+        self._endpoint_registry_loaded_at = 0
 
     def get_manifest(self) -> Manifest:
         """Get loaded manifest.
diff --git a/tests/integration/test_cross_endpoint_routing.py b/tests/integration/test_cross_endpoint_routing.py
index 1b67967e..aab993d1 100644
--- a/tests/integration/test_cross_endpoint_routing.py
+++ b/tests/integration/test_cross_endpoint_routing.py
@@ -74,7 +74,7 @@ async def test_local_function_execution(self, manifest):
                 "FLASH_MOTHERSHIP_URL": "https://mothership.example.com",
             },
         ):
-            directory = {
+            endpoint_registry = {
                 "gpu_config": "https://gpu.example.com",
                 "cpu_config": "https://cpu.example.com",
             }
@@ -88,12 +88,12 @@ async def test_local_function_execution(self, manifest):
             try:
                 registry = ServiceRegistry(manifest_path=manifest_path)
 
-                mock_dir_client = AsyncMock(spec=ManifestClient)
-                mock_dir_client.get_directory.return_value = directory
+                mock_manifest_client = AsyncMock(spec=ManifestClient)
+                mock_manifest_client.get_manifest.return_value = endpoint_registry
 
-                registry._directory_client = mock_dir_client
-                registry._directory = directory
-                registry._directory_loaded_at = float("inf")
+                registry._manifest_client = mock_manifest_client
+                registry._endpoint_registry = endpoint_registry
+                registry._endpoint_registry_loaded_at = float("inf")
 
                 wrapper = ProductionWrapper(registry)
 
@@ -128,7 +128,7 @@ async def test_remote_function_execution_routing(self, manifest):
                 "FLASH_MOTHERSHIP_URL": "https://mothership.example.com",
             },
         ):
-            directory = {
+            endpoint_registry = {
                 "gpu_config": "https://gpu.example.com",
                 "cpu_config": "https://cpu.example.com",
             }
@@ -141,11 +141,11 @@ async def test_remote_function_execution_routing(self, manifest):
 
             try:
                 registry = ServiceRegistry(manifest_path=manifest_path)
-                mock_dir_client = AsyncMock(spec=ManifestClient)
-                mock_dir_client.get_directory.return_value = directory
-                registry._directory_client = mock_dir_client
-                registry._directory = directory
-                registry._directory_loaded_at = float("inf")
+                mock_manifest_client = AsyncMock(spec=ManifestClient)
+                mock_manifest_client.get_manifest.return_value = endpoint_registry
+                registry._manifest_client = mock_manifest_client
+                registry._endpoint_registry = endpoint_registry
+                registry._endpoint_registry_loaded_at = float("inf")
 
                 # Mock ServerlessResource
                 mock_resource = AsyncMock()
@@ -183,8 +183,8 @@ async def cpu_task(x):
                 manifest_path.unlink()
 
     @pytest.mark.asyncio
-    async def test_directory_loading_on_demand(self, manifest):
-        """Test that directory is loaded on-demand before routing decision."""
+    async def test_manifest_loading_on_demand(self, manifest):
+        """Test that manifest is loaded on-demand before routing decision."""
         with patch.dict(
             "os.environ",
             {
@@ -192,7 +192,7 @@ async def test_directory_loading_on_demand(self, manifest):
                 "FLASH_MOTHERSHIP_URL": "https://mothership.example.com",
             },
         ):
-            directory = {
+            endpoint_registry = {
                 "gpu_config": "https://gpu.example.com",
                 "cpu_config": "https://cpu.example.com",
             }
@@ -205,11 +205,11 @@ async def test_directory_loading_on_demand(self, manifest):
 
             try:
                 registry = ServiceRegistry(manifest_path=manifest_path)
-                mock_dir_client = AsyncMock(spec=ManifestClient)
-                mock_dir_client.get_directory.return_value = directory
-                registry._directory_client = mock_dir_client
+                mock_manifest_client = AsyncMock(spec=ManifestClient)
+                mock_manifest_client.get_manifest.return_value = endpoint_registry
+                registry._manifest_client = mock_manifest_client
 
-                assert registry._directory == {}
+                assert registry._endpoint_registry == {}
 
                 wrapper = ProductionWrapper(registry)
 
@@ -230,8 +230,11 @@ async def cpu_task(x):
                         original_stub, cpu_task, None, None, True
                     )
 
-                assert len(registry._directory) > 0
-                assert registry._directory["gpu_config"] == "https://gpu.example.com"
+                assert len(registry._endpoint_registry) > 0
+                assert (
+                    registry._endpoint_registry["gpu_config"]
+                    == "https://gpu.example.com"
+                )
 
             finally:
                 manifest_path.unlink()
@@ -246,7 +249,7 @@ async def test_error_handling_in_remote_execution(self, manifest):
                 "FLASH_MOTHERSHIP_URL": "https://mothership.example.com",
             },
         ):
-            directory = {
+            endpoint_registry = {
                 "gpu_config": "https://gpu.example.com",
                 "cpu_config": "https://cpu.example.com",
             }
@@ -259,11 +262,11 @@ async def test_error_handling_in_remote_execution(self, manifest):
 
             try:
                 registry = ServiceRegistry(manifest_path=manifest_path)
-                mock_dir_client = AsyncMock(spec=ManifestClient)
-                mock_dir_client.get_directory.return_value = directory
-                registry._directory_client = mock_dir_client
-                registry._directory = directory
-                registry._directory_loaded_at = float("inf")
+                mock_manifest_client = AsyncMock(spec=ManifestClient)
+                mock_manifest_client.get_manifest.return_value = endpoint_registry
+                registry._manifest_client = mock_manifest_client
+                registry._endpoint_registry = endpoint_registry
+                registry._endpoint_registry_loaded_at = float("inf")
 
                 # Mock ServerlessResource that returns error
                 mock_resource = AsyncMock()
diff --git a/tests/unit/runtime/test_manifest_client.py b/tests/unit/runtime/test_manifest_client.py
index 27bb12cc..be48a38c 100644
--- a/tests/unit/runtime/test_manifest_client.py
+++ b/tests/unit/runtime/test_manifest_client.py
@@ -21,7 +21,7 @@ def mock_response(self):
         response = MagicMock()
         response.status_code = 200
         response.json.return_value = {
-            "directory": {
+            "manifest": {
                 "gpu_config": "https://api.runpod.io/v2/gpu123",
                 "cpu_config": "https://api.runpod.io/v2/cpu456",
             },
@@ -53,8 +53,8 @@ def test_init_explicit_over_env(self):
             assert client.mothership_url == "https://explicit.com"
 
     @pytest.mark.asyncio
-    async def test_get_directory_success(self, mock_response):
-        """Test successful directory fetch."""
+    async def test_get_manifest_success(self, mock_response):
+        """Test successful manifest fetch."""
         client = ManifestClient(mothership_url="https://mothership.example.com")
 
         with patch("tetra_rp.runtime.manifest_client.httpx"):
@@ -63,15 +63,15 @@ async def test_get_directory_success(self, mock_response):
             mock_client.get.return_value = mock_response
 
             with patch.object(client, "_get_client", return_value=mock_client):
-                directory = await client.get_directory()
+                manifest = await client.get_manifest()
 
-                assert directory == {
+                assert manifest == {
                     "gpu_config": "https://api.runpod.io/v2/gpu123",
                     "cpu_config": "https://api.runpod.io/v2/cpu456",
                 }
 
     @pytest.mark.asyncio
-    async def test_get_directory_http_error(self):
+    async def test_get_manifest_http_error(self):
         """Test handling of HTTP errors."""
         client = ManifestClient(mothership_url="https://mothership.example.com")
 
@@ -86,10 +86,10 @@ async def test_get_directory_http_error(self):
             mock_get_client.return_value = mock_http_client
 
             with pytest.raises(ManifestServiceUnavailableError, match="500"):
-                await client.get_directory()
+                await client.get_manifest()
 
     @pytest.mark.asyncio
-    async def test_get_directory_timeout(self):
+    async def test_get_manifest_timeout(self):
         """Test handling of request timeout."""
         client = ManifestClient(
             mothership_url="https://mothership.example.com", timeout=0.1
@@ -104,10 +104,10 @@ async def test_get_directory_timeout(self):
             with pytest.raises(
                 ManifestServiceUnavailableError, match="after \\d+ attempts"
             ):
-                await client.get_directory()
+                await client.get_manifest()
 
     @pytest.mark.asyncio
-    async def test_get_directory_retry(self):
+    async def test_get_manifest_retry(self):
         """Test retry logic on transient failure."""
         client = ManifestClient(
             mothership_url="https://mothership.example.com", max_retries=3
@@ -115,7 +115,7 @@ async def test_get_directory_retry(self):
 
         response = MagicMock()
         response.status_code = 200
-        response.json.return_value = {"directory": {"gpu": "https://gpu.example.com"}}
+        response.json.return_value = {"manifest": {"gpu": "https://gpu.example.com"}}
 
         with patch.object(client, "_get_client") as mock_get_client:
             mock_http_client = AsyncMock()
@@ -133,12 +133,12 @@ async def test_get_directory_retry(self):
                 "tetra_rp.runtime.manifest_client.asyncio.sleep",
                 new_callable=AsyncMock,
             ):
-                directory = await client.get_directory()
-                assert directory == {"gpu": "https://gpu.example.com"}
+                manifest = await client.get_manifest()
+                assert manifest == {"gpu": "https://gpu.example.com"}
                 assert mock_http_client.get.call_count == 3
 
     @pytest.mark.asyncio
-    async def test_get_directory_exhaust_retries(self):
+    async def test_get_manifest_exhaust_retries(self):
         """Test failure after exhausting retries."""
         client = ManifestClient(
             mothership_url="https://mothership.example.com", max_retries=2
@@ -157,7 +157,7 @@ async def test_get_directory_exhaust_retries(self):
                 with pytest.raises(
                     ManifestServiceUnavailableError, match="after 2 attempts"
                 ):
-                    await client.get_directory()
+                    await client.get_manifest()
 
     @pytest.mark.asyncio
     async def test_context_manager(self):
diff --git a/tests/unit/runtime/test_production_wrapper.py b/tests/unit/runtime/test_production_wrapper.py
index cc628047..bda5c31d 100644
--- a/tests/unit/runtime/test_production_wrapper.py
+++ b/tests/unit/runtime/test_production_wrapper.py
@@ -19,7 +19,7 @@ class TestProductionWrapper:
     def mock_registry(self):
         """Mock service registry."""
         registry = AsyncMock(spec=ServiceRegistry)
-        registry._ensure_directory_loaded = AsyncMock()
+        registry._ensure_manifest_loaded = AsyncMock()
         return registry
 
     @pytest.fixture
@@ -135,8 +135,8 @@ async def test_wrap_function_remote_error(
             )
 
     @pytest.mark.asyncio
-    async def test_wrap_function_loads_directory(self, wrapper, mock_registry):
-        """Test that directory is loaded before routing decision."""
+    async def test_wrap_function_loads_manifest(self, wrapper, mock_registry):
+        """Test that manifest is loaded before routing decision."""
         mock_registry.get_resource_for_function.return_value = None
 
         async def sample_func():
@@ -147,8 +147,8 @@ async def sample_func():
             original_stub, sample_func, None, None, True
         )
 
-        # Should ensure directory is loaded
-        mock_registry._ensure_directory_loaded.assert_called_once()
+        # Should ensure manifest is loaded
+        mock_registry._ensure_manifest_loaded.assert_called_once()
 
     @pytest.mark.asyncio
     async def test_wrap_class_method_local(self, wrapper, mock_registry, original_stub):
diff --git a/tests/unit/runtime/test_service_registry.py b/tests/unit/runtime/test_service_registry.py
index 8dc88aa1..c7c83aaf 100644
--- a/tests/unit/runtime/test_service_registry.py
+++ b/tests/unit/runtime/test_service_registry.py
@@ -103,19 +103,19 @@ def test_is_local_function_local(self, manifest_file):
             assert registry.is_local_function("inference") is True
 
     def test_is_local_function_remote(self, manifest_file):
-        """Test determining remote function (with directory loaded)."""
+        """Test determining remote function (with manifest loaded)."""
         with patch.dict(os.environ, {"RUNPOD_ENDPOINT_ID": "gpu_config"}):
             mock_client = AsyncMock()
-            mock_client.get_directory.return_value = {
+            mock_client.get_manifest.return_value = {
                 "cpu_config": "https://cpu.example.com"
             }
 
             registry = ServiceRegistry(
-                manifest_path=manifest_file, directory_client=mock_client
+                manifest_path=manifest_file, manifest_client=mock_client
             )
-            # After directory is loaded, CPU tasks should be recognized as remote
+            # After manifest is loaded, CPU tasks should be recognized as remote
             # (but is_local_function doesn't async load, so returns True for now)
-            # This is actually expected behavior - sync method can't load async directory
+            # This is actually expected behavior - sync method can't load async manifest
             assert registry.is_local_function("preprocess") is True
 
     def test_is_local_function_not_in_manifest(self, manifest_file):
@@ -131,11 +131,11 @@ def test_get_endpoint_for_function_local(self, manifest_file):
             endpoint = registry.get_endpoint_for_function("gpu_task")
             assert endpoint is None  # Local returns None
 
-    def test_get_endpoint_for_function_remote_no_directory(self, manifest_file):
-        """Test getting endpoint for remote function without directory."""
+    def test_get_endpoint_for_function_remote_no_manifest(self, manifest_file):
+        """Test getting endpoint for remote function without manifest."""
         with patch.dict(os.environ, {"RUNPOD_ENDPOINT_ID": "gpu_config"}):
             registry = ServiceRegistry(manifest_path=manifest_file)
-            # CPU function is remote, but no directory loaded
+            # CPU function is remote, but no manifest loaded
             endpoint = registry.get_endpoint_for_function("preprocess")
             assert endpoint is None
 
@@ -157,15 +157,17 @@ def test_get_resource_for_function_remote(self, manifest_file):
         """Test getting ServerlessResource for remote function."""
         with patch.dict(os.environ, {"RUNPOD_ENDPOINT_ID": "gpu_config"}):
             mock_client = AsyncMock()
-            mock_client.get_directory.return_value = {
+            mock_client.get_manifest.return_value = {
                 "cpu_config": "https://api.runpod.io/v2/abc123"
             }
 
             registry = ServiceRegistry(
-                manifest_path=manifest_file, directory_client=mock_client
+                manifest_path=manifest_file, manifest_client=mock_client
             )
-            # Manually set directory to simulate loaded state
-            registry._directory = {"cpu_config": "https://api.runpod.io/v2/abc123"}
+            # Manually set endpoint registry to simulate loaded state
+            registry._endpoint_registry = {
+                "cpu_config": "https://api.runpod.io/v2/abc123"
+            }
 
             resource = registry.get_resource_for_function("preprocess")
 
@@ -182,77 +184,77 @@ def test_get_resource_for_function_not_in_manifest(self, manifest_file):
             registry.get_resource_for_function("unknown_function")
 
     @pytest.mark.asyncio
-    async def test_ensure_directory_loaded(self, manifest_file):
-        """Test lazy loading of directory from client."""
-        mock_directory = {
+    async def test_ensure_manifest_loaded(self, manifest_file):
+        """Test lazy loading of manifest from client."""
+        mock_endpoint_registry = {
             "gpu_config": "https://gpu.example.com",
             "cpu_config": "https://cpu.example.com",
         }
 
         mock_client = AsyncMock()
-        mock_client.get_directory.return_value = mock_directory
+        mock_client.get_manifest.return_value = mock_endpoint_registry
 
         registry = ServiceRegistry(
-            manifest_path=manifest_file, directory_client=mock_client, cache_ttl=10
+            manifest_path=manifest_file, manifest_client=mock_client, cache_ttl=10
         )
 
-        # Directory not loaded yet
-        assert registry._directory == {}
+        # Endpoint registry not loaded yet
+        assert registry._endpoint_registry == {}
 
-        # Load directory
-        await registry._ensure_directory_loaded()
+        # Load manifest
+        await registry._ensure_manifest_loaded()
 
-        # Should now have loaded directory
-        assert registry._directory == mock_directory
-        mock_client.get_directory.assert_called_once()
+        # Should now have loaded endpoint registry
+        assert registry._endpoint_registry == mock_endpoint_registry
+        mock_client.get_manifest.assert_called_once()
 
     @pytest.mark.asyncio
-    async def test_ensure_directory_cache_respects_ttl(self, manifest_file):
-        """Test that directory cache respects TTL."""
-        mock_directory = {"gpu_config": "https://gpu.example.com"}
+    async def test_ensure_manifest_cache_respects_ttl(self, manifest_file):
+        """Test that manifest cache respects TTL."""
+        mock_endpoint_registry = {"gpu_config": "https://gpu.example.com"}
 
         mock_client = AsyncMock()
-        mock_client.get_directory.return_value = mock_directory
+        mock_client.get_manifest.return_value = mock_endpoint_registry
 
         registry = ServiceRegistry(
-            manifest_path=manifest_file, directory_client=mock_client, cache_ttl=1
+            manifest_path=manifest_file, manifest_client=mock_client, cache_ttl=1
         )
 
-        # Load directory
-        await registry._ensure_directory_loaded()
-        assert mock_client.get_directory.call_count == 1
+        # Load manifest
+        await registry._ensure_manifest_loaded()
+        assert mock_client.get_manifest.call_count == 1
 
         # Immediate reload should use cache
-        await registry._ensure_directory_loaded()
-        assert mock_client.get_directory.call_count == 1
+        await registry._ensure_manifest_loaded()
+        assert mock_client.get_manifest.call_count == 1
 
         # After TTL, should reload
-        registry._directory_loaded_at = time.time() - 2  # 2 seconds ago
-        await registry._ensure_directory_loaded()
-        assert mock_client.get_directory.call_count == 2
+        registry._endpoint_registry_loaded_at = time.time() - 2  # 2 seconds ago
+        await registry._ensure_manifest_loaded()
+        assert mock_client.get_manifest.call_count == 2
 
     @pytest.mark.asyncio
-    async def test_refresh_directory(self, manifest_file):
-        """Test forcing directory refresh."""
-        mock_directory = {"gpu_config": "https://gpu.example.com"}
+    async def test_refresh_manifest(self, manifest_file):
+        """Test forcing manifest refresh."""
+        mock_endpoint_registry = {"gpu_config": "https://gpu.example.com"}
 
         mock_client = AsyncMock()
-        mock_client.get_directory.return_value = mock_directory
+        mock_client.get_manifest.return_value = mock_endpoint_registry
 
         registry = ServiceRegistry(
-            manifest_path=manifest_file, directory_client=mock_client, cache_ttl=3600
+            manifest_path=manifest_file, manifest_client=mock_client, cache_ttl=3600
         )
 
-        # Load directory
-        await registry._ensure_directory_loaded()
-        assert mock_client.get_directory.call_count == 1
+        # Load manifest
+        await registry._ensure_manifest_loaded()
+        assert mock_client.get_manifest.call_count == 1
 
         # Force refresh
-        registry.refresh_directory()
+        registry.refresh_manifest()
 
         # Next load should fetch again
-        await registry._ensure_directory_loaded()
-        assert mock_client.get_directory.call_count == 2
+        await registry._ensure_manifest_loaded()
+        assert mock_client.get_manifest.call_count == 2
 
     def test_get_manifest(self, manifest_file):
         """Test getting manifest."""
@@ -282,16 +284,16 @@ def test_get_resource_functions_not_found(self, manifest_file):
         functions = registry.get_resource_functions("nonexistent")
         assert functions == []
 
-    def test_init_no_directory_client_no_mothership_url(self, manifest_file):
-        """Test initialization without directory client or URL."""
+    def test_init_no_manifest_client_no_mothership_url(self, manifest_file):
+        """Test initialization without manifest client or URL."""
         with patch.dict(os.environ, {}, clear=True):
             registry = ServiceRegistry(manifest_path=manifest_file)
-            assert registry._directory_client is None
+            assert registry._manifest_client is None
 
     @pytest.mark.asyncio
-    async def test_ensure_directory_loaded_unavailable_client(self, manifest_file):
-        """Test directory loading when client is None."""
-        registry = ServiceRegistry(manifest_path=manifest_file, directory_client=None)
+    async def test_ensure_manifest_loaded_unavailable_client(self, manifest_file):
+        """Test manifest loading when client is None."""
+        registry = ServiceRegistry(manifest_path=manifest_file, manifest_client=None)
         # Should not fail, just log warning
-        await registry._ensure_directory_loaded()
-        assert registry._directory == {}
+        await registry._ensure_manifest_loaded()
+        assert registry._endpoint_registry == {}

From 9bed355b55391d5188b541e935e691134daf9132 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Mon, 12 Jan 2026 13:24:41 -0800
Subject: [PATCH 09/12] fix: align GET /manifest response format to
 Deployment_Architecture spec

Remove {"manifest": ...} wrapper and return manifest directly per spec
(Deployment_Architecture.md:235-273). Update ManifestClient parser to expect
manifest directly without unwrap logic.

Changes:
- Remove wrapper from GET /manifest endpoint (lb_handler.py:215)
- Update ManifestClient to validate manifest has "resources" key directly
- Replace global _manifest_fetcher with @lru_cache(maxsize=1) for thread safety
- Update all test assertions to expect unwrapped manifest format

All 636 tests pass, coverage: 66.48%
---
 src/tetra_rp/runtime/lb_handler.py            | 17 +++---
 src/tetra_rp/runtime/manifest_client.py       | 11 ++--
 tests/integration/test_lb_remote_execution.py | 57 +++++++++++++++++++
 tests/unit/runtime/test_lb_handler.py         | 32 +++++------
 tests/unit/runtime/test_manifest_client.py    | 37 +++++++++---
 5 files changed, 115 insertions(+), 39 deletions(-)

diff --git a/src/tetra_rp/runtime/lb_handler.py b/src/tetra_rp/runtime/lb_handler.py
index 495261d2..3647aa52 100644
--- a/src/tetra_rp/runtime/lb_handler.py
+++ b/src/tetra_rp/runtime/lb_handler.py
@@ -23,7 +23,8 @@
 import inspect
 import logging
 import os
-from typing import Any, Callable, Dict, Optional
+from functools import lru_cache
+from typing import Any, Callable, Dict
 
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
@@ -37,16 +38,14 @@
 
 logger = logging.getLogger(__name__)
 
-# Module-level manifest fetcher (singleton, reused across requests)
-_manifest_fetcher: Optional[ManifestFetcher] = None
-
 
+@lru_cache(maxsize=1)
 def _get_manifest_fetcher() -> ManifestFetcher:
-    """Get or create the manifest fetcher singleton."""
-    global _manifest_fetcher
-    if _manifest_fetcher is None:
-        _manifest_fetcher = ManifestFetcher()
-    return _manifest_fetcher
+    """Get or create the manifest fetcher singleton.
+
+    Uses @lru_cache for thread-safe lazy initialization.
+    """
+    return ManifestFetcher()
 
 
 def create_lb_handler(
diff --git a/src/tetra_rp/runtime/manifest_client.py b/src/tetra_rp/runtime/manifest_client.py
index db845a63..eb234cbd 100644
--- a/src/tetra_rp/runtime/manifest_client.py
+++ b/src/tetra_rp/runtime/manifest_client.py
@@ -85,14 +85,15 @@ async def get_manifest(self) -> Dict[str, str]:
                         f"{response.text[:200]}"
                     )
 
-                data = response.json()
-                if "manifest" not in data:
+                manifest = response.json()
+                if not isinstance(manifest, dict) or "resources" not in manifest:
                     raise ManifestServiceUnavailableError(
-                        "Invalid manifest response: missing 'manifest' key"
+                        "Invalid manifest response: missing 'resources'"
                     )
 
-                manifest = data["manifest"]
-                logger.debug(f"Manifest loaded: {len(manifest)} endpoints")
+                logger.debug(
+                    f"Manifest loaded: {len(manifest.get('resources', {}))} resources"
+                )
                 return manifest
 
             except (
diff --git a/tests/integration/test_lb_remote_execution.py b/tests/integration/test_lb_remote_execution.py
index 8c45022f..d1413a93 100644
--- a/tests/integration/test_lb_remote_execution.py
+++ b/tests/integration/test_lb_remote_execution.py
@@ -393,8 +393,10 @@ def test_manifest_endpoint_with_deployed_lb_resource(self, monkeypatch):
         """Test manifest endpoint with LoadBalancerSlsResource."""
         from unittest.mock import patch, AsyncMock
         from fastapi.testclient import TestClient
+        from tetra_rp.runtime.lb_handler import _get_manifest_fetcher
 
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+        _get_manifest_fetcher.cache_clear()
 
         # Create test manifest for deployed endpoint
         test_manifest = {
@@ -436,12 +438,16 @@ def test_manifest_endpoint_with_deployed_lb_resource(self, monkeypatch):
             assert response.status_code == 200
             assert response.json() == test_manifest
 
+        _get_manifest_fetcher.cache_clear()
+
     def test_manifest_endpoint_coexists_with_ping(self, monkeypatch):
         """Test that /manifest endpoint coexists with /ping health check."""
         from unittest.mock import patch, AsyncMock
         from fastapi.testclient import TestClient
+        from tetra_rp.runtime.lb_handler import _get_manifest_fetcher
 
         monkeypatch.setenv("FLASH_IS_MOTHERSHIP", "true")
+        _get_manifest_fetcher.cache_clear()
 
         test_manifest = {
             "version": "1.0",
@@ -465,3 +471,54 @@ def test_manifest_endpoint_coexists_with_ping(self, monkeypatch):
 
             ping_response = client.get("/ping")
             assert ping_response.status_code == 404  # Ping not auto-added by factory
+
+        _get_manifest_fetcher.cache_clear()
+
+
+class TestManifestClientToEndpointIntegration:
+    """Integration tests for ManifestClient calling GET /manifest endpoint."""
+
+    def test_manifest_client_can_parse_response(self):
+        """Test ManifestClient can parse manifest response directly."""
+        import asyncio
+        from unittest.mock import patch, AsyncMock, MagicMock
+        from tetra_rp.runtime.manifest_client import ManifestClient
+
+        # Create a manifest to simulate
+        test_manifest = {
+            "version": "1.0",
+            "generated_at": "2024-01-15T10:30:00Z",
+            "project_name": "test-app",
+            "resources": {
+                "gpu_config": {
+                    "resource_type": "LoadBalancerSlsResource",
+                    "handler_file": "handler_gpu.py",
+                    "endpoint_url": "https://api.runpod.io/v2/gpu123",
+                }
+            },
+            "function_registry": {"process_gpu": "gpu_config"},
+        }
+
+        async def test_client_parsing():
+            # Create a mock httpx client that returns the manifest directly
+            mock_http_client = AsyncMock()
+            mock_response = MagicMock()
+            mock_response.status_code = 200
+            mock_response.json.return_value = test_manifest
+            mock_http_client.get = AsyncMock(return_value=mock_response)
+
+            # Create ManifestClient
+            client = ManifestClient(mothership_url="http://localhost:8000")
+
+            # Mock the _get_client to return our mock
+            with patch.object(client, "_get_client", return_value=mock_http_client):
+                # Call get_manifest - should parse the response
+                result = await client.get_manifest()
+
+                # Verify it successfully parsed the manifest
+                assert result == test_manifest
+                assert "gpu_config" in result["resources"]
+                assert result["function_registry"]["process_gpu"] == "gpu_config"
+
+        # Run the async test
+        asyncio.run(test_client_parsing())
diff --git a/tests/unit/runtime/test_lb_handler.py b/tests/unit/runtime/test_lb_handler.py
index 966a2ab6..1da78a4f 100644
--- a/tests/unit/runtime/test_lb_handler.py
+++ b/tests/unit/runtime/test_lb_handler.py
@@ -5,7 +5,7 @@
 import pytest
 from fastapi.testclient import TestClient
 
-from tetra_rp.runtime.lb_handler import create_lb_handler
+from tetra_rp.runtime.lb_handler import create_lb_handler, _get_manifest_fetcher
 
 
 class TestManifestEndpoint:
@@ -13,12 +13,10 @@ class TestManifestEndpoint:
 
     @pytest.fixture(autouse=True)
     def reset_manifest_fetcher(self):
-        """Reset the global manifest fetcher before each test."""
-        import tetra_rp.runtime.lb_handler as lb_handler_module
-
-        lb_handler_module._manifest_fetcher = None
+        """Reset the manifest fetcher cache before each test."""
+        _get_manifest_fetcher.cache_clear()
         yield
-        lb_handler_module._manifest_fetcher = None
+        _get_manifest_fetcher.cache_clear()
 
     @pytest.fixture
     def sample_manifest(self):
@@ -172,14 +170,14 @@ def test_manifest_endpoint_response_structure(self, sample_manifest, monkeypatch
             client = TestClient(app)
 
             response = client.get("/manifest")
-            data = response.json()
+            manifest = response.json()
 
-            # Verify structure
-            assert "version" in data
-            assert "generated_at" in data
-            assert "project_name" in data
-            assert "resources" in data
-            assert "function_registry" in data
+            # Verify manifest structure
+            assert "version" in manifest
+            assert "generated_at" in manifest
+            assert "project_name" in manifest
+            assert "resources" in manifest
+            assert "function_registry" in manifest
 
     def test_manifest_endpoint_with_empty_resources(self, monkeypatch):
         """Test endpoint behavior when manifest has no resources."""
@@ -339,10 +337,10 @@ def test_manifest_endpoint_with_complex_manifest(self, monkeypatch):
             response = client.get("/manifest")
 
             assert response.status_code == 200
-            data = response.json()
-            assert len(data["resources"]) == 2
-            assert "gpu_config" in data["resources"]
-            assert "cpu_config" in data["resources"]
+            manifest = response.json()
+            assert len(manifest["resources"]) == 2
+            assert "gpu_config" in manifest["resources"]
+            assert "cpu_config" in manifest["resources"]
 
     def test_manifest_endpoint_uses_fetcher_with_caching(
         self, sample_manifest, monkeypatch
diff --git a/tests/unit/runtime/test_manifest_client.py b/tests/unit/runtime/test_manifest_client.py
index be48a38c..0578613e 100644
--- a/tests/unit/runtime/test_manifest_client.py
+++ b/tests/unit/runtime/test_manifest_client.py
@@ -21,11 +21,14 @@ def mock_response(self):
         response = MagicMock()
         response.status_code = 200
         response.json.return_value = {
-            "manifest": {
-                "gpu_config": "https://api.runpod.io/v2/gpu123",
-                "cpu_config": "https://api.runpod.io/v2/cpu456",
+            "version": "1.0",
+            "generated_at": "2025-01-03T12:00:00Z",
+            "project_name": "test-app",
+            "resources": {
+                "gpu_config": {"endpoint_url": "https://api.runpod.io/v2/gpu123"},
+                "cpu_config": {"endpoint_url": "https://api.runpod.io/v2/cpu456"},
             },
-            "updated_at": "2025-01-03T12:00:00Z",
+            "function_registry": {},
         }
         return response
 
@@ -66,8 +69,18 @@ async def test_get_manifest_success(self, mock_response):
                 manifest = await client.get_manifest()
 
                 assert manifest == {
-                    "gpu_config": "https://api.runpod.io/v2/gpu123",
-                    "cpu_config": "https://api.runpod.io/v2/cpu456",
+                    "version": "1.0",
+                    "generated_at": "2025-01-03T12:00:00Z",
+                    "project_name": "test-app",
+                    "resources": {
+                        "gpu_config": {
+                            "endpoint_url": "https://api.runpod.io/v2/gpu123"
+                        },
+                        "cpu_config": {
+                            "endpoint_url": "https://api.runpod.io/v2/cpu456"
+                        },
+                    },
+                    "function_registry": {},
                 }
 
     @pytest.mark.asyncio
@@ -115,7 +128,11 @@ async def test_get_manifest_retry(self):
 
         response = MagicMock()
         response.status_code = 200
-        response.json.return_value = {"manifest": {"gpu": "https://gpu.example.com"}}
+        response.json.return_value = {
+            "version": "1.0",
+            "resources": {"gpu": {"endpoint_url": "https://gpu.example.com"}},
+            "function_registry": {},
+        }
 
         with patch.object(client, "_get_client") as mock_get_client:
             mock_http_client = AsyncMock()
@@ -134,7 +151,11 @@ async def test_get_manifest_retry(self):
                 new_callable=AsyncMock,
             ):
                 manifest = await client.get_manifest()
-                assert manifest == {"gpu": "https://gpu.example.com"}
+                assert manifest == {
+                    "version": "1.0",
+                    "resources": {"gpu": {"endpoint_url": "https://gpu.example.com"}},
+                    "function_registry": {},
+                }
                 assert mock_http_client.get.call_count == 3
 
     @pytest.mark.asyncio

From f34f0469e856ca70365c70bcd9ad5e8e2b2eb80e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Wed, 14 Jan 2026 01:40:41 -0800
Subject: [PATCH 10/12] fix: make function_code and class_code optional for
 Flash deployments

Removes validation that requires function_code and class_code to be present,
allowing Flash deployment requests where code is pre-deployed in /app.

Changes:
- Remove function_code requirement for execution_type='function'
- Remove class_code requirement for execution_type='class'
- Add documentation explaining optional fields for Flash deployments

This enables dual-mode runtime where the same handler serves both:
- Live Serverless (with code in request)
- Flash Deployed Apps (without code in request)
---
 src/tetra_rp/protos/remote_execution.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/tetra_rp/protos/remote_execution.py b/src/tetra_rp/protos/remote_execution.py
index ab10bf11..a5c9f39c 100644
--- a/src/tetra_rp/protos/remote_execution.py
+++ b/src/tetra_rp/protos/remote_execution.py
@@ -84,26 +84,24 @@ class FunctionRequest(BaseModel):
 
     @model_validator(mode="after")
     def validate_execution_requirements(self) -> "FunctionRequest":
-        """Validate that required fields are provided based on execution_type"""
+        """Validate that required fields are provided based on execution_type.
+
+        Note: function_code and class_code are optional to support Flash deployments
+        where code is pre-deployed and not sent with the request.
+        """
         if self.execution_type == "function":
             if self.function_name is None:
                 raise ValueError(
                     'function_name is required when execution_type is "function"'
                 )
-            if self.function_code is None:
-                raise ValueError(
-                    'function_code is required when execution_type is "function"'
-                )
+            # function_code is optional - absent for Flash deployments
 
         elif self.execution_type == "class":
             if self.class_name is None:
                 raise ValueError(
                     'class_name is required when execution_type is "class"'
                 )
-            if self.class_code is None:
-                raise ValueError(
-                    'class_code is required when execution_type is "class"'
-                )
+            # class_code is optional - absent for Flash deployments
 
         return self
 

From 041643e715302064895a43c5f0718139a46a0dc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Wed, 14 Jan 2026 15:30:34 -0800
Subject: [PATCH 11/12] fix: update environment variables after merge
 resolution

- Replace FLASH_MOTHERSHIP_URL with FLASH_MOTHERSHIP_ID in integration tests
- Update tests to use FLASH_RESOURCE_NAME (with RUNPOD_ENDPOINT_ID fallback)
- Apply ruff formatting to service_registry.py
- All quality checks passing (706 tests, 63.52% coverage)
---
 src/tetra_rp/runtime/service_registry.py       |  4 +++-
 .../integration/test_cross_endpoint_routing.py | 18 +++++++++---------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/tetra_rp/runtime/service_registry.py b/src/tetra_rp/runtime/service_registry.py
index e55176ee..870283bd 100644
--- a/src/tetra_rp/runtime/service_registry.py
+++ b/src/tetra_rp/runtime/service_registry.py
@@ -75,7 +75,9 @@ def __init__(
                     logger.warning(f"Failed to initialize manifest client: {e}")
                     manifest_client = None
             else:
-                logger.debug("FLASH_MOTHERSHIP_ID not set, manifest service unavailable")
+                logger.debug(
+                    "FLASH_MOTHERSHIP_ID not set, manifest service unavailable"
+                )
                 manifest_client = None
 
         self._manifest_client = manifest_client
diff --git a/tests/integration/test_cross_endpoint_routing.py b/tests/integration/test_cross_endpoint_routing.py
index aab993d1..c17e56ab 100644
--- a/tests/integration/test_cross_endpoint_routing.py
+++ b/tests/integration/test_cross_endpoint_routing.py
@@ -70,8 +70,8 @@ async def test_local_function_execution(self, manifest):
         with patch.dict(
             "os.environ",
             {
-                "RUNPOD_ENDPOINT_ID": "gpu_config",
-                "FLASH_MOTHERSHIP_URL": "https://mothership.example.com",
+                "FLASH_RESOURCE_NAME": "gpu_config",
+                "FLASH_MOTHERSHIP_ID": "mothership-test",
             },
         ):
             endpoint_registry = {
@@ -124,8 +124,8 @@ async def test_remote_function_execution_routing(self, manifest):
         with patch.dict(
             "os.environ",
             {
-                "RUNPOD_ENDPOINT_ID": "gpu_config",
-                "FLASH_MOTHERSHIP_URL": "https://mothership.example.com",
+                "FLASH_RESOURCE_NAME": "gpu_config",
+                "FLASH_MOTHERSHIP_ID": "mothership-test",
             },
         ):
             endpoint_registry = {
@@ -188,8 +188,8 @@ async def test_manifest_loading_on_demand(self, manifest):
         with patch.dict(
             "os.environ",
             {
-                "RUNPOD_ENDPOINT_ID": "gpu_config",
-                "FLASH_MOTHERSHIP_URL": "https://mothership.example.com",
+                "FLASH_RESOURCE_NAME": "gpu_config",
+                "FLASH_MOTHERSHIP_ID": "mothership-test",
             },
         ):
             endpoint_registry = {
@@ -245,8 +245,8 @@ async def test_error_handling_in_remote_execution(self, manifest):
         with patch.dict(
             "os.environ",
             {
-                "RUNPOD_ENDPOINT_ID": "gpu_config",
-                "FLASH_MOTHERSHIP_URL": "https://mothership.example.com",
+                "FLASH_RESOURCE_NAME": "gpu_config",
+                "FLASH_MOTHERSHIP_ID": "mothership-test",
             },
         ):
             endpoint_registry = {
@@ -316,7 +316,7 @@ def test_factory_creates_complete_system(self):
                 "os.environ",
                 {
                     "RUNPOD_ENDPOINT_ID": "resource1",
-                    "FLASH_MOTHERSHIP_URL": "https://mothership.example.com",
+                    "FLASH_MOTHERSHIP_ID": "mothership-test",
                 },
             ):
                 wrapper = create_production_wrapper()

From 436562f2caafbda9943f7824ef161b5c9d21c156 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= <dean.quinanola@runpod.io>
Date: Wed, 14 Jan 2026 15:36:17 -0800
Subject: [PATCH 12/12] docs: align ServiceRegistry signature with
 implementation

- Add missing manifest_client and cache_ttl parameters to __init__ docs
- Document FLASH_RESOURCE_NAME and RUNPOD_ENDPOINT_ID env vars in docstring
- Show _current_endpoint initialization logic
- Match actual code implementation exactly
---
 docs/Cross_Endpoint_Routing.md | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/docs/Cross_Endpoint_Routing.md b/docs/Cross_Endpoint_Routing.md
index 44afa63f..0af5e7b1 100644
--- a/docs/Cross_Endpoint_Routing.md
+++ b/docs/Cross_Endpoint_Routing.md
@@ -453,12 +453,35 @@ Manages service discovery and manifest loading:
 class ServiceRegistry:
     """Service discovery and routing for cross-endpoint function calls."""
 
-    def __init__(self, manifest_path: Optional[Path] = None):
-        """Initialize with manifest and optional manifest client."""
+    def __init__(
+        self,
+        manifest_path: Optional[Path] = None,
+        manifest_client: Optional[ManifestClient] = None,
+        cache_ttl: int = DEFAULT_CACHE_TTL,
+    ):
+        """Initialize service registry.
+
+        Args:
+            manifest_path: Path to flash_manifest.json. Defaults to
+                FLASH_MANIFEST_PATH env var or auto-detection.
+            manifest_client: Manifest service client for mothership API. If None,
+                creates one from FLASH_MOTHERSHIP_ID env var.
+            cache_ttl: Manifest cache lifetime in seconds (default: 300).
+
+        Environment Variables (for local vs remote detection):
+            FLASH_RESOURCE_NAME: Resource config name for this endpoint (child endpoints).
+                Identifies which resource config this endpoint represents in the manifest.
+            RUNPOD_ENDPOINT_ID: Endpoint ID (used as fallback for mothership identification).
+        """
         self._load_manifest(manifest_path)
-        self._manifest_client = ManifestClient(...)
+        self._manifest_client = manifest_client or ManifestClient()
         self._endpoint_registry = {}  # Cached endpoint URLs
         self._endpoint_registry_lock = asyncio.Lock()
+        # Child endpoints use FLASH_RESOURCE_NAME to identify which resource they represent
+        # Mothership doesn't have FLASH_RESOURCE_NAME, so falls back to RUNPOD_ENDPOINT_ID
+        self._current_endpoint = os.getenv("FLASH_RESOURCE_NAME") or os.getenv(
+            "RUNPOD_ENDPOINT_ID"
+        )
 
     def get_resource_for_function(self, func_name: str) -> Optional[ServerlessResource]:
         """Get resource config for function from manifest."""