diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 38b4a42..d8906ad 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -128,10 +128,103 @@ jobs:
echo "Testing CPU handler in Docker environment..."
docker run --rm tetra-rp-cpu:test ./test-handler.sh
+ docker-test-lb:
+ runs-on: ubuntu-latest
+ needs: [test, lint]
+ steps:
+ - name: Clear Space
+ if: github.event_name == 'pull_request'
+ run: |
+ rm -rf /usr/share/dotnet
+ rm -rf /opt/ghc
+ rm -rf "/usr/local/share/boost"
+ rm -rf "$AGENT_TOOLSDIRECTORY"
+
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ submodules: recursive
+ fetch-depth: 0
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+
+ - name: Set up uv
+ uses: astral-sh/setup-uv@v4
+ with:
+ enable-cache: true
+
+ - name: Setup dependencies
+ run: |
+ uv sync
+ git submodule update
+ cp tetra-rp/src/tetra_rp/protos/remote_execution.py src/
+
+ - name: Build Load Balancer Docker image
+ uses: docker/build-push-action@v6
+ with:
+ context: .
+ file: ./Dockerfile-lb
+ platforms: linux/amd64
+ push: false
+ tags: tetra-rp-lb:test
+ cache-from: type=gha
+ cache-to: type=gha,mode=max
+ load: true
+
+ docker-test-lb-cpu:
+ runs-on: ubuntu-latest
+ needs: [test, lint]
+ steps:
+ - name: Clear Space
+ if: github.event_name == 'pull_request'
+ run: |
+ rm -rf /usr/share/dotnet
+ rm -rf /opt/ghc
+ rm -rf "/usr/local/share/boost"
+ rm -rf "$AGENT_TOOLSDIRECTORY"
+
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ submodules: recursive
+ fetch-depth: 0
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+
+ - name: Set up uv
+ uses: astral-sh/setup-uv@v4
+ with:
+ enable-cache: true
+
+ - name: Setup dependencies
+ run: |
+ uv sync
+ git submodule update
+ cp tetra-rp/src/tetra_rp/protos/remote_execution.py src/
+
+ - name: Build CPU Load Balancer Docker image
+ uses: docker/build-push-action@v6
+ with:
+ context: .
+ file: ./Dockerfile-lb-cpu
+ platforms: linux/amd64
+ push: false
+ tags: tetra-rp-lb-cpu:test
+ cache-from: type=gha
+ cache-to: type=gha,mode=max
+ load: true
release:
runs-on: ubuntu-latest
- needs: [test, lint, docker-test]
+ needs: [test, lint, docker-test, docker-test-lb, docker-test-lb-cpu]
if: github.ref == 'refs/heads/main'
outputs:
release_created: ${{ steps.release.outputs.release_created }}
@@ -255,6 +348,111 @@ jobs:
cache-from: type=gha
cache-to: type=gha,mode=max
+ docker-main-lb:
+ runs-on: ubuntu-latest
+ needs: [test, lint, docker-test, docker-test-lb, release]
+ if: github.ref == 'refs/heads/main' && github.event_name == 'push' && !needs.release.outputs.release_created
+ steps:
+ - name: Clear Space
+ run: |
+ rm -rf /usr/share/dotnet
+ rm -rf /opt/ghc
+ rm -rf "/usr/local/share/boost"
+ rm -rf "$AGENT_TOOLSDIRECTORY"
+
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ submodules: recursive
+ fetch-depth: 0
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+
+ - name: Login to Docker Hub
+ uses: docker/login-action@v3
+ with:
+ registry: ${{ env.REGISTRY }}
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+ - name: Set up uv
+ uses: astral-sh/setup-uv@v4
+ with:
+ enable-cache: true
+
+ - name: Setup dependencies
+ run: |
+ uv sync
+ git submodule update
+ cp tetra-rp/src/tetra_rp/protos/remote_execution.py src/
+
+ - name: Build and push Load Balancer Docker image (main)
+ uses: docker/build-push-action@v6
+ with:
+ context: .
+ file: ./Dockerfile-lb
+ platforms: linux/amd64
+ push: true
+ tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-lb:main
+ cache-from: type=gha
+ cache-to: type=gha,mode=max
+
+ docker-main-lb-cpu:
+ runs-on: ubuntu-latest
+ needs: [test, lint, docker-test, docker-test-lb-cpu, release]
+ if: github.ref == 'refs/heads/main' && github.event_name == 'push' && !needs.release.outputs.release_created
+ steps:
+ - name: Clear Space
+ run: |
+ rm -rf /usr/share/dotnet
+ rm -rf /opt/ghc
+ rm -rf "/usr/local/share/boost"
+ rm -rf "$AGENT_TOOLSDIRECTORY"
+
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ submodules: recursive
+ fetch-depth: 0
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+
+ - name: Login to Docker Hub
+ uses: docker/login-action@v3
+ with:
+ registry: ${{ env.REGISTRY }}
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+ - name: Set up uv
+ uses: astral-sh/setup-uv@v4
+ with:
+ enable-cache: true
+
+ - name: Setup dependencies
+ run: |
+ uv sync
+ git submodule update
+ cp tetra-rp/src/tetra_rp/protos/remote_execution.py src/
+
+ - name: Build and push CPU Load Balancer Docker image (main)
+ uses: docker/build-push-action@v6
+ with:
+ context: .
+ file: ./Dockerfile-lb-cpu
+ platforms: linux/amd64
+ push: true
+ tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-lb-cpu:main
+ cache-from: type=gha
+ cache-to: type=gha,mode=max
docker-prod-gpu:
runs-on: ubuntu-latest
@@ -380,4 +578,130 @@ jobs:
tags: ${{ steps.meta-cpu.outputs.tags }}
labels: ${{ steps.meta-cpu.outputs.labels }}
cache-from: type=gha
+ cache-to: type=gha,mode=max
+
+ docker-prod-lb:
+ runs-on: ubuntu-latest
+ needs: [release]
+ if: needs.release.outputs.release_created
+ steps:
+ - name: Clear Space
+ run: |
+ rm -rf /usr/share/dotnet
+ rm -rf /opt/ghc
+ rm -rf "/usr/local/share/boost"
+ rm -rf "$AGENT_TOOLSDIRECTORY"
+
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ submodules: recursive
+ fetch-depth: 0
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+
+ - name: Login to Docker Hub
+ uses: docker/login-action@v3
+ with:
+ registry: ${{ env.REGISTRY }}
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+ - name: Extract Load Balancer metadata
+ id: meta-lb
+ uses: docker/metadata-action@v5
+ with:
+ images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-lb
+ tags: |
+ type=semver,pattern={{version}}
+ type=raw,value=latest,enable={{is_default_branch}}
+
+ - name: Set up uv
+ uses: astral-sh/setup-uv@v4
+ with:
+ enable-cache: true
+
+ - name: Setup dependencies
+ run: |
+ uv sync
+ git submodule update
+ cp tetra-rp/src/tetra_rp/protos/remote_execution.py src/
+
+ - name: Build and push Load Balancer Docker image (prod)
+ uses: docker/build-push-action@v6
+ with:
+ context: .
+ file: ./Dockerfile-lb
+ platforms: linux/amd64
+ push: true
+ tags: ${{ steps.meta-lb.outputs.tags }}
+ labels: ${{ steps.meta-lb.outputs.labels }}
+ cache-from: type=gha
+ cache-to: type=gha,mode=max
+
+ docker-prod-lb-cpu:
+ runs-on: ubuntu-latest
+ needs: [release]
+ if: needs.release.outputs.release_created
+ steps:
+ - name: Clear Space
+ run: |
+ rm -rf /usr/share/dotnet
+ rm -rf /opt/ghc
+ rm -rf "/usr/local/share/boost"
+ rm -rf "$AGENT_TOOLSDIRECTORY"
+
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ submodules: recursive
+ fetch-depth: 0
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+
+ - name: Login to Docker Hub
+ uses: docker/login-action@v3
+ with:
+ registry: ${{ env.REGISTRY }}
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+ - name: Extract CPU Load Balancer metadata
+ id: meta-lb-cpu
+ uses: docker/metadata-action@v5
+ with:
+ images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-lb-cpu
+ tags: |
+ type=semver,pattern={{version}}
+ type=raw,value=latest,enable={{is_default_branch}}
+
+ - name: Set up uv
+ uses: astral-sh/setup-uv@v4
+ with:
+ enable-cache: true
+
+ - name: Setup dependencies
+ run: |
+ uv sync
+ git submodule update
+ cp tetra-rp/src/tetra_rp/protos/remote_execution.py src/
+
+ - name: Build and push CPU Load Balancer Docker image (prod)
+ uses: docker/build-push-action@v6
+ with:
+ context: .
+ file: ./Dockerfile-lb-cpu
+ platforms: linux/amd64
+ push: true
+ tags: ${{ steps.meta-lb-cpu.outputs.tags }}
+ labels: ${{ steps.meta-lb-cpu.outputs.labels }}
+ cache-from: type=gha
cache-to: type=gha,mode=max
\ No newline at end of file
diff --git a/Dockerfile-lb b/Dockerfile-lb
new file mode 100644
index 0000000..40c7999
--- /dev/null
+++ b/Dockerfile-lb
@@ -0,0 +1,39 @@
+FROM pytorch/pytorch:2.8.0-cuda12.8-cudnn9-runtime
+
+WORKDIR /app
+
+# Prevent interactive prompts during package installation
+ENV DEBIAN_FRONTEND=noninteractive
+# Set timezone to avoid tzdata prompts
+ENV TZ=Etc/UTC
+
+# Enable HuggingFace transfer acceleration
+ENV HF_HUB_ENABLE_HF_TRANSFER=1
+# Relocate HuggingFace cache outside /root/.cache to exclude from volume sync
+ENV HF_HOME=/hf-cache
+
+# Configure APT cache to persist under /root/.cache for volume sync
+RUN mkdir -p /root/.cache/apt/archives/partial \
+ && echo 'Dir::Cache "/root/.cache/apt";' > /etc/apt/apt.conf.d/01cache
+
+# Install system dependencies and uv
+RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y --no-install-recommends \
+ build-essential curl ca-certificates nala git \
+ && curl -LsSf https://astral.sh/uv/install.sh | sh \
+ && cp ~/.local/bin/uv /usr/local/bin/uv \
+ && chmod +x /usr/local/bin/uv \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+# Copy app code and install dependencies
+COPY README.md pyproject.toml uv.lock ./
+COPY src/ ./
+RUN uv export --format requirements-txt --no-dev --no-hashes > requirements.txt \
+ && uv pip install --system -r requirements.txt
+
+EXPOSE 80
+
+# CMD will be overridden by RunPod at runtime to run the specific generated handler
+# The handler factory generates handler_{resource_name}.py files
+# RunPod will invoke: uvicorn handler_{resource_name}:app --host 0.0.0.0 --port 80
+CMD ["uvicorn", "lb_handler:app", "--host", "0.0.0.0", "--port", "80", "--timeout-keep-alive", "600"]
diff --git a/Dockerfile-lb-cpu b/Dockerfile-lb-cpu
new file mode 100644
index 0000000..af6ea3d
--- /dev/null
+++ b/Dockerfile-lb-cpu
@@ -0,0 +1,34 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+# Prevent interactive prompts during package installation
+ENV DEBIAN_FRONTEND=noninteractive
+# Set timezone to avoid tzdata prompts
+ENV TZ=Etc/UTC
+
+# Configure APT cache to persist under /root/.cache for volume sync
+RUN mkdir -p /root/.cache/apt/archives/partial \
+ && echo 'Dir::Cache "/root/.cache/apt";' > /etc/apt/apt.conf.d/01cache
+
+# Install system dependencies and uv
+RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y --no-install-recommends \
+ build-essential curl ca-certificates nala git \
+ && curl -LsSf https://astral.sh/uv/install.sh | sh \
+ && cp ~/.local/bin/uv /usr/local/bin/uv \
+ && chmod +x /usr/local/bin/uv \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+# Copy app code and install dependencies
+COPY README.md pyproject.toml uv.lock ./
+COPY src/ ./
+RUN uv export --format requirements-txt --no-dev --no-hashes > requirements.txt \
+ && uv pip install --system -r requirements.txt
+
+EXPOSE 80
+
+# CMD will be overridden by RunPod at runtime to run the specific generated handler
+# The handler factory generates handler_{resource_name}.py files
+# RunPod will invoke: uvicorn handler_{resource_name}:app --host 0.0.0.0 --port 80
+CMD ["uvicorn", "lb_handler:app", "--host", "0.0.0.0", "--port", "80", "--timeout-keep-alive", "600"]
diff --git a/Makefile b/Makefile
index e6c24ed..6889111 100644
--- a/Makefile
+++ b/Makefile
@@ -58,6 +58,20 @@ build-cpu: setup # Build CPU-only Docker image (linux/amd64)
-t $(FULL_IMAGE_CPU) \
. --load
+build-lb: setup # Build Load Balancer Docker image (linux/amd64)
+ docker buildx build \
+ --platform linux/amd64 \
+ -f Dockerfile-lb \
+ -t $(IMAGE)-lb:$(TAG) \
+ . --load
+
+build-lb-cpu: setup # Build CPU-only Load Balancer Docker image (linux/amd64)
+ docker buildx build \
+ --platform linux/amd64 \
+ -f Dockerfile-lb-cpu \
+ -t $(IMAGE)-lb-cpu:$(TAG) \
+ . --load
+
# Test commands
test: # Run all tests
uv run pytest tests/ -v
@@ -77,6 +91,9 @@ test-fast: # Run tests with fast-fail mode
test-handler: # Test handler locally with all test_*.json files
cd src && ./test-handler.sh
+test-lb-handler: # Test Load Balancer handler with /execute endpoint
+ cd src && ./test-lb-handler.sh
+
# Smoke Tests (local on Mac OS)
smoketest-macos-build: setup # Build Mac OS Docker image (macos/arm64)
@@ -89,6 +106,26 @@ smoketest-macos-build: setup # Build Mac OS Docker image (macos/arm64)
smoketest-macos: smoketest-macos-build # Test Docker image locally
docker run --rm $(FULL_IMAGE)-mac ./test-handler.sh
+smoketest-macos-lb-build: setup # Build Mac OS Load Balancer Docker image (macos/arm64)
+ docker buildx build \
+ --platform linux/arm64 \
+ -f Dockerfile-lb \
+ -t $(IMAGE)-lb:mac \
+ . --load
+
+smoketest-macos-lb: smoketest-macos-lb-build # Test Load Balancer Docker image locally
+ docker run --rm $(IMAGE)-lb:mac ./test-lb-handler.sh
+
+smoketest-macos-lb-cpu-build: setup # Build Mac OS CPU-only Load Balancer Docker image (macos/arm64)
+ docker buildx build \
+ --platform linux/arm64 \
+ -f Dockerfile-lb-cpu \
+ -t $(IMAGE)-lb-cpu:mac \
+ . --load
+
+smoketest-macos-lb-cpu: smoketest-macos-lb-cpu-build # Test CPU-only Load Balancer Docker image locally
+ docker run --rm $(IMAGE)-lb-cpu:mac ./test-lb-handler.sh
+
# Linting commands
lint: # Check code with ruff
uv run ruff check .
diff --git a/docs/Docker_Build_Pipeline.md b/docs/Docker_Build_Pipeline.md
new file mode 100644
index 0000000..b76aab6
--- /dev/null
+++ b/docs/Docker_Build_Pipeline.md
@@ -0,0 +1,212 @@
+# Docker Build Pipeline
+
+This document covers the CI/CD infrastructure for building and deploying worker-tetra Docker images.
+
+## Overview
+
+The worker-tetra repository maintains four Docker images:
+
+1. **GPU Image** (`runpod/tetra-rp`) - Queue-based serverless worker with CUDA
+2. **CPU Image** (`runpod/tetra-rp-cpu`) - Queue-based serverless worker for CPU-only
+3. **Load Balancer GPU Image** (`runpod/tetra-rp-lb`) - HTTP-based Load Balancer worker with CUDA
+4. **Load Balancer CPU Image** (`runpod/tetra-rp-lb-cpu`) - HTTP-based Load Balancer worker for CPU-only
+
+All images are automatically built and pushed via GitHub Actions workflows.
+
+## Pipeline Stages
+
+### Stage 1: Pull Request Testing
+
+**Trigger**: All pull requests and pushes to main
+
+**Jobs**:
+- `docker-test` - Validates GPU image builds
+- `docker-test-lb` - Validates Load Balancer GPU image builds
+- `docker-test-lb-cpu` - Validates Load Balancer CPU image builds
+
+**What it does**:
+- Builds the Docker images locally
+- Validates Dockerfile syntax
+- Checks that all dependencies resolve
+- Does NOT push to Docker Hub
+
+**Status checks**: Required to pass before merging
+
+### Stage 2: Main Branch Deployment
+
+**Trigger**: Pushes to main branch (when no release is created)
+
+**Jobs**:
+- `docker-main-gpu` - Pushes GPU image
+- `docker-main-cpu` - Pushes CPU image
+- `docker-main-lb` - Pushes Load Balancer GPU image
+- `docker-main-lb-cpu` - Pushes Load Balancer CPU image
+
+**What it does**:
+- Builds the Docker images
+- Pushes to Docker Hub with `:main` tag
+- Useful for testing development versions
+- Skipped if a release was just created
+
+**Image tags**:
+- `runpod/tetra-rp:main`
+- `runpod/tetra-rp-cpu:main`
+- `runpod/tetra-rp-lb:main`
+- `runpod/tetra-rp-lb-cpu:main`
+
+### Stage 3: Release Deployment
+
+**Trigger**: When a release is created via release-please
+
+**Jobs**:
+- `docker-prod-gpu` - Pushes GPU image with version tags
+- `docker-prod-cpu` - Pushes CPU image with version tags
+- `docker-prod-lb` - Pushes Load Balancer GPU image with version tags
+- `docker-prod-lb-cpu` - Pushes Load Balancer CPU image with version tags
+
+**What it does**:
+- Builds the Docker images
+- Pushes with semantic version tags (e.g., `0.7.3`)
+- Also pushes `:latest` tag
+- Automatically triggered by release-please
+
+**Image tags**:
+- `runpod/tetra-rp:0.7.3` and `runpod/tetra-rp:latest`
+- `runpod/tetra-rp-cpu:0.7.3` and `runpod/tetra-rp-cpu:latest`
+- `runpod/tetra-rp-lb:0.7.3` and `runpod/tetra-rp-lb:latest`
+- `runpod/tetra-rp-lb-cpu:0.7.3` and `runpod/tetra-rp-lb-cpu:latest`
+
+## Pipeline Flow
+
+```mermaid
+graph TD
+ A["Pull Request"] --> B["test"]
+ A --> C["lint"]
+ B --> D["docker-test"]
+ C --> D
+ B --> E["docker-test-lb"]
+ C --> E
+ D --> F["release"]
+ E --> F
+ F --> G["docker-main-gpu"]
+ F --> H["docker-main-cpu"]
+ F --> I["docker-main-lb"]
+
+ J["Main Branch Push
with Release"] --> K["release-please"]
+ K --> L["docker-prod-gpu"]
+ K --> M["docker-prod-cpu"]
+ K --> N["docker-prod-lb"]
+
+ style D fill:#FF9800,stroke:#E65100,stroke-width:2px,color:#fff
+ style E fill:#FF9800,stroke:#E65100,stroke-width:2px,color:#fff
+ style G fill:#4CAF50,stroke:#2E7D32,stroke-width:2px,color:#fff
+ style H fill:#4CAF50,stroke:#2E7D32,stroke-width:2px,color:#fff
+ style I fill:#4CAF50,stroke:#2E7D32,stroke-width:2px,color:#fff
+ style L fill:#F44336,stroke:#C62828,stroke-width:2px,color:#fff
+ style M fill:#F44336,stroke:#C62828,stroke-width:2px,color:#fff
+ style N fill:#F44336,stroke:#C62828,stroke-width:2px,color:#fff
+ style F fill:#2196F3,stroke:#1565C0,stroke-width:2px,color:#fff
+```
+
+## Building Locally
+
+### Build GPU Image
+
+```bash
+make build-gpu
+```
+
+Builds `runpod/tetra-rp:local` for testing locally.
+
+### Build CPU Image
+
+```bash
+make build-cpu
+```
+
+Builds `runpod/tetra-rp-cpu:local` for testing locally.
+
+### Build Load Balancer Image
+
+```bash
+make build-lb
+```
+
+Builds `runpod/tetra-rp-lb:local` for testing locally.
+
+### Build CPU Load Balancer Image
+
+```bash
+make build-lb-cpu
+```
+
+Builds `runpod/tetra-rp-lb-cpu:local` for testing locally.
+
+### Build All Images
+
+```bash
+make build
+```
+
+Builds both GPU and CPU images (does not build Load Balancer image).
+
+## Docker Hub Credentials
+
+The pipeline requires Docker Hub credentials configured as GitHub repository secrets:
+
+- **DOCKERHUB_USERNAME** - Docker Hub username
+- **DOCKERHUB_TOKEN** - Docker Hub password or personal access token
+
+These are used by the `docker/login-action` step to authenticate with Docker Hub.
+
+## Release Process
+
+Releases are managed automatically via `release-please`:
+
+1. **Detect conventional commits** on main branch
+2. **Create pull request** with version bump and changelog
+3. **User merges** the release PR
+4. **release-please** creates a Git tag and release
+5. **CI/CD** triggers production image builds and pushes
+
+### Commit Message Convention
+
+Use conventional commit messages to trigger releases:
+
+- `feat:` - Feature (triggers minor version bump)
+- `fix:` - Bug fix (triggers patch version bump)
+- `BREAKING CHANGE:` - Breaking change (triggers major version bump)
+
+Example:
+```bash
+git commit -m "feat(executor): add async function execution support"
+git commit -m "fix(handler): correct dependency installation order"
+```
+
+## Troubleshooting
+
+### Docker Build Fails
+
+1. Check `pyproject.toml` syntax
+2. Verify `Dockerfile*` line endings (LF not CRLF)
+3. Ensure `uv.lock` is up to date: `uv lock --upgrade`
+4. Test locally: `make build-lb` (for Load Balancer)
+
+### Image Not Pushed to Docker Hub
+
+1. Verify Docker Hub credentials are set in GitHub secrets
+2. Check CI/CD job logs for authentication errors
+3. Ensure you're on main branch for main builds
+4. Ensure release tag exists for production builds
+
+### Release PR Not Created
+
+1. Check that commits follow conventional commit format
+2. Ensure `release-please` workflow is enabled
+3. Verify release-please app has access to repository
+4. Check repository settings > Actions permissions
+
+## See Also
+
+- [Load Balancer Docker Infrastructure](./Load_Balancer_Docker_Infrastructure.md)
+- [System Python Runtime Architecture](./System_Python_Runtime_Architecture.md)
diff --git a/docs/Load_Balancer_Docker_Infrastructure.md b/docs/Load_Balancer_Docker_Infrastructure.md
new file mode 100644
index 0000000..19a2675
--- /dev/null
+++ b/docs/Load_Balancer_Docker_Infrastructure.md
@@ -0,0 +1,340 @@
+# Load Balancer Docker Infrastructure
+
+## Overview
+
+This document covers the Docker infrastructure supporting **LiveLoadBalancerSLSResource remote code execution** in worker-tetra. This enables executing serialized Python functions via HTTP `/execute` endpoint during local development and testing.
+
+### Purpose
+
+- **Remote code execution**: Execute arbitrary Python functions via HTTP POST to `/execute` endpoint
+- **Function serialization**: Send function code, arguments, and dependencies as JSON payload
+- **Local development**: LiveLoadBalancer resource provides `/execute` for testing before production deployment
+- **HTTP-based communication**: Direct HTTP requests/responses instead of RunPod job queue
+
+### Integration with tetra-rp
+
+The tetra-rp submodule (branch `deanq/ae-1102-load-balancer-sls-resource`) provides:
+
+- **Resource Classes**: `LoadBalancerSlsResource` (base) and `LiveLoadBalancer` (local dev with /execute)
+- **Stub Implementation**: `LoadBalancerSlsStub` routes HTTP requests to handler functions
+- **Protocol**: `FunctionRequest`/`FunctionResponse` for serialized code execution
+
+Worker-tetra provides the Docker infrastructure to run the handler that processes these requests.
+
+## Architecture
+
+### Complete Flow
+
+```mermaid
+graph TB
+ subgraph SDK["tetra-rp SDK"]
+ A["LiveLoadBalancerSLSResource
Resource Config"]
+ B["LoadBalancerSlsStub
Client-side stub"]
+ C["FunctionRequest/Response
Protocol"]
+ end
+
+ subgraph Infrastructure["worker-tetra"]
+ D["Dockerfile-lb
FastAPI + uvicorn"]
+ E["src/handler.py
Serverless entry point"]
+ F["RemoteExecutor
Executes serialized code"]
+ end
+
+ subgraph CICD["CI/CD Pipeline"]
+ G["docker-test-lb
Validate image builds"]
+ H["docker-main-lb
Push dev image"]
+ I["docker-prod-lb
Push release image"]
+ end
+
+ subgraph Registry["Docker Hub"]
+ J["runpod/tetra-rp-lb:tag
Base Image"]
+ end
+
+ subgraph Runtime["RunPod Deployment"]
+ K["Load Balancer Endpoint
Port 80"]
+ L["FastAPI Handler
POST /execute"]
+ end
+
+ A --> B
+ C --> B
+ B -->|HTTP POST| L
+ D --> E
+ E --> F
+ F -->|executes| L
+ D --> G
+ G --> H
+ G --> I
+ H --> J
+ I --> J
+ J --> K
+ K --> L
+
+ style SDK fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff
+ style Infrastructure fill:#F57C00,stroke:#BF360C,stroke-width:3px,color:#fff
+ style CICD fill:#2E7D32,stroke:#1B5E20,stroke-width:3px,color:#fff
+ style Registry fill:#C62828,stroke:#7F0000,stroke-width:3px,color:#fff
+ style Runtime fill:#00695C,stroke:#003D33,stroke-width:3px,color:#fff
+```
+
+### /execute Endpoint Execution
+
+```mermaid
+sequenceDiagram
+ participant Client as tetra-rp SDK
LoadBalancerSlsStub
+ participant Handler as FastAPI Handler
POST /execute
+ participant Executor as RemoteExecutor
+ participant System as System
+
+ Client->>Handler: HTTP POST /execute
+ Handler->>Handler: Parse FunctionRequest JSON
+ Handler->>Executor: ExecuteFunction(request)
+ Executor->>System: Install dependencies
+ Executor->>System: Execute function code
+ Executor->>System: Capture output/errors
+ Executor->>Handler: Return FunctionResponse
+ Handler->>Client: HTTP 200 with result JSON
+```
+
+## Docker Image (Dockerfile-lb)
+
+### Base Image
+
+```dockerfile
+FROM pytorch/pytorch:2.8.0-cuda12.8-cudnn9-runtime
+```
+
+Uses PyTorch CUDA runtime to support GPU-accelerated functions. This matches the queue-based Dockerfile but with different CMD and dependencies.
+
+### Key Configuration
+
+| Aspect | Queue-Based (Dockerfile) | Load Balancer (Dockerfile-lb) |
+|--------|------------------------|------|
+| Handler Entry | `runpod.serverless.start()` | `uvicorn handler:app` |
+| Communication | RunPod job queue | HTTP requests |
+| Port | Job polling (no port) | Port 80 |
+| Handler | Single `handler.py` | Generated `handler_*.py` |
+| Framework | RunPod SDK | FastAPI + uvicorn |
+| Request Type | Batch (job queue) | Synchronous HTTP |
+
+### Dependencies
+
+Added to `pyproject.toml`:
+
+```toml
+dependencies = [
+ "fastapi>=0.115.0",
+ "uvicorn[standard]>=0.34.0",
+ # ... existing dependencies ...
+]
+```
+
+**FastAPI**: Web framework for HTTP routing and request handling
+**uvicorn**: ASGI server for running FastAPI applications
+
+### Environment Variables
+
+```dockerfile
+ENV HF_HUB_ENABLE_HF_TRANSFER=1 # HuggingFace acceleration
+ENV HF_HOME=/hf-cache # Cache location
+ENV DEBIAN_FRONTEND=noninteractive # No interactive prompts
+ENV TZ=Etc/UTC # Timezone
+```
+
+### CMD Strategy
+
+```dockerfile
+CMD ["uvicorn", "handler:app", "--host", "0.0.0.0", "--port", "80", "--timeout-keep-alive", "600"]
+```
+
+This is a **placeholder**. RunPod overrides the CMD at runtime to run the specific generated handler:
+
+```bash
+uvicorn handler_api:app --host 0.0.0.0 --port 80
+```
+
+Different endpoints can use different generated handlers (e.g., `handler_api`, `handler_service`).
+
+## Building the Image
+
+### Local Build
+
+Build the Load Balancer image locally for testing:
+
+```bash
+make build-lb
+```
+
+This builds and tags as `runpod/tetra-rp-lb:local` and loads into local Docker daemon.
+
+### CI/CD Build
+
+The image is automatically built and pushed via GitHub Actions:
+
+- **Pull requests**: `docker-test-lb` job validates the build
+- **Main branch**: `docker-main-lb` job pushes `:main` tag
+- **Releases**: `docker-prod-lb` job pushes semantic version and `:latest` tags
+
+See [Docker Build Pipeline](./Docker_Build_Pipeline.md) for details on the CI/CD process.
+
+## Handler Implementation
+
+The handler processes `/execute` requests by:
+
+1. **Receiving** HTTP POST to `/execute` with `FunctionRequest` JSON payload
+2. **Parsing** the request containing:
+ - `function_code`: Source code of function to execute
+ - `function_name`: Name of function to call
+ - `args`: Base64-encoded cloudpickle serialized arguments
+ - `kwargs`: Base64-encoded cloudpickle serialized keyword arguments
+ - `dependencies`: List of Python packages to install
+ - `system_dependencies`: List of system packages to install
+3. **Installing** dependencies as needed
+4. **Executing** the function in an isolated namespace
+5. **Capturing** stdout, stderr, logs, and return value
+6. **Returning** `FunctionResponse` with result or error
+
+### /execute Request Example
+
+```bash
+curl -X POST http://localhost:80/execute \
+ -H "Content-Type: application/json" \
+ -d '{
+ "function_name": "add",
+ "function_code": "def add(a, b):\n return a + b",
+ "args": ["", ""],
+ "dependencies": []
+ }'
+```
+
+### /execute Response Example
+
+```json
+{
+ "success": true,
+ "result": "",
+ "stdout": null,
+ "error": null
+}
+```
+
+## Local Development with LiveLoadBalancer
+
+LiveLoadBalancer enables testing the `/execute` endpoint locally before deployment:
+
+```python
+from tetra_rp import remote, LiveLoadBalancer
+
+config = LiveLoadBalancer(name="test-endpoint")
+
+@remote(config)
+def my_function(x):
+ return x * 2
+```
+
+When using `LiveLoadBalancer`:
+- The `/execute` endpoint is **available** for local testing
+- You can serialize and send function code via HTTP
+- Test the full execution pipeline locally
+
+When deployed as `LoadBalancerSlsResource`:
+- The `/execute` endpoint is **not available** (security)
+- Only user-defined routes are exposed
+- Production endpoints prevent arbitrary code execution
+
+## Testing the Handler
+
+Test the Load Balancer handler's `/execute` endpoint locally:
+
+### Using the Test Script
+
+```bash
+make test-lb-handler
+```
+
+This script:
+1. Starts the FastAPI server on port 80
+2. Validates the `/health` endpoint
+3. Runs all `tests/test_*.json` files against `/execute`
+4. Reports pass/fail results
+5. Cleans up the server
+
+### Manual Testing
+
+Start the server:
+
+```bash
+uvicorn src.lb_handler:app --port 80
+```
+
+Test the health endpoint:
+
+```bash
+curl http://localhost:80/health
+```
+
+Test the execute endpoint:
+
+```bash
+curl -X POST http://localhost:80/execute \
+ -H "Content-Type: application/json" \
+ -d '{
+ "function_name": "add",
+ "function_code": "def add(a, b):\n return a + b",
+ "args": ["base64_encoded_5", "base64_encoded_3"],
+ "dependencies": []
+ }'
+```
+
+### Test File Format
+
+Create test files in `src/tests/test_*.json`:
+
+```json
+{
+ "function_name": "my_function",
+ "function_code": "def my_function(x):\n return x * 2",
+ "args": ["base64_encoded_arg"],
+ "kwargs": {},
+ "dependencies": [],
+ "system_dependencies": [],
+ "accelerate_downloads": false
+}
+```
+
+Note: Arguments must be base64-encoded cloudpickle serialized values.
+
+## Troubleshooting
+
+### Docker Build Fails
+
+1. Check `pyproject.toml` for syntax errors
+2. Verify `Dockerfile-lb` line endings (CRLF vs LF)
+3. Ensure uv.lock is up to date: `uv lock --upgrade`
+4. Run `make build-lb` to validate locally
+
+### /execute Endpoint Not Responding
+
+1. Verify endpoint is deployed and in "Ready" state
+2. Check port 80 is exposed: `EXPOSE 80` in Dockerfile-lb
+3. Verify FastAPI handler is started: check container logs
+4. Test with simple request first
+
+### Function Execution Fails
+
+1. Check dependencies are listed in `FunctionRequest`
+2. Verify function code syntax
+3. Check for import errors in dependencies
+4. Review captured `stdout` and `error` fields in response
+
+### Missing Dependencies in Handler
+
+1. Add package to `pyproject.toml`
+2. Update uv.lock: `uv lock`
+3. Rebuild Docker image: `make build-lb`
+
+## See Also
+
+- [Docker Build Pipeline](./Docker_Build_Pipeline.md) - CI/CD infrastructure for building and pushing images
+- [System Python Runtime Architecture](./System_Python_Runtime_Architecture.md) - Details on the execution engine
+- [tetra-rp Load Balancer Runtime Architecture](../tetra-rp/docs/LoadBalancer_Runtime_Architecture.md) - SDK-level architecture
+- [Using @remote with Load Balancer](../tetra-rp/docs/Using_Remote_With_LoadBalancer.md) - SDK usage guide
+- [Load Balancer Endpoints](../tetra-rp/docs/Load_Balancer_Endpoints.md) - Endpoint configuration
diff --git a/pyproject.toml b/pyproject.toml
index 624e941..e112993 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,6 +11,8 @@ dependencies = [
"runpod",
"hf_transfer>=0.1.0",
"huggingface_hub>=0.32.0",
+ "fastapi>=0.115.0",
+ "uvicorn[standard]>=0.34.0",
]
[dependency-groups]
diff --git a/src/lb_handler.py b/src/lb_handler.py
new file mode 100644
index 0000000..2cdf14b
--- /dev/null
+++ b/src/lb_handler.py
@@ -0,0 +1,69 @@
+"""Load Balancer handler for executing remote functions via HTTP.
+
+This handler provides a FastAPI application for the Load Balancer runtime.
+It supports:
+- /ping: Health check endpoint (required by RunPod Load Balancer)
+- /execute: Remote function execution via HTTP POST
+
+The handler uses worker-tetra's RemoteExecutor for function execution.
+
+For generated handlers from flash build:
+- Those handlers extend this with user-defined routes
+- They use the same execution engine
+"""
+
+from typing import Any, Dict
+
+from fastapi import FastAPI
+
+from logger import setup_logging
+from remote_execution import FunctionRequest, FunctionResponse
+from remote_executor import RemoteExecutor
+
+# Initialize logging configuration
+setup_logging()
+
+# Create FastAPI app
+app = FastAPI(title="Load Balancer Handler")
+
+
+@app.get("/ping")
+async def ping() -> Dict[str, Any]:
+ """Ping endpoint for health checks (RunPod Load Balancer requirement).
+
+ Returns HTTP 200 when healthy. RunPod measures cold start by tracking
+ the transition from 204 (initializing) to 200 (healthy).
+ """
+ return {"status": "healthy"}
+
+
+@app.post("/execute")
+async def execute(request: Dict[str, Any]) -> Dict[str, Any]:
+ """Execute a remote function via HTTP POST request.
+
+ Expects FunctionRequest JSON payload.
+ Supports both direct FunctionRequest format and RunPod wrapped format.
+ """
+ output: FunctionResponse
+
+ try:
+ executor = RemoteExecutor()
+ # Handle both direct FunctionRequest and RunPod wrapped format
+ request_data = request.get("input", request)
+ input_data = FunctionRequest(**request_data)
+ output = await executor.ExecuteFunction(input_data)
+
+ except Exception as error:
+ output = FunctionResponse(
+ success=False,
+ error=f"Error in handler: {str(error)}",
+ )
+
+ return output.model_dump()
+
+
+if __name__ == "__main__":
+ import uvicorn
+
+ # Local development server for testing
+ uvicorn.run(app, host="0.0.0.0", port=80)
diff --git a/src/test-handler.sh b/src/test-handler.sh
index 1304a8d..303e3ec 100755
--- a/src/test-handler.sh
+++ b/src/test-handler.sh
@@ -15,9 +15,16 @@ for test_file in tests/test_*.json; do
test_count=$((test_count + 1))
echo "Testing with $test_file..."
- # Run the test and capture output using system Python directly
- output=$(python handler.py --test_input "$(cat "$test_file")" 2>&1)
- exit_code=$?
+ # Run the test and capture output
+ # In Docker: python is available and has system-installed packages
+ # Locally: use uv run to manage dependencies
+ if command -v python &> /dev/null; then
+ output=$(python handler.py --test_input "$(cat "$test_file")" 2>&1)
+ exit_code=$?
+ else
+ output=$(uv run python3 handler.py --test_input "$(cat "$test_file")" 2>&1)
+ exit_code=$?
+ fi
if [ $exit_code -eq 0 ]; then
echo "✓ $test_file: PASSED"
diff --git a/src/test-lb-handler.sh b/src/test-lb-handler.sh
new file mode 100755
index 0000000..775e102
--- /dev/null
+++ b/src/test-lb-handler.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+
+set -e
+
+echo "Testing Load Balancer handler with /execute endpoint..."
+
+# Configuration
+PORT=80
+HOST="localhost"
+TEST_TIMEOUT=30
+SERVER_PID=""
+
+# Cleanup function
+cleanup() {
+ if [ -n "$SERVER_PID" ]; then
+ echo "Stopping FastAPI server (PID: $SERVER_PID)..."
+ kill $SERVER_PID 2>/dev/null || true
+ wait $SERVER_PID 2>/dev/null || true
+ fi
+}
+
+# Set up trap to ensure cleanup on exit
+trap cleanup EXIT
+
+# Start FastAPI server in background
+echo "Starting FastAPI server on port $PORT..."
+PYTHONPATH=. uv run python3 -m uvicorn lb_handler:app --host $HOST --port $PORT --log-level error > /tmp/lb_handler.log 2>&1 &
+SERVER_PID=$!
+
+# Wait for server to be ready
+echo "Waiting for server to be ready..."
+attempt=0
+while [ $attempt -lt $TEST_TIMEOUT ]; do
+ if curl -s -f "http://$HOST:$PORT/health" > /dev/null 2>&1; then
+ echo "✓ Server is ready"
+ break
+ fi
+ attempt=$((attempt + 1))
+ sleep 1
+
+ if [ $attempt -eq $TEST_TIMEOUT ]; then
+ echo "✗ Server failed to start after ${TEST_TIMEOUT}s"
+ echo "Server logs:"
+ cat /tmp/lb_handler.log
+ exit 1
+ fi
+done
+
+# Test /health endpoint
+echo ""
+echo "Testing /health endpoint..."
+health_response=$(curl -s "http://$HOST:$PORT/health")
+echo "Response: $health_response"
+
+# Run /execute tests
+echo ""
+echo "Testing /execute endpoint with test files..."
+
+failed_tests=""
+test_count=0
+passed_count=0
+
+for test_file in tests/test_*.json; do
+ if [ ! -f "$test_file" ]; then
+ echo "No test_*.json files found"
+ exit 1
+ fi
+
+ test_count=$((test_count + 1))
+ echo ""
+ echo "Testing with $test_file..."
+
+ # Send request to /execute endpoint
+ response=$(curl -s -X POST "http://$HOST:$PORT/execute" \
+ -H "Content-Type: application/json" \
+ -d "$(cat "$test_file")")
+
+ # Check if response contains success or error
+ if echo "$response" | grep -q '"success":true'; then
+ echo "✓ $test_file: PASSED"
+ echo " Result: $(echo "$response" | python3 -m json.tool 2>/dev/null | head -5)"
+ passed_count=$((passed_count + 1))
+ elif echo "$response" | grep -q '"success":false'; then
+ echo "✗ $test_file: FAILED"
+ echo " Error: $(echo "$response" | python3 -c "import sys, json; d=json.load(sys.stdin); print(d.get('error', 'Unknown error'))" 2>/dev/null || echo 'Unknown error')"
+ failed_tests="$failed_tests $test_file"
+ else
+ echo "✗ $test_file: FAILED (Invalid response)"
+ echo " Response: $(echo "$response" | head -c 100)"
+ failed_tests="$failed_tests $test_file"
+ fi
+done
+
+echo ""
+echo "============================================"
+echo "Test Results: $passed_count/$test_count tests passed"
+echo "============================================"
+
+if [ -z "$failed_tests" ]; then
+ echo "✓ All tests passed!"
+ exit 0
+else
+ echo "✗ Failed tests:$failed_tests"
+ exit 1
+fi
diff --git a/src/tests/test_lb_simple_function.json b/src/tests/test_lb_simple_function.json
new file mode 100644
index 0000000..3b6fccb
--- /dev/null
+++ b/src/tests/test_lb_simple_function.json
@@ -0,0 +1,9 @@
+{
+ "input": {
+ "function_name": "simple_add",
+ "function_code": "def simple_add(a, b):\n \"\"\"Add two numbers.\"\"\"\n return a + b",
+ "args": ["gAVLBS4=", "gAVLAy4="],
+ "kwargs": {},
+ "dependencies": []
+ }
+}
diff --git a/tests/integration/test_dependency_management.py b/tests/integration/test_dependency_management.py
index b77b960..ab410ee 100644
--- a/tests/integration/test_dependency_management.py
+++ b/tests/integration/test_dependency_management.py
@@ -18,8 +18,9 @@ def test_install_python_dependencies_integration(self):
success=True, stdout="Successfully installed package-1.0.0"
)
+ # Use a package that's unlikely to be installed
result = executor.dependency_installer.install_dependencies(
- ["requests", "numpy"]
+ ["nonexistent-test-package-integration-12345"]
)
assert result.success is True
diff --git a/tests/unit/test_dependency_installer.py b/tests/unit/test_dependency_installer.py
index e30a81c..5f132d4 100644
--- a/tests/unit/test_dependency_installer.py
+++ b/tests/unit/test_dependency_installer.py
@@ -213,7 +213,8 @@ def test_install_dependencies_success(self, mock_subprocess):
success=True, stdout="Successfully installed"
)
- result = self.installer.install_dependencies(["requests", "numpy"])
+ # Use a package that's unlikely to be installed
+ result = self.installer.install_dependencies(["nonexistent-test-package-12345"])
assert result.success is True
assert "Successfully installed" in result.stdout
@@ -246,8 +247,9 @@ def test_install_dependencies_with_acceleration_enabled(self, mock_subprocess):
success=True, stdout="Successfully installed with UV"
)
+ # Use a package that's unlikely to be installed
result = self.installer.install_dependencies(
- ["requests", "numpy"], accelerate_downloads=True
+ ["nonexistent-test-package-uv-12345"], accelerate_downloads=True
)
assert result.success is True
@@ -262,8 +264,9 @@ def test_install_dependencies_with_acceleration_disabled(self, mock_subprocess):
success=True, stdout="Successfully installed with pip"
)
+ # Use a package that's unlikely to be installed
result = self.installer.install_dependencies(
- ["requests", "numpy"], accelerate_downloads=False
+ ["nonexistent-test-package-pip-12345"], accelerate_downloads=False
)
assert result.success is True
diff --git a/tetra-rp b/tetra-rp
index d64b7f2..b57748f 160000
--- a/tetra-rp
+++ b/tetra-rp
@@ -1 +1 @@
-Subproject commit d64b7f22e1d320e775a7a4561b04e4bd26b0470f
+Subproject commit b57748fce829b85e09d4e917dd03778dfe0ebc44
diff --git a/uv.lock b/uv.lock
index 4b6e3c0..509a961 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2614,15 +2614,17 @@ wheels = [
[[package]]
name = "worker-tetra"
-version = "0.7.1"
+version = "0.7.2"
source = { virtual = "." }
dependencies = [
{ name = "cloudpickle" },
+ { name = "fastapi" },
{ name = "hf-transfer" },
{ name = "huggingface-hub" },
{ name = "pydantic" },
{ name = "requests" },
{ name = "runpod" },
+ { name = "uvicorn", extra = ["standard"] },
]
[package.dev-dependencies]
@@ -2640,11 +2642,13 @@ dev = [
[package.metadata]
requires-dist = [
{ name = "cloudpickle", specifier = ">=3.1.1" },
+ { name = "fastapi", specifier = ">=0.115.0" },
{ name = "hf-transfer", specifier = ">=0.1.0" },
{ name = "huggingface-hub", specifier = ">=0.32.0" },
{ name = "pydantic", specifier = ">=2.11.4" },
{ name = "requests", specifier = ">=2.25.0" },
{ name = "runpod" },
+ { name = "uvicorn", extras = ["standard"], specifier = ">=0.34.0" },
]
[package.metadata.requires-dev]