diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 38b4a42..d8906ad 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -128,10 +128,103 @@ jobs: echo "Testing CPU handler in Docker environment..." docker run --rm tetra-rp-cpu:test ./test-handler.sh + docker-test-lb: + runs-on: ubuntu-latest + needs: [test, lint] + steps: + - name: Clear Space + if: github.event_name == 'pull_request' + run: | + rm -rf /usr/share/dotnet + rm -rf /opt/ghc + rm -rf "/usr/local/share/boost" + rm -rf "$AGENT_TOOLSDIRECTORY" + + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Set up uv + uses: astral-sh/setup-uv@v4 + with: + enable-cache: true + + - name: Setup dependencies + run: | + uv sync + git submodule update + cp tetra-rp/src/tetra_rp/protos/remote_execution.py src/ + + - name: Build Load Balancer Docker image + uses: docker/build-push-action@v6 + with: + context: . + file: ./Dockerfile-lb + platforms: linux/amd64 + push: false + tags: tetra-rp-lb:test + cache-from: type=gha + cache-to: type=gha,mode=max + load: true + + docker-test-lb-cpu: + runs-on: ubuntu-latest + needs: [test, lint] + steps: + - name: Clear Space + if: github.event_name == 'pull_request' + run: | + rm -rf /usr/share/dotnet + rm -rf /opt/ghc + rm -rf "/usr/local/share/boost" + rm -rf "$AGENT_TOOLSDIRECTORY" + + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Set up uv + uses: astral-sh/setup-uv@v4 + with: + enable-cache: true + + - name: Setup dependencies + run: | + uv sync + git submodule update + cp tetra-rp/src/tetra_rp/protos/remote_execution.py src/ + + - name: Build CPU Load Balancer Docker image + uses: docker/build-push-action@v6 + with: + context: . + file: ./Dockerfile-lb-cpu + platforms: linux/amd64 + push: false + tags: tetra-rp-lb-cpu:test + cache-from: type=gha + cache-to: type=gha,mode=max + load: true release: runs-on: ubuntu-latest - needs: [test, lint, docker-test] + needs: [test, lint, docker-test, docker-test-lb, docker-test-lb-cpu] if: github.ref == 'refs/heads/main' outputs: release_created: ${{ steps.release.outputs.release_created }} @@ -255,6 +348,111 @@ jobs: cache-from: type=gha cache-to: type=gha,mode=max + docker-main-lb: + runs-on: ubuntu-latest + needs: [test, lint, docker-test, docker-test-lb, release] + if: github.ref == 'refs/heads/main' && github.event_name == 'push' && !needs.release.outputs.release_created + steps: + - name: Clear Space + run: | + rm -rf /usr/share/dotnet + rm -rf /opt/ghc + rm -rf "/usr/local/share/boost" + rm -rf "$AGENT_TOOLSDIRECTORY" + + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Set up uv + uses: astral-sh/setup-uv@v4 + with: + enable-cache: true + + - name: Setup dependencies + run: | + uv sync + git submodule update + cp tetra-rp/src/tetra_rp/protos/remote_execution.py src/ + + - name: Build and push Load Balancer Docker image (main) + uses: docker/build-push-action@v6 + with: + context: . + file: ./Dockerfile-lb + platforms: linux/amd64 + push: true + tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-lb:main + cache-from: type=gha + cache-to: type=gha,mode=max + + docker-main-lb-cpu: + runs-on: ubuntu-latest + needs: [test, lint, docker-test, docker-test-lb-cpu, release] + if: github.ref == 'refs/heads/main' && github.event_name == 'push' && !needs.release.outputs.release_created + steps: + - name: Clear Space + run: | + rm -rf /usr/share/dotnet + rm -rf /opt/ghc + rm -rf "/usr/local/share/boost" + rm -rf "$AGENT_TOOLSDIRECTORY" + + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Set up uv + uses: astral-sh/setup-uv@v4 + with: + enable-cache: true + + - name: Setup dependencies + run: | + uv sync + git submodule update + cp tetra-rp/src/tetra_rp/protos/remote_execution.py src/ + + - name: Build and push CPU Load Balancer Docker image (main) + uses: docker/build-push-action@v6 + with: + context: . + file: ./Dockerfile-lb-cpu + platforms: linux/amd64 + push: true + tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-lb-cpu:main + cache-from: type=gha + cache-to: type=gha,mode=max docker-prod-gpu: runs-on: ubuntu-latest @@ -380,4 +578,130 @@ jobs: tags: ${{ steps.meta-cpu.outputs.tags }} labels: ${{ steps.meta-cpu.outputs.labels }} cache-from: type=gha + cache-to: type=gha,mode=max + + docker-prod-lb: + runs-on: ubuntu-latest + needs: [release] + if: needs.release.outputs.release_created + steps: + - name: Clear Space + run: | + rm -rf /usr/share/dotnet + rm -rf /opt/ghc + rm -rf "/usr/local/share/boost" + rm -rf "$AGENT_TOOLSDIRECTORY" + + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Extract Load Balancer metadata + id: meta-lb + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-lb + tags: | + type=semver,pattern={{version}} + type=raw,value=latest,enable={{is_default_branch}} + + - name: Set up uv + uses: astral-sh/setup-uv@v4 + with: + enable-cache: true + + - name: Setup dependencies + run: | + uv sync + git submodule update + cp tetra-rp/src/tetra_rp/protos/remote_execution.py src/ + + - name: Build and push Load Balancer Docker image (prod) + uses: docker/build-push-action@v6 + with: + context: . + file: ./Dockerfile-lb + platforms: linux/amd64 + push: true + tags: ${{ steps.meta-lb.outputs.tags }} + labels: ${{ steps.meta-lb.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + + docker-prod-lb-cpu: + runs-on: ubuntu-latest + needs: [release] + if: needs.release.outputs.release_created + steps: + - name: Clear Space + run: | + rm -rf /usr/share/dotnet + rm -rf /opt/ghc + rm -rf "/usr/local/share/boost" + rm -rf "$AGENT_TOOLSDIRECTORY" + + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Extract CPU Load Balancer metadata + id: meta-lb-cpu + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-lb-cpu + tags: | + type=semver,pattern={{version}} + type=raw,value=latest,enable={{is_default_branch}} + + - name: Set up uv + uses: astral-sh/setup-uv@v4 + with: + enable-cache: true + + - name: Setup dependencies + run: | + uv sync + git submodule update + cp tetra-rp/src/tetra_rp/protos/remote_execution.py src/ + + - name: Build and push CPU Load Balancer Docker image (prod) + uses: docker/build-push-action@v6 + with: + context: . + file: ./Dockerfile-lb-cpu + platforms: linux/amd64 + push: true + tags: ${{ steps.meta-lb-cpu.outputs.tags }} + labels: ${{ steps.meta-lb-cpu.outputs.labels }} + cache-from: type=gha cache-to: type=gha,mode=max \ No newline at end of file diff --git a/Dockerfile-lb b/Dockerfile-lb new file mode 100644 index 0000000..40c7999 --- /dev/null +++ b/Dockerfile-lb @@ -0,0 +1,39 @@ +FROM pytorch/pytorch:2.8.0-cuda12.8-cudnn9-runtime + +WORKDIR /app + +# Prevent interactive prompts during package installation +ENV DEBIAN_FRONTEND=noninteractive +# Set timezone to avoid tzdata prompts +ENV TZ=Etc/UTC + +# Enable HuggingFace transfer acceleration +ENV HF_HUB_ENABLE_HF_TRANSFER=1 +# Relocate HuggingFace cache outside /root/.cache to exclude from volume sync +ENV HF_HOME=/hf-cache + +# Configure APT cache to persist under /root/.cache for volume sync +RUN mkdir -p /root/.cache/apt/archives/partial \ + && echo 'Dir::Cache "/root/.cache/apt";' > /etc/apt/apt.conf.d/01cache + +# Install system dependencies and uv +RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y --no-install-recommends \ + build-essential curl ca-certificates nala git \ + && curl -LsSf https://astral.sh/uv/install.sh | sh \ + && cp ~/.local/bin/uv /usr/local/bin/uv \ + && chmod +x /usr/local/bin/uv \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Copy app code and install dependencies +COPY README.md pyproject.toml uv.lock ./ +COPY src/ ./ +RUN uv export --format requirements-txt --no-dev --no-hashes > requirements.txt \ + && uv pip install --system -r requirements.txt + +EXPOSE 80 + +# CMD will be overridden by RunPod at runtime to run the specific generated handler +# The handler factory generates handler_{resource_name}.py files +# RunPod will invoke: uvicorn handler_{resource_name}:app --host 0.0.0.0 --port 80 +CMD ["uvicorn", "lb_handler:app", "--host", "0.0.0.0", "--port", "80", "--timeout-keep-alive", "600"] diff --git a/Dockerfile-lb-cpu b/Dockerfile-lb-cpu new file mode 100644 index 0000000..af6ea3d --- /dev/null +++ b/Dockerfile-lb-cpu @@ -0,0 +1,34 @@ +FROM python:3.12-slim + +WORKDIR /app + +# Prevent interactive prompts during package installation +ENV DEBIAN_FRONTEND=noninteractive +# Set timezone to avoid tzdata prompts +ENV TZ=Etc/UTC + +# Configure APT cache to persist under /root/.cache for volume sync +RUN mkdir -p /root/.cache/apt/archives/partial \ + && echo 'Dir::Cache "/root/.cache/apt";' > /etc/apt/apt.conf.d/01cache + +# Install system dependencies and uv +RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y --no-install-recommends \ + build-essential curl ca-certificates nala git \ + && curl -LsSf https://astral.sh/uv/install.sh | sh \ + && cp ~/.local/bin/uv /usr/local/bin/uv \ + && chmod +x /usr/local/bin/uv \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Copy app code and install dependencies +COPY README.md pyproject.toml uv.lock ./ +COPY src/ ./ +RUN uv export --format requirements-txt --no-dev --no-hashes > requirements.txt \ + && uv pip install --system -r requirements.txt + +EXPOSE 80 + +# CMD will be overridden by RunPod at runtime to run the specific generated handler +# The handler factory generates handler_{resource_name}.py files +# RunPod will invoke: uvicorn handler_{resource_name}:app --host 0.0.0.0 --port 80 +CMD ["uvicorn", "lb_handler:app", "--host", "0.0.0.0", "--port", "80", "--timeout-keep-alive", "600"] diff --git a/Makefile b/Makefile index e6c24ed..6889111 100644 --- a/Makefile +++ b/Makefile @@ -58,6 +58,20 @@ build-cpu: setup # Build CPU-only Docker image (linux/amd64) -t $(FULL_IMAGE_CPU) \ . --load +build-lb: setup # Build Load Balancer Docker image (linux/amd64) + docker buildx build \ + --platform linux/amd64 \ + -f Dockerfile-lb \ + -t $(IMAGE)-lb:$(TAG) \ + . --load + +build-lb-cpu: setup # Build CPU-only Load Balancer Docker image (linux/amd64) + docker buildx build \ + --platform linux/amd64 \ + -f Dockerfile-lb-cpu \ + -t $(IMAGE)-lb-cpu:$(TAG) \ + . --load + # Test commands test: # Run all tests uv run pytest tests/ -v @@ -77,6 +91,9 @@ test-fast: # Run tests with fast-fail mode test-handler: # Test handler locally with all test_*.json files cd src && ./test-handler.sh +test-lb-handler: # Test Load Balancer handler with /execute endpoint + cd src && ./test-lb-handler.sh + # Smoke Tests (local on Mac OS) smoketest-macos-build: setup # Build Mac OS Docker image (macos/arm64) @@ -89,6 +106,26 @@ smoketest-macos-build: setup # Build Mac OS Docker image (macos/arm64) smoketest-macos: smoketest-macos-build # Test Docker image locally docker run --rm $(FULL_IMAGE)-mac ./test-handler.sh +smoketest-macos-lb-build: setup # Build Mac OS Load Balancer Docker image (macos/arm64) + docker buildx build \ + --platform linux/arm64 \ + -f Dockerfile-lb \ + -t $(IMAGE)-lb:mac \ + . --load + +smoketest-macos-lb: smoketest-macos-lb-build # Test Load Balancer Docker image locally + docker run --rm $(IMAGE)-lb:mac ./test-lb-handler.sh + +smoketest-macos-lb-cpu-build: setup # Build Mac OS CPU-only Load Balancer Docker image (macos/arm64) + docker buildx build \ + --platform linux/arm64 \ + -f Dockerfile-lb-cpu \ + -t $(IMAGE)-lb-cpu:mac \ + . --load + +smoketest-macos-lb-cpu: smoketest-macos-lb-cpu-build # Test CPU-only Load Balancer Docker image locally + docker run --rm $(IMAGE)-lb-cpu:mac ./test-lb-handler.sh + # Linting commands lint: # Check code with ruff uv run ruff check . diff --git a/docs/Docker_Build_Pipeline.md b/docs/Docker_Build_Pipeline.md new file mode 100644 index 0000000..b76aab6 --- /dev/null +++ b/docs/Docker_Build_Pipeline.md @@ -0,0 +1,212 @@ +# Docker Build Pipeline + +This document covers the CI/CD infrastructure for building and deploying worker-tetra Docker images. + +## Overview + +The worker-tetra repository maintains four Docker images: + +1. **GPU Image** (`runpod/tetra-rp`) - Queue-based serverless worker with CUDA +2. **CPU Image** (`runpod/tetra-rp-cpu`) - Queue-based serverless worker for CPU-only +3. **Load Balancer GPU Image** (`runpod/tetra-rp-lb`) - HTTP-based Load Balancer worker with CUDA +4. **Load Balancer CPU Image** (`runpod/tetra-rp-lb-cpu`) - HTTP-based Load Balancer worker for CPU-only + +All images are automatically built and pushed via GitHub Actions workflows. + +## Pipeline Stages + +### Stage 1: Pull Request Testing + +**Trigger**: All pull requests and pushes to main + +**Jobs**: +- `docker-test` - Validates GPU image builds +- `docker-test-lb` - Validates Load Balancer GPU image builds +- `docker-test-lb-cpu` - Validates Load Balancer CPU image builds + +**What it does**: +- Builds the Docker images locally +- Validates Dockerfile syntax +- Checks that all dependencies resolve +- Does NOT push to Docker Hub + +**Status checks**: Required to pass before merging + +### Stage 2: Main Branch Deployment + +**Trigger**: Pushes to main branch (when no release is created) + +**Jobs**: +- `docker-main-gpu` - Pushes GPU image +- `docker-main-cpu` - Pushes CPU image +- `docker-main-lb` - Pushes Load Balancer GPU image +- `docker-main-lb-cpu` - Pushes Load Balancer CPU image + +**What it does**: +- Builds the Docker images +- Pushes to Docker Hub with `:main` tag +- Useful for testing development versions +- Skipped if a release was just created + +**Image tags**: +- `runpod/tetra-rp:main` +- `runpod/tetra-rp-cpu:main` +- `runpod/tetra-rp-lb:main` +- `runpod/tetra-rp-lb-cpu:main` + +### Stage 3: Release Deployment + +**Trigger**: When a release is created via release-please + +**Jobs**: +- `docker-prod-gpu` - Pushes GPU image with version tags +- `docker-prod-cpu` - Pushes CPU image with version tags +- `docker-prod-lb` - Pushes Load Balancer GPU image with version tags +- `docker-prod-lb-cpu` - Pushes Load Balancer CPU image with version tags + +**What it does**: +- Builds the Docker images +- Pushes with semantic version tags (e.g., `0.7.3`) +- Also pushes `:latest` tag +- Automatically triggered by release-please + +**Image tags**: +- `runpod/tetra-rp:0.7.3` and `runpod/tetra-rp:latest` +- `runpod/tetra-rp-cpu:0.7.3` and `runpod/tetra-rp-cpu:latest` +- `runpod/tetra-rp-lb:0.7.3` and `runpod/tetra-rp-lb:latest` +- `runpod/tetra-rp-lb-cpu:0.7.3` and `runpod/tetra-rp-lb-cpu:latest` + +## Pipeline Flow + +```mermaid +graph TD + A["Pull Request"] --> B["test"] + A --> C["lint"] + B --> D["docker-test"] + C --> D + B --> E["docker-test-lb"] + C --> E + D --> F["release"] + E --> F + F --> G["docker-main-gpu"] + F --> H["docker-main-cpu"] + F --> I["docker-main-lb"] + + J["Main Branch Push
with Release"] --> K["release-please"] + K --> L["docker-prod-gpu"] + K --> M["docker-prod-cpu"] + K --> N["docker-prod-lb"] + + style D fill:#FF9800,stroke:#E65100,stroke-width:2px,color:#fff + style E fill:#FF9800,stroke:#E65100,stroke-width:2px,color:#fff + style G fill:#4CAF50,stroke:#2E7D32,stroke-width:2px,color:#fff + style H fill:#4CAF50,stroke:#2E7D32,stroke-width:2px,color:#fff + style I fill:#4CAF50,stroke:#2E7D32,stroke-width:2px,color:#fff + style L fill:#F44336,stroke:#C62828,stroke-width:2px,color:#fff + style M fill:#F44336,stroke:#C62828,stroke-width:2px,color:#fff + style N fill:#F44336,stroke:#C62828,stroke-width:2px,color:#fff + style F fill:#2196F3,stroke:#1565C0,stroke-width:2px,color:#fff +``` + +## Building Locally + +### Build GPU Image + +```bash +make build-gpu +``` + +Builds `runpod/tetra-rp:local` for testing locally. + +### Build CPU Image + +```bash +make build-cpu +``` + +Builds `runpod/tetra-rp-cpu:local` for testing locally. + +### Build Load Balancer Image + +```bash +make build-lb +``` + +Builds `runpod/tetra-rp-lb:local` for testing locally. + +### Build CPU Load Balancer Image + +```bash +make build-lb-cpu +``` + +Builds `runpod/tetra-rp-lb-cpu:local` for testing locally. + +### Build All Images + +```bash +make build +``` + +Builds both GPU and CPU images (does not build Load Balancer image). + +## Docker Hub Credentials + +The pipeline requires Docker Hub credentials configured as GitHub repository secrets: + +- **DOCKERHUB_USERNAME** - Docker Hub username +- **DOCKERHUB_TOKEN** - Docker Hub password or personal access token + +These are used by the `docker/login-action` step to authenticate with Docker Hub. + +## Release Process + +Releases are managed automatically via `release-please`: + +1. **Detect conventional commits** on main branch +2. **Create pull request** with version bump and changelog +3. **User merges** the release PR +4. **release-please** creates a Git tag and release +5. **CI/CD** triggers production image builds and pushes + +### Commit Message Convention + +Use conventional commit messages to trigger releases: + +- `feat:` - Feature (triggers minor version bump) +- `fix:` - Bug fix (triggers patch version bump) +- `BREAKING CHANGE:` - Breaking change (triggers major version bump) + +Example: +```bash +git commit -m "feat(executor): add async function execution support" +git commit -m "fix(handler): correct dependency installation order" +``` + +## Troubleshooting + +### Docker Build Fails + +1. Check `pyproject.toml` syntax +2. Verify `Dockerfile*` line endings (LF not CRLF) +3. Ensure `uv.lock` is up to date: `uv lock --upgrade` +4. Test locally: `make build-lb` (for Load Balancer) + +### Image Not Pushed to Docker Hub + +1. Verify Docker Hub credentials are set in GitHub secrets +2. Check CI/CD job logs for authentication errors +3. Ensure you're on main branch for main builds +4. Ensure release tag exists for production builds + +### Release PR Not Created + +1. Check that commits follow conventional commit format +2. Ensure `release-please` workflow is enabled +3. Verify release-please app has access to repository +4. Check repository settings > Actions permissions + +## See Also + +- [Load Balancer Docker Infrastructure](./Load_Balancer_Docker_Infrastructure.md) +- [System Python Runtime Architecture](./System_Python_Runtime_Architecture.md) diff --git a/docs/Load_Balancer_Docker_Infrastructure.md b/docs/Load_Balancer_Docker_Infrastructure.md new file mode 100644 index 0000000..19a2675 --- /dev/null +++ b/docs/Load_Balancer_Docker_Infrastructure.md @@ -0,0 +1,340 @@ +# Load Balancer Docker Infrastructure + +## Overview + +This document covers the Docker infrastructure supporting **LiveLoadBalancerSLSResource remote code execution** in worker-tetra. This enables executing serialized Python functions via HTTP `/execute` endpoint during local development and testing. + +### Purpose + +- **Remote code execution**: Execute arbitrary Python functions via HTTP POST to `/execute` endpoint +- **Function serialization**: Send function code, arguments, and dependencies as JSON payload +- **Local development**: LiveLoadBalancer resource provides `/execute` for testing before production deployment +- **HTTP-based communication**: Direct HTTP requests/responses instead of RunPod job queue + +### Integration with tetra-rp + +The tetra-rp submodule (branch `deanq/ae-1102-load-balancer-sls-resource`) provides: + +- **Resource Classes**: `LoadBalancerSlsResource` (base) and `LiveLoadBalancer` (local dev with /execute) +- **Stub Implementation**: `LoadBalancerSlsStub` routes HTTP requests to handler functions +- **Protocol**: `FunctionRequest`/`FunctionResponse` for serialized code execution + +Worker-tetra provides the Docker infrastructure to run the handler that processes these requests. + +## Architecture + +### Complete Flow + +```mermaid +graph TB + subgraph SDK["tetra-rp SDK"] + A["LiveLoadBalancerSLSResource
Resource Config"] + B["LoadBalancerSlsStub
Client-side stub"] + C["FunctionRequest/Response
Protocol"] + end + + subgraph Infrastructure["worker-tetra"] + D["Dockerfile-lb
FastAPI + uvicorn"] + E["src/handler.py
Serverless entry point"] + F["RemoteExecutor
Executes serialized code"] + end + + subgraph CICD["CI/CD Pipeline"] + G["docker-test-lb
Validate image builds"] + H["docker-main-lb
Push dev image"] + I["docker-prod-lb
Push release image"] + end + + subgraph Registry["Docker Hub"] + J["runpod/tetra-rp-lb:tag
Base Image"] + end + + subgraph Runtime["RunPod Deployment"] + K["Load Balancer Endpoint
Port 80"] + L["FastAPI Handler
POST /execute"] + end + + A --> B + C --> B + B -->|HTTP POST| L + D --> E + E --> F + F -->|executes| L + D --> G + G --> H + G --> I + H --> J + I --> J + J --> K + K --> L + + style SDK fill:#1976d2,stroke:#0d47a1,stroke-width:3px,color:#fff + style Infrastructure fill:#F57C00,stroke:#BF360C,stroke-width:3px,color:#fff + style CICD fill:#2E7D32,stroke:#1B5E20,stroke-width:3px,color:#fff + style Registry fill:#C62828,stroke:#7F0000,stroke-width:3px,color:#fff + style Runtime fill:#00695C,stroke:#003D33,stroke-width:3px,color:#fff +``` + +### /execute Endpoint Execution + +```mermaid +sequenceDiagram + participant Client as tetra-rp SDK
LoadBalancerSlsStub + participant Handler as FastAPI Handler
POST /execute + participant Executor as RemoteExecutor + participant System as System + + Client->>Handler: HTTP POST /execute + Handler->>Handler: Parse FunctionRequest JSON + Handler->>Executor: ExecuteFunction(request) + Executor->>System: Install dependencies + Executor->>System: Execute function code + Executor->>System: Capture output/errors + Executor->>Handler: Return FunctionResponse + Handler->>Client: HTTP 200 with result JSON +``` + +## Docker Image (Dockerfile-lb) + +### Base Image + +```dockerfile +FROM pytorch/pytorch:2.8.0-cuda12.8-cudnn9-runtime +``` + +Uses PyTorch CUDA runtime to support GPU-accelerated functions. This matches the queue-based Dockerfile but with different CMD and dependencies. + +### Key Configuration + +| Aspect | Queue-Based (Dockerfile) | Load Balancer (Dockerfile-lb) | +|--------|------------------------|------| +| Handler Entry | `runpod.serverless.start()` | `uvicorn handler:app` | +| Communication | RunPod job queue | HTTP requests | +| Port | Job polling (no port) | Port 80 | +| Handler | Single `handler.py` | Generated `handler_*.py` | +| Framework | RunPod SDK | FastAPI + uvicorn | +| Request Type | Batch (job queue) | Synchronous HTTP | + +### Dependencies + +Added to `pyproject.toml`: + +```toml +dependencies = [ + "fastapi>=0.115.0", + "uvicorn[standard]>=0.34.0", + # ... existing dependencies ... +] +``` + +**FastAPI**: Web framework for HTTP routing and request handling +**uvicorn**: ASGI server for running FastAPI applications + +### Environment Variables + +```dockerfile +ENV HF_HUB_ENABLE_HF_TRANSFER=1 # HuggingFace acceleration +ENV HF_HOME=/hf-cache # Cache location +ENV DEBIAN_FRONTEND=noninteractive # No interactive prompts +ENV TZ=Etc/UTC # Timezone +``` + +### CMD Strategy + +```dockerfile +CMD ["uvicorn", "handler:app", "--host", "0.0.0.0", "--port", "80", "--timeout-keep-alive", "600"] +``` + +This is a **placeholder**. RunPod overrides the CMD at runtime to run the specific generated handler: + +```bash +uvicorn handler_api:app --host 0.0.0.0 --port 80 +``` + +Different endpoints can use different generated handlers (e.g., `handler_api`, `handler_service`). + +## Building the Image + +### Local Build + +Build the Load Balancer image locally for testing: + +```bash +make build-lb +``` + +This builds and tags as `runpod/tetra-rp-lb:local` and loads into local Docker daemon. + +### CI/CD Build + +The image is automatically built and pushed via GitHub Actions: + +- **Pull requests**: `docker-test-lb` job validates the build +- **Main branch**: `docker-main-lb` job pushes `:main` tag +- **Releases**: `docker-prod-lb` job pushes semantic version and `:latest` tags + +See [Docker Build Pipeline](./Docker_Build_Pipeline.md) for details on the CI/CD process. + +## Handler Implementation + +The handler processes `/execute` requests by: + +1. **Receiving** HTTP POST to `/execute` with `FunctionRequest` JSON payload +2. **Parsing** the request containing: + - `function_code`: Source code of function to execute + - `function_name`: Name of function to call + - `args`: Base64-encoded cloudpickle serialized arguments + - `kwargs`: Base64-encoded cloudpickle serialized keyword arguments + - `dependencies`: List of Python packages to install + - `system_dependencies`: List of system packages to install +3. **Installing** dependencies as needed +4. **Executing** the function in an isolated namespace +5. **Capturing** stdout, stderr, logs, and return value +6. **Returning** `FunctionResponse` with result or error + +### /execute Request Example + +```bash +curl -X POST http://localhost:80/execute \ + -H "Content-Type: application/json" \ + -d '{ + "function_name": "add", + "function_code": "def add(a, b):\n return a + b", + "args": ["", ""], + "dependencies": [] + }' +``` + +### /execute Response Example + +```json +{ + "success": true, + "result": "", + "stdout": null, + "error": null +} +``` + +## Local Development with LiveLoadBalancer + +LiveLoadBalancer enables testing the `/execute` endpoint locally before deployment: + +```python +from tetra_rp import remote, LiveLoadBalancer + +config = LiveLoadBalancer(name="test-endpoint") + +@remote(config) +def my_function(x): + return x * 2 +``` + +When using `LiveLoadBalancer`: +- The `/execute` endpoint is **available** for local testing +- You can serialize and send function code via HTTP +- Test the full execution pipeline locally + +When deployed as `LoadBalancerSlsResource`: +- The `/execute` endpoint is **not available** (security) +- Only user-defined routes are exposed +- Production endpoints prevent arbitrary code execution + +## Testing the Handler + +Test the Load Balancer handler's `/execute` endpoint locally: + +### Using the Test Script + +```bash +make test-lb-handler +``` + +This script: +1. Starts the FastAPI server on port 80 +2. Validates the `/health` endpoint +3. Runs all `tests/test_*.json` files against `/execute` +4. Reports pass/fail results +5. Cleans up the server + +### Manual Testing + +Start the server: + +```bash +uvicorn src.lb_handler:app --port 80 +``` + +Test the health endpoint: + +```bash +curl http://localhost:80/health +``` + +Test the execute endpoint: + +```bash +curl -X POST http://localhost:80/execute \ + -H "Content-Type: application/json" \ + -d '{ + "function_name": "add", + "function_code": "def add(a, b):\n return a + b", + "args": ["base64_encoded_5", "base64_encoded_3"], + "dependencies": [] + }' +``` + +### Test File Format + +Create test files in `src/tests/test_*.json`: + +```json +{ + "function_name": "my_function", + "function_code": "def my_function(x):\n return x * 2", + "args": ["base64_encoded_arg"], + "kwargs": {}, + "dependencies": [], + "system_dependencies": [], + "accelerate_downloads": false +} +``` + +Note: Arguments must be base64-encoded cloudpickle serialized values. + +## Troubleshooting + +### Docker Build Fails + +1. Check `pyproject.toml` for syntax errors +2. Verify `Dockerfile-lb` line endings (CRLF vs LF) +3. Ensure uv.lock is up to date: `uv lock --upgrade` +4. Run `make build-lb` to validate locally + +### /execute Endpoint Not Responding + +1. Verify endpoint is deployed and in "Ready" state +2. Check port 80 is exposed: `EXPOSE 80` in Dockerfile-lb +3. Verify FastAPI handler is started: check container logs +4. Test with simple request first + +### Function Execution Fails + +1. Check dependencies are listed in `FunctionRequest` +2. Verify function code syntax +3. Check for import errors in dependencies +4. Review captured `stdout` and `error` fields in response + +### Missing Dependencies in Handler + +1. Add package to `pyproject.toml` +2. Update uv.lock: `uv lock` +3. Rebuild Docker image: `make build-lb` + +## See Also + +- [Docker Build Pipeline](./Docker_Build_Pipeline.md) - CI/CD infrastructure for building and pushing images +- [System Python Runtime Architecture](./System_Python_Runtime_Architecture.md) - Details on the execution engine +- [tetra-rp Load Balancer Runtime Architecture](../tetra-rp/docs/LoadBalancer_Runtime_Architecture.md) - SDK-level architecture +- [Using @remote with Load Balancer](../tetra-rp/docs/Using_Remote_With_LoadBalancer.md) - SDK usage guide +- [Load Balancer Endpoints](../tetra-rp/docs/Load_Balancer_Endpoints.md) - Endpoint configuration diff --git a/pyproject.toml b/pyproject.toml index 624e941..e112993 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,8 @@ dependencies = [ "runpod", "hf_transfer>=0.1.0", "huggingface_hub>=0.32.0", + "fastapi>=0.115.0", + "uvicorn[standard]>=0.34.0", ] [dependency-groups] diff --git a/src/lb_handler.py b/src/lb_handler.py new file mode 100644 index 0000000..2cdf14b --- /dev/null +++ b/src/lb_handler.py @@ -0,0 +1,69 @@ +"""Load Balancer handler for executing remote functions via HTTP. + +This handler provides a FastAPI application for the Load Balancer runtime. +It supports: +- /ping: Health check endpoint (required by RunPod Load Balancer) +- /execute: Remote function execution via HTTP POST + +The handler uses worker-tetra's RemoteExecutor for function execution. + +For generated handlers from flash build: +- Those handlers extend this with user-defined routes +- They use the same execution engine +""" + +from typing import Any, Dict + +from fastapi import FastAPI + +from logger import setup_logging +from remote_execution import FunctionRequest, FunctionResponse +from remote_executor import RemoteExecutor + +# Initialize logging configuration +setup_logging() + +# Create FastAPI app +app = FastAPI(title="Load Balancer Handler") + + +@app.get("/ping") +async def ping() -> Dict[str, Any]: + """Ping endpoint for health checks (RunPod Load Balancer requirement). + + Returns HTTP 200 when healthy. RunPod measures cold start by tracking + the transition from 204 (initializing) to 200 (healthy). + """ + return {"status": "healthy"} + + +@app.post("/execute") +async def execute(request: Dict[str, Any]) -> Dict[str, Any]: + """Execute a remote function via HTTP POST request. + + Expects FunctionRequest JSON payload. + Supports both direct FunctionRequest format and RunPod wrapped format. + """ + output: FunctionResponse + + try: + executor = RemoteExecutor() + # Handle both direct FunctionRequest and RunPod wrapped format + request_data = request.get("input", request) + input_data = FunctionRequest(**request_data) + output = await executor.ExecuteFunction(input_data) + + except Exception as error: + output = FunctionResponse( + success=False, + error=f"Error in handler: {str(error)}", + ) + + return output.model_dump() + + +if __name__ == "__main__": + import uvicorn + + # Local development server for testing + uvicorn.run(app, host="0.0.0.0", port=80) diff --git a/src/test-handler.sh b/src/test-handler.sh index 1304a8d..303e3ec 100755 --- a/src/test-handler.sh +++ b/src/test-handler.sh @@ -15,9 +15,16 @@ for test_file in tests/test_*.json; do test_count=$((test_count + 1)) echo "Testing with $test_file..." - # Run the test and capture output using system Python directly - output=$(python handler.py --test_input "$(cat "$test_file")" 2>&1) - exit_code=$? + # Run the test and capture output + # In Docker: python is available and has system-installed packages + # Locally: use uv run to manage dependencies + if command -v python &> /dev/null; then + output=$(python handler.py --test_input "$(cat "$test_file")" 2>&1) + exit_code=$? + else + output=$(uv run python3 handler.py --test_input "$(cat "$test_file")" 2>&1) + exit_code=$? + fi if [ $exit_code -eq 0 ]; then echo "✓ $test_file: PASSED" diff --git a/src/test-lb-handler.sh b/src/test-lb-handler.sh new file mode 100755 index 0000000..775e102 --- /dev/null +++ b/src/test-lb-handler.sh @@ -0,0 +1,105 @@ +#!/bin/bash + +set -e + +echo "Testing Load Balancer handler with /execute endpoint..." + +# Configuration +PORT=80 +HOST="localhost" +TEST_TIMEOUT=30 +SERVER_PID="" + +# Cleanup function +cleanup() { + if [ -n "$SERVER_PID" ]; then + echo "Stopping FastAPI server (PID: $SERVER_PID)..." + kill $SERVER_PID 2>/dev/null || true + wait $SERVER_PID 2>/dev/null || true + fi +} + +# Set up trap to ensure cleanup on exit +trap cleanup EXIT + +# Start FastAPI server in background +echo "Starting FastAPI server on port $PORT..." +PYTHONPATH=. uv run python3 -m uvicorn lb_handler:app --host $HOST --port $PORT --log-level error > /tmp/lb_handler.log 2>&1 & +SERVER_PID=$! + +# Wait for server to be ready +echo "Waiting for server to be ready..." +attempt=0 +while [ $attempt -lt $TEST_TIMEOUT ]; do + if curl -s -f "http://$HOST:$PORT/health" > /dev/null 2>&1; then + echo "✓ Server is ready" + break + fi + attempt=$((attempt + 1)) + sleep 1 + + if [ $attempt -eq $TEST_TIMEOUT ]; then + echo "✗ Server failed to start after ${TEST_TIMEOUT}s" + echo "Server logs:" + cat /tmp/lb_handler.log + exit 1 + fi +done + +# Test /health endpoint +echo "" +echo "Testing /health endpoint..." +health_response=$(curl -s "http://$HOST:$PORT/health") +echo "Response: $health_response" + +# Run /execute tests +echo "" +echo "Testing /execute endpoint with test files..." + +failed_tests="" +test_count=0 +passed_count=0 + +for test_file in tests/test_*.json; do + if [ ! -f "$test_file" ]; then + echo "No test_*.json files found" + exit 1 + fi + + test_count=$((test_count + 1)) + echo "" + echo "Testing with $test_file..." + + # Send request to /execute endpoint + response=$(curl -s -X POST "http://$HOST:$PORT/execute" \ + -H "Content-Type: application/json" \ + -d "$(cat "$test_file")") + + # Check if response contains success or error + if echo "$response" | grep -q '"success":true'; then + echo "✓ $test_file: PASSED" + echo " Result: $(echo "$response" | python3 -m json.tool 2>/dev/null | head -5)" + passed_count=$((passed_count + 1)) + elif echo "$response" | grep -q '"success":false'; then + echo "✗ $test_file: FAILED" + echo " Error: $(echo "$response" | python3 -c "import sys, json; d=json.load(sys.stdin); print(d.get('error', 'Unknown error'))" 2>/dev/null || echo 'Unknown error')" + failed_tests="$failed_tests $test_file" + else + echo "✗ $test_file: FAILED (Invalid response)" + echo " Response: $(echo "$response" | head -c 100)" + failed_tests="$failed_tests $test_file" + fi +done + +echo "" +echo "============================================" +echo "Test Results: $passed_count/$test_count tests passed" +echo "============================================" + +if [ -z "$failed_tests" ]; then + echo "✓ All tests passed!" + exit 0 +else + echo "✗ Failed tests:$failed_tests" + exit 1 +fi diff --git a/src/tests/test_lb_simple_function.json b/src/tests/test_lb_simple_function.json new file mode 100644 index 0000000..3b6fccb --- /dev/null +++ b/src/tests/test_lb_simple_function.json @@ -0,0 +1,9 @@ +{ + "input": { + "function_name": "simple_add", + "function_code": "def simple_add(a, b):\n \"\"\"Add two numbers.\"\"\"\n return a + b", + "args": ["gAVLBS4=", "gAVLAy4="], + "kwargs": {}, + "dependencies": [] + } +} diff --git a/tests/integration/test_dependency_management.py b/tests/integration/test_dependency_management.py index b77b960..ab410ee 100644 --- a/tests/integration/test_dependency_management.py +++ b/tests/integration/test_dependency_management.py @@ -18,8 +18,9 @@ def test_install_python_dependencies_integration(self): success=True, stdout="Successfully installed package-1.0.0" ) + # Use a package that's unlikely to be installed result = executor.dependency_installer.install_dependencies( - ["requests", "numpy"] + ["nonexistent-test-package-integration-12345"] ) assert result.success is True diff --git a/tests/unit/test_dependency_installer.py b/tests/unit/test_dependency_installer.py index e30a81c..5f132d4 100644 --- a/tests/unit/test_dependency_installer.py +++ b/tests/unit/test_dependency_installer.py @@ -213,7 +213,8 @@ def test_install_dependencies_success(self, mock_subprocess): success=True, stdout="Successfully installed" ) - result = self.installer.install_dependencies(["requests", "numpy"]) + # Use a package that's unlikely to be installed + result = self.installer.install_dependencies(["nonexistent-test-package-12345"]) assert result.success is True assert "Successfully installed" in result.stdout @@ -246,8 +247,9 @@ def test_install_dependencies_with_acceleration_enabled(self, mock_subprocess): success=True, stdout="Successfully installed with UV" ) + # Use a package that's unlikely to be installed result = self.installer.install_dependencies( - ["requests", "numpy"], accelerate_downloads=True + ["nonexistent-test-package-uv-12345"], accelerate_downloads=True ) assert result.success is True @@ -262,8 +264,9 @@ def test_install_dependencies_with_acceleration_disabled(self, mock_subprocess): success=True, stdout="Successfully installed with pip" ) + # Use a package that's unlikely to be installed result = self.installer.install_dependencies( - ["requests", "numpy"], accelerate_downloads=False + ["nonexistent-test-package-pip-12345"], accelerate_downloads=False ) assert result.success is True diff --git a/tetra-rp b/tetra-rp index d64b7f2..b57748f 160000 --- a/tetra-rp +++ b/tetra-rp @@ -1 +1 @@ -Subproject commit d64b7f22e1d320e775a7a4561b04e4bd26b0470f +Subproject commit b57748fce829b85e09d4e917dd03778dfe0ebc44 diff --git a/uv.lock b/uv.lock index 4b6e3c0..509a961 100644 --- a/uv.lock +++ b/uv.lock @@ -2614,15 +2614,17 @@ wheels = [ [[package]] name = "worker-tetra" -version = "0.7.1" +version = "0.7.2" source = { virtual = "." } dependencies = [ { name = "cloudpickle" }, + { name = "fastapi" }, { name = "hf-transfer" }, { name = "huggingface-hub" }, { name = "pydantic" }, { name = "requests" }, { name = "runpod" }, + { name = "uvicorn", extra = ["standard"] }, ] [package.dev-dependencies] @@ -2640,11 +2642,13 @@ dev = [ [package.metadata] requires-dist = [ { name = "cloudpickle", specifier = ">=3.1.1" }, + { name = "fastapi", specifier = ">=0.115.0" }, { name = "hf-transfer", specifier = ">=0.1.0" }, { name = "huggingface-hub", specifier = ">=0.32.0" }, { name = "pydantic", specifier = ">=2.11.4" }, { name = "requests", specifier = ">=2.25.0" }, { name = "runpod" }, + { name = "uvicorn", extras = ["standard"], specifier = ">=0.34.0" }, ] [package.metadata.requires-dev]