From 07232afb4e41b6ce03fc21a0f027dba0de35c5a3 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 21 May 2026 14:52:33 -0400 Subject: [PATCH 1/7] feat(sdk): support inference_v3 config_file mount Bump platform-api-python-client to 4.10.0 so ConfigFileMount is importable, and add a thin helper in centml/sdk/utils/config_file.py that reads a file off disk and returns a populated ConfigFileMount. Example create_inference.py updated to show the helper in context. Server (platform PRs #3656 and #3667) owns all field-level validation (64 KiB content cap, filename charset, mount_path rules); the helper deliberately stays validation-free so the SDK does not drift when server limits change. Signed-off-by: Honglin Cao --- centml/sdk/utils/config_file.py | 13 +++++++++ examples/sdk/create_inference.py | 8 ++++++ tests/test_sdk_config_file_helper.py | 41 ++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+) create mode 100644 centml/sdk/utils/config_file.py create mode 100644 tests/test_sdk_config_file_helper.py diff --git a/centml/sdk/utils/config_file.py b/centml/sdk/utils/config_file.py new file mode 100644 index 00000000..fef42070 --- /dev/null +++ b/centml/sdk/utils/config_file.py @@ -0,0 +1,13 @@ +import os +from typing import Optional + +from platform_api_python_client import ConfigFileMount + + +# Load a file off disk into a ConfigFileMount. Field-level validation +# (size cap, filename charset, mount_path rules) is intentionally left +# to the API so SDK doesn't drift when server limits change. +def load_config_file_mount(path: str, mount_path: str, filename: Optional[str] = None) -> ConfigFileMount: + with open(path, "r", encoding="utf-8") as f: + content = f.read() + return ConfigFileMount(filename=filename or os.path.basename(path), mount_path=mount_path, content=content) diff --git a/examples/sdk/create_inference.py b/examples/sdk/create_inference.py index 5bbe365d..2e5b4b40 100644 --- a/examples/sdk/create_inference.py +++ b/examples/sdk/create_inference.py @@ -1,6 +1,7 @@ import centml from centml.sdk.api import get_centml_client from centml.sdk import DeploymentType, CreateInferenceV3DeploymentRequest, UserVaultType +from centml.sdk.utils.config_file import load_config_file_mount def main(): @@ -22,6 +23,13 @@ def main(): max_unavailable=0, # Keep all pods available during updates healthcheck="/", concurrency=10, + # Helper reads ./nginx.conf and wraps it; pass an inline + # ConfigFileMount(filename=..., mount_path=..., content=...) if + # the content is already in memory. + config_file=load_config_file_mount( + path="./nginx.conf", + mount_path="/etc/nginx/conf.d/default.conf", + ), ) response = cclient.create_inference(request) print("Create deployment response: ", response) diff --git a/tests/test_sdk_config_file_helper.py b/tests/test_sdk_config_file_helper.py new file mode 100644 index 00000000..db9e52c4 --- /dev/null +++ b/tests/test_sdk_config_file_helper.py @@ -0,0 +1,41 @@ +"""Tests for centml.sdk.utils.config_file.load_config_file_mount.""" + +import pytest + +from centml.sdk.utils.config_file import load_config_file_mount + + +def test_default_filename_from_basename(tmp_path): + src = tmp_path / "nginx.conf" + src.write_text("server { listen 80; }\n") + + mount = load_config_file_mount(str(src), "/etc/nginx/conf.d/default.conf") + + assert mount.filename == "nginx.conf" + assert mount.mount_path == "/etc/nginx/conf.d/default.conf" + assert mount.content == "server { listen 80; }\n" + + +def test_explicit_filename_overrides_basename(tmp_path): + src = tmp_path / "local.txt" + src.write_text("payload") + + mount = load_config_file_mount(str(src), "/app/etc/remote.conf", filename="remote.conf") + + assert mount.filename == "remote.conf" + assert mount.mount_path == "/app/etc/remote.conf" + assert mount.content == "payload" + + +def test_utf8_multibyte_content_roundtrips(tmp_path): + src = tmp_path / "i18n.conf" + src.write_text("配置内容 = 测试\n", encoding="utf-8") + + mount = load_config_file_mount(str(src), "/etc/app/i18n.conf") + + assert mount.content == "配置内容 = 测试\n" + + +def test_missing_file_raises_filenotfound(tmp_path): + with pytest.raises(FileNotFoundError): + load_config_file_mount(str(tmp_path / "does-not-exist.conf"), "/etc/x") From 5b90163e3a6fa4afa87d8928a707211793fea75d Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 21 May 2026 15:28:34 -0400 Subject: [PATCH 2/7] fix(sdk): correct mount_path semantics in config_file example/helper mount_path is the parent directory inside the container; the chart concatenates mount_path + "/" + filename for the final volumeMount target. The original example treated mount_path as the full file path, which mounted the file as a subdirectory and crashed nginx with an OCI "cannot create subdirectories in : not a directory" error. - Helper comment now states the mount_path/filename relationship. - Example uses mount_path="/etc/nginx/conf.d", path="./default.conf" so the file lands at /etc/nginx/conf.d/default.conf as nginx expects. - Unit tests use directory mount_paths to match real usage. Discovered while running pod-level e2e smoke against local minikube. Signed-off-by: Honglin Cao --- centml/sdk/utils/config_file.py | 7 ++++--- examples/sdk/create_inference.py | 12 +++++++----- tests/test_sdk_config_file_helper.py | 10 +++++----- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/centml/sdk/utils/config_file.py b/centml/sdk/utils/config_file.py index fef42070..acf531b0 100644 --- a/centml/sdk/utils/config_file.py +++ b/centml/sdk/utils/config_file.py @@ -4,9 +4,10 @@ from platform_api_python_client import ConfigFileMount -# Load a file off disk into a ConfigFileMount. Field-level validation -# (size cap, filename charset, mount_path rules) is intentionally left -# to the API so SDK doesn't drift when server limits change. +# Load a file off disk into a ConfigFileMount. `mount_path` is the parent +# directory inside the container; the file lands at `mount_path/filename`. +# Field-level validation (size cap, filename charset, mount_path rules) is +# left to the API so SDK doesn't drift when server limits change. def load_config_file_mount(path: str, mount_path: str, filename: Optional[str] = None) -> ConfigFileMount: with open(path, "r", encoding="utf-8") as f: content = f.read() diff --git a/examples/sdk/create_inference.py b/examples/sdk/create_inference.py index 2e5b4b40..5863d665 100644 --- a/examples/sdk/create_inference.py +++ b/examples/sdk/create_inference.py @@ -23,12 +23,14 @@ def main(): max_unavailable=0, # Keep all pods available during updates healthcheck="/", concurrency=10, - # Helper reads ./nginx.conf and wraps it; pass an inline - # ConfigFileMount(filename=..., mount_path=..., content=...) if - # the content is already in memory. + # Mounts ./default.conf at /etc/nginx/conf.d/default.conf. mount_path + # is the parent directory; filename defaults to os.path.basename(path) + # so the resulting file lands at mount_path/filename. Pass an inline + # ConfigFileMount(filename=..., mount_path=..., content=...) if the + # content is already in memory. config_file=load_config_file_mount( - path="./nginx.conf", - mount_path="/etc/nginx/conf.d/default.conf", + path="./default.conf", + mount_path="/etc/nginx/conf.d", ), ) response = cclient.create_inference(request) diff --git a/tests/test_sdk_config_file_helper.py b/tests/test_sdk_config_file_helper.py index db9e52c4..9470a2d5 100644 --- a/tests/test_sdk_config_file_helper.py +++ b/tests/test_sdk_config_file_helper.py @@ -9,10 +9,10 @@ def test_default_filename_from_basename(tmp_path): src = tmp_path / "nginx.conf" src.write_text("server { listen 80; }\n") - mount = load_config_file_mount(str(src), "/etc/nginx/conf.d/default.conf") + mount = load_config_file_mount(str(src), "/etc/nginx/conf.d") assert mount.filename == "nginx.conf" - assert mount.mount_path == "/etc/nginx/conf.d/default.conf" + assert mount.mount_path == "/etc/nginx/conf.d" assert mount.content == "server { listen 80; }\n" @@ -20,10 +20,10 @@ def test_explicit_filename_overrides_basename(tmp_path): src = tmp_path / "local.txt" src.write_text("payload") - mount = load_config_file_mount(str(src), "/app/etc/remote.conf", filename="remote.conf") + mount = load_config_file_mount(str(src), "/app/etc", filename="remote.conf") assert mount.filename == "remote.conf" - assert mount.mount_path == "/app/etc/remote.conf" + assert mount.mount_path == "/app/etc" assert mount.content == "payload" @@ -31,7 +31,7 @@ def test_utf8_multibyte_content_roundtrips(tmp_path): src = tmp_path / "i18n.conf" src.write_text("配置内容 = 测试\n", encoding="utf-8") - mount = load_config_file_mount(str(src), "/etc/app/i18n.conf") + mount = load_config_file_mount(str(src), "/etc/app") assert mount.content == "配置内容 = 测试\n" From aa47d7ea971b102161f6496dee56e3e7a69e030b Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 21 May 2026 16:57:17 -0400 Subject: [PATCH 3/7] docs(examples): add vLLM inference example with chat_template config_file Per review feedback on #136: add a separate vLLM example showing config_file used to mount a chat-template Jinja file that vLLM consumes via --chat-template. Existing nginx example stays as the minimal config_file demo; this one shows the realistic LLM-serving shape (vllm/vllm-openai image, command override, GPU instance). Signed-off-by: Honglin Cao --- examples/sdk/create_inference_vllm.py | 50 +++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 examples/sdk/create_inference_vllm.py diff --git a/examples/sdk/create_inference_vllm.py b/examples/sdk/create_inference_vllm.py new file mode 100644 index 00000000..ddd277a4 --- /dev/null +++ b/examples/sdk/create_inference_vllm.py @@ -0,0 +1,50 @@ +import centml +from centml.sdk.api import get_centml_client +from centml.sdk import CreateInferenceV3DeploymentRequest +from centml.sdk.utils.config_file import load_config_file_mount + + +def main(): + with get_centml_client() as cclient: + # Mounts ./chat_template.jinja at /etc/vllm/chat_template.jinja and + # tells vLLM to use it via --chat-template. mount_path is the parent + # directory; filename defaults to os.path.basename(path). + request = CreateInferenceV3DeploymentRequest( + name="vllm-llama", + cluster_id=1000, + hardware_instance_id=1001, # GPU instance + image_url="vllm/vllm-openai:latest", + port=8000, + min_replicas=1, + max_replicas=1, + initial_replicas=1, + max_surge=1, + max_unavailable=0, + healthcheck="/health", + concurrency=10, + env_vars={"HF_TOKEN": ""}, + command=( + "python -m vllm.entrypoints.openai.api_server " + "--model meta-llama/Llama-3.2-3B-Instruct " + "--port 8000 " + "--chat-template /etc/vllm/chat_template.jinja" + ), + config_file=load_config_file_mount(path="./chat_template.jinja", mount_path="/etc/vllm"), + ) + response = cclient.create_inference(request) + print("Create deployment response: ", response) + + deployment = cclient.get_inference(response.id) + print("Deployment details: ", deployment) + + ''' + ### Pause the deployment + cclient.pause(deployment.id) + + ### Delete the deployment + cclient.delete(deployment.id) + ''' + + +if __name__ == "__main__": + main() From 608e3c410f46a12de463ab526528b5f3bc863c56 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 21 May 2026 17:00:49 -0400 Subject: [PATCH 4/7] style(examples): black-format create_inference.py scripts/format.sh uses --skip-magic-trailing-comma; load_config_file_mount call fits on one line under line-length 120 so collapse the multi-line form. Signed-off-by: Honglin Cao --- examples/sdk/create_inference.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/examples/sdk/create_inference.py b/examples/sdk/create_inference.py index 5863d665..f8126e4a 100644 --- a/examples/sdk/create_inference.py +++ b/examples/sdk/create_inference.py @@ -28,10 +28,7 @@ def main(): # so the resulting file lands at mount_path/filename. Pass an inline # ConfigFileMount(filename=..., mount_path=..., content=...) if the # content is already in memory. - config_file=load_config_file_mount( - path="./default.conf", - mount_path="/etc/nginx/conf.d", - ), + config_file=load_config_file_mount(path="./default.conf", mount_path="/etc/nginx/conf.d"), ) response = cclient.create_inference(request) print("Create deployment response: ", response) From 2095ab54b3d9824947c5ec11286a6e52f5ff55d3 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 21 May 2026 17:22:37 -0400 Subject: [PATCH 5/7] docs(examples): drive vllm example via --config (#136 review) michaelshin's review: a config_file is most useful when it carries the full vLLM startup config (model, dtype, gpu-memory-utilization, speculative-config, tool-call-parser, etc.) consumed via --config, not just a chat template. - Switch the example command to `--config /etc/vllm/vllm_config.yaml`. - Add sibling examples/sdk/vllm_config.yaml with the Llama-3.1-8B + EAGLE3 speculative-decoding setup from review feedback so the example is self-runnable. Signed-off-by: Honglin Cao --- examples/sdk/create_inference_vllm.py | 19 +++++++++---------- examples/sdk/vllm_config.yaml | 23 +++++++++++++++++++++++ 2 files changed, 32 insertions(+), 10 deletions(-) create mode 100644 examples/sdk/vllm_config.yaml diff --git a/examples/sdk/create_inference_vllm.py b/examples/sdk/create_inference_vllm.py index ddd277a4..3db31ac6 100644 --- a/examples/sdk/create_inference_vllm.py +++ b/examples/sdk/create_inference_vllm.py @@ -6,9 +6,13 @@ def main(): with get_centml_client() as cclient: - # Mounts ./chat_template.jinja at /etc/vllm/chat_template.jinja and - # tells vLLM to use it via --chat-template. mount_path is the parent - # directory; filename defaults to os.path.basename(path). + # Mounts ./vllm_config.yaml at /etc/vllm/vllm_config.yaml and lets vLLM + # consume the whole config via --config. mount_path is the parent + # directory; filename defaults to os.path.basename(path) so the file + # lands at mount_path/filename. The sibling vllm_config.yaml in this + # directory shows a realistic Llama-3.1-8B + EAGLE3 speculative-decoding + # setup; edit it (model, dtype, tensor-parallel-size, speculative-config, + # etc.) to match the workload before deploying. request = CreateInferenceV3DeploymentRequest( name="vllm-llama", cluster_id=1000, @@ -23,13 +27,8 @@ def main(): healthcheck="/health", concurrency=10, env_vars={"HF_TOKEN": ""}, - command=( - "python -m vllm.entrypoints.openai.api_server " - "--model meta-llama/Llama-3.2-3B-Instruct " - "--port 8000 " - "--chat-template /etc/vllm/chat_template.jinja" - ), - config_file=load_config_file_mount(path="./chat_template.jinja", mount_path="/etc/vllm"), + command="python -m vllm.entrypoints.openai.api_server --port 8000 --config /etc/vllm/vllm_config.yaml", + config_file=load_config_file_mount(path="./vllm_config.yaml", mount_path="/etc/vllm"), ) response = cclient.create_inference(request) print("Create deployment response: ", response) diff --git a/examples/sdk/vllm_config.yaml b/examples/sdk/vllm_config.yaml new file mode 100644 index 00000000..89a05aaa --- /dev/null +++ b/examples/sdk/vllm_config.yaml @@ -0,0 +1,23 @@ +model: meta-llama/Llama-3.1-8B-Instruct +tokenizer: meta-llama/Llama-3.1-8B-Instruct +runner: generate +dtype: auto +gpu-memory-utilization: 0.9 +max-num-seqs: 2048 +tokenizer-mode: auto +seed: 0 +tensor-parallel-size: 1 +pipeline-parallel-size: 1 +block-size: 16 +attention-backend: FLASHINFER +distributed-executor-backend: uni +enable-prefix-caching: true +enable-chunked-prefill: true +max-num-batched-tokens: 1024 +speculative-config: + method: eagle3 + model: centml/EAGLE3-Llama3.1-8B-Instruct + num_speculative_tokens: 3 + draft_tensor_parallel_size: 1 +enable-auto-tool-choice: true +tool-call-parser: llama3_json From 82decf1dca57853035d21eccc0a72c65c8b0cb88 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 21 May 2026 17:51:48 -0400 Subject: [PATCH 6/7] fix(examples): vllm example uses python3 + moves port into yaml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Found during dev B200 verification (Qwen2.5-0.5B variant of this shape): - vllm/vllm-openai image has python3 only, no `python` symlink — the K8s container fails to start with `exec: "python": executable file not found in $PATH` because the platform's `command` field overrides the image's ENTRYPOINT entirely. - The platform helm path passes numeric arg tokens to the Rollout spec as integers, and the K8s API server rejects them (`args[3] ... must be of type string: "integer"`). Moving `port: 8000` into the YAML config keeps every CLI token a non-numeric string while still letting vLLM pick up the port — and matches the "all config in one file" intent of --config. Verified end-to-end on dev (cluster c-01-c-11-centml-org, hw x1-large-b200): created deployment via SDK → rollout HEALTHY at t+76s → POST /v1/chat/completions returned HTTP 200 with a real Qwen completion. Signed-off-by: Honglin Cao --- examples/sdk/create_inference_vllm.py | 2 +- examples/sdk/vllm_config.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/sdk/create_inference_vllm.py b/examples/sdk/create_inference_vllm.py index 3db31ac6..33d16523 100644 --- a/examples/sdk/create_inference_vllm.py +++ b/examples/sdk/create_inference_vllm.py @@ -27,7 +27,7 @@ def main(): healthcheck="/health", concurrency=10, env_vars={"HF_TOKEN": ""}, - command="python -m vllm.entrypoints.openai.api_server --port 8000 --config /etc/vllm/vllm_config.yaml", + command="python3 -m vllm.entrypoints.openai.api_server --config /etc/vllm/vllm_config.yaml", config_file=load_config_file_mount(path="./vllm_config.yaml", mount_path="/etc/vllm"), ) response = cclient.create_inference(request) diff --git a/examples/sdk/vllm_config.yaml b/examples/sdk/vllm_config.yaml index 89a05aaa..cddd94bd 100644 --- a/examples/sdk/vllm_config.yaml +++ b/examples/sdk/vllm_config.yaml @@ -1,3 +1,4 @@ +port: 8000 model: meta-llama/Llama-3.1-8B-Instruct tokenizer: meta-llama/Llama-3.1-8B-Instruct runner: generate From 009be9b41be2fd07069a6960977f2082fb669b17 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 21 May 2026 17:58:27 -0400 Subject: [PATCH 7/7] fix(sdk): address codex review feedback on #136 - helper: open with newline="" so CRLF/CR line endings reach the server byte-faithful instead of being silently normalized to \n. Adds a regression test for a Windows-authored config (b"line1\r\nline2\r\n"). - example: ship examples/sdk/default.conf so create_inference.py runs as-is without the user having to discover and create an extra file (matches how create_inference_vllm.py ships vllm_config.yaml). Signed-off-by: Honglin Cao --- centml/sdk/utils/config_file.py | 4 +++- examples/sdk/default.conf | 7 +++++++ tests/test_sdk_config_file_helper.py | 11 +++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 examples/sdk/default.conf diff --git a/centml/sdk/utils/config_file.py b/centml/sdk/utils/config_file.py index acf531b0..4cedc9b2 100644 --- a/centml/sdk/utils/config_file.py +++ b/centml/sdk/utils/config_file.py @@ -9,6 +9,8 @@ # Field-level validation (size cap, filename charset, mount_path rules) is # left to the API so SDK doesn't drift when server limits change. def load_config_file_mount(path: str, mount_path: str, filename: Optional[str] = None) -> ConfigFileMount: - with open(path, "r", encoding="utf-8") as f: + # newline="" disables universal-newline translation so CRLF/CR line + # endings reach the server byte-faithful instead of being normalized to \n. + with open(path, "r", encoding="utf-8", newline="") as f: content = f.read() return ConfigFileMount(filename=filename or os.path.basename(path), mount_path=mount_path, content=content) diff --git a/examples/sdk/default.conf b/examples/sdk/default.conf new file mode 100644 index 00000000..aa75ddfb --- /dev/null +++ b/examples/sdk/default.conf @@ -0,0 +1,7 @@ +server { + listen 8080; + location / { + return 200 "hello from config_file\n"; + add_header Content-Type text/plain; + } +} diff --git a/tests/test_sdk_config_file_helper.py b/tests/test_sdk_config_file_helper.py index 9470a2d5..212e94f2 100644 --- a/tests/test_sdk_config_file_helper.py +++ b/tests/test_sdk_config_file_helper.py @@ -39,3 +39,14 @@ def test_utf8_multibyte_content_roundtrips(tmp_path): def test_missing_file_raises_filenotfound(tmp_path): with pytest.raises(FileNotFoundError): load_config_file_mount(str(tmp_path / "does-not-exist.conf"), "/etc/x") + + +def test_preserves_crlf_line_endings(tmp_path): + # Windows-authored configs use \r\n; the helper must not silently + # normalize them to \n when uploading to the server. + src = tmp_path / "windows.conf" + src.write_bytes(b"line1\r\nline2\r\n") + + mount = load_config_file_mount(str(src), "/etc/app") + + assert mount.content == "line1\r\nline2\r\n"