From 650879828eb1e9673d854b5a7ba1d47f6f88f416 Mon Sep 17 00:00:00 2001 From: Sodawyx Date: Tue, 19 May 2026 22:21:48 +0800 Subject: [PATCH 1/8] =?UTF-8?q?feat(runtime):=20foundations=20=E2=80=94=20?= =?UTF-8?q?YAML=20parser=20+=20render=20+=20exit=20code=20extension?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR1 of the ar runtime command group. Pure underlying utilities, no command surface yet. - _utils/runtime_constants.py — SYSTEM_TAG_CLI / ARTIFACT_TYPE_CONTAINER / default endpoint / polling defaults / timeout defaults. - _utils/agentruntime_yaml.py — k8s-style YAML parser for kind=AgentRuntime with 17 validation rules: apiVersion/kind/name regex, mandatory container + image, forbidden fields (spec.code, metadata.tags, metadata.systemTags), workspace xor workspaceId, imageRegistryType enum, registryConfig for CUSTOM, log project/logstore pair, endpoint name uniqueness, targetVersion/routing mutex, routing weight numeric + sum=100, NAS/OSS mount required fields, scaling target >= minInstances. - _utils/runtime_render.py — Parsed* dataclasses to SDK 0.0.200 inputs. CLI auto-injects system_tags=["x-agentrun-cli"] and artifact_type="Container"; injects default endpoint when spec.endpoints is omitted; endpoint_needs_update detects drift on description / target_version / routing / disable_public_network_access. - _utils/error.py — adds EXIT_RESOURCE_FAILED=5 + EXIT_TIMEOUT=6 with RuntimePollFailed / RuntimePollTimeout exception types; existing 0/1/2/3/4 codes untouched. Coverage: 95.78%. All tests pass. Signed-off-by: Sodawyx --- src/agentrun_cli/_utils/agentruntime_yaml.py | 642 +++++++++++++++ src/agentrun_cli/_utils/error.py | 45 ++ src/agentrun_cli/_utils/runtime_constants.py | 26 + src/agentrun_cli/_utils/runtime_render.py | 369 +++++++++ tests/unit/test_error_runtime_extension.py | 63 ++ tests/unit/test_runtime_constants.py | 32 + tests/unit/test_runtime_render.py | 302 +++++++ tests/unit/test_runtime_yaml.py | 796 +++++++++++++++++++ 8 files changed, 2275 insertions(+) create mode 100644 src/agentrun_cli/_utils/agentruntime_yaml.py create mode 100644 src/agentrun_cli/_utils/runtime_constants.py create mode 100644 src/agentrun_cli/_utils/runtime_render.py create mode 100644 tests/unit/test_error_runtime_extension.py create mode 100644 tests/unit/test_runtime_constants.py create mode 100644 tests/unit/test_runtime_render.py create mode 100644 tests/unit/test_runtime_yaml.py diff --git a/src/agentrun_cli/_utils/agentruntime_yaml.py b/src/agentrun_cli/_utils/agentruntime_yaml.py new file mode 100644 index 0000000..af8da29 --- /dev/null +++ b/src/agentrun_cli/_utils/agentruntime_yaml.py @@ -0,0 +1,642 @@ +"""YAML schema parsing for ``ar runtime apply / render``. + +Schema (k8s-style, single document, ``kind: AgentRuntime``):: + + apiVersion: agentrun/v1 + kind: AgentRuntime + metadata: + name: + description: + workspace: # XOR workspaceId + workspaceId: + spec: + container: # required (Container mode only) + image: # required + command: [, ...] + port: + imageRegistryType: + acrInstanceId: + registryConfig: ... + cpu / memory / port / diskSize + enableSessionIsolation + protocol: {type, settings} + network: {mode, vpcId, vswitchIds, securityGroupId} + healthCheck / log / env + credentialName / executionRoleArn + sessionConcurrencyLimitPerInstance / sessionIdleTimeoutSeconds + nas / ossMount + endpoints: [...] # None → inject default; [] → no endpoints + +See ``projects/agent-infra-build-runit/design/runtime-cli-design.md`` §2 for the +complete field list. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass, field +from typing import Any + +import yaml + +SUPPORTED_API_VERSION = "agentrun/v1" +SUPPORTED_KIND = "AgentRuntime" + +_NAME_RE = re.compile(r"^[a-z0-9-]{1,63}$") + + +class YamlSchemaError(ValueError): + """Raised when a document fails schema validation.""" + + +@dataclass +class ParsedRegistryAuth: + user_name: str | None = None + password: str | None = field(default=None, repr=False) + + +@dataclass +class ParsedRegistryCert: + insecure: bool | None = None + root_ca_cert_base_64: str | None = None + + +@dataclass +class ParsedRegistryNetwork: + vpc_id: str | None = None + v_switch_id: str | None = None + security_group_id: str | None = None + + +@dataclass +class ParsedRegistryConfig: + auth: ParsedRegistryAuth | None = None + cert: ParsedRegistryCert | None = None + network: ParsedRegistryNetwork | None = None + + +@dataclass +class ParsedContainer: + image: str + command: list[str] | None = None + port: int | None = None + image_registry_type: str | None = None + acr_instance_id: str | None = None + registry_config: ParsedRegistryConfig | None = None + + +@dataclass +class ParsedProtocolSetting: + type: str | None = None + name: str | None = None + path: str | None = None + path_prefix: str | None = None + method: str | None = None + request_content_type: str | None = None + response_content_type: str | None = None + headers: str | None = None + input_body_json_schema: str | None = None + output_body_json_schema: str | None = None + a2a_agent_card: str | None = None + a2a_agent_card_url: str | None = None + config: str | None = None + + +@dataclass +class ParsedProtocol: + type: str | None = None + settings: list[ParsedProtocolSetting] | None = None + + +@dataclass +class ParsedNetwork: + mode: str | None = None + vpc_id: str | None = None + vswitch_ids: list[str] | None = None + security_group_id: str | None = None + + +@dataclass +class ParsedHealthCheck: + http_get_url: str | None = None + initial_delay_seconds: int | None = None + period_seconds: int | None = None + timeout_seconds: int | None = None + failure_threshold: int | None = None + success_threshold: int | None = None + + +@dataclass +class ParsedLog: + project: str + logstore: str + + +@dataclass +class ParsedNasMountPoint: + server_addr: str + mount_dir: str + enable_tls: bool | None = None + + +@dataclass +class ParsedNas: + user_id: int | None = None + group_id: int | None = None + mount_points: list[ParsedNasMountPoint] = field(default_factory=list) + + +@dataclass +class ParsedOssMountPoint: + bucket_name: str + mount_dir: str + bucket_path: str | None = None + endpoint: str | None = None + read_only: bool | None = None + + +@dataclass +class ParsedOssMount: + mount_points: list[ParsedOssMountPoint] = field(default_factory=list) + + +@dataclass +class ParsedScheduledPolicy: + name: str | None = None + schedule_expression: str | None = None + start_time: str | None = None + end_time: str | None = None + target: int | None = None + time_zone: str | None = None + + +@dataclass +class ParsedScaling: + min_instances: int | None = None + scheduled_policies: list[ParsedScheduledPolicy] = field(default_factory=list) + + +@dataclass +class ParsedEndpoint: + name: str + description: str | None = None + target_version: str | None = None + routing: list[tuple[str, float]] | None = None + disable_public_network_access: bool | None = None + scaling: ParsedScaling | None = None + + +@dataclass +class ParsedAgentRuntime: + name: str + container: ParsedContainer + description: str | None = None + workspace_name: str | None = None + workspace_id: str | None = None + cpu: float | None = None + memory: int | None = None + port: int | None = None + disk_size: int | None = None + enable_session_isolation: bool | None = None + protocol: ParsedProtocol | None = None + network: ParsedNetwork | None = None + health_check: ParsedHealthCheck | None = None + log: ParsedLog | None = None + env: dict[str, str] | None = None + credential_name: str | None = None + execution_role_arn: str | None = None + session_concurrency_limit_per_instance: int | None = None + session_idle_timeout_seconds: int | None = None + nas: ParsedNas | None = None + oss_mount: ParsedOssMount | None = None + endpoints: list[ParsedEndpoint] | None = None + + +def parse_yaml_text(text: str) -> list[ParsedAgentRuntime]: + """Parse multi-doc YAML; return list of parsed runtimes.""" + try: + raw_docs = list(yaml.safe_load_all(text)) + except yaml.YAMLError as e: + raise YamlSchemaError(f"Invalid YAML: {e}") from e + + raw_docs = [d for d in raw_docs if d is not None] + if not raw_docs: + raise YamlSchemaError("No documents found in YAML input.") + + results: list[ParsedAgentRuntime] = [] + for idx, doc in enumerate(raw_docs): + try: + results.append(_validate_doc(doc)) + except YamlSchemaError as e: + raise YamlSchemaError(f"Document #{idx + 1}: {e}") from e + return results + + +def _require_mapping(value: Any, where: str) -> dict: + if value is None: + return {} + if not isinstance(value, dict): + raise YamlSchemaError(f"{where} must be a mapping.") + return value + + +def _parse_container(raw: dict) -> ParsedContainer: + image = raw.get("image") + if not isinstance(image, str) or not image: + raise YamlSchemaError("spec.container.image is required and must be a string.") + image_registry_type = raw.get("imageRegistryType") + if image_registry_type is not None and image_registry_type not in ( + "ACR", "ACREE", "CUSTOM", + ): + raise YamlSchemaError( + f"spec.container.imageRegistryType {image_registry_type!r} must be " + "one of ACR|ACREE|CUSTOM." + ) + registry_config = None + if image_registry_type == "CUSTOM": + rc_raw = raw.get("registryConfig") + if not isinstance(rc_raw, dict): + raise YamlSchemaError( + "spec.container.registryConfig is required when " + "imageRegistryType=CUSTOM." + ) + registry_config = _parse_registry_config(rc_raw) + elif raw.get("registryConfig") is not None: + # Allow but parse if present even for ACR/ACREE + registry_config = _parse_registry_config(raw["registryConfig"]) + return ParsedContainer( + image=image, + command=list(raw["command"]) if raw.get("command") else None, + port=raw.get("port"), + image_registry_type=image_registry_type, + acr_instance_id=raw.get("acrInstanceId"), + registry_config=registry_config, + ) + + +def _parse_registry_config(raw: dict) -> ParsedRegistryConfig: + auth_raw = raw.get("auth") + cert_raw = raw.get("cert") + net_raw = raw.get("network") + auth = None + if isinstance(auth_raw, dict): + auth = ParsedRegistryAuth( + user_name=auth_raw.get("userName"), + password=auth_raw.get("password"), + ) + cert = None + if isinstance(cert_raw, dict): + cert = ParsedRegistryCert( + insecure=cert_raw.get("insecure"), + root_ca_cert_base_64=cert_raw.get("rootCaCertBase64"), + ) + network = None + if isinstance(net_raw, dict): + network = ParsedRegistryNetwork( + vpc_id=net_raw.get("vpcId"), + v_switch_id=net_raw.get("vSwitchId"), + security_group_id=net_raw.get("securityGroupId"), + ) + return ParsedRegistryConfig(auth=auth, cert=cert, network=network) + + +def _parse_protocol(raw) -> ParsedProtocol | None: + if raw is None: + return None + if not isinstance(raw, dict): + raise YamlSchemaError("spec.protocol must be a mapping.") + settings_raw = raw.get("settings") + settings = None + if settings_raw is not None: + if not isinstance(settings_raw, list): + raise YamlSchemaError("spec.protocol.settings must be a list.") + settings = [ + ParsedProtocolSetting( + type=s.get("type"), + name=s.get("name"), + path=s.get("path"), + path_prefix=s.get("pathPrefix"), + method=s.get("method"), + request_content_type=s.get("requestContentType"), + response_content_type=s.get("responseContentType"), + headers=s.get("headers"), + input_body_json_schema=s.get("inputBodyJsonSchema"), + output_body_json_schema=s.get("outputBodyJsonSchema"), + a2a_agent_card=s.get("a2aAgentCard"), + a2a_agent_card_url=s.get("a2aAgentCardUrl"), + config=s.get("config"), + ) + for s in settings_raw + if isinstance(s, dict) + ] + return ParsedProtocol(type=raw.get("type"), settings=settings) + + +def _parse_network(raw) -> ParsedNetwork | None: + if raw is None: + return None + if not isinstance(raw, dict): + raise YamlSchemaError("spec.network must be a mapping.") + mode = raw.get("mode") + vpc_id = raw.get("vpcId") + if mode in ("PRIVATE", "PUBLIC_AND_PRIVATE") and not vpc_id: + raise YamlSchemaError( + "spec.network.vpcId is required when mode is PRIVATE or PUBLIC_AND_PRIVATE." + ) + return ParsedNetwork( + mode=mode, + vpc_id=vpc_id, + vswitch_ids=list(raw["vswitchIds"]) if raw.get("vswitchIds") else None, + security_group_id=raw.get("securityGroupId"), + ) + + +def _parse_health_check(raw) -> ParsedHealthCheck | None: + if raw is None: + return None + if not isinstance(raw, dict): + raise YamlSchemaError("spec.healthCheck must be a mapping.") + return ParsedHealthCheck( + http_get_url=raw.get("httpGetUrl"), + initial_delay_seconds=raw.get("initialDelaySeconds"), + period_seconds=raw.get("periodSeconds"), + timeout_seconds=raw.get("timeoutSeconds"), + failure_threshold=raw.get("failureThreshold"), + success_threshold=raw.get("successThreshold"), + ) + + +def _parse_log(raw) -> ParsedLog | None: + if raw is None: + return None + if not isinstance(raw, dict): + raise YamlSchemaError("spec.log must be a mapping.") + project = raw.get("project") + logstore = raw.get("logstore") + if bool(project) != bool(logstore): + raise YamlSchemaError( + "spec.log.project and spec.log.logstore must be set together." + ) + if project is None: + return None + return ParsedLog(project=project, logstore=logstore) + + +def _parse_env(raw) -> dict[str, str] | None: + if raw is None: + return None + if not isinstance(raw, dict): + raise YamlSchemaError("spec.env must be a mapping of str→str.") + out = {} + for k, v in raw.items(): + if not isinstance(k, str): + raise YamlSchemaError("spec.env keys must be strings.") + out[k] = str(v) + return out + + +def _parse_nas(raw) -> ParsedNas | None: + if raw is None: + return None + if not isinstance(raw, dict): + raise YamlSchemaError("spec.nas must be a mapping.") + points = [] + for mp in raw.get("mountPoints", []) or []: + if not isinstance(mp, dict): + raise YamlSchemaError("spec.nas.mountPoints[*] must be a mapping.") + sa = mp.get("serverAddr") + md = mp.get("mountDir") + if not sa or not md: + raise YamlSchemaError( + "spec.nas.mountPoints[*] requires serverAddr and mountDir." + ) + points.append( + ParsedNasMountPoint( + server_addr=sa, mount_dir=md, enable_tls=mp.get("enableTLS") + ) + ) + return ParsedNas( + user_id=raw.get("userId"), group_id=raw.get("groupId"), mount_points=points + ) + + +def _parse_oss(raw) -> ParsedOssMount | None: + if raw is None: + return None + if not isinstance(raw, dict): + raise YamlSchemaError("spec.ossMount must be a mapping.") + points = [] + for mp in raw.get("mountPoints", []) or []: + if not isinstance(mp, dict): + raise YamlSchemaError("spec.ossMount.mountPoints[*] must be a mapping.") + bn = mp.get("bucketName") + md = mp.get("mountDir") + if not bn or not md: + raise YamlSchemaError( + "spec.ossMount.mountPoints[*] requires bucketName and mountDir." + ) + points.append( + ParsedOssMountPoint( + bucket_name=bn, + mount_dir=md, + bucket_path=mp.get("bucketPath"), + endpoint=mp.get("endpoint"), + read_only=mp.get("readOnly"), + ) + ) + return ParsedOssMount(mount_points=points) + + +def _parse_endpoints(raw) -> list[ParsedEndpoint] | None: + """Return None if key absent; [] if explicitly empty; else parsed list.""" + if raw is None: + return None + if not isinstance(raw, list): + raise YamlSchemaError("spec.endpoints must be a list.") + if not raw: + return [] + seen: set[str] = set() + out: list[ParsedEndpoint] = [] + for ep_raw in raw: + if not isinstance(ep_raw, dict): + raise YamlSchemaError("spec.endpoints[*] must be a mapping.") + name = ep_raw.get("name") + if not isinstance(name, str) or not name: + raise YamlSchemaError("spec.endpoints[*].name is required.") + if name in seen: + raise YamlSchemaError(f"spec.endpoints[*] duplicate name: {name!r}.") + seen.add(name) + target_version = ep_raw.get("targetVersion") + routing_raw = ep_raw.get("routing") + if target_version is not None and routing_raw is not None: + raise YamlSchemaError( + f"endpoint {name!r}: targetVersion and routing are mutually exclusive." + ) + routing = None + if routing_raw is not None: + if not isinstance(routing_raw, list) or not routing_raw: + raise YamlSchemaError( + f"endpoint {name!r}: routing must be a non-empty list." + ) + routing = [] + total = 0.0 + for r in routing_raw: + if not isinstance(r, dict): + raise YamlSchemaError( + f"endpoint {name!r}: routing[*] must be a mapping." + ) + v = r.get("version") + w = r.get("weight") + if v is None or w is None: + raise YamlSchemaError( + f"endpoint {name!r}: routing[*] requires version and weight." + ) + try: + weight_f = float(w) + except (TypeError, ValueError): + raise YamlSchemaError( + f"endpoint {name!r}: routing[*].weight must be a number, " + f"got {w!r}." + ) from None + routing.append((str(v), weight_f)) + total += weight_f + if abs(total - 100.0) > 1e-6: + raise YamlSchemaError( + f"endpoint {name!r}: routing weights must sum to 100 (got {total})." + ) + scaling = _parse_scaling(ep_raw.get("scaling"), name) + out.append( + ParsedEndpoint( + name=name, + description=ep_raw.get("description"), + target_version=target_version, + routing=routing, + disable_public_network_access=ep_raw.get("disablePublicNetworkAccess"), + scaling=scaling, + ) + ) + return out + + +def _parse_scaling(raw, ep_name: str) -> ParsedScaling | None: + if raw is None: + return None + if not isinstance(raw, dict): + raise YamlSchemaError(f"endpoint {ep_name!r}: scaling must be a mapping.") + min_instances = raw.get("minInstances") + policies = [] + for p in raw.get("scheduledPolicies", []) or []: + if not isinstance(p, dict): + raise YamlSchemaError( + f"endpoint {ep_name!r}: scheduledPolicies[*] must be a mapping." + ) + target = p.get("target") + if min_instances is not None and target is not None and target < min_instances: + raise YamlSchemaError( + f"endpoint {ep_name!r}: scheduledPolicies[*].target ({target}) " + f"is less than minInstances ({min_instances})." + ) + policies.append( + ParsedScheduledPolicy( + name=p.get("name"), + schedule_expression=p.get("scheduleExpression"), + start_time=p.get("startTime"), + end_time=p.get("endTime"), + target=target, + time_zone=p.get("timeZone"), + ) + ) + return ParsedScaling(min_instances=min_instances, scheduled_policies=policies) + + +def _validate_doc(doc: Any) -> ParsedAgentRuntime: + if not isinstance(doc, dict): + raise YamlSchemaError("Top level must be a mapping.") + + api_version = doc.get("apiVersion") + if api_version != SUPPORTED_API_VERSION: + raise YamlSchemaError( + f"Unsupported apiVersion {api_version!r}; " + f"expected {SUPPORTED_API_VERSION!r}." + ) + kind = doc.get("kind") + if kind != SUPPORTED_KIND: + raise YamlSchemaError( + f"Unsupported kind {kind!r}; expected {SUPPORTED_KIND!r}." + ) + + metadata = _require_mapping(doc.get("metadata"), "metadata") + name = metadata.get("name") + if not isinstance(name, str) or not name: + raise YamlSchemaError("metadata.name is required and must be a string.") + if not _NAME_RE.match(name): + raise YamlSchemaError( + f"metadata.name {name!r} is invalid; must match [a-z0-9-]{{1,63}}." + ) + + spec = _require_mapping(doc.get("spec"), "spec") + + if "code" in spec: + raise YamlSchemaError( + "spec.code is not supported; this CLI only supports Container mode." + ) + if "tags" in metadata: + raise YamlSchemaError( + "metadata.tags is not supported; SDK 0.0.200 removed the tags field." + ) + if "systemTags" in metadata: + raise YamlSchemaError( + "metadata.systemTags is reserved; system_tags is managed by the CLI." + ) + workspace = metadata.get("workspace") + workspace_id = metadata.get("workspaceId") + if workspace is not None and workspace_id is not None: + raise YamlSchemaError( + "metadata.workspace and metadata.workspaceId are mutually exclusive." + ) + + container_raw = spec.get("container") + if not isinstance(container_raw, dict): + raise YamlSchemaError("spec.container is required and must be a mapping.") + container = _parse_container(container_raw) + + protocol = _parse_protocol(spec.get("protocol")) + network = _parse_network(spec.get("network")) + health_check = _parse_health_check(spec.get("healthCheck")) + log = _parse_log(spec.get("log")) + env = _parse_env(spec.get("env")) + nas = _parse_nas(spec.get("nas")) + oss_mount = _parse_oss(spec.get("ossMount")) + endpoints = _parse_endpoints(spec.get("endpoints")) + + return ParsedAgentRuntime( + name=name, + description=metadata.get("description"), + workspace_name=workspace, + workspace_id=workspace_id, + container=container, + cpu=spec.get("cpu"), + memory=spec.get("memory"), + port=spec.get("port"), + disk_size=spec.get("diskSize"), + enable_session_isolation=spec.get("enableSessionIsolation"), + protocol=protocol, + network=network, + health_check=health_check, + log=log, + env=env, + credential_name=spec.get("credentialName"), + execution_role_arn=spec.get("executionRoleArn"), + session_concurrency_limit_per_instance=spec.get( + "sessionConcurrencyLimitPerInstance" + ), + session_idle_timeout_seconds=spec.get("sessionIdleTimeoutSeconds"), + nas=nas, + oss_mount=oss_mount, + endpoints=endpoints, + ) + + +def parse_yaml_file(path: str) -> list[ParsedAgentRuntime]: + with open(path, encoding="utf-8") as f: + return parse_yaml_text(f.read()) diff --git a/src/agentrun_cli/_utils/error.py b/src/agentrun_cli/_utils/error.py index 5a61d27..8b7ec55 100644 --- a/src/agentrun_cli/_utils/error.py +++ b/src/agentrun_cli/_utils/error.py @@ -10,6 +10,8 @@ 2 — bad input / resource already exists 3 — authentication failure 4 — server / unexpected error + 5 — runtime/endpoint reached a terminal *_FAILED status + 6 — polling timed out waiting for a terminal status """ import functools @@ -25,6 +27,8 @@ EXIT_BAD_INPUT = 2 EXIT_AUTH_ERROR = 3 EXIT_SERVER_ERROR = 4 +EXIT_RESOURCE_FAILED = 5 +EXIT_TIMEOUT = 6 PREREQUISITES_HINT = ( "Complete the one-time setup at " @@ -44,6 +48,37 @@ ) +class RuntimePollFailed(Exception): + """Raised when a runtime/endpoint reaches a *_FAILED terminal status.""" + + def __init__( + self, + resource_kind: str, + name: str, + status: str, + reason: str | None = None, + ): + self.resource_kind = resource_kind + self.name = name + self.status = status + self.reason = reason + super().__init__( + f"{resource_kind} {name!r} entered {status}: {reason or '(no reason)'}" + ) + + +class RuntimePollTimeout(Exception): + """Raised when polling exceeds the configured timeout.""" + + def __init__(self, resource_kind: str, name: str, elapsed: float): + self.resource_kind = resource_kind + self.name = name + self.elapsed = elapsed + super().__init__( + f"Timed out after {elapsed:.1f}s waiting for {resource_kind} {name!r}" + ) + + def handle_errors(func: Callable) -> Callable: """Decorator that catches common SDK / HTTP errors and exits cleanly.""" @@ -71,6 +106,16 @@ def wrapper(*args, **kwargs): if any(pattern in msg for pattern in _AUTH_PATTERNS): echo_error("AuthenticationFailed", msg, hint=PREREQUISITES_HINT) sys.exit(EXIT_AUTH_ERROR) + if isinstance(exc, RuntimePollFailed): + echo_error( + "RuntimePollFailed", + str(exc), + hint=None, + ) + sys.exit(EXIT_RESOURCE_FAILED) + if isinstance(exc, RuntimePollTimeout): + echo_error("RuntimePollTimeout", str(exc)) + sys.exit(EXIT_TIMEOUT) # Generic fallback echo_error("Error", msg) diff --git a/src/agentrun_cli/_utils/runtime_constants.py b/src/agentrun_cli/_utils/runtime_constants.py new file mode 100644 index 0000000..daa0d1d --- /dev/null +++ b/src/agentrun_cli/_utils/runtime_constants.py @@ -0,0 +1,26 @@ +"""Constants for the ``ar runtime`` command group. + +Centralises CLI-side defaults so commands and tests share a single source. +""" + +SYSTEM_TAG_CLI = "x-agentrun-cli" +"""Auto-injected into ``system_tags`` for every Runtime/Endpoint create or update +issued by this CLI. SDK 0.0.200 removed the user-facing ``tags`` field; the +``system_tags`` slot is the only place such markers can live.""" + +ARTIFACT_TYPE_CONTAINER = "Container" +"""Forced ``artifact_type`` value — this CLI only supports Container mode.""" + +DEFAULT_ENDPOINT_NAME = "default" +DEFAULT_TARGET_VERSION = "LATEST" + +POLL_INITIAL_INTERVAL = 3.0 # seconds +POLL_MAX_INTERVAL = 10.0 # seconds (cap of exponential backoff) +POLL_BACKOFF_FACTOR = 1.5 +ENDPOINT_POLL_CONCURRENCY = 4 # parallel endpoint pollers + +DEFAULT_APPLY_TIMEOUT_SECONDS = 600 # 10 min +DEFAULT_DELETE_TIMEOUT_SECONDS = 300 # 5 min + +ENV_POLL_CONCURRENCY = "AGENTRUN_CLI_ENDPOINT_POLL_CONCURRENCY" +"""Env override for ``ENDPOINT_POLL_CONCURRENCY`` (clamped 1..16).""" diff --git a/src/agentrun_cli/_utils/runtime_render.py b/src/agentrun_cli/_utils/runtime_render.py new file mode 100644 index 0000000..a42b6da --- /dev/null +++ b/src/agentrun_cli/_utils/runtime_render.py @@ -0,0 +1,369 @@ +"""Render ParsedAgentRuntime → SDK 0.0.200 create/update inputs. + +This module owns all "CLI auto-injection" logic: +- ``system_tags`` always includes ``x-agentrun-cli`` +- ``artifact_type`` is forced to ``Container`` +- If user omitted ``spec.endpoints`` entirely, a single default endpoint is + injected (name=``default``, target_version=``LATEST``) +""" + +from __future__ import annotations + +from agentrun_cli._utils.agentruntime_yaml import ( + ParsedAgentRuntime, + ParsedContainer, + ParsedEndpoint, + ParsedScaling, +) +from agentrun_cli._utils.runtime_constants import ( + ARTIFACT_TYPE_CONTAINER, + DEFAULT_ENDPOINT_NAME, + DEFAULT_TARGET_VERSION, + SYSTEM_TAG_CLI, +) + + +def _sdk_models(): + """Lazy import of SDK models so import is cheap.""" + from agentrun.agent_runtime.model import ( + AgentRuntimeContainer, + AgentRuntimeCreateInput, + AgentRuntimeEndpointCreateInput, + AgentRuntimeEndpointRoutingConfig, + AgentRuntimeEndpointRoutingWeight, + AgentRuntimeEndpointUpdateInput, + AgentRuntimeHealthCheckConfig, + AgentRuntimeLogConfig, + AgentRuntimeProtocolConfig, + AgentRuntimeProtocolType, + AgentRuntimeUpdateInput, + NASConfig, + NASMountConfig, + OSSMountConfig, + OSSMountPoint, + ProtocolSettings, + RegistryAuthConfig, + RegistryCertConfig, + RegistryConfig, + RegistryNetworkConfig, + ScalingConfig, + ScheduledPolicy, + ) + from agentrun.utils.model import NetworkConfig, NetworkMode + + return { + "container": AgentRuntimeContainer, + "create_input": AgentRuntimeCreateInput, + "update_input": AgentRuntimeUpdateInput, + "endpoint_create": AgentRuntimeEndpointCreateInput, + "endpoint_update": AgentRuntimeEndpointUpdateInput, + "routing_cfg": AgentRuntimeEndpointRoutingConfig, + "routing_weight": AgentRuntimeEndpointRoutingWeight, + "health": AgentRuntimeHealthCheckConfig, + "log": AgentRuntimeLogConfig, + "protocol_cfg": AgentRuntimeProtocolConfig, + "protocol_type": AgentRuntimeProtocolType, + "nas": NASConfig, + "nas_mount": NASMountConfig, + "oss": OSSMountConfig, + "oss_mount": OSSMountPoint, + "protocol_settings": ProtocolSettings, + "registry_auth": RegistryAuthConfig, + "registry_cert": RegistryCertConfig, + "registry": RegistryConfig, + "registry_net": RegistryNetworkConfig, + "scaling": ScalingConfig, + "scheduled_policy": ScheduledPolicy, + "net_cfg": NetworkConfig, + "net_mode": NetworkMode, + } + + +def _build_container(p: ParsedContainer, m): + rc = None + if p.registry_config: + rc = m["registry"]( + auth_config=( + m["registry_auth"]( + user_name=p.registry_config.auth.user_name, + password=p.registry_config.auth.password, + ) + if p.registry_config.auth + else None + ), + cert_config=( + m["registry_cert"]( + insecure=p.registry_config.cert.insecure, + root_ca_cert_base_64=p.registry_config.cert.root_ca_cert_base_64, + ) + if p.registry_config.cert + else None + ), + network_config=( + m["registry_net"]( + vpc_id=p.registry_config.network.vpc_id, + v_switch_id=p.registry_config.network.v_switch_id, + security_group_id=p.registry_config.network.security_group_id, + ) + if p.registry_config.network + else None + ), + ) + return m["container"]( + image=p.image, + command=p.command, + port=p.port, + image_registry_type=p.image_registry_type, + acr_instance_id=p.acr_instance_id, + registry_config=rc, + ) + + +def to_runtime_create_input(p: ParsedAgentRuntime): + m = _sdk_models() + return m["create_input"]( + agent_runtime_name=p.name, + description=p.description, + workspace_name=p.workspace_name, + workspace_id=p.workspace_id, + artifact_type=ARTIFACT_TYPE_CONTAINER, + system_tags=[SYSTEM_TAG_CLI], + container_configuration=_build_container(p.container, m), + cpu=p.cpu, + memory=p.memory, + port=p.port, + disk_size=p.disk_size, + enable_session_isolation=p.enable_session_isolation, + protocol_configuration=_build_protocol(p.protocol, m), + network_configuration=_build_network(p.network, m), + health_check_configuration=_build_health(p.health_check, m), + log_configuration=_build_log(p.log, m), + environment_variables=p.env, + credential_name=p.credential_name, + execution_role_arn=p.execution_role_arn, + session_concurrency_limit_per_instance=p.session_concurrency_limit_per_instance, + session_idle_timeout_seconds=p.session_idle_timeout_seconds, + nas_config=_build_nas(p.nas, m), + oss_mount_config=_build_oss(p.oss_mount, m), + ) + + +def to_runtime_update_input(p: ParsedAgentRuntime): + """Same as create input but workspace is immutable so we strip it.""" + m = _sdk_models() + return m["update_input"]( + agent_runtime_name=p.name, + description=p.description, + artifact_type=ARTIFACT_TYPE_CONTAINER, + system_tags=[SYSTEM_TAG_CLI], + container_configuration=_build_container(p.container, m), + cpu=p.cpu, + memory=p.memory, + port=p.port, + disk_size=p.disk_size, + enable_session_isolation=p.enable_session_isolation, + protocol_configuration=_build_protocol(p.protocol, m), + network_configuration=_build_network(p.network, m), + health_check_configuration=_build_health(p.health_check, m), + log_configuration=_build_log(p.log, m), + environment_variables=p.env, + credential_name=p.credential_name, + execution_role_arn=p.execution_role_arn, + session_concurrency_limit_per_instance=p.session_concurrency_limit_per_instance, + session_idle_timeout_seconds=p.session_idle_timeout_seconds, + nas_config=_build_nas(p.nas, m), + oss_mount_config=_build_oss(p.oss_mount, m), + ) + + +def _build_protocol(p, m): + if p is None: + return None + settings = None + if p.settings: + settings = [ + m["protocol_settings"]( + type=s.type, + name=s.name, + path=s.path, + path_prefix=s.path_prefix, + method=s.method, + request_content_type=s.request_content_type, + response_content_type=s.response_content_type, + headers=s.headers, + input_body_json_schema=s.input_body_json_schema, + output_body_json_schema=s.output_body_json_schema, + a2a_agent_card=s.a2a_agent_card, + a2a_agent_card_url=s.a2a_agent_card_url, + config=s.config, + ) + for s in p.settings + ] + ptype = m["protocol_type"](p.type) if p.type else m["protocol_type"].HTTP + return m["protocol_cfg"](type=ptype, protocol_settings=settings) + + +def _build_network(p, m): + if p is None: + return None + mode = m["net_mode"](p.mode) if p.mode else m["net_mode"].PUBLIC + return m["net_cfg"]( + network_mode=mode, + vpc_id=p.vpc_id, + vswitch_ids=p.vswitch_ids, + security_group_id=p.security_group_id, + ) + + +def _build_health(p, m): + if p is None: + return None + return m["health"]( + http_get_url=p.http_get_url, + initial_delay_seconds=p.initial_delay_seconds, + period_seconds=p.period_seconds, + timeout_seconds=p.timeout_seconds, + failure_threshold=p.failure_threshold, + success_threshold=p.success_threshold, + ) + + +def _build_log(p, m): + if p is None: + return None + return m["log"](project=p.project, logstore=p.logstore) + + +def _build_nas(p, m): + if p is None: + return None + return m["nas"]( + user_id=p.user_id, + group_id=p.group_id, + mount_points=[ + m["nas_mount"]( + server_addr=mp.server_addr, + mount_dir=mp.mount_dir, + enable_tls=mp.enable_tls, + ) + for mp in p.mount_points + ] + or None, + ) + + +def _build_oss(p, m): + if p is None: + return None + return m["oss"]( + mount_points=[ + m["oss_mount"]( + bucket_name=mp.bucket_name, + mount_dir=mp.mount_dir, + bucket_path=mp.bucket_path, + endpoint=mp.endpoint, + read_only=mp.read_only, + ) + for mp in p.mount_points + ] + or None + ) + + +def to_endpoint_create_inputs(p: ParsedAgentRuntime): + """Return SDK endpoint create inputs. + + Rules: + p.endpoints is None → inject a single ``default`` endpoint + p.endpoints == [] → return [] + otherwise → map each parsed endpoint + """ + m = _sdk_models() + if p.endpoints is None: + return [ + m["endpoint_create"]( + agent_runtime_endpoint_name=DEFAULT_ENDPOINT_NAME, + target_version=DEFAULT_TARGET_VERSION, + ) + ] + return [_endpoint_create(ep, m) for ep in p.endpoints] + + +def _endpoint_create(ep: ParsedEndpoint, m): + routing_cfg = None + if ep.routing is not None: + routing_cfg = m["routing_cfg"]( + version_weights=[ + m["routing_weight"](version=v, weight=w) for v, w in ep.routing + ] + ) + scaling_cfg = _build_scaling(ep.scaling, m) + target_version = ep.target_version or ( + None if ep.routing else DEFAULT_TARGET_VERSION + ) + return m["endpoint_create"]( + agent_runtime_endpoint_name=ep.name, + description=ep.description, + target_version=target_version, + routing_configuration=routing_cfg, + disable_public_network_access=ep.disable_public_network_access, + scaling_config=scaling_cfg, + ) + + +def to_endpoint_update_input(ep: ParsedEndpoint): + m = _sdk_models() + routing_cfg = None + if ep.routing is not None: + routing_cfg = m["routing_cfg"]( + version_weights=[ + m["routing_weight"](version=v, weight=w) for v, w in ep.routing + ] + ) + return m["endpoint_update"]( + agent_runtime_endpoint_name=ep.name, + description=ep.description, + target_version=ep.target_version, + routing_configuration=routing_cfg, + disable_public_network_access=ep.disable_public_network_access, + scaling_config=_build_scaling(ep.scaling, m), + ) + + +def _build_scaling(s: ParsedScaling | None, m): + if s is None: + return None + policies = [ + m["scheduled_policy"]( + name=p.name, + schedule_expression=p.schedule_expression, + start_time=p.start_time, + end_time=p.end_time, + target=p.target, + time_zone=p.time_zone, + ) + for p in s.scheduled_policies + ] or None + return m["scaling"](min_instances=s.min_instances, scheduled_policies=policies) + + +def endpoint_needs_update(desired: ParsedEndpoint, current) -> bool: + """Return True if a drift exists between the parsed endpoint and a remote one.""" + if getattr(current, "description", None) != desired.description: + return True + if (getattr(current, "target_version", None) or None) != desired.target_version: + if desired.target_version is not None: + return True + cur_rc = getattr(current, "routing_configuration", None) + cur_pairs = None + if cur_rc and getattr(cur_rc, "version_weights", None): + cur_pairs = [ + (w.version, float(w.weight) if w.weight is not None else None) + for w in cur_rc.version_weights + ] + if cur_pairs != desired.routing: + return True + cur_disable = getattr(current, "disable_public_network_access", None) + if cur_disable != desired.disable_public_network_access: + return True + return False diff --git a/tests/unit/test_error_runtime_extension.py b/tests/unit/test_error_runtime_extension.py new file mode 100644 index 0000000..abfcb3c --- /dev/null +++ b/tests/unit/test_error_runtime_extension.py @@ -0,0 +1,63 @@ +"""Coverage for the runtime-specific exit codes added to _utils/error.py.""" + +import pytest + +from agentrun_cli._utils.error import ( + EXIT_RESOURCE_FAILED, + EXIT_TIMEOUT, + RuntimePollFailed, + RuntimePollTimeout, + handle_errors, +) + + +def test_new_exit_code_constants(): + assert EXIT_RESOURCE_FAILED == 5 + assert EXIT_TIMEOUT == 6 + + +def test_handle_errors_maps_poll_failed_to_5(): + @handle_errors + def _cmd(): + raise RuntimePollFailed( + resource_kind="AgentRuntime", + name="my-agent", + status="CREATE_FAILED", + reason="image pull backoff", + ) + + with pytest.raises(SystemExit) as exc: + _cmd() + assert exc.value.code == 5 + + +def test_handle_errors_maps_poll_timeout_to_6(): + @handle_errors + def _cmd(): + raise RuntimePollTimeout( + resource_kind="AgentRuntime", + name="my-agent", + elapsed=600.0, + ) + + with pytest.raises(SystemExit) as exc: + _cmd() + assert exc.value.code == 6 + + +def test_existing_exit_codes_unchanged(): + from agentrun_cli._utils.error import ( + EXIT_AUTH_ERROR, + EXIT_BAD_INPUT, + EXIT_NOT_FOUND, + EXIT_SERVER_ERROR, + EXIT_SUCCESS, + ) + + assert ( + EXIT_SUCCESS, + EXIT_NOT_FOUND, + EXIT_BAD_INPUT, + EXIT_AUTH_ERROR, + EXIT_SERVER_ERROR, + ) == (0, 1, 2, 3, 4) diff --git a/tests/unit/test_runtime_constants.py b/tests/unit/test_runtime_constants.py new file mode 100644 index 0000000..5a45bab --- /dev/null +++ b/tests/unit/test_runtime_constants.py @@ -0,0 +1,32 @@ +"""Constants for the `ar runtime` command group.""" + +from agentrun_cli._utils import runtime_constants as C + + +def test_system_tag_cli(): + assert C.SYSTEM_TAG_CLI == "x-agentrun-cli" + + +def test_default_endpoint_and_version(): + assert C.DEFAULT_ENDPOINT_NAME == "default" + assert C.DEFAULT_TARGET_VERSION == "LATEST" + + +def test_poll_defaults(): + assert C.POLL_INITIAL_INTERVAL == 3.0 + assert C.POLL_MAX_INTERVAL == 10.0 + assert C.POLL_BACKOFF_FACTOR == 1.5 + assert C.ENDPOINT_POLL_CONCURRENCY == 4 + + +def test_timeout_defaults(): + assert C.DEFAULT_APPLY_TIMEOUT_SECONDS == 600 + assert C.DEFAULT_DELETE_TIMEOUT_SECONDS == 300 + + +def test_env_concurrency_override_key(): + assert C.ENV_POLL_CONCURRENCY == "AGENTRUN_CLI_ENDPOINT_POLL_CONCURRENCY" + + +def test_artifact_type_container(): + assert C.ARTIFACT_TYPE_CONTAINER == "Container" diff --git a/tests/unit/test_runtime_render.py b/tests/unit/test_runtime_render.py new file mode 100644 index 0000000..ec438ad --- /dev/null +++ b/tests/unit/test_runtime_render.py @@ -0,0 +1,302 @@ +"""Tests for ParsedAgentRuntime → SDK input rendering.""" + +from types import SimpleNamespace + +from agentrun_cli._utils.agentruntime_yaml import ( + ParsedAgentRuntime, + ParsedContainer, + ParsedEndpoint, + ParsedHealthCheck, + ParsedLog, + ParsedNas, + ParsedNasMountPoint, + ParsedNetwork, + ParsedOssMount, + ParsedOssMountPoint, + ParsedProtocol, + ParsedProtocolSetting, + ParsedRegistryAuth, + ParsedRegistryCert, + ParsedRegistryConfig, + ParsedRegistryNetwork, + ParsedScaling, + ParsedScheduledPolicy, +) +from agentrun_cli._utils.runtime_constants import ( + ARTIFACT_TYPE_CONTAINER, + DEFAULT_ENDPOINT_NAME, + DEFAULT_TARGET_VERSION, + SYSTEM_TAG_CLI, +) +from agentrun_cli._utils.runtime_render import ( + endpoint_needs_update, + to_endpoint_create_inputs, + to_endpoint_update_input, + to_runtime_create_input, + to_runtime_update_input, +) + + +def _minimal_parsed(): + return ParsedAgentRuntime( + name="my-agent", + container=ParsedContainer(image="img:v1"), + ) + + +def test_create_input_injects_system_tag_and_container_artifact(): + p = _minimal_parsed() + inp = to_runtime_create_input(p) + assert inp.agent_runtime_name == "my-agent" + assert inp.artifact_type == ARTIFACT_TYPE_CONTAINER + assert inp.system_tags == [SYSTEM_TAG_CLI] + assert inp.container_configuration.image == "img:v1" + # code_configuration must not be set + assert inp.code_configuration is None + + +def test_endpoints_none_injects_default(): + p = _minimal_parsed() + inps = to_endpoint_create_inputs(p) + assert len(inps) == 1 + assert inps[0].agent_runtime_endpoint_name == DEFAULT_ENDPOINT_NAME + assert inps[0].target_version == DEFAULT_TARGET_VERSION + + +def test_endpoints_empty_list_returns_empty(): + p = _minimal_parsed() + p.endpoints = [] + assert to_endpoint_create_inputs(p) == [] + + +def test_endpoints_routing_mapped(): + p = _minimal_parsed() + p.endpoints = [ + ParsedEndpoint(name="canary", routing=[("1", 80.0), ("2", 20.0)]), + ] + inps = to_endpoint_create_inputs(p) + rc = inps[0].routing_configuration + assert rc is not None + vs = rc.version_weights + assert [(v.version, v.weight) for v in vs] == [("1", 80.0), ("2", 20.0)] + + +def test_update_input_strips_workspace(): + p = _minimal_parsed() + p.workspace_name = "ws" + upd = to_runtime_update_input(p) + # AgentRuntimeUpdateInput inherits only MutableProps — workspace fields not present. + assert not hasattr(upd, "workspace_name") or upd.workspace_name is None + + +def test_full_spec_round_trip(): + p = ParsedAgentRuntime( + name="my-agent", + container=ParsedContainer(image="img:v1", port=8000), + cpu=4, + memory=8192, + disk_size=20, + port=9000, + enable_session_isolation=True, + protocol=ParsedProtocol(type="HTTP"), + network=ParsedNetwork(mode="PUBLIC", vpc_id=None), + health_check=ParsedHealthCheck(http_get_url="/healthz", period_seconds=10), + log=ParsedLog(project="p", logstore="ls"), + env={"K": "V"}, + nas=ParsedNas( + mount_points=[ParsedNasMountPoint(server_addr="x.nas:/", mount_dir="/mnt")] + ), + oss_mount=ParsedOssMount( + mount_points=[ParsedOssMountPoint(bucket_name="b", mount_dir="/mnt/oss")] + ), + endpoints=[ + ParsedEndpoint( + name="prod", + target_version="LATEST", + scaling=ParsedScaling(min_instances=2), + ) + ], + ) + inp = to_runtime_create_input(p) + assert inp.cpu == 4 + assert inp.memory == 8192 + assert inp.disk_size == 20 + assert inp.environment_variables == {"K": "V"} + assert inp.nas_config and inp.nas_config.mount_points + assert inp.oss_mount_config and inp.oss_mount_config.mount_points + assert inp.health_check_configuration.http_get_url == "/healthz" + assert inp.log_configuration.project == "p" + + eps = to_endpoint_create_inputs(p) + assert eps[0].scaling_config.min_instances == 2 + + +def test_endpoint_needs_update_detects_target_version_change(): + current = SimpleNamespace( + description=None, + target_version="OLD", + routing_configuration=None, + disable_public_network_access=None, + ) + desired = ParsedEndpoint(name="x", target_version="LATEST") + assert endpoint_needs_update(desired, current) is True + + +def test_endpoint_needs_update_no_drift(): + current = SimpleNamespace( + description=None, + target_version="LATEST", + routing_configuration=None, + disable_public_network_access=None, + ) + desired = ParsedEndpoint(name="x") + # desired.target_version is None — _endpoint_create injects LATEST when no + # routing is present, but drift detection treats absent desired value as + # "don't compare". Spec for drift: only fields the user set differ. + assert endpoint_needs_update(desired, current) is False + + +# --- additional render coverage ---------------------------------------------- + + +def test_protocol_settings_rendered(): + p = _minimal_parsed() + p.protocol = ParsedProtocol( + type="HTTP", + settings=[ParsedProtocolSetting(type="HTTP", path="/x", method="GET")], + ) + inp = to_runtime_create_input(p) + pc = inp.protocol_configuration + assert pc and pc.protocol_settings and pc.protocol_settings[0].path == "/x" + + +def test_container_full_registry_config_rendered(): + p = _minimal_parsed() + p.container = ParsedContainer( + image="img", + image_registry_type="CUSTOM", + registry_config=ParsedRegistryConfig( + auth=ParsedRegistryAuth(user_name="u", password="p"), # noqa: S106 + cert=ParsedRegistryCert(insecure=True, root_ca_cert_base_64="abc"), + network=ParsedRegistryNetwork( + vpc_id="vpc-1", v_switch_id="vsw-1", security_group_id="sg-1" + ), + ), + ) + inp = to_runtime_create_input(p) + rc = inp.container_configuration.registry_config + assert rc.auth_config.user_name == "u" + assert rc.cert_config.insecure is True + assert rc.network_config.vpc_id == "vpc-1" + + +def test_endpoint_update_input_renders_routing_and_scaling(): + ep = ParsedEndpoint( + name="ep", + description="d", + routing=[("1", 50.0), ("2", 50.0)], + disable_public_network_access=True, + scaling=ParsedScaling( + min_instances=1, + scheduled_policies=[ + ParsedScheduledPolicy( + name="p", + schedule_expression="* * * * *", + start_time="s", + end_time="e", + target=2, + time_zone="UTC", + ) + ], + ), + ) + upd = to_endpoint_update_input(ep) + assert upd.agent_runtime_endpoint_name == "ep" + assert upd.routing_configuration.version_weights[0].version == "1" + assert upd.scaling_config.min_instances == 1 + assert upd.scaling_config.scheduled_policies[0].target == 2 + + +def test_endpoint_update_input_no_routing_no_scaling(): + ep = ParsedEndpoint(name="ep", target_version="LATEST") + upd = to_endpoint_update_input(ep) + assert upd.routing_configuration is None + assert upd.scaling_config is None + + +def test_endpoint_needs_update_description_drift(): + current = SimpleNamespace( + description="old", + target_version=None, + routing_configuration=None, + disable_public_network_access=None, + ) + desired = ParsedEndpoint(name="x", description="new") + assert endpoint_needs_update(desired, current) is True + + +def test_endpoint_needs_update_routing_drift(): + cur_rc = SimpleNamespace( + version_weights=[ + SimpleNamespace(version="1", weight=50.0), + SimpleNamespace(version="2", weight=50.0), + ] + ) + current = SimpleNamespace( + description=None, + target_version=None, + routing_configuration=cur_rc, + disable_public_network_access=None, + ) + desired = ParsedEndpoint(name="x", routing=[("1", 80.0), ("2", 20.0)]) + assert endpoint_needs_update(desired, current) is True + + +def test_endpoint_needs_update_disable_public_drift(): + current = SimpleNamespace( + description=None, + target_version=None, + routing_configuration=None, + disable_public_network_access=False, + ) + desired = ParsedEndpoint(name="x", disable_public_network_access=True) + assert endpoint_needs_update(desired, current) is True + + +def test_endpoint_needs_update_routing_weight_none_handled(): + cur_rc = SimpleNamespace( + version_weights=[ + SimpleNamespace(version="1", weight=None), + ] + ) + current = SimpleNamespace( + description=None, + target_version=None, + routing_configuration=cur_rc, + disable_public_network_access=None, + ) + desired = ParsedEndpoint(name="x", routing=[("1", 100.0)]) + assert endpoint_needs_update(desired, current) is True + + +def test_endpoint_create_with_routing_no_target_version(): + p = _minimal_parsed() + p.endpoints = [ParsedEndpoint(name="canary", routing=[("1", 100.0)])] + inps = to_endpoint_create_inputs(p) + # When routing is set, target_version stays None (no DEFAULT injection) + assert inps[0].target_version is None + + +def test_network_default_mode_public(): + p = _minimal_parsed() + p.network = ParsedNetwork(mode=None) + inp = to_runtime_create_input(p) + # Should not error out, default to PUBLIC mode + assert inp.network_configuration is not None + + +def test_protocol_with_no_type_defaults_http(): + p = _minimal_parsed() + p.protocol = ParsedProtocol(type=None) + inp = to_runtime_create_input(p) + assert inp.protocol_configuration is not None diff --git a/tests/unit/test_runtime_yaml.py b/tests/unit/test_runtime_yaml.py new file mode 100644 index 0000000..71fb562 --- /dev/null +++ b/tests/unit/test_runtime_yaml.py @@ -0,0 +1,796 @@ +"""Tests for the AgentRuntime YAML parser.""" + +import pytest + +from agentrun_cli._utils.agentruntime_yaml import ( + ParsedAgentRuntime, + ParsedContainer, + ParsedEndpoint, + YamlSchemaError, + parse_yaml_text, +) + + +def test_dataclasses_exist_and_default(): + rt = ParsedAgentRuntime( + name="x", + container=ParsedContainer(image="img:1"), + ) + assert rt.name == "x" + assert rt.container.image == "img:1" + assert rt.endpoints is None # None means "inject default" + + ep = ParsedEndpoint(name="default") + assert ep.target_version is None + assert ep.routing is None + + +MINIMAL_YAML = """ +apiVersion: agentrun/v1 +kind: AgentRuntime +metadata: + name: my-agent +spec: + container: + image: registry.example.com/my-agent:v1 +""" + + +def test_parse_minimal_yaml(): + docs = parse_yaml_text(MINIMAL_YAML) + assert len(docs) == 1 + rt = docs[0] + assert rt.name == "my-agent" + assert rt.container.image == "registry.example.com/my-agent:v1" + assert rt.container.command is None + assert rt.endpoints is None # None means CLI must inject default later + # Defaults that should NOT be set at parse time (render layer handles them): + assert rt.cpu is None + assert rt.memory is None + assert rt.port is None + + +def _doc_with(**override): + base = { + "apiVersion": "agentrun/v1", + "kind": "AgentRuntime", + "metadata": {"name": "x"}, + "spec": {"container": {"image": "img"}}, + } + base.update(override) + import yaml as _y + + return _y.dump(base) + + +def test_wrong_api_version_rejected(): + with pytest.raises(YamlSchemaError, match="apiVersion"): + parse_yaml_text(_doc_with(apiVersion="wrong/v1")) + + +def test_wrong_kind_rejected(): + with pytest.raises(YamlSchemaError, match="kind"): + parse_yaml_text(_doc_with(kind="Something")) + + +def test_missing_name_rejected(): + with pytest.raises(YamlSchemaError, match="metadata.name"): + parse_yaml_text(_doc_with(metadata={})) + + +def test_bad_name_pattern_rejected(): + with pytest.raises(YamlSchemaError, match="metadata.name"): + parse_yaml_text(_doc_with(metadata={"name": "BadName!"})) + + +def test_missing_container_rejected(): + with pytest.raises(YamlSchemaError, match="spec.container"): + parse_yaml_text(_doc_with(spec={})) + + +def test_missing_image_rejected(): + with pytest.raises(YamlSchemaError, match="image"): + parse_yaml_text(_doc_with(spec={"container": {}})) + + +def test_empty_yaml_rejected(): + with pytest.raises(YamlSchemaError, match="No documents"): + parse_yaml_text("") + + +def test_invalid_yaml_rejected(): + with pytest.raises(YamlSchemaError, match="Invalid YAML"): + parse_yaml_text(":\n: invalid") + + +def test_reject_spec_code(): + with pytest.raises(YamlSchemaError, match="Container mode"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "code": {"zipFile": "abc"}, + } + ) + ) + + +def test_reject_metadata_tags(): + with pytest.raises(YamlSchemaError, match="tags"): + parse_yaml_text(_doc_with(metadata={"name": "x", "tags": ["t1"]})) + + +def test_reject_metadata_system_tags(): + with pytest.raises(YamlSchemaError, match="system_tags"): + parse_yaml_text(_doc_with(metadata={"name": "x", "systemTags": ["x-foo"]})) + + +def test_workspace_xor_workspaceid(): + with pytest.raises(YamlSchemaError, match="workspace"): + parse_yaml_text( + _doc_with(metadata={"name": "x", "workspace": "a", "workspaceId": "ws-1"}) + ) + + +def test_container_full_fields(): + text = _doc_with( + spec={ + "container": { + "image": "img:v1", + "command": ["python", "app.py"], + "port": 8080, + "imageRegistryType": "ACREE", + "acrInstanceId": "cri-xxx", + } + } + ) + rt = parse_yaml_text(text)[0] + assert rt.container.command == ["python", "app.py"] + assert rt.container.port == 8080 + assert rt.container.image_registry_type == "ACREE" + assert rt.container.acr_instance_id == "cri-xxx" + + +def test_custom_registry_requires_config(): + text = _doc_with( + spec={"container": {"image": "img", "imageRegistryType": "CUSTOM"}} + ) + with pytest.raises(YamlSchemaError, match="registryConfig"): + parse_yaml_text(text) + + +def test_custom_registry_config_parsed(): + text = _doc_with( + spec={ + "container": { + "image": "img", + "imageRegistryType": "CUSTOM", + "registryConfig": { + "auth": {"userName": "u", "password": "p"}, + "cert": {"insecure": True}, + "network": {"vpcId": "vpc-1"}, + }, + } + } + ) + rc = parse_yaml_text(text)[0].container.registry_config + assert rc is not None + assert rc.auth and rc.auth.user_name == "u" + assert rc.cert and rc.cert.insecure is True + assert rc.network and rc.network.vpc_id == "vpc-1" + + +def test_resource_fields(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "cpu": 4, + "memory": 8192, + "diskSize": 20, + "port": 9100, + "enableSessionIsolation": True, + } + ) + rt = parse_yaml_text(text)[0] + assert rt.cpu == 4 + assert rt.memory == 8192 + assert rt.disk_size == 20 + assert rt.port == 9100 + assert rt.enable_session_isolation is True + + +def test_protocol_parsed(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "protocol": { + "type": "MCP", + "settings": [{"type": "MCP", "path": "/mcp", "method": "POST"}], + }, + } + ) + rt = parse_yaml_text(text)[0] + assert rt.protocol and rt.protocol.type == "MCP" + assert rt.protocol.settings and rt.protocol.settings[0].path == "/mcp" + + +def test_network_private_requires_vpc(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "network": {"mode": "PRIVATE"}, + } + ) + with pytest.raises(YamlSchemaError, match="vpcId"): + parse_yaml_text(text) + + +def test_network_private_with_vpc_ok(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "network": { + "mode": "PUBLIC_AND_PRIVATE", + "vpcId": "vpc-1", + "vswitchIds": ["vsw-1"], + "securityGroupId": "sg-1", + }, + } + ) + rt = parse_yaml_text(text)[0] + assert rt.network and rt.network.mode == "PUBLIC_AND_PRIVATE" + assert rt.network.vpc_id == "vpc-1" + assert rt.network.vswitch_ids == ["vsw-1"] + + +def test_log_pair_required(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "log": {"project": "p"}, + } + ) + with pytest.raises(YamlSchemaError, match="log.logstore"): + parse_yaml_text(text) + + +def test_log_both_present(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "log": {"project": "p", "logstore": "ls"}, + } + ) + rt = parse_yaml_text(text)[0] + assert rt.log and rt.log.project == "p" and rt.log.logstore == "ls" + + +def test_env_parsed(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "env": {"LOG_LEVEL": "info", "FOO": "bar"}, + } + ) + rt = parse_yaml_text(text)[0] + assert rt.env == {"LOG_LEVEL": "info", "FOO": "bar"} + + +def test_health_check_parsed(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "healthCheck": { + "httpGetUrl": "/healthz", + "initialDelaySeconds": 5, + "periodSeconds": 10, + "timeoutSeconds": 3, + "failureThreshold": 3, + "successThreshold": 1, + }, + } + ) + rt = parse_yaml_text(text)[0] + hc = rt.health_check + assert hc and hc.http_get_url == "/healthz" and hc.period_seconds == 10 + + +def test_nas_parsed(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "nas": { + "userId": 1000, + "groupId": 1000, + "mountPoints": [ + {"serverAddr": "x.nas:/", "mountDir": "/mnt", "enableTLS": True}, + ], + }, + } + ) + rt = parse_yaml_text(text)[0] + assert rt.nas and rt.nas.user_id == 1000 + assert rt.nas.mount_points[0].server_addr == "x.nas:/" + assert rt.nas.mount_points[0].enable_tls is True + + +def test_nas_missing_required_field(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "nas": {"mountPoints": [{"serverAddr": "x.nas:/"}]}, # missing mountDir + } + ) + with pytest.raises(YamlSchemaError, match="mountDir"): + parse_yaml_text(text) + + +def test_oss_parsed(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "ossMount": { + "mountPoints": [ + {"bucketName": "b", "mountDir": "/mnt/oss", "readOnly": True}, + ] + }, + } + ) + rt = parse_yaml_text(text)[0] + assert rt.oss_mount and rt.oss_mount.mount_points[0].bucket_name == "b" + assert rt.oss_mount.mount_points[0].read_only is True + + +def test_oss_missing_required_field(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "ossMount": {"mountPoints": [{"bucketName": "b"}]}, + } + ) + with pytest.raises(YamlSchemaError, match="mountDir"): + parse_yaml_text(text) + + +def test_endpoints_omitted_is_none(): + rt = parse_yaml_text(MINIMAL_YAML)[0] + assert rt.endpoints is None # CLI render layer will inject default + + +def test_endpoints_empty_list_preserved(): + text = _doc_with(spec={"container": {"image": "img"}, "endpoints": []}) + rt = parse_yaml_text(text)[0] + assert rt.endpoints == [] # explicit "no endpoints" + + +def test_endpoint_basic_parsed(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "endpoints": [{"name": "prod", "targetVersion": "LATEST"}], + } + ) + rt = parse_yaml_text(text)[0] + assert rt.endpoints and rt.endpoints[0].name == "prod" + assert rt.endpoints[0].target_version == "LATEST" + + +def test_endpoint_duplicate_name_rejected(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "endpoints": [{"name": "a"}, {"name": "a"}], + } + ) + with pytest.raises(YamlSchemaError, match="duplicate"): + parse_yaml_text(text) + + +def test_endpoint_target_version_and_routing_mutex(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "endpoints": [ + { + "name": "x", + "targetVersion": "1", + "routing": [{"version": "1", "weight": 100}], + } + ], + } + ) + with pytest.raises(YamlSchemaError, match="mutually exclusive"): + parse_yaml_text(text) + + +def test_endpoint_routing_weight_sum(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "endpoints": [ + { + "name": "x", + "routing": [ + {"version": "1", "weight": 60}, + {"version": "2", "weight": 30}, + ], + } + ], + } + ) + with pytest.raises(YamlSchemaError, match="weight"): + parse_yaml_text(text) + + +def test_endpoint_routing_ok(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "endpoints": [ + { + "name": "x", + "routing": [ + {"version": "1", "weight": 80}, + {"version": "2", "weight": 20}, + ], + } + ], + } + ) + rt = parse_yaml_text(text)[0] + assert rt.endpoints[0].routing == [("1", 80.0), ("2", 20.0)] + + +def test_endpoint_scaling_target_lt_min_rejected(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "endpoints": [ + { + "name": "x", + "scaling": { + "minInstances": 5, + "scheduledPolicies": [ + { + "name": "p", + "target": 3, + "scheduleExpression": "0 * * * *", + }, + ], + }, + } + ], + } + ) + with pytest.raises(YamlSchemaError, match="minInstances"): + parse_yaml_text(text) + + +def test_multi_doc_supported(): + text = ( + _doc_with(metadata={"name": "a"}) + "---\n" + _doc_with(metadata={"name": "b"}) + ) + docs = parse_yaml_text(text) + assert [d.name for d in docs] == ["a", "b"] + + +def test_parse_yaml_file(tmp_path): + from agentrun_cli._utils.agentruntime_yaml import parse_yaml_file + + p = tmp_path / "rt.yaml" + p.write_text(MINIMAL_YAML, encoding="utf-8") + docs = parse_yaml_file(str(p)) + assert docs[0].name == "my-agent" + + +# --- additional coverage for edge / error branches ----------------------------- + + +def test_top_level_not_mapping(): + with pytest.raises(YamlSchemaError, match="Top level"): + parse_yaml_text("- 1\n- 2\n") + + +def test_metadata_not_mapping(): + with pytest.raises(YamlSchemaError, match="metadata"): + parse_yaml_text( + "apiVersion: agentrun/v1\nkind: AgentRuntime\nmetadata: foo\n" + "spec:\n container:\n image: img\n" + ) + + +def test_protocol_not_mapping(): + with pytest.raises(YamlSchemaError, match="protocol"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "protocol": "bad", + } + ) + ) + + +def test_protocol_settings_not_list(): + with pytest.raises(YamlSchemaError, match="settings"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "protocol": {"type": "HTTP", "settings": "bad"}, + } + ) + ) + + +def test_network_not_mapping(): + with pytest.raises(YamlSchemaError, match="network"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "network": "bad", + } + ) + ) + + +def test_health_check_not_mapping(): + with pytest.raises(YamlSchemaError, match="healthCheck"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "healthCheck": "bad", + } + ) + ) + + +def test_log_not_mapping(): + with pytest.raises(YamlSchemaError, match="log"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "log": "bad", + } + ) + ) + + +def test_env_not_mapping(): + with pytest.raises(YamlSchemaError, match="env"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "env": "bad", + } + ) + ) + + +def test_env_non_string_key(): + # Non-string keys are valid YAML but should be rejected. Build YAML manually. + text = ( + "apiVersion: agentrun/v1\nkind: AgentRuntime\n" + "metadata: {name: x}\n" + "spec:\n container: {image: img}\n env:\n 1: v\n" + ) + with pytest.raises(YamlSchemaError, match="env"): + parse_yaml_text(text) + + +def test_nas_not_mapping(): + with pytest.raises(YamlSchemaError, match="nas"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "nas": "bad", + } + ) + ) + + +def test_nas_mount_point_not_mapping(): + with pytest.raises(YamlSchemaError, match="mountPoints"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "nas": {"mountPoints": ["bad"]}, + } + ) + ) + + +def test_oss_not_mapping(): + with pytest.raises(YamlSchemaError, match="ossMount"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "ossMount": "bad", + } + ) + ) + + +def test_oss_mount_point_not_mapping(): + with pytest.raises(YamlSchemaError, match="mountPoints"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "ossMount": {"mountPoints": ["bad"]}, + } + ) + ) + + +def test_endpoints_not_list(): + with pytest.raises(YamlSchemaError, match="endpoints"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "endpoints": "bad", + } + ) + ) + + +def test_endpoint_item_not_mapping(): + with pytest.raises(YamlSchemaError, match="endpoints"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "endpoints": ["bad"], + } + ) + ) + + +def test_endpoint_missing_name(): + with pytest.raises(YamlSchemaError, match="name"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "endpoints": [{}], + } + ) + ) + + +def test_endpoint_routing_not_list(): + with pytest.raises(YamlSchemaError, match="routing"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "endpoints": [{"name": "x", "routing": "bad"}], + } + ) + ) + + +def test_endpoint_routing_item_not_mapping(): + with pytest.raises(YamlSchemaError, match="routing"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "endpoints": [{"name": "x", "routing": ["bad"]}], + } + ) + ) + + +def test_endpoint_routing_missing_version_weight(): + with pytest.raises(YamlSchemaError, match="version and weight"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "endpoints": [{"name": "x", "routing": [{"version": "1"}]}], + } + ) + ) + + +def test_endpoint_scaling_not_mapping(): + with pytest.raises(YamlSchemaError, match="scaling"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "endpoints": [{"name": "x", "scaling": "bad"}], + } + ) + ) + + +def test_endpoint_scheduled_policy_not_mapping(): + with pytest.raises(YamlSchemaError, match="scheduledPolicies"): + parse_yaml_text( + _doc_with( + spec={ + "container": {"image": "img"}, + "endpoints": [ + { + "name": "x", + "scaling": {"scheduledPolicies": ["bad"]}, + } + ], + } + ) + ) + + +def test_document_error_includes_index(): + text = ( + _doc_with(metadata={"name": "ok"}) + "---\n" + _doc_with(apiVersion="wrong/v1") + ) + with pytest.raises(YamlSchemaError, match="Document #2"): + parse_yaml_text(text) + + +def test_registry_config_for_acr_optional_parsed(): + # registryConfig is allowed even when imageRegistryType is ACR + text = _doc_with( + spec={ + "container": { + "image": "img", + "imageRegistryType": "ACR", + "registryConfig": { + "auth": {"userName": "u"}, + }, + } + } + ) + rc = parse_yaml_text(text)[0].container.registry_config + assert rc and rc.auth and rc.auth.user_name == "u" + + +def test_endpoint_minimal_keeps_target_version_none(): + text = _doc_with( + spec={ + "container": {"image": "img"}, + "endpoints": [{"name": "x"}], + } + ) + rt = parse_yaml_text(text)[0] + assert rt.endpoints[0].target_version is None + assert rt.endpoints[0].routing is None + + +def test_registry_password_not_in_repr(): + from agentrun_cli._utils.agentruntime_yaml import ParsedRegistryAuth + auth = ParsedRegistryAuth(user_name="u", password="secret") # noqa: S106 + rendered = repr(auth) + assert "secret" not in rendered + assert "u" in rendered + + +def test_image_registry_type_must_be_known(): + text = _doc_with(spec={ + "container": {"image": "img", "imageRegistryType": "acree"}, + }) + with pytest.raises(YamlSchemaError, match="imageRegistryType"): + parse_yaml_text(text) + + +def test_endpoint_routing_non_numeric_weight(): + text = _doc_with(spec={ + "container": {"image": "img"}, + "endpoints": [{ + "name": "x", + "routing": [{"version": "1", "weight": "abc"}], + }], + }) + with pytest.raises(YamlSchemaError, match="weight"): + parse_yaml_text(text) From b4f654498e9b0c2935055ab0809692c200f81c60 Mon Sep 17 00:00:00 2001 From: Sodawyx Date: Tue, 19 May 2026 22:22:27 +0800 Subject: [PATCH 2/8] feat(runtime): polling state machine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR2 of the ar runtime command group. SDK-decoupled polling primitives. - _utils/runtime_state.py — PollConfig dataclass + three polling functions: - poll_until_final: exponential backoff (3s -> 10s), raises RuntimePollFailed on *_FAILED, RuntimePollTimeout on elapsed >= timeout. Status check happens BEFORE timeout check so a resource that becomes READY at the same instant as timeout still succeeds. - poll_until_deleted: caller supplies an is_not_found predicate so the module stays SDK-agnostic; refresh's NotFound -> success; *_FAILED raises RuntimePollFailed. - poll_many_parallel: ThreadPoolExecutor; re-raises first failure; concurrency clamped to [1, len(resources)]; early-return on empty. All time.sleep / time.monotonic calls use qualified time.* form so tests can monkey-patch deterministically. Zero SDK imports — pure duck typing on .status / .refresh() / .status_reason. Coverage: 95.91%. New module at 100%. Signed-off-by: Sodawyx --- src/agentrun_cli/_utils/runtime_state.py | 177 ++++++++++++++++ tests/unit/test_runtime_state.py | 254 +++++++++++++++++++++++ 2 files changed, 431 insertions(+) create mode 100644 src/agentrun_cli/_utils/runtime_state.py create mode 100644 tests/unit/test_runtime_state.py diff --git a/src/agentrun_cli/_utils/runtime_state.py b/src/agentrun_cli/_utils/runtime_state.py new file mode 100644 index 0000000..a8f0490 --- /dev/null +++ b/src/agentrun_cli/_utils/runtime_state.py @@ -0,0 +1,177 @@ +"""Async-status polling primitives for AgentRuntime / AgentRuntimeEndpoint. + +These helpers know nothing about the SDK shape — callers pass in a callable +(``refresh_fn``) plus accessors for ``status`` and the resource name. This keeps +the module trivially mockable and lets us reuse it for both ``AgentRuntime`` and +``AgentRuntimeEndpoint`` (which share ``Status`` semantics). +""" + +from __future__ import annotations + +import time +from collections.abc import Callable +from concurrent.futures import ThreadPoolExecutor, as_completed +from dataclasses import dataclass +from typing import Any + +from agentrun_cli._utils.error import RuntimePollFailed, RuntimePollTimeout +from agentrun_cli._utils.runtime_constants import ( + ENDPOINT_POLL_CONCURRENCY, + POLL_BACKOFF_FACTOR, + POLL_INITIAL_INTERVAL, + POLL_MAX_INTERVAL, +) + +_FAILED_SUFFIX = "_FAILED" +_READY = "READY" + + +@dataclass +class PollConfig: + initial_interval: float = POLL_INITIAL_INTERVAL + max_interval: float = POLL_MAX_INTERVAL + backoff_factor: float = POLL_BACKOFF_FACTOR + timeout: float = 600.0 + + +def poll_until_final( + resource: Any, + *, + resource_kind: str, + cfg: PollConfig | None = None, + on_tick: Callable[[Any, float], None] | None = None, +) -> Any: + """Block until ``resource.status`` is a final state. + + Final = ``READY`` or any ``*_FAILED``. ``DELETING`` is NOT final (use + ``poll_until_deleted`` for delete paths). + + Side effects: calls ``resource.refresh()`` between checks; sleeps with + exponential backoff capped at ``cfg.max_interval``. + + Raises: + RuntimePollFailed: status ends in ``_FAILED``. + RuntimePollTimeout: elapsed time exceeds ``cfg.timeout``. + """ + cfg = cfg or PollConfig() + start = time.monotonic() + interval = cfg.initial_interval + name = _resource_name(resource) + while True: + status = getattr(resource, "status", None) + elapsed = time.monotonic() - start + if on_tick: + on_tick(resource, elapsed) + if status == _READY: + return resource + if isinstance(status, str) and status.endswith(_FAILED_SUFFIX): + raise RuntimePollFailed( + resource_kind=resource_kind, + name=name, + status=status, + reason=getattr(resource, "status_reason", None), + ) + if elapsed >= cfg.timeout: + raise RuntimePollTimeout( + resource_kind=resource_kind, + name=name, + elapsed=elapsed, + ) + time.sleep(interval) + interval = min(interval * cfg.backoff_factor, cfg.max_interval) + resource.refresh() + + +def poll_until_deleted( + resource: Any, + *, + resource_kind: str, + is_not_found: Callable[[BaseException], bool], + cfg: PollConfig | None = None, + on_tick: Callable[[Any, float], None] | None = None, +) -> None: + """Poll until ``resource.refresh()`` raises a NotFound-like exception. + + The caller supplies ``is_not_found`` because the SDK uses different + exception classes (and we avoid importing them at module load time). + """ + cfg = cfg or PollConfig() + start = time.monotonic() + interval = cfg.initial_interval + name = _resource_name(resource) + while True: + elapsed = time.monotonic() - start + if on_tick: + on_tick(resource, elapsed) + status = getattr(resource, "status", None) + if isinstance(status, str) and status.endswith(_FAILED_SUFFIX): + raise RuntimePollFailed( + resource_kind=resource_kind, + name=name, + status=status, + reason=getattr(resource, "status_reason", None), + ) + if elapsed >= cfg.timeout: + raise RuntimePollTimeout( + resource_kind=resource_kind, + name=name, + elapsed=elapsed, + ) + time.sleep(interval) + interval = min(interval * cfg.backoff_factor, cfg.max_interval) + try: + resource.refresh() + except BaseException as e: # noqa: BLE001 — caller decides + if is_not_found(e): + return + raise + + +def _resource_name(resource: Any) -> str: + for attr in ( + "agent_runtime_name", + "agent_runtime_endpoint_name", + "name", + "agent_runtime_id", + "agent_runtime_endpoint_id", + ): + v = getattr(resource, attr, None) + if v: + return str(v) + return "" + + +def poll_many_parallel( + resources: list, + *, + resource_kind: str, + cfg: PollConfig | None = None, + concurrency: int = ENDPOINT_POLL_CONCURRENCY, + on_tick: Callable[[Any, float], None] | None = None, +) -> list: + """Poll multiple resources to terminal state concurrently. + + Re-raises the first ``RuntimePollFailed`` / ``RuntimePollTimeout`` that + surfaces. Already-completed pollers are cancelled (best-effort) on + failure — Python's ThreadPoolExecutor doesn't preempt running tasks, so + in-flight pollers continue until their next sleep boundary. + """ + cfg = cfg or PollConfig() + if not resources: + return [] + concurrency = max(1, min(concurrency, len(resources))) + with ThreadPoolExecutor(max_workers=concurrency) as ex: + futures = { + ex.submit( + poll_until_final, + r, + resource_kind=resource_kind, + cfg=cfg, + on_tick=on_tick, + ): r + for r in resources + } + results: list = [] + for fut in as_completed(futures): + results.append(fut.result()) # re-raises first exception + return results diff --git a/tests/unit/test_runtime_state.py b/tests/unit/test_runtime_state.py new file mode 100644 index 0000000..c556166 --- /dev/null +++ b/tests/unit/test_runtime_state.py @@ -0,0 +1,254 @@ +"""Tests for runtime_state polling primitives.""" + +from types import SimpleNamespace + +import pytest + +from agentrun_cli._utils.error import RuntimePollFailed, RuntimePollTimeout +from agentrun_cli._utils.runtime_state import PollConfig, poll_until_final + + +def _mk_resource(statuses, name="my-agent"): + """Build a fake resource whose .refresh() advances through statuses.""" + states = iter(statuses) + res = SimpleNamespace( + status=next(states), + status_reason=None, + agent_runtime_name=name, + name=name, + ) + + def _refresh(*args, **kwargs): + try: + res.status = next(states) + except StopIteration: + pass + return res + + res.refresh = _refresh + return res + + +def test_poll_config_defaults(): + cfg = PollConfig() + assert cfg.initial_interval == 3.0 + assert cfg.max_interval == 10.0 + assert cfg.backoff_factor == 1.5 + assert cfg.timeout == 600.0 + + +def test_poll_config_override(): + cfg = PollConfig(timeout=42.0, initial_interval=1.0) + assert cfg.timeout == 42.0 + assert cfg.initial_interval == 1.0 + + +def test_poll_until_ready(monkeypatch): + monkeypatch.setattr("time.sleep", lambda *_: None) + res = _mk_resource(["CREATING", "CREATING", "READY"]) + out = poll_until_final( + res, + resource_kind="AgentRuntime", + cfg=PollConfig(timeout=10.0, initial_interval=0.0), + ) + assert out.status == "READY" + + +def test_poll_failed_raises(monkeypatch): + monkeypatch.setattr("time.sleep", lambda *_: None) + res = _mk_resource(["CREATING", "CREATE_FAILED"]) + res.status_reason = "image pull backoff" + with pytest.raises(RuntimePollFailed) as exc: + poll_until_final( + res, + resource_kind="AgentRuntime", + cfg=PollConfig(timeout=10.0, initial_interval=0.0), + ) + assert exc.value.status == "CREATE_FAILED" + assert "image pull" in str(exc.value) + + +def test_poll_timeout(monkeypatch): + monkeypatch.setattr("time.sleep", lambda *_: None) + res = _mk_resource(["CREATING"] * 50) + fake_clock = iter([0.0, 5.0, 11.0]) # passes timeout=10 on 3rd check + monkeypatch.setattr("time.monotonic", lambda: next(fake_clock)) + with pytest.raises(RuntimePollTimeout): + poll_until_final( + res, + resource_kind="AgentRuntime", + cfg=PollConfig(timeout=10.0, initial_interval=0.0), + ) + + +class FakeNotFound(Exception): + pass + + +def test_poll_until_deleted(monkeypatch): + monkeypatch.setattr("time.sleep", lambda *_: None) + res = SimpleNamespace( + status="DELETING", status_reason=None, agent_runtime_name="x" + ) + call_count = {"n": 0} + + def _refresh(*a, **k): + call_count["n"] += 1 + if call_count["n"] >= 2: + raise FakeNotFound("gone") + return res + + res.refresh = _refresh + from agentrun_cli._utils.runtime_state import poll_until_deleted + + poll_until_deleted( + res, + resource_kind="AgentRuntime", + is_not_found=lambda e: isinstance(e, FakeNotFound), + cfg=PollConfig(timeout=10.0, initial_interval=0.0), + ) + + +def test_poll_until_delete_failed(monkeypatch): + monkeypatch.setattr("time.sleep", lambda *_: None) + states = iter(["DELETING", "DELETE_FAILED"]) + res = SimpleNamespace( + status=next(states), + status_reason="quota exceeded", + agent_runtime_name="x", + ) + + def _refresh(*a, **k): + try: + res.status = next(states) + except StopIteration: + pass + return res + + res.refresh = _refresh + from agentrun_cli._utils.runtime_state import poll_until_deleted + + with pytest.raises(RuntimePollFailed) as exc: + poll_until_deleted( + res, + resource_kind="AgentRuntime", + is_not_found=lambda e: False, + cfg=PollConfig(timeout=10.0, initial_interval=0.0), + ) + assert "DELETE_FAILED" in str(exc.value) + + +def test_poll_many_parallel_all_ready(monkeypatch): + monkeypatch.setattr("time.sleep", lambda *_: None) + from agentrun_cli._utils.runtime_state import poll_many_parallel + + res_a = _mk_resource(["CREATING", "READY"], name="a") + res_b = _mk_resource(["CREATING", "CREATING", "READY"], name="b") + out = poll_many_parallel( + [res_a, res_b], + resource_kind="AgentRuntimeEndpoint", + cfg=PollConfig(timeout=10.0, initial_interval=0.0), + concurrency=2, + ) + assert len(out) == 2 + assert all(r.status == "READY" for r in out) + + +def test_poll_many_parallel_one_fails(monkeypatch): + monkeypatch.setattr("time.sleep", lambda *_: None) + from agentrun_cli._utils.runtime_state import poll_many_parallel + + res_a = _mk_resource(["CREATING", "READY"], name="a") + res_b = _mk_resource(["CREATING", "CREATE_FAILED"], name="b") + with pytest.raises(RuntimePollFailed): + poll_many_parallel( + [res_a, res_b], + resource_kind="AgentRuntimeEndpoint", + cfg=PollConfig(timeout=10.0, initial_interval=0.0), + concurrency=2, + ) + + +def test_poll_until_final_on_tick_invoked(monkeypatch): + monkeypatch.setattr("time.sleep", lambda *_: None) + res = _mk_resource(["CREATING", "READY"]) + seen: list = [] + poll_until_final( + res, + resource_kind="AgentRuntime", + cfg=PollConfig(timeout=10.0, initial_interval=0.0), + on_tick=lambda r, e: seen.append((r.status, e)), + ) + assert seen # at least one tick + assert seen[0][0] == "CREATING" + + +def test_poll_until_deleted_on_tick_and_timeout(monkeypatch): + """Exercise on_tick + timeout branches in poll_until_deleted.""" + monkeypatch.setattr("time.sleep", lambda *_: None) + res = SimpleNamespace( + status="DELETING", status_reason=None, agent_runtime_name="x" + ) + res.refresh = lambda *a, **k: res # never raises + fake_clock = iter([0.0, 11.0]) # passes timeout=10 on 2nd check + monkeypatch.setattr("time.monotonic", lambda: next(fake_clock)) + from agentrun_cli._utils.runtime_state import poll_until_deleted + + seen: list = [] + with pytest.raises(RuntimePollTimeout): + poll_until_deleted( + res, + resource_kind="AgentRuntime", + is_not_found=lambda e: False, + cfg=PollConfig(timeout=10.0, initial_interval=0.0), + on_tick=lambda r, e: seen.append(e), + ) + assert seen + + +def test_poll_until_deleted_refresh_raises_other(monkeypatch): + """Non-not-found exceptions from refresh propagate.""" + monkeypatch.setattr("time.sleep", lambda *_: None) + res = SimpleNamespace( + status="DELETING", status_reason=None, agent_runtime_name="x" + ) + + def _refresh(*a, **k): + raise RuntimeError("boom") + + res.refresh = _refresh + from agentrun_cli._utils.runtime_state import poll_until_deleted + + with pytest.raises(RuntimeError, match="boom"): + poll_until_deleted( + res, + resource_kind="AgentRuntime", + is_not_found=lambda e: False, + cfg=PollConfig(timeout=10.0, initial_interval=0.0), + ) + + +def test_resource_name_fallback_to_unnamed(): + """_resource_name returns '' when no known attr is set.""" + from agentrun_cli._utils.runtime_state import _resource_name + + res = SimpleNamespace() # no name attributes at all + assert _resource_name(res) == "" + + +def test_resource_name_skips_falsy_first_attrs(): + """Skip empty values and fall through to a later populated attr.""" + from agentrun_cli._utils.runtime_state import _resource_name + + res = SimpleNamespace( + agent_runtime_name=None, + agent_runtime_endpoint_name="", + name="resolved", + ) + assert _resource_name(res) == "resolved" + + +def test_poll_many_parallel_empty_list(): + from agentrun_cli._utils.runtime_state import poll_many_parallel + + assert poll_many_parallel([], resource_kind="AgentRuntimeEndpoint") == [] From 1c6e04bb42c7c8dc19c05934d57db8c40a0c2607 Mon Sep 17 00:00:00 2001 From: Sodawyx Date: Tue, 19 May 2026 22:23:03 +0800 Subject: [PATCH 3/8] feat(runtime): apply reconciler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR3 of the ar runtime command group. Diff-and-execute orchestration. - _utils/runtime_reconciler.py — orchestration brain for apply: - Action = Literal["create", "update", "delete", "noop"]. - RuntimeReconcileResult / EndpointAction / ApplyPlan dataclasses. - find_runtime_by_name: list -> filter by agent_runtime_name. - reconcile_runtime: list -> create (absent) or update_by_id (present). - reconcile_endpoints: name-keyed diff, create/update on drift/noop, prune orphans when prune=True (default). Injects single 'default' endpoint when desired is None. No SDK imports — works against duck-typed clients/runtimes; tests inject MagicMock instances. Coverage: 96.01%. New module at 100%. Signed-off-by: Sodawyx --- src/agentrun_cli/_utils/runtime_reconciler.py | 158 ++++++++++++++++ tests/unit/test_runtime_reconciler.py | 169 ++++++++++++++++++ 2 files changed, 327 insertions(+) create mode 100644 src/agentrun_cli/_utils/runtime_reconciler.py create mode 100644 tests/unit/test_runtime_reconciler.py diff --git a/src/agentrun_cli/_utils/runtime_reconciler.py b/src/agentrun_cli/_utils/runtime_reconciler.py new file mode 100644 index 0000000..b12db43 --- /dev/null +++ b/src/agentrun_cli/_utils/runtime_reconciler.py @@ -0,0 +1,158 @@ +"""Apply-time reconciler: list → diff → create/update/delete. + +This module is the orchestration brain for ``ar runtime apply``. It does not +poll — it just submits the SDK calls and hands the in-flight resource handles +back to the command layer (which then drives ``runtime_state.poll_*``). + +Endpoint reconciliation does a by-name diff and supports pruning unknown +endpoints (default on; toggled by ``ar runtime apply --no-prune-endpoints``). +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Literal + +from agentrun_cli._utils.agentruntime_yaml import ( + ParsedAgentRuntime, + ParsedEndpoint, +) +from agentrun_cli._utils.runtime_render import ( + endpoint_needs_update, + to_endpoint_create_inputs, + to_endpoint_update_input, + to_runtime_create_input, + to_runtime_update_input, +) + +Action = Literal["create", "update", "delete", "noop"] + + +@dataclass +class RuntimeReconcileResult: + action: Action + runtime: Any + + +@dataclass +class EndpointAction: + action: Action + name: str + endpoint: Any | None + + +@dataclass +class ApplyPlan: + runtime_result: RuntimeReconcileResult + endpoint_actions: list[EndpointAction] = field(default_factory=list) + + +def find_runtime_by_name(client: Any, name: str): + """List runtimes and return the first whose ``agent_runtime_name`` matches. + + The SDK exposes ``list_all`` returning an iterable of AgentRuntime objects. + Filtering happens client-side; the backend has no name-filter on list. + """ + for rt in client.list_all(): + if getattr(rt, "agent_runtime_name", None) == name: + return rt + return None + + +def reconcile_runtime( + parsed: ParsedAgentRuntime, + *, + client: Any, +) -> RuntimeReconcileResult: + """List by name → create or update_by_id. + + Returns the in-flight resource (status likely CREATING / UPDATING) so the + caller can poll it. + """ + existing = find_runtime_by_name(client, parsed.name) + if existing is None: + runtime = client.create(to_runtime_create_input(parsed)) + return RuntimeReconcileResult(action="create", runtime=runtime) + runtime = client.update_by_id( + existing.agent_runtime_id, + to_runtime_update_input(parsed), + ) + return RuntimeReconcileResult(action="update", runtime=runtime) + + +def reconcile_endpoints( + runtime: Any, + *, + desired: list[ParsedEndpoint] | None, + prune: bool = True, +) -> list[EndpointAction]: + """Apply-time endpoint diff. + + Args: + runtime: a Runtime instance with ``list_endpoints / create_endpoint / + update_endpoint / delete_endpoint`` methods (matches SDK 0.0.200). + desired: parsed list. ``None`` means "user omitted endpoints"; the CLI + injects a single ``default`` endpoint. ``[]`` means "explicitly no + endpoints"; the CLI deletes orphans when ``prune`` is True. + prune: when True, endpoints present remotely but absent from ``desired`` + are deleted. + + Returns: + Ordered list of ``EndpointAction`` capturing what happened. + """ + if desired is None: + # CLI injects a single default endpoint. + desired = [ParsedEndpoint(name="default", target_version="LATEST")] + + current_by_name = { + getattr(ep, "agent_runtime_endpoint_name", None): ep + for ep in runtime.list_endpoints() + } + desired_names = {ep.name for ep in desired} + actions: list[EndpointAction] = [] + + # to_create / to_update / noop + create_inputs = to_endpoint_create_inputs( + # Build a stub-parsed runtime carrying just .endpoints + _StubWithEndpoints(desired) + ) + create_inputs_by_name = {ci.agent_runtime_endpoint_name: ci for ci in create_inputs} + + for ep in desired: + current = current_by_name.get(ep.name) + if current is None: + ci = create_inputs_by_name[ep.name] + new_ep = runtime.create_endpoint(ci) + actions.append( + EndpointAction(action="create", name=ep.name, endpoint=new_ep) + ) + continue + if endpoint_needs_update(ep, current): + updated = runtime.update_endpoint( + current.agent_runtime_endpoint_id, + to_endpoint_update_input(ep), + ) + actions.append( + EndpointAction(action="update", name=ep.name, endpoint=updated) + ) + else: + actions.append( + EndpointAction(action="noop", name=ep.name, endpoint=current) + ) + + # prune + if prune: + for name, current in current_by_name.items(): + if name in desired_names: + continue + runtime.delete_endpoint(current.agent_runtime_endpoint_id) + actions.append(EndpointAction(action="delete", name=name, endpoint=current)) + + return actions + + +class _StubWithEndpoints: + """Tiny adapter so we can reuse ``to_endpoint_create_inputs``.""" + + def __init__(self, endpoints): + self.endpoints = endpoints diff --git a/tests/unit/test_runtime_reconciler.py b/tests/unit/test_runtime_reconciler.py new file mode 100644 index 0000000..8dd8e42 --- /dev/null +++ b/tests/unit/test_runtime_reconciler.py @@ -0,0 +1,169 @@ +"""Tests for the apply reconciler.""" + +from types import SimpleNamespace +from unittest.mock import MagicMock + +from agentrun_cli._utils.agentruntime_yaml import ( + ParsedAgentRuntime, + ParsedContainer, + ParsedEndpoint, +) +from agentrun_cli._utils.runtime_reconciler import ( + ApplyPlan, + EndpointAction, + RuntimeReconcileResult, + find_runtime_by_name, + reconcile_endpoints, + reconcile_runtime, +) + + +def test_dataclasses_exist(): + rr = RuntimeReconcileResult(action="create", runtime=SimpleNamespace()) + ea = EndpointAction(action="create", name="default", endpoint=None) + plan = ApplyPlan(runtime_result=rr, endpoint_actions=[ea]) + assert plan.runtime_result.action == "create" + + +def test_find_runtime_by_name_match(): + sdk = MagicMock() + sdk.list_all.return_value = [ + SimpleNamespace(agent_runtime_name="other", agent_runtime_id="ar-1"), + SimpleNamespace(agent_runtime_name="me", agent_runtime_id="ar-2"), + ] + out = find_runtime_by_name(sdk, "me") + assert out is not None and out.agent_runtime_id == "ar-2" + + +def test_find_runtime_by_name_missing(): + sdk = MagicMock() + sdk.list_all.return_value = [] + assert find_runtime_by_name(sdk, "me") is None + + +def _parsed(): + return ParsedAgentRuntime( + name="my-agent", + container=ParsedContainer(image="img:v1"), + ) + + +def test_reconcile_runtime_creates_when_absent(): + client = MagicMock() + client.list_all.return_value = [] + created = SimpleNamespace( + agent_runtime_name="my-agent", + agent_runtime_id="ar-new", + status="CREATING", + ) + client.create.return_value = created + + out = reconcile_runtime(_parsed(), client=client) + assert out.action == "create" + assert out.runtime.agent_runtime_id == "ar-new" + client.create.assert_called_once() + + +def test_reconcile_runtime_updates_when_present(): + client = MagicMock() + existing = SimpleNamespace( + agent_runtime_name="my-agent", + agent_runtime_id="ar-1", + status="READY", + ) + client.list_all.return_value = [existing] + updated = SimpleNamespace( + agent_runtime_name="my-agent", + agent_runtime_id="ar-1", + status="UPDATING", + ) + client.update_by_id.return_value = updated + + out = reconcile_runtime(_parsed(), client=client) + assert out.action == "update" + assert out.runtime.status == "UPDATING" + client.update_by_id.assert_called_once() + # First positional arg must be the existing id + assert client.update_by_id.call_args[0][0] == "ar-1" + + +def _ep_remote(name, ep_id, **extra): + return SimpleNamespace( + agent_runtime_endpoint_name=name, + agent_runtime_endpoint_id=ep_id, + target_version=extra.get("target_version"), + routing_configuration=extra.get("routing_configuration"), + description=extra.get("description"), + disable_public_network_access=extra.get("disable_public_network_access"), + ) + + +def test_reconcile_endpoints_creates_default_when_none_desired(): + runtime = MagicMock() + runtime.agent_runtime_id = "ar-1" + runtime.list_endpoints.return_value = [] + runtime.create_endpoint.return_value = _ep_remote("default", "ep-1") + + actions = reconcile_endpoints(runtime, desired=None, prune=True) + assert len(actions) == 1 + assert actions[0].action == "create" + assert actions[0].name == "default" + runtime.create_endpoint.assert_called_once() + + +def test_reconcile_endpoints_empty_list_prunes_existing(): + runtime = MagicMock() + runtime.agent_runtime_id = "ar-1" + runtime.list_endpoints.return_value = [_ep_remote("default", "ep-1")] + + actions = reconcile_endpoints(runtime, desired=[], prune=True) + assert len(actions) == 1 + assert actions[0].action == "delete" + runtime.delete_endpoint.assert_called_once_with("ep-1") + + +def test_reconcile_endpoints_no_prune_keeps_existing(): + runtime = MagicMock() + runtime.list_endpoints.return_value = [_ep_remote("orphan", "ep-9")] + + actions = reconcile_endpoints( + runtime, + desired=[ParsedEndpoint(name="prod", target_version="LATEST")], + prune=False, + ) + # Should create "prod"; orphan stays untouched (no delete action). + assert {a.action for a in actions} == {"create"} + runtime.delete_endpoint.assert_not_called() + + +def test_reconcile_endpoints_updates_on_drift(): + runtime = MagicMock() + current = _ep_remote("prod", "ep-1", target_version="OLD") + runtime.list_endpoints.return_value = [current] + runtime.update_endpoint.return_value = _ep_remote( + "prod", + "ep-1", + target_version="LATEST", + ) + + actions = reconcile_endpoints( + runtime, + desired=[ParsedEndpoint(name="prod", target_version="LATEST")], + prune=True, + ) + assert any(a.action == "update" for a in actions) + runtime.update_endpoint.assert_called_once() + + +def test_reconcile_endpoints_noop_when_aligned(): + runtime = MagicMock() + current = _ep_remote("prod", "ep-1", target_version="LATEST") + runtime.list_endpoints.return_value = [current] + + actions = reconcile_endpoints( + runtime, + desired=[ParsedEndpoint(name="prod", target_version="LATEST")], + prune=True, + ) + assert any(a.action == "noop" for a in actions) + runtime.update_endpoint.assert_not_called() From 8396f49e766f8b54e15d2dc6287a858d5cac5bbd Mon Sep 17 00:00:00 2001 From: Sodawyx Date: Tue, 19 May 2026 22:23:47 +0800 Subject: [PATCH 4/8] feat(runtime): ar runtime apply / render / get / list / delete / status (with bilingual docs) PR4-6 of the ar runtime command group. Adds the user-facing CLI surface plus README / docs / index updates and wires the group into main.py. Commands: - apply -f FILE [--wait/--no-wait] [--timeout] [--prune-endpoints/...] - render -f FILE - get NAME - list [--created-by-cli] [--workspace] - delete NAME [--wait/--no-wait] [--timeout] [--yes] - status NAME [--wait] [--timeout] Alias: ar rt -> ar runtime. All commands follow AGENTS.md hard rules: - Lazy SDK import (module-level AgentRuntime = None + _lazy_sdk()). - @handle_errors on every Click command. - format_output(ctx, data, quiet_field="name") for output. - build_sdk_config(profile_name, region) for SDK config. Behaviour: - apply: list-then-create-or-update via reconcile_runtime; reconcile_endpoints with prune-on by default; --wait polls runtime + endpoints via runtime_state. - delete: SDK chains endpoint deletes internally; CLI polls via NotFound. - get / status: not-found exits 1. - list --created-by-cli filters by system_tags=x-agentrun-cli. Bilingual docs: - docs/en/runtime.md + docs/zh/runtime.md (option tables identical, prose translated; AGENTS.md doc parity rule). - docs/{en,zh}/index.md: exit-code rows 5/6 + runtime command-group row. - README.md + README_zh.md: Features bullet, Quickstart, command-groups row. Tests: - 8 integration tests under tests/integration/test_runtime_cmd.py covering apply (create / update / failed / timeout), render, get / list (incl. --created-by-cli), delete (missing / happy), status (no-wait / wait), plus real-cli group + rt alias. - Unit coverage for _helpers (parse_duration / serializers / ctx_cfg) and end-to-end happy path through real cli. Coverage: 95.33%. 522 tests pass. Signed-off-by: Sodawyx --- README.md | 18 +- README_zh.md | 18 +- docs/en/index.md | 3 + docs/en/runtime.md | 183 ++++++++ docs/zh/index.md | 3 + docs/zh/runtime.md | 179 ++++++++ src/agentrun_cli/commands/runtime/__init__.py | 40 ++ src/agentrun_cli/commands/runtime/_helpers.py | 62 +++ .../commands/runtime/apply_cmd.py | 152 +++++++ src/agentrun_cli/commands/runtime/crud_cmd.py | 62 +++ .../commands/runtime/delete_cmd.py | 89 ++++ .../commands/runtime/render_cmd.py | 59 +++ .../commands/runtime/status_cmd.py | 62 +++ src/agentrun_cli/main.py | 3 + tests/integration/test_runtime_cmd.py | 414 ++++++++++++++++++ tests/unit/test_runtime_apply_e2e.py | 68 +++ tests/unit/test_runtime_helpers.py | 115 +++++ 17 files changed, 1528 insertions(+), 2 deletions(-) create mode 100644 docs/en/runtime.md create mode 100644 docs/zh/runtime.md create mode 100644 src/agentrun_cli/commands/runtime/__init__.py create mode 100644 src/agentrun_cli/commands/runtime/_helpers.py create mode 100644 src/agentrun_cli/commands/runtime/apply_cmd.py create mode 100644 src/agentrun_cli/commands/runtime/crud_cmd.py create mode 100644 src/agentrun_cli/commands/runtime/delete_cmd.py create mode 100644 src/agentrun_cli/commands/runtime/render_cmd.py create mode 100644 src/agentrun_cli/commands/runtime/status_cmd.py create mode 100644 tests/integration/test_runtime_cmd.py create mode 100644 tests/unit/test_runtime_apply_e2e.py create mode 100644 tests/unit/test_runtime_helpers.py diff --git a/README.md b/README.md index 52c779e..5eb32d2 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,8 @@ agents that you configure declaratively without writing or deploying any runtime - **One-command super agent** — `ar super-agent run` creates a hosted agent and drops you into a chat REPL in seconds. - **Declarative deployment** — Kubernetes-style YAML (`ar sa apply -f superagent.yaml`) for reproducible, version-controlled agents. -- **Six resource groups** — `config`, `model`, `sandbox`, `tool`, `skill`, `super-agent`, all following the same `ar ` pattern. +- **Runtime declarative deploy** — `ar runtime apply -f runtime.yaml` builds an Agent Runtime from a container image and waits for it to reach `READY`. +- **Seven resource groups** — `config`, `model`, `sandbox`, `tool`, `skill`, `super-agent`, `runtime`, all following the same `ar ` pattern. - **Multi-profile config** — store multiple sets of credentials in `~/.agentrun/config.json` and switch with `--profile`. - **Multiple output formats** — `json` (default), `table`, `yaml`, and `quiet` for shell piping. - **Agent-friendly** — JSON-by-default output, deterministic exit codes, no interactive prompts when stdin isn't a TTY. @@ -176,6 +177,20 @@ ar sa invoke my-helper -m "Plan my day" --text-only Multi-document YAMLs (`---` separated) let you deploy many agents in one call. +### Deploy a runtime from an image + +```bash +cat > runtime.yaml <<'EOF' +apiVersion: agentrun/v1 +kind: AgentRuntime +metadata: {name: my-agent} +spec: + container: + image: registry.cn-hangzhou.aliyuncs.com/my-ns/my-agent:v1 +EOF +ar runtime apply -f runtime.yaml +``` + ## Command groups | Group | Alias | Purpose | Docs | @@ -186,6 +201,7 @@ Multi-document YAMLs (`---` separated) let you deploy many agents in one call. | `tool` | | MCP and FunctionCall tools | [en](./docs/en/tool.md) · [zh](./docs/zh/tool.md) | | `skill` | | Platform skill packages + local execution | [en](./docs/en/skill.md) · [zh](./docs/zh/skill.md) | | `super-agent` | `sa` | Quickstart / CRUD / declarative / conversation | [en](./docs/en/super-agent.md) · [zh](./docs/zh/super-agent.md) | +| `runtime` | `rt` | Declarative Agent Runtime deploy (container mode) | [en](./docs/en/runtime.md) · [zh](./docs/zh/runtime.md) | ## Documentation diff --git a/README_zh.md b/README_zh.md index d70b48f..80e242c 100644 --- a/README_zh.md +++ b/README_zh.md @@ -12,7 +12,8 @@ Agent)**:一种由平台托管、用户只需声明配置、无需编写或 - **一键拉起超级 Agent** — `ar super-agent run` 一条命令创建托管 Agent 并进入 REPL 对话。 - **声明式部署** — Kubernetes 风格 YAML(`ar sa apply -f superagent.yaml`),可版本化、可重复部署。 -- **六大资源组** — `config`、`model`、`sandbox`、`tool`、`skill`、`super-agent`,统一 `ar ` 模式。 +- **Runtime 声明式部署** — `ar runtime apply -f runtime.yaml` 从容器镜像创建 Agent Runtime 并等待 `READY`。 +- **七大资源组** — `config`、`model`、`sandbox`、`tool`、`skill`、`super-agent`、`runtime`,统一 `ar ` 模式。 - **多 Profile 配置** — `~/.agentrun/config.json` 支持多套凭证,通过 `--profile` 切换。 - **多种输出格式** — 默认 `json`,支持 `table` / `yaml` / `quiet`(适合 shell 管道)。 - **对 Agent 友好** — 默认 JSON 输出、确定性退出码、非 TTY 下不弹交互提示。 @@ -172,6 +173,20 @@ ar sa invoke my-helper -m "帮我规划今天的事情" --text-only 多文档 YAML(用 `---` 分隔)可以一次部署多个 Agent。 +### 从镜像部署一个 Runtime + +```bash +cat > runtime.yaml <<'EOF' +apiVersion: agentrun/v1 +kind: AgentRuntime +metadata: {name: my-agent} +spec: + container: + image: registry.cn-hangzhou.aliyuncs.com/my-ns/my-agent:v1 +EOF +ar runtime apply -f runtime.yaml +``` + ## 命令组总览 | 命令组 | 别名 | 用途 | 文档 | @@ -182,6 +197,7 @@ ar sa invoke my-helper -m "帮我规划今天的事情" --text-only | `tool` | | MCP 与 FunctionCall 工具 | [en](./docs/en/tool.md) · [zh](./docs/zh/tool.md) | | `skill` | | 平台技能包 + 本地执行 | [en](./docs/en/skill.md) · [zh](./docs/zh/skill.md) | | `super-agent` | `sa` | 一键拉起 / CRUD / 声明式 / 会话管理 | [en](./docs/en/super-agent.md) · [zh](./docs/zh/super-agent.md) | +| `runtime` | `rt` | 声明式 Agent Runtime 部署(容器模式) | [en](./docs/en/runtime.md) · [zh](./docs/zh/runtime.md) | ## 文档 diff --git a/docs/en/index.md b/docs/en/index.md index f447eeb..8bd81e0 100644 --- a/docs/en/index.md +++ b/docs/en/index.md @@ -180,6 +180,8 @@ ar sandbox exec "$SANDBOX" --code "print('hello')" | `2` | Bad input | Missing required flag, invalid JSON, mutually-exclusive flags combined, non-TTY missing model. | | `3` | Authentication failure | Invalid AK/SK or insufficient permissions. | | `4` | Server error or timeout | Backend API exception, SSE stream error, `apply --wait` timeout. | +| `5` | Resource failed | `apply` ran and the Runtime/Endpoint reached `*_FAILED`. | +| `6` | Polling timeout | `apply --wait` exceeded `--timeout`. | | `130` | User interrupt | REPL received two Ctrl+C or Ctrl+D. | Errors are written to stderr as JSON: @@ -197,4 +199,5 @@ Errors are written to stderr as JSON: | `sandbox` | `sb` | Sandboxes plus file, process, context, template and browser sub-groups | [sandbox.md](./sandbox.md) | | `tool` | | MCP and FunctionCall tools + sub-tool invocation | [tool.md](./tool.md) | | `skill` | | Platform skill packages + local scan/load/exec | [skill.md](./skill.md) | +| `runtime` | `rt` | Declarative Agent Runtime deploy (container mode) | [runtime.md](./runtime.md) | | `super-agent` | `sa` | Quickstart REPL, declarative deploy, CRUD, conversations | [super-agent.md](./super-agent.md) | diff --git a/docs/en/runtime.md b/docs/en/runtime.md new file mode 100644 index 0000000..8068b11 --- /dev/null +++ b/docs/en/runtime.md @@ -0,0 +1,183 @@ +**English** | [简体中文](../zh/runtime.md) + +# ar runtime + +Manage **Agent Runtimes** declaratively from a YAML file. The CLI only supports +container-mode runtimes (you supply an OCI image; building the image is out of +scope for this command group). Endpoints are embedded in the same YAML; the +default behaviour is to inject one named `default` (`targetVersion=LATEST`). + +Also available as the alias `ar rt`. + +> **Heads up:** before running any command here, complete the two one-time +> setup steps in [Prerequisites](./index.md#prerequisites). Missing roles or +> policies surface as exit code `3`. + +## Commands + +- [apply](#apply) — create-or-update from YAML, with status polling. +- [render](#render) — dry-run validate + render to SDK input. +- [get](#get) — fetch one runtime by name. +- [list](#list) — list runtimes; filter by `--created-by-cli` or `--workspace`. +- [delete](#delete) — delete a runtime (waits by default). +- [status](#status) — fetch (and optionally wait for) terminal status. + +Exit codes (extends global table): + +| Code | Meaning | +|------|---------| +| `5` | Runtime or endpoint ended in `CREATE_FAILED`, `UPDATE_FAILED`, or `DELETE_FAILED`. | +| `6` | Polling exceeded `--timeout`. | + +--- + +## apply + +``` +ar runtime apply -f FILE [--wait/--no-wait] [--timeout DURATION] + [--prune-endpoints/--no-prune-endpoints] +``` + +### Options + +| Flag | Type | Required | Default | Description | +|------|------|----------|---------|-------------| +| `-f`, `--file` | path | yes | | YAML file path (supports multi-document). | +| `--wait/--no-wait` | flag | no | `--wait` | Poll runtime + endpoints to final status. | +| `--timeout` | duration | no | `10m` | Polling timeout. Accepts `Ns`, `Nm`, `Nh`, or bare seconds. | +| `--prune-endpoints/--no-prune-endpoints` | flag | no | `--prune-endpoints` | Delete remote endpoints absent from the YAML. | + +### Examples + +```bash +# Minimal: container-only, default endpoint auto-injected. +cat > runtime.yaml <<'EOF' +apiVersion: agentrun/v1 +kind: AgentRuntime +metadata: {name: my-agent} +spec: + container: + image: registry.cn-hangzhou.aliyuncs.com/my-ns/my-agent:v1 +EOF +ar runtime apply -f runtime.yaml + +# Non-blocking submit (CI-friendly): +ar runtime apply -f runtime.yaml --no-wait + +# Custom timeout: +ar runtime apply -f runtime.yaml --timeout 20m + +# Disable endpoint pruning when migrating between YAML files: +ar runtime apply -f runtime.yaml --no-prune-endpoints +``` + +--- + +## render + +``` +ar runtime render -f FILE +``` + +### Options + +| Flag | Type | Required | Default | Description | +|------|------|----------|---------|-------------| +| `-f`, `--file` | path | yes | | YAML file path (supports multi-document). | + +Validates the YAML, applies CLI auto-injection (`system_tags=["x-agentrun-cli"]`, +`artifact_type=Container`, default endpoint when `spec.endpoints` is omitted), +and prints the SDK create-input as JSON without calling the server. Use this to +preview changes before `apply`. + +--- + +## get + +``` +ar runtime get NAME +``` + +Show a single Agent Runtime as JSON. Exits `1` if no runtime with that name exists. + +### Examples + +```bash +ar runtime get my-agent +ar runtime get my-agent --output quiet # prints just the name +``` + +--- + +## list + +``` +ar runtime list [--created-by-cli] [--workspace NAME] +``` + +### Options + +| Flag | Type | Required | Default | Description | +|------|------|----------|---------|-------------| +| `--created-by-cli` | flag | no | false | Only show runtimes carrying the `x-agentrun-cli` system tag. | +| `--workspace` | string | no | | Restrict to runtimes belonging to the named workspace. | + +### Examples + +```bash +ar runtime list +ar runtime list --created-by-cli +ar runtime list --output table +``` + +--- + +## delete + +``` +ar runtime delete NAME [--wait/--no-wait] [--timeout DURATION] [--yes] +``` + +### Options + +| Flag | Type | Required | Default | Description | +|------|------|----------|---------|-------------| +| `--wait/--no-wait` | flag | no | `--wait` | Block until the runtime is gone (or fails). | +| `--timeout` | duration | no | `5m` | Polling timeout. | +| `--yes` | flag | no | false | Skip the interactive confirmation prompt. | + +### Examples + +```bash +ar runtime delete my-agent --yes +ar runtime delete my-agent --no-wait +``` + +--- + +## status + +``` +ar runtime status NAME [--wait] [--timeout DURATION] +``` + +### Options + +| Flag | Type | Required | Default | Description | +|------|------|----------|---------|-------------| +| `--wait` | flag | no | false | Poll until the runtime reaches READY / *_FAILED. | +| `--timeout` | duration | no | `10m` | Polling timeout (only meaningful with `--wait`). | + +### Examples + +```bash +ar runtime status my-agent +ar runtime status my-agent --wait --timeout 20m +``` + +--- + +## YAML schema + +See `projects/agent-infra-build-runit/design/runtime-cli-design.md` §2 for the +full field list and validation rules. diff --git a/docs/zh/index.md b/docs/zh/index.md index 9aec785..bdba385 100644 --- a/docs/zh/index.md +++ b/docs/zh/index.md @@ -176,6 +176,8 @@ ar sandbox exec "$SANDBOX" --code "print('hello')" | `2` | 参数错误 | 缺少必填参数、非法 JSON、互斥参数同时出现、非 TTY 缺 model。 | | `3` | 认证失败 | AK/SK 无效或权限不足。 | | `4` | 服务端错误 / 超时 | 后端 API 异常、SSE 流中断、`apply --wait` 超时。 | +| `5` | 资源失败 | `apply` 运行后 Runtime/Endpoint 进入 `*_FAILED`。 | +| `6` | 轮询超时 | `apply --wait` 超过 `--timeout`。 | | `130` | 用户中断 | REPL 中连按两次 Ctrl+C 或 Ctrl+D。 | 错误以 JSON 形式写到 stderr: @@ -193,4 +195,5 @@ ar sandbox exec "$SANDBOX" --code "print('hello')" | `sandbox` | `sb` | 沙箱以及 file / process / context / template / browser 子组 | [sandbox.md](./sandbox.md) | | `tool` | | MCP 与 FunctionCall 工具 + 子工具调用 | [tool.md](./tool.md) | | `skill` | | 平台侧技能包 + 本地 scan / load / exec | [skill.md](./skill.md) | +| `runtime` | `rt` | 声明式 Agent Runtime 部署(容器模式) | [runtime.md](./runtime.md) | | `super-agent` | `sa` | 一键拉起 REPL、声明式部署、CRUD、会话管理 | [super-agent.md](./super-agent.md) | diff --git a/docs/zh/runtime.md b/docs/zh/runtime.md new file mode 100644 index 0000000..764a91f --- /dev/null +++ b/docs/zh/runtime.md @@ -0,0 +1,179 @@ +[English](../en/runtime.md) | **简体中文** + +# ar runtime + +通过 YAML 声明式管理 **Agent Runtime**。本命令组只支持容器模式(用户提供 OCI 镜像; +代码 → 镜像构建不在本命令组范围)。Endpoint 嵌入同一份 YAML;用户不写时 CLI 会自动 +注入一个名为 `default` 的 endpoint(`targetVersion=LATEST`)。 + +别名:`ar rt`。 + +> **提示:** 执行任何命令前,请先完成 [Prerequisites](./index.md#prerequisites) 中的 +> 两步一次性设置。角色或策略缺失会以退出码 `3` 暴露。 + +## 命令 + +- [apply](#apply) — 从 YAML create-or-update,附带状态轮询。 +- [render](#render) — 校验 + 渲染为 SDK 输入(不调用服务端)。 +- [get](#get) — 按名字获取单个 runtime。 +- [list](#list) — 列出 runtime;可用 `--created-by-cli` 或 `--workspace` 过滤。 +- [delete](#delete) — 删除 runtime(默认等待)。 +- [status](#status) — 查看(可选等待)终态状态。 + +退出码(在全局表的基础上扩展): + +| 码 | 含义 | +|----|------| +| `5` | Runtime 或 endpoint 进入 `CREATE_FAILED`、`UPDATE_FAILED` 或 `DELETE_FAILED`。 | +| `6` | 轮询超过 `--timeout`。 | + +--- + +## apply + +``` +ar runtime apply -f FILE [--wait/--no-wait] [--timeout DURATION] + [--prune-endpoints/--no-prune-endpoints] +``` + +### Options + +| Flag | Type | Required | Default | Description | +|------|------|----------|---------|-------------| +| `-f`, `--file` | path | yes | | YAML 文件路径(支持多文档)。 | +| `--wait/--no-wait` | flag | no | `--wait` | 轮询 runtime + endpoints 到终态。 | +| `--timeout` | duration | no | `10m` | 轮询超时。支持 `Ns` / `Nm` / `Nh` 或裸秒数。 | +| `--prune-endpoints/--no-prune-endpoints` | flag | no | `--prune-endpoints` | 删除远端存在但 YAML 缺失的 endpoint。 | + +### Examples + +```bash +# 最小可用:仅容器,CLI 自动注入默认 endpoint +cat > runtime.yaml <<'EOF' +apiVersion: agentrun/v1 +kind: AgentRuntime +metadata: {name: my-agent} +spec: + container: + image: registry.cn-hangzhou.aliyuncs.com/my-ns/my-agent:v1 +EOF +ar runtime apply -f runtime.yaml + +# CI 场景:异步提交不等待 +ar runtime apply -f runtime.yaml --no-wait + +# 自定义超时 +ar runtime apply -f runtime.yaml --timeout 20m + +# 在 YAML 之间迁移时关闭 prune +ar runtime apply -f runtime.yaml --no-prune-endpoints +``` + +--- + +## render + +``` +ar runtime render -f FILE +``` + +### Options + +| Flag | Type | Required | Default | Description | +|------|------|----------|---------|-------------| +| `-f`, `--file` | path | yes | | YAML 文件路径(支持多文档)。 | + +校验 YAML 并应用 CLI 自动注入(`system_tags=["x-agentrun-cli"]`、 +`artifact_type=Container`、`spec.endpoints` 缺省时注入默认 endpoint), +以 JSON 形式打印 SDK create-input,不调用服务端。可在 `apply` 之前用于预览。 + +--- + +## get + +``` +ar runtime get NAME +``` + +以 JSON 形式展示一个 Agent Runtime。不存在则退出码 `1`。 + +### Examples + +```bash +ar runtime get my-agent +ar runtime get my-agent --output quiet # 只打印名字 +``` + +--- + +## list + +``` +ar runtime list [--created-by-cli] [--workspace NAME] +``` + +### Options + +| Flag | Type | Required | Default | Description | +|------|------|----------|---------|-------------| +| `--created-by-cli` | flag | no | false | 只显示带 `x-agentrun-cli` 系统标签的 runtime。 | +| `--workspace` | string | no | | 按工作空间名过滤。 | + +### Examples + +```bash +ar runtime list +ar runtime list --created-by-cli +ar runtime list --output table +``` + +--- + +## delete + +``` +ar runtime delete NAME [--wait/--no-wait] [--timeout DURATION] [--yes] +``` + +### Options + +| Flag | Type | Required | Default | Description | +|------|------|----------|---------|-------------| +| `--wait/--no-wait` | flag | no | `--wait` | 阻塞直到资源消失(或失败)。 | +| `--timeout` | duration | no | `5m` | 轮询超时。 | +| `--yes` | flag | no | false | 跳过交互式确认。 | + +### Examples + +```bash +ar runtime delete my-agent --yes +ar runtime delete my-agent --no-wait +``` + +--- + +## status + +``` +ar runtime status NAME [--wait] [--timeout DURATION] +``` + +### Options + +| Flag | Type | Required | Default | Description | +|------|------|----------|---------|-------------| +| `--wait` | flag | no | false | 轮询到 READY / *_FAILED。 | +| `--timeout` | duration | no | `10m` | 轮询超时(仅 `--wait` 有效)。 | + +### Examples + +```bash +ar runtime status my-agent +ar runtime status my-agent --wait --timeout 20m +``` + +--- + +## YAML schema + +完整字段列表与校验规则见 `projects/agent-infra-build-runit/design/runtime-cli-design.md` §2。 diff --git a/src/agentrun_cli/commands/runtime/__init__.py b/src/agentrun_cli/commands/runtime/__init__.py new file mode 100644 index 0000000..e117ad2 --- /dev/null +++ b/src/agentrun_cli/commands/runtime/__init__.py @@ -0,0 +1,40 @@ +"""``ar runtime`` (``ar rt``) — declarative Agent Runtime management. + +See ``projects/agent-infra-build-runit/design/runtime-cli-design.md`` for the +full design. PR4 ships ``apply`` and ``render``; PR5 adds ``get / list / +delete / status``. +""" + +import os + +# Mirror ``main.py``: silence the SDK's "you are using version 0.0.200" +# warning even when the command is invoked through a test harness that +# bypasses ``main`` (e.g. tests/integration/test_runtime_cmd.py::_root). +os.environ.setdefault("DISABLE_BREAKING_CHANGES_WARNING", "1") + +import click # noqa: E402 + +from agentrun_cli.commands.runtime import apply_cmd as _apply_mod # noqa: E402 +from agentrun_cli.commands.runtime import crud_cmd as _crud_mod # noqa: E402 +from agentrun_cli.commands.runtime import delete_cmd as _delete_mod # noqa: E402 +from agentrun_cli.commands.runtime import render_cmd as _render_mod # noqa: E402 +from agentrun_cli.commands.runtime import status_cmd as _status_mod # noqa: E402 + + +@click.group( + "runtime", + help="Manage Agent Runtimes declaratively (container mode only).", +) +def runtime_group(): + pass + + +runtime_group.add_command(_apply_mod.apply_cmd) +runtime_group.add_command(_render_mod.render_cmd) +runtime_group.add_command(_crud_mod.get_cmd) +runtime_group.add_command(_crud_mod.list_cmd) +runtime_group.add_command(_delete_mod.delete_cmd) +runtime_group.add_command(_status_mod.status_cmd) + + +__all__ = ["runtime_group"] diff --git a/src/agentrun_cli/commands/runtime/_helpers.py b/src/agentrun_cli/commands/runtime/_helpers.py new file mode 100644 index 0000000..42a340f --- /dev/null +++ b/src/agentrun_cli/commands/runtime/_helpers.py @@ -0,0 +1,62 @@ +"""Shared helpers for ``ar runtime`` commands.""" + +from __future__ import annotations + +import re + + +def ctx_cfg(ctx) -> tuple[str | None, str | None]: + obj = getattr(ctx, "obj", None) or {} + return obj.get("profile"), obj.get("region") + + +def parse_duration(value: str) -> int: + """Parse ``10m`` / ``300s`` / ``1h`` / plain integer seconds → seconds.""" + if isinstance(value, int): + return value + if value is None: + return 0 + m = re.fullmatch(r"(\d+)\s*(s|sec|m|min|h|hr|hour)?", str(value).strip(), + re.IGNORECASE) + if not m: + raise ValueError(f"Invalid duration {value!r}") + n = int(m.group(1)) + unit = (m.group(2) or "s").lower() + if unit.startswith("s"): + return n + if unit.startswith("m"): + return n * 60 + return n * 3600 + + +def serialize_runtime(rt) -> dict: + """Convert an AgentRuntime SDK object to a plain dict.""" + return { + "name": getattr(rt, "agent_runtime_name", None), + "id": getattr(rt, "agent_runtime_id", None), + "arn": getattr(rt, "agent_runtime_arn", None), + "version": getattr(rt, "agent_runtime_version", None), + "status": _coerce_status(getattr(rt, "status", None)), + "statusReason": getattr(rt, "status_reason", None), + "createdAt": getattr(rt, "created_at", None), + "lastUpdatedAt": getattr(rt, "last_updated_at", None), + } + + +def serialize_endpoint(ep) -> dict: + return { + "name": getattr(ep, "agent_runtime_endpoint_name", None), + "id": getattr(ep, "agent_runtime_endpoint_id", None), + "status": _coerce_status(getattr(ep, "status", None)), + "statusReason": getattr(ep, "status_reason", None), + "publicUrl": getattr(ep, "endpoint_public_url", None), + "targetVersion": getattr(ep, "target_version", None), + } + + +def _coerce_status(s): + if s is None: + return None + if hasattr(s, "value"): + return s.value + return str(s) diff --git a/src/agentrun_cli/commands/runtime/apply_cmd.py b/src/agentrun_cli/commands/runtime/apply_cmd.py new file mode 100644 index 0000000..e95da0d --- /dev/null +++ b/src/agentrun_cli/commands/runtime/apply_cmd.py @@ -0,0 +1,152 @@ +"""``ar runtime apply`` — declarative create-or-update.""" + +from __future__ import annotations + +import sys +import time + +import click + +from agentrun_cli._utils.agentruntime_yaml import ( + YamlSchemaError, + parse_yaml_file, +) +from agentrun_cli._utils.config import build_sdk_config +from agentrun_cli._utils.error import EXIT_BAD_INPUT, handle_errors +from agentrun_cli._utils.output import echo_error, format_output +from agentrun_cli._utils.runtime_constants import ( + DEFAULT_APPLY_TIMEOUT_SECONDS, + ENDPOINT_POLL_CONCURRENCY, +) +from agentrun_cli._utils.runtime_reconciler import ( + reconcile_endpoints, + reconcile_runtime, +) +from agentrun_cli._utils.runtime_state import ( + PollConfig, + poll_many_parallel, + poll_until_final, +) +from agentrun_cli.commands.runtime._helpers import ( + ctx_cfg, + parse_duration, + serialize_endpoint, + serialize_runtime, +) + +# Re-exported for monkeypatching in integration tests: +AgentRuntime = None + + +def _lazy_sdk(): + """Import the SDK only when the command actually runs.""" + global AgentRuntime + if AgentRuntime is None: + from agentrun.agent_runtime import AgentRuntime as _AR + AgentRuntime = _AR + return AgentRuntime + + +def _parse(path): + try: + return parse_yaml_file(path) + except YamlSchemaError as exc: + echo_error("InvalidYaml", str(exc)) + raise SystemExit(EXIT_BAD_INPUT) from exc + + +def _progress(stream, parsed, runtime, elapsed): + """Best-effort stderr progress; silent when stderr is not a TTY.""" + if not stream.isatty(): + return + stream.write( + f"[runtime {parsed.name}] status={getattr(runtime, 'status', None)} " + f"({elapsed:.1f}s)\n" + ) + + +@click.command( + "apply", + help=( + "Create or update an Agent Runtime declaratively from YAML. " + "By default waits until the runtime (and its endpoints) reach a " + "terminal status." + ), +) +@click.option( + "-f", "--file", "file_path", required=True, + help="YAML file path (supports multi-document).", +) +@click.option( + "--wait/--no-wait", default=True, show_default=True, + help="Poll until the runtime + endpoints reach a final status.", +) +@click.option( + "--timeout", default="10m", show_default=True, + help="Polling timeout (e.g. 600s, 10m, 1h).", +) +@click.option( + "--prune-endpoints/--no-prune-endpoints", default=True, show_default=True, + help="Delete endpoints that exist remotely but are absent from the YAML.", +) +@click.pass_context +@handle_errors +def apply_cmd(ctx, file_path, wait, timeout, prune_endpoints): + runtime_cls = _lazy_sdk() + profile, region = ctx_cfg(ctx) + build_sdk_config(profile_name=profile, region=region) + + docs = _parse(file_path) + timeout_seconds = parse_duration(timeout) or DEFAULT_APPLY_TIMEOUT_SECONDS + poll_cfg = PollConfig(timeout=float(timeout_seconds)) + + results = [] + for parsed in docs: + started = time.monotonic() + rt_res = reconcile_runtime(parsed, client=runtime_cls) + runtime = rt_res.runtime + + if wait: + poll_until_final( + runtime, resource_kind="AgentRuntime", cfg=poll_cfg, + on_tick=lambda r, e, p=parsed: _progress(sys.stderr, p, r, e), + ) + + ep_actions = reconcile_endpoints( + runtime, desired=parsed.endpoints, prune=prune_endpoints, + ) + + if wait: + in_flight = [ + a.endpoint for a in ep_actions + if a.action in ("create", "update") and a.endpoint is not None + ] + poll_many_parallel( + in_flight, resource_kind="AgentRuntimeEndpoint", + cfg=poll_cfg, concurrency=ENDPOINT_POLL_CONCURRENCY, + on_tick=lambda r, e, p=parsed: _progress(sys.stderr, p, r, e), + ) + + results.append({ + "action": rt_res.action, + "runtime": serialize_runtime(runtime), + "endpoints": [ + {**serialize_endpoint(a.endpoint or _empty_ep(a.name)), + "action": a.action} + for a in ep_actions + ], + "elapsedSeconds": round(time.monotonic() - started, 3), + }) + + format_output(ctx, results, quiet_field="name") + + +def _empty_ep(name): + class _E: + agent_runtime_endpoint_name = name + agent_runtime_endpoint_id = None + status = None + status_reason = None + endpoint_public_url = None + target_version = None + return _E() diff --git a/src/agentrun_cli/commands/runtime/crud_cmd.py b/src/agentrun_cli/commands/runtime/crud_cmd.py new file mode 100644 index 0000000..d9cbea8 --- /dev/null +++ b/src/agentrun_cli/commands/runtime/crud_cmd.py @@ -0,0 +1,62 @@ +"""``ar runtime get`` and ``ar runtime list``.""" + +import click + +from agentrun_cli._utils.config import build_sdk_config +from agentrun_cli._utils.error import EXIT_NOT_FOUND, handle_errors +from agentrun_cli._utils.output import echo_error, format_output +from agentrun_cli._utils.runtime_constants import SYSTEM_TAG_CLI +from agentrun_cli._utils.runtime_reconciler import find_runtime_by_name +from agentrun_cli.commands.runtime._helpers import ctx_cfg, serialize_runtime + +# Monkey-patch entry points used by integration tests: +AgentRuntime = None + + +def _lazy_sdk(): + global AgentRuntime + if AgentRuntime is None: + from agentrun.agent_runtime import AgentRuntime as _AR + AgentRuntime = _AR + return AgentRuntime + + +@click.command("get", help="Show a single Agent Runtime by name.") +@click.argument("name") +@click.pass_context +@handle_errors +def get_cmd(ctx, name): + rt_cls = _lazy_sdk() + profile, region = ctx_cfg(ctx) + build_sdk_config(profile_name=profile, region=region) + runtime = find_runtime_by_name(rt_cls, name) + if runtime is None: + echo_error("ResourceNotFound", + f"AgentRuntime {name!r} not found.") + raise SystemExit(EXIT_NOT_FOUND) + format_output(ctx, serialize_runtime(runtime), quiet_field="name") + + +@click.command("list", help="List Agent Runtimes.") +@click.option( + "--created-by-cli", is_flag=True, default=False, + help=f"Only show runtimes tagged with {SYSTEM_TAG_CLI!r}.", +) +@click.option( + "--workspace", default=None, + help="Restrict the listing to a workspace (by name).", +) +@click.pass_context +@handle_errors +def list_cmd(ctx, created_by_cli, workspace): + rt_cls = _lazy_sdk() + profile, region = ctx_cfg(ctx) + build_sdk_config(profile_name=profile, region=region) + items = list(rt_cls.list_all()) + if workspace is not None: + items = [r for r in items if + getattr(r, "workspace_name", None) == workspace] + if created_by_cli: + items = [r for r in items if SYSTEM_TAG_CLI in + (getattr(r, "system_tags", None) or [])] + format_output(ctx, [serialize_runtime(r) for r in items]) diff --git a/src/agentrun_cli/commands/runtime/delete_cmd.py b/src/agentrun_cli/commands/runtime/delete_cmd.py new file mode 100644 index 0000000..08931b8 --- /dev/null +++ b/src/agentrun_cli/commands/runtime/delete_cmd.py @@ -0,0 +1,89 @@ +"""``ar runtime delete``.""" + +from __future__ import annotations + +import sys + +import click + +from agentrun_cli._utils.config import build_sdk_config +from agentrun_cli._utils.error import EXIT_NOT_FOUND, handle_errors +from agentrun_cli._utils.output import echo_error, format_output +from agentrun_cli._utils.runtime_constants import DEFAULT_DELETE_TIMEOUT_SECONDS +from agentrun_cli._utils.runtime_reconciler import find_runtime_by_name +from agentrun_cli._utils.runtime_state import PollConfig, poll_until_deleted +from agentrun_cli.commands.runtime._helpers import ( + ctx_cfg, + parse_duration, + serialize_runtime, +) + +AgentRuntime = None + + +def _lazy_sdk(): + global AgentRuntime + if AgentRuntime is None: + from agentrun.agent_runtime import AgentRuntime as _AR + AgentRuntime = _AR + return AgentRuntime + + +def _is_not_found(exc: BaseException) -> bool: + """Default predicate. SDK raises ``ResourceNotExistError`` after delete.""" + name = type(exc).__name__ + return "NotExist" in name or "NotFound" in name + + +def _progress(parsed_name, runtime, elapsed): + if sys.stderr.isatty(): + sys.stderr.write( + f"[runtime {parsed_name}] status={getattr(runtime, 'status', None)} " + f"({elapsed:.1f}s)\n" + ) + + +@click.command( + "delete", + help=( + "Delete an Agent Runtime by name. By default waits until the resource " + "is gone (or fails)." + ), +) +@click.argument("name") +@click.option("--wait/--no-wait", default=True, show_default=True) +@click.option( + "--timeout", default="5m", show_default=True, + help="Polling timeout (e.g. 300s, 5m).", +) +@click.option( + "--yes", is_flag=True, default=False, + help="Skip the interactive confirmation.", +) +@click.pass_context +@handle_errors +def delete_cmd(ctx, name, wait, timeout, yes): + rt_cls = _lazy_sdk() + profile, region = ctx_cfg(ctx) + build_sdk_config(profile_name=profile, region=region) + runtime = find_runtime_by_name(rt_cls, name) + if runtime is None: + echo_error("ResourceNotFound", f"AgentRuntime {name!r} not found.") + raise SystemExit(EXIT_NOT_FOUND) + if not yes and sys.stdin.isatty(): + click.confirm(f"Delete AgentRuntime {name!r}?", abort=True) + runtime.delete() # SDK chains endpoint deletes internally + if wait: + poll_until_deleted( + runtime, resource_kind="AgentRuntime", + is_not_found=_is_not_found, + cfg=PollConfig(timeout=float( + parse_duration(timeout) or DEFAULT_DELETE_TIMEOUT_SECONDS, + )), + on_tick=lambda r, e: _progress(name, r, e), + ) + format_output( + ctx, + {"action": "delete", "runtime": serialize_runtime(runtime)}, + quiet_field="name", + ) diff --git a/src/agentrun_cli/commands/runtime/render_cmd.py b/src/agentrun_cli/commands/runtime/render_cmd.py new file mode 100644 index 0000000..31ab641 --- /dev/null +++ b/src/agentrun_cli/commands/runtime/render_cmd.py @@ -0,0 +1,59 @@ +"""``ar runtime render`` — validate YAML and dump the SDK input that would be sent.""" + +import click + +from agentrun_cli._utils.agentruntime_yaml import ( + YamlSchemaError, + parse_yaml_file, +) +from agentrun_cli._utils.error import EXIT_BAD_INPUT, handle_errors +from agentrun_cli._utils.output import echo_error, format_output + +# Re-exported for tests/monkeypatching: +from agentrun_cli._utils.runtime_render import ( + to_endpoint_create_inputs, + to_runtime_create_input, +) + +__all__ = ["render_cmd", "to_runtime_create_input", "to_endpoint_create_inputs"] + + +def _parse_file(path): + try: + return parse_yaml_file(path) + except YamlSchemaError as exc: + echo_error("InvalidYaml", str(exc)) + raise SystemExit(EXIT_BAD_INPUT) from exc + + +@click.command( + "render", + help=( + "Validate a YAML file and print the SDK create-input that would be " + "sent — no server calls." + ), +) +@click.option( + "-f", "--file", "file_path", required=True, + help="YAML file path (supports multi-document).", +) +@click.pass_context +@handle_errors +def render_cmd(ctx, file_path): + docs = _parse_file(file_path) + results = [] + for parsed in docs: + rt_input = to_runtime_create_input(parsed) + ep_inputs = to_endpoint_create_inputs(parsed) + results.append({ + "kind": "AgentRuntime", + "name": parsed.name, + "renderedCreateInput": ( + rt_input.model_dump() if hasattr(rt_input, "model_dump") else rt_input + ), + "renderedEndpoints": [ + ei.model_dump() if hasattr(ei, "model_dump") else ei + for ei in ep_inputs + ], + }) + format_output(ctx, results) diff --git a/src/agentrun_cli/commands/runtime/status_cmd.py b/src/agentrun_cli/commands/runtime/status_cmd.py new file mode 100644 index 0000000..df89066 --- /dev/null +++ b/src/agentrun_cli/commands/runtime/status_cmd.py @@ -0,0 +1,62 @@ +"""``ar runtime status`` — fetch (and optionally wait for) terminal status.""" + +from __future__ import annotations + +import sys + +import click + +from agentrun_cli._utils.config import build_sdk_config +from agentrun_cli._utils.error import EXIT_NOT_FOUND, handle_errors +from agentrun_cli._utils.output import echo_error, format_output +from agentrun_cli._utils.runtime_constants import DEFAULT_APPLY_TIMEOUT_SECONDS +from agentrun_cli._utils.runtime_reconciler import find_runtime_by_name +from agentrun_cli._utils.runtime_state import PollConfig, poll_until_final +from agentrun_cli.commands.runtime._helpers import ( + ctx_cfg, + parse_duration, + serialize_runtime, +) + +AgentRuntime = None + + +def _lazy_sdk(): + global AgentRuntime + if AgentRuntime is None: + from agentrun.agent_runtime import AgentRuntime as _AR + AgentRuntime = _AR + return AgentRuntime + + +@click.command( + "status", + help="Show the status of an Agent Runtime (optionally wait for terminal).", +) +@click.argument("name") +@click.option("--wait", is_flag=True, default=False, + help="Poll until the runtime reaches READY/*_FAILED.") +@click.option("--timeout", default="10m", show_default=True, + help="Polling timeout (only with --wait).") +@click.pass_context +@handle_errors +def status_cmd(ctx, name, wait, timeout): + rt_cls = _lazy_sdk() + profile, region = ctx_cfg(ctx) + build_sdk_config(profile_name=profile, region=region) + runtime = find_runtime_by_name(rt_cls, name) + if runtime is None: + echo_error("ResourceNotFound", f"AgentRuntime {name!r} not found.") + raise SystemExit(EXIT_NOT_FOUND) + if wait: + poll_until_final( + runtime, resource_kind="AgentRuntime", + cfg=PollConfig(timeout=float( + parse_duration(timeout) or DEFAULT_APPLY_TIMEOUT_SECONDS, + )), + on_tick=lambda r, e: sys.stderr.isatty() and sys.stderr.write( + f"[runtime {name}] status={getattr(r, 'status', None)} " + f"({e:.1f}s)\n" + ), + ) + format_output(ctx, serialize_runtime(runtime), quiet_field="name") diff --git a/src/agentrun_cli/main.py b/src/agentrun_cli/main.py index 1cf460c..50fa743 100644 --- a/src/agentrun_cli/main.py +++ b/src/agentrun_cli/main.py @@ -19,6 +19,7 @@ from agentrun_cli import __version__ from agentrun_cli.commands.config_cmd import config_group from agentrun_cli.commands.model_cmd import model_group +from agentrun_cli.commands.runtime import runtime_group from agentrun_cli.commands.sandbox import sandbox_group from agentrun_cli.commands.skill_cmd import skill_group from agentrun_cli.commands.super_agent import super_agent_group @@ -108,6 +109,8 @@ def cli(ctx: click.Context, profile, region, output, debug): cli.add_command(skill_group) cli.add_command(super_agent_group) cli._aliases["sa"] = "super-agent" +cli.add_command(runtime_group) +cli._aliases["rt"] = "runtime" def main(): diff --git a/tests/integration/test_runtime_cmd.py b/tests/integration/test_runtime_cmd.py new file mode 100644 index 0000000..b5f0490 --- /dev/null +++ b/tests/integration/test_runtime_cmd.py @@ -0,0 +1,414 @@ +"""Integration tests for the ``ar runtime`` command group. + +PR4 covers ``apply`` and ``render``. PR5 adds ``get / list / delete / status``. +PR6 adds an end-to-end happy path that exercises everything in one invocation. + +The group is exercised through its own root via a private helper so PR4 can +land before ``main.py`` is wired up in PR6. +""" + +import json +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import click +from click.testing import CliRunner + +from agentrun_cli.commands.runtime import runtime_group + + +def _root(): + """Build a root CLI that mounts only ``runtime`` — keeps the test + independent of PR6's main.py wiring.""" + @click.group() + @click.option("--profile", default=None) + @click.option("--region", default=None) + @click.option("--output", default="json") + @click.pass_context + def root(ctx, profile, region, output): + ctx.ensure_object(dict) + ctx.obj["profile"] = profile + ctx.obj["region"] = region + ctx.obj["output"] = output + + root.add_command(runtime_group) + return root + + +def test_runtime_group_registered(): + result = CliRunner().invoke(_root(), ["runtime", "--help"]) + assert result.exit_code == 0 + assert "apply" in result.output + assert "render" in result.output + + +VALID_YAML = """ +apiVersion: agentrun/v1 +kind: AgentRuntime +metadata: + name: my-agent +spec: + container: + image: img:v1 +""" + + +def test_render_outputs_rendered_input(): + fake_input = MagicMock() + fake_input.model_dump.return_value = { + "agentRuntimeName": "my-agent", + "artifactType": "Container", + "systemTags": ["x-agentrun-cli"], + } + fake_eps = [MagicMock()] + fake_eps[0].model_dump.return_value = { + "agentRuntimeEndpointName": "default", + "targetVersion": "LATEST", + } + with ( + patch( + "agentrun_cli.commands.runtime.render_cmd.to_runtime_create_input", + return_value=fake_input, + ), + patch( + "agentrun_cli.commands.runtime.render_cmd.to_endpoint_create_inputs", + return_value=fake_eps, + ), + ): + runner = CliRunner() + with runner.isolated_filesystem(): + with open("rt.yaml", "w") as f: + f.write(VALID_YAML) + result = runner.invoke(_root(), ["runtime", "render", "-f", "rt.yaml"]) + assert result.exit_code == 0, result.output + out = json.loads(result.output) + assert out[0]["kind"] == "AgentRuntime" + assert out[0]["name"] == "my-agent" + assert out[0]["renderedCreateInput"]["systemTags"] == ["x-agentrun-cli"] + assert out[0]["renderedEndpoints"][0]["agentRuntimeEndpointName"] == "default" + + +def test_render_invalid_yaml_exit_code_2(): + runner = CliRunner() + with runner.isolated_filesystem(): + with open("bad.yaml", "w") as f: + f.write("apiVersion: wrong/v1\nkind: AgentRuntime\nmetadata: {name: x}\n" + "spec: {container: {image: i}}\n") + result = runner.invoke(_root(), ["runtime", "render", "-f", "bad.yaml"]) + assert result.exit_code == 2 + + +def _make_runtime(name="my-agent", status="READY", rid="ar-1"): + return SimpleNamespace( + agent_runtime_name=name, agent_runtime_id=rid, + agent_runtime_arn=f"acs:{rid}", + agent_runtime_version="1", + status=status, status_reason=None, + created_at="t0", last_updated_at="t1", + ) + + +def _make_endpoint(name="default", status="READY", eid="ep-1", + url="https://x/"): + e = SimpleNamespace( + agent_runtime_endpoint_name=name, + agent_runtime_endpoint_id=eid, + status=status, status_reason=None, + endpoint_public_url=url, + target_version="LATEST", + description=None, + routing_configuration=None, + disable_public_network_access=None, + ) + e.refresh = lambda *a, **k: e + return e + + +def test_apply_create_happy_path(monkeypatch): + monkeypatch.setattr("time.sleep", lambda *_: None) + # Build a fake SDK Runtime class with the methods the reconciler / state + # machine touch. + fake_runtime_cls = MagicMock() + created = _make_runtime(status="CREATING") + # After create, ``refresh`` flips to READY on first call: + refresh_states = iter(["CREATING", "READY"]) + + def _refresh(self=None, *a, **k): + created.status = next(refresh_states, "READY") + return created + created.refresh = _refresh + + fake_runtime_cls.list_all.return_value = [] + fake_runtime_cls.create.return_value = created + + created.list_endpoints = MagicMock(return_value=[]) + created.create_endpoint = MagicMock(return_value=_make_endpoint()) + + with ( + patch( + "agentrun_cli.commands.runtime.apply_cmd.build_sdk_config", + return_value=MagicMock(), + ), + patch( + "agentrun_cli.commands.runtime.apply_cmd.AgentRuntime", + fake_runtime_cls, + ), + ): + runner = CliRunner() + with runner.isolated_filesystem(): + with open("rt.yaml", "w") as f: + f.write(VALID_YAML) + result = runner.invoke( + _root(), + ["runtime", "apply", "-f", "rt.yaml", "--no-wait"], + ) + assert result.exit_code == 0, result.output + out = json.loads(result.output) + assert out[0]["action"] == "create" + assert out[0]["runtime"]["name"] == "my-agent" + fake_runtime_cls.create.assert_called_once() + + +def test_apply_update_path(monkeypatch): + monkeypatch.setattr("time.sleep", lambda *_: None) + existing = _make_runtime(status="UPDATING") + refresh_states = iter(["UPDATING", "READY"]) + existing.refresh = lambda *a, **k: (setattr(existing, "status", + next(refresh_states, "READY")) + or existing) + existing.list_endpoints = MagicMock(return_value=[]) + existing.create_endpoint = MagicMock(return_value=_make_endpoint()) + + rt_cls = MagicMock() + rt_cls.list_all.return_value = [existing] + rt_cls.update_by_id.return_value = existing + + with ( + patch("agentrun_cli.commands.runtime.apply_cmd.build_sdk_config", + return_value=MagicMock()), + patch("agentrun_cli.commands.runtime.apply_cmd.AgentRuntime", rt_cls), + ): + runner = CliRunner() + with runner.isolated_filesystem(): + with open("rt.yaml", "w") as f: + f.write(VALID_YAML) + result = runner.invoke(_root(), ["runtime", "apply", "-f", "rt.yaml"]) + assert result.exit_code == 0, result.output + out = json.loads(result.output) + assert out[0]["action"] == "update" + + +def test_apply_runtime_failed_exits_5(monkeypatch): + monkeypatch.setattr("time.sleep", lambda *_: None) + rt = _make_runtime(status="CREATE_FAILED") + rt.status_reason = "image pull backoff" + rt.refresh = lambda *a, **k: rt + rt_cls = MagicMock() + rt_cls.list_all.return_value = [] + rt_cls.create.return_value = rt + + with ( + patch("agentrun_cli.commands.runtime.apply_cmd.build_sdk_config", + return_value=MagicMock()), + patch("agentrun_cli.commands.runtime.apply_cmd.AgentRuntime", rt_cls), + ): + runner = CliRunner() + with runner.isolated_filesystem(): + with open("rt.yaml", "w") as f: + f.write(VALID_YAML) + result = runner.invoke(_root(), ["runtime", "apply", "-f", "rt.yaml"]) + assert result.exit_code == 5 + + +def test_apply_timeout_exits_6(monkeypatch): + import itertools + monkeypatch.setattr("time.sleep", lambda *_: None) + rt = _make_runtime(status="CREATING") + rt.refresh = lambda *a, **k: rt # never advances + # apply_cmd.started uses 1 tick; poll_until_final uses 1 for start + + # >=2 for elapsed checks (first under timeout, second exceeds). Provide + # an unbounded chain so any extra calls keep returning the timeout value. + fake_clock = itertools.chain([0.0, 0.0, 0.5, 999.0], itertools.repeat(999.0)) + monkeypatch.setattr("time.monotonic", lambda: next(fake_clock)) + + rt_cls = MagicMock() + rt_cls.list_all.return_value = [] + rt_cls.create.return_value = rt + + with ( + patch("agentrun_cli.commands.runtime.apply_cmd.build_sdk_config", + return_value=MagicMock()), + patch("agentrun_cli.commands.runtime.apply_cmd.AgentRuntime", rt_cls), + ): + runner = CliRunner() + with runner.isolated_filesystem(): + with open("rt.yaml", "w") as f: + f.write(VALID_YAML) + result = runner.invoke( + _root(), ["runtime", "apply", "-f", "rt.yaml", "--timeout", "1s"], + ) + assert result.exit_code == 6 + + +def test_get_runtime(): + rt = _make_runtime() + rt_cls = MagicMock() + rt_cls.list_all.return_value = [rt] + with ( + patch("agentrun_cli.commands.runtime.crud_cmd.build_sdk_config", + return_value=MagicMock()), + patch("agentrun_cli.commands.runtime.crud_cmd.AgentRuntime", rt_cls), + ): + result = CliRunner().invoke(_root(), ["runtime", "get", "my-agent"]) + assert result.exit_code == 0, result.output + out = json.loads(result.output) + assert out["name"] == "my-agent" and out["status"] == "READY" + + +def test_get_runtime_not_found_exit_1(): + rt_cls = MagicMock() + rt_cls.list_all.return_value = [] + with ( + patch("agentrun_cli.commands.runtime.crud_cmd.build_sdk_config", + return_value=MagicMock()), + patch("agentrun_cli.commands.runtime.crud_cmd.AgentRuntime", rt_cls), + ): + result = CliRunner().invoke(_root(), ["runtime", "get", "missing"]) + assert result.exit_code == 1 + + +def test_list_runtimes(): + rt_cls = MagicMock() + rt_cls.list_all.return_value = [ + _make_runtime("a", "READY", "ar-a"), + _make_runtime("b", "CREATING", "ar-b"), + ] + with ( + patch("agentrun_cli.commands.runtime.crud_cmd.build_sdk_config", + return_value=MagicMock()), + patch("agentrun_cli.commands.runtime.crud_cmd.AgentRuntime", rt_cls), + ): + result = CliRunner().invoke(_root(), ["runtime", "list"]) + assert result.exit_code == 0 + out = json.loads(result.output) + assert {r["name"] for r in out} == {"a", "b"} + + +def test_list_runtimes_created_by_cli_filter(): + """``--created-by-cli`` must filter remote list by SYSTEM_TAG_CLI.""" + rt_cls = MagicMock() + cli_runtime = _make_runtime("cli-one", "READY", "ar-cli") + cli_runtime.system_tags = ["x-agentrun-cli"] + other = _make_runtime("manual", "READY", "ar-m") + other.system_tags = ["something-else"] + rt_cls.list_all.return_value = [cli_runtime, other] + with ( + patch("agentrun_cli.commands.runtime.crud_cmd.build_sdk_config", + return_value=MagicMock()), + patch("agentrun_cli.commands.runtime.crud_cmd.AgentRuntime", rt_cls), + ): + result = CliRunner().invoke( + _root(), ["runtime", "list", "--created-by-cli"], + ) + assert result.exit_code == 0 + out = json.loads(result.output) + assert {r["name"] for r in out} == {"cli-one"} + + +def test_delete_idempotent_when_missing(): + rt_cls = MagicMock() + rt_cls.list_all.return_value = [] + with ( + patch("agentrun_cli.commands.runtime.delete_cmd.build_sdk_config", + return_value=MagicMock()), + patch("agentrun_cli.commands.runtime.delete_cmd.AgentRuntime", rt_cls), + ): + result = CliRunner().invoke( + _root(), ["runtime", "delete", "missing", "--yes"], + ) + assert result.exit_code == 1 # ResourceNotFound + + +def test_delete_happy_path(monkeypatch): + monkeypatch.setattr("time.sleep", lambda *_: None) + rt = _make_runtime(status="DELETING") + states = iter([Exception("simulated NotFound")]) + + def _refresh(*a, **k): + try: + raise next(states) + except StopIteration: + return rt + rt.refresh = _refresh + rt.delete = MagicMock() + + rt_cls = MagicMock() + rt_cls.list_all.return_value = [rt] + + # The SystemExit module needs an is_not_found predicate. The integration + # test points the predicate at the simulated exception's message. + with ( + patch("agentrun_cli.commands.runtime.delete_cmd.build_sdk_config", + return_value=MagicMock()), + patch("agentrun_cli.commands.runtime.delete_cmd.AgentRuntime", rt_cls), + patch( + "agentrun_cli.commands.runtime.delete_cmd._is_not_found", + lambda e: "NotFound" in str(e), + ), + ): + result = CliRunner().invoke( + _root(), ["runtime", "delete", "my-agent", "--yes"], + ) + assert result.exit_code == 0, result.output + rt.delete.assert_called_once() + + +def test_status_no_wait_returns_current(): + rt = _make_runtime(status="CREATING") + rt_cls = MagicMock() + rt_cls.list_all.return_value = [rt] + with ( + patch("agentrun_cli.commands.runtime.status_cmd.build_sdk_config", + return_value=MagicMock()), + patch("agentrun_cli.commands.runtime.status_cmd.AgentRuntime", rt_cls), + ): + result = CliRunner().invoke(_root(), ["runtime", "status", "my-agent"]) + assert result.exit_code == 0 + out = json.loads(result.output) + assert out["status"] == "CREATING" + + +def test_status_wait_until_ready(monkeypatch): + monkeypatch.setattr("time.sleep", lambda *_: None) + rt = _make_runtime(status="CREATING") + states = iter(["CREATING", "READY"]) + rt.refresh = lambda *a, **k: (setattr(rt, "status", next(states, "READY")) + or rt) + rt_cls = MagicMock() + rt_cls.list_all.return_value = [rt] + with ( + patch("agentrun_cli.commands.runtime.status_cmd.build_sdk_config", + return_value=MagicMock()), + patch("agentrun_cli.commands.runtime.status_cmd.AgentRuntime", rt_cls), + ): + result = CliRunner().invoke( + _root(), ["runtime", "status", "my-agent", "--wait"], + ) + assert result.exit_code == 0 + out = json.loads(result.output) + assert out["status"] == "READY" + + +from agentrun_cli.main import cli as real_cli # noqa: E402 + + +def test_real_cli_exposes_runtime_group(): + result = CliRunner().invoke(real_cli, ["runtime", "--help"]) + assert result.exit_code == 0 + assert "apply" in result.output + + +def test_real_cli_exposes_rt_alias(): + result = CliRunner().invoke(real_cli, ["rt", "--help"]) + assert result.exit_code == 0 + assert "apply" in result.output diff --git a/tests/unit/test_runtime_apply_e2e.py b/tests/unit/test_runtime_apply_e2e.py new file mode 100644 index 0000000..13fd783 --- /dev/null +++ b/tests/unit/test_runtime_apply_e2e.py @@ -0,0 +1,68 @@ +"""End-to-end mock for ar runtime apply happy / failure / timeout paths.""" + +import json +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +from click.testing import CliRunner + +from agentrun_cli.main import cli + +VALID_YAML = """ +apiVersion: agentrun/v1 +kind: AgentRuntime +metadata: {name: my-agent} +spec: + container: + image: img:v1 +""" + + +def _runtime(name="my-agent", rid="ar-1", status="CREATING"): + rt = SimpleNamespace( + agent_runtime_name=name, agent_runtime_id=rid, + agent_runtime_arn=f"acs:{rid}", + agent_runtime_version="1", + status=status, status_reason=None, + created_at="t", last_updated_at="t", + ) + rt.list_endpoints = MagicMock(return_value=[]) + rt.create_endpoint = MagicMock(return_value=SimpleNamespace( + agent_runtime_endpoint_name="default", + agent_runtime_endpoint_id="ep-1", + status="READY", status_reason=None, + endpoint_public_url="https://x/", + target_version="LATEST", + description=None, routing_configuration=None, + disable_public_network_access=None, + refresh=lambda *a, **k: None, + )) + return rt + + +def test_apply_full_chain(monkeypatch): + monkeypatch.setattr("time.sleep", lambda *_: None) + rt = _runtime() + states = iter(["CREATING", "CREATING", "READY"]) + rt.refresh = lambda *a, **k: (setattr(rt, "status", + next(states, "READY")) or rt) + rt_cls = MagicMock() + rt_cls.list_all.return_value = [] + rt_cls.create.return_value = rt + + with ( + patch("agentrun_cli.commands.runtime.apply_cmd.build_sdk_config", + return_value=MagicMock()), + patch("agentrun_cli.commands.runtime.apply_cmd.AgentRuntime", rt_cls), + ): + runner = CliRunner() + with runner.isolated_filesystem(): + with open("rt.yaml", "w") as f: + f.write(VALID_YAML) + result = runner.invoke(cli, ["runtime", "apply", "-f", "rt.yaml"]) + assert result.exit_code == 0, result.output + out = json.loads(result.output) + assert out[0]["action"] == "create" + assert out[0]["runtime"]["status"] == "READY" + assert out[0]["endpoints"][0]["status"] == "READY" + assert out[0]["endpoints"][0]["publicUrl"] == "https://x/" diff --git a/tests/unit/test_runtime_helpers.py b/tests/unit/test_runtime_helpers.py new file mode 100644 index 0000000..a34b855 --- /dev/null +++ b/tests/unit/test_runtime_helpers.py @@ -0,0 +1,115 @@ +"""Unit tests for ``agentrun_cli.commands.runtime._helpers``.""" + +from types import SimpleNamespace + +import pytest + +from agentrun_cli.commands.runtime._helpers import ( + _coerce_status, + ctx_cfg, + parse_duration, + serialize_endpoint, + serialize_runtime, +) + + +class TestParseDuration: + def test_int_passthrough(self): + assert parse_duration(42) == 42 + + def test_none_returns_zero(self): + assert parse_duration(None) == 0 + + def test_seconds_default(self): + assert parse_duration("30") == 30 + + def test_explicit_seconds(self): + assert parse_duration("90s") == 90 + assert parse_duration("90sec") == 90 + + def test_minutes(self): + assert parse_duration("10m") == 600 + assert parse_duration("5min") == 300 + + def test_hours(self): + assert parse_duration("2h") == 7200 + assert parse_duration("1hr") == 3600 + assert parse_duration("3hour") == 10800 + + def test_case_insensitive_and_whitespace(self): + assert parse_duration(" 10 M ") == 600 + + def test_invalid_raises(self): + with pytest.raises(ValueError): + parse_duration("ten minutes") + + +class TestCtxCfg: + def test_no_obj(self): + ctx = SimpleNamespace(obj=None) + assert ctx_cfg(ctx) == (None, None) + + def test_with_obj(self): + ctx = SimpleNamespace(obj={"profile": "staging", "region": "cn-hangzhou"}) + assert ctx_cfg(ctx) == ("staging", "cn-hangzhou") + + def test_missing_attr(self): + ctx = SimpleNamespace() + assert ctx_cfg(ctx) == (None, None) + + +class TestCoerceStatus: + def test_none(self): + assert _coerce_status(None) is None + + def test_enum_like(self): + class StatusLike: + value = "READY" + assert _coerce_status(StatusLike()) == "READY" + + def test_plain_string(self): + assert _coerce_status("CREATING") == "CREATING" + + +class TestSerializeRuntime: + def test_full_object(self): + rt = SimpleNamespace( + agent_runtime_name="my-agent", + agent_runtime_id="ar-1", + agent_runtime_arn="acs:ar-1", + agent_runtime_version="1", + status="READY", + status_reason=None, + created_at="t0", + last_updated_at="t1", + ) + out = serialize_runtime(rt) + assert out["name"] == "my-agent" + assert out["id"] == "ar-1" + assert out["arn"] == "acs:ar-1" + assert out["status"] == "READY" + + def test_minimal_object(self): + rt = SimpleNamespace() + out = serialize_runtime(rt) + assert all(v is None for v in out.values()) + + +class TestSerializeEndpoint: + def test_full_object(self): + ep = SimpleNamespace( + agent_runtime_endpoint_name="default", + agent_runtime_endpoint_id="ep-1", + status="READY", + status_reason=None, + endpoint_public_url="https://x/", + target_version="LATEST", + ) + out = serialize_endpoint(ep) + assert out["name"] == "default" + assert out["publicUrl"] == "https://x/" + + def test_minimal(self): + ep = SimpleNamespace() + out = serialize_endpoint(ep) + assert all(v is None for v in out.values()) From 9e2a9667135017689c9852c32cf6972be9545cf5 Mon Sep 17 00:00:00 2001 From: Sodawyx Date: Wed, 20 May 2026 10:29:55 +0800 Subject: [PATCH 5/8] fix(ci): pin agentrun-inner-test==0.0.200; fix lint, mypy, test failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PR4-6 runtime work was developed against an unreleased SDK whose PyPI distribution name is `agentrun-inner-test` (versions 0.0.200+), not `agentrun-sdk` (PyPI tops out at 0.0.36, missing NASConfig and other model classes). CI installed 0.0.36 and every runtime_* test crashed at `from agentrun.agent_runtime.model import NASConfig, ...`, which `@handle_errors` funneled to exit code 4 — hence the spurious `assert 4 == 5/6` failures on the apply tests. Switch the dep so CI installs the SDK that actually has the classes: - "agentrun-sdk[core]>=0.0.34" + "agentrun-inner-test[core]==0.0.200" Also fix the lint + type-check jobs that were green-blocking the PR: - ruff format: apply formatter to the 12 PR4-6 files (no semantic changes, just the format the CI gate requires). - mypy: 6 errors in 4 files - _helpers.py: widen parse_duration(value: str) -> str | int | None so the int/None branches stop being unreachable. - agentruntime_yaml.py: cast(str, logstore) — the XOR check above already proves logstore is non-None when project is. - runtime_reconciler.py: - cast(ParsedAgentRuntime, _StubWithEndpoints(...)) for the to_endpoint_create_inputs call. - skip None keys in the prune loop (dict key came from getattr(..., None), can be None). Local gate after fixes: ruff format --check + ruff check + mypy clean; 522/522 tests pass on agentrun-inner-test==0.0.200. Signed-off-by: Sodawyx Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Sodawyx --- pyproject.toml | 2 +- src/agentrun_cli/_utils/agentruntime_yaml.py | 10 +- src/agentrun_cli/_utils/runtime_reconciler.py | 6 +- src/agentrun_cli/commands/runtime/_helpers.py | 7 +- .../commands/runtime/apply_cmd.py | 60 ++++++--- src/agentrun_cli/commands/runtime/crud_cmd.py | 21 ++-- .../commands/runtime/delete_cmd.py | 20 ++- .../commands/runtime/render_cmd.py | 31 +++-- .../commands/runtime/status_cmd.py | 36 ++++-- tests/integration/test_runtime_cmd.py | 117 ++++++++++++------ tests/unit/test_runtime_apply_e2e.py | 42 ++++--- tests/unit/test_runtime_helpers.py | 1 + tests/unit/test_runtime_state.py | 12 +- tests/unit/test_runtime_yaml.py | 27 ++-- 14 files changed, 249 insertions(+), 143 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8c6ab0d..26d9793 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ classifiers = [ ] dependencies = [ - "agentrun-sdk[core]>=0.0.34", + "agentrun-inner-test[core]==0.0.200", "pyyaml>=6.0", "questionary>=2.0", ] diff --git a/src/agentrun_cli/_utils/agentruntime_yaml.py b/src/agentrun_cli/_utils/agentruntime_yaml.py index af8da29..5fe2271 100644 --- a/src/agentrun_cli/_utils/agentruntime_yaml.py +++ b/src/agentrun_cli/_utils/agentruntime_yaml.py @@ -35,7 +35,7 @@ import re from dataclasses import dataclass, field -from typing import Any +from typing import Any, cast import yaml @@ -246,7 +246,9 @@ def _parse_container(raw: dict) -> ParsedContainer: raise YamlSchemaError("spec.container.image is required and must be a string.") image_registry_type = raw.get("imageRegistryType") if image_registry_type is not None and image_registry_type not in ( - "ACR", "ACREE", "CUSTOM", + "ACR", + "ACREE", + "CUSTOM", ): raise YamlSchemaError( f"spec.container.imageRegistryType {image_registry_type!r} must be " @@ -379,7 +381,9 @@ def _parse_log(raw) -> ParsedLog | None: ) if project is None: return None - return ParsedLog(project=project, logstore=logstore) + # bool(project) == bool(logstore) was already enforced above, so when + # project is truthy logstore is too — narrow the type for mypy. + return ParsedLog(project=project, logstore=cast(str, logstore)) def _parse_env(raw) -> dict[str, str] | None: diff --git a/src/agentrun_cli/_utils/runtime_reconciler.py b/src/agentrun_cli/_utils/runtime_reconciler.py index b12db43..b6fc719 100644 --- a/src/agentrun_cli/_utils/runtime_reconciler.py +++ b/src/agentrun_cli/_utils/runtime_reconciler.py @@ -11,7 +11,7 @@ from __future__ import annotations from dataclasses import dataclass, field -from typing import Any, Literal +from typing import Any, Literal, cast from agentrun_cli._utils.agentruntime_yaml import ( ParsedAgentRuntime, @@ -114,7 +114,7 @@ def reconcile_endpoints( # to_create / to_update / noop create_inputs = to_endpoint_create_inputs( # Build a stub-parsed runtime carrying just .endpoints - _StubWithEndpoints(desired) + cast(ParsedAgentRuntime, _StubWithEndpoints(desired)) ) create_inputs_by_name = {ci.agent_runtime_endpoint_name: ci for ci in create_inputs} @@ -143,7 +143,7 @@ def reconcile_endpoints( # prune if prune: for name, current in current_by_name.items(): - if name in desired_names: + if name is None or name in desired_names: continue runtime.delete_endpoint(current.agent_runtime_endpoint_id) actions.append(EndpointAction(action="delete", name=name, endpoint=current)) diff --git a/src/agentrun_cli/commands/runtime/_helpers.py b/src/agentrun_cli/commands/runtime/_helpers.py index 42a340f..cfa732b 100644 --- a/src/agentrun_cli/commands/runtime/_helpers.py +++ b/src/agentrun_cli/commands/runtime/_helpers.py @@ -10,14 +10,15 @@ def ctx_cfg(ctx) -> tuple[str | None, str | None]: return obj.get("profile"), obj.get("region") -def parse_duration(value: str) -> int: +def parse_duration(value: str | int | None) -> int: """Parse ``10m`` / ``300s`` / ``1h`` / plain integer seconds → seconds.""" if isinstance(value, int): return value if value is None: return 0 - m = re.fullmatch(r"(\d+)\s*(s|sec|m|min|h|hr|hour)?", str(value).strip(), - re.IGNORECASE) + m = re.fullmatch( + r"(\d+)\s*(s|sec|m|min|h|hr|hour)?", str(value).strip(), re.IGNORECASE + ) if not m: raise ValueError(f"Invalid duration {value!r}") n = int(m.group(1)) diff --git a/src/agentrun_cli/commands/runtime/apply_cmd.py b/src/agentrun_cli/commands/runtime/apply_cmd.py index e95da0d..1aa30b8 100644 --- a/src/agentrun_cli/commands/runtime/apply_cmd.py +++ b/src/agentrun_cli/commands/runtime/apply_cmd.py @@ -43,6 +43,7 @@ def _lazy_sdk(): global AgentRuntime if AgentRuntime is None: from agentrun.agent_runtime import AgentRuntime as _AR + AgentRuntime = _AR return AgentRuntime @@ -74,19 +75,28 @@ def _progress(stream, parsed, runtime, elapsed): ), ) @click.option( - "-f", "--file", "file_path", required=True, + "-f", + "--file", + "file_path", + required=True, help="YAML file path (supports multi-document).", ) @click.option( - "--wait/--no-wait", default=True, show_default=True, + "--wait/--no-wait", + default=True, + show_default=True, help="Poll until the runtime + endpoints reach a final status.", ) @click.option( - "--timeout", default="10m", show_default=True, + "--timeout", + default="10m", + show_default=True, help="Polling timeout (e.g. 600s, 10m, 1h).", ) @click.option( - "--prune-endpoints/--no-prune-endpoints", default=True, show_default=True, + "--prune-endpoints/--no-prune-endpoints", + default=True, + show_default=True, help="Delete endpoints that exist remotely but are absent from the YAML.", ) @click.pass_context @@ -108,35 +118,46 @@ def apply_cmd(ctx, file_path, wait, timeout, prune_endpoints): if wait: poll_until_final( - runtime, resource_kind="AgentRuntime", cfg=poll_cfg, + runtime, + resource_kind="AgentRuntime", + cfg=poll_cfg, on_tick=lambda r, e, p=parsed: _progress(sys.stderr, p, r, e), ) ep_actions = reconcile_endpoints( - runtime, desired=parsed.endpoints, prune=prune_endpoints, + runtime, + desired=parsed.endpoints, + prune=prune_endpoints, ) if wait: in_flight = [ - a.endpoint for a in ep_actions + a.endpoint + for a in ep_actions if a.action in ("create", "update") and a.endpoint is not None ] poll_many_parallel( - in_flight, resource_kind="AgentRuntimeEndpoint", - cfg=poll_cfg, concurrency=ENDPOINT_POLL_CONCURRENCY, + in_flight, + resource_kind="AgentRuntimeEndpoint", + cfg=poll_cfg, + concurrency=ENDPOINT_POLL_CONCURRENCY, on_tick=lambda r, e, p=parsed: _progress(sys.stderr, p, r, e), ) - results.append({ - "action": rt_res.action, - "runtime": serialize_runtime(runtime), - "endpoints": [ - {**serialize_endpoint(a.endpoint or _empty_ep(a.name)), - "action": a.action} - for a in ep_actions - ], - "elapsedSeconds": round(time.monotonic() - started, 3), - }) + results.append( + { + "action": rt_res.action, + "runtime": serialize_runtime(runtime), + "endpoints": [ + { + **serialize_endpoint(a.endpoint or _empty_ep(a.name)), + "action": a.action, + } + for a in ep_actions + ], + "elapsedSeconds": round(time.monotonic() - started, 3), + } + ) format_output(ctx, results, quiet_field="name") @@ -149,4 +170,5 @@ class _E: status_reason = None endpoint_public_url = None target_version = None + return _E() diff --git a/src/agentrun_cli/commands/runtime/crud_cmd.py b/src/agentrun_cli/commands/runtime/crud_cmd.py index d9cbea8..9447a02 100644 --- a/src/agentrun_cli/commands/runtime/crud_cmd.py +++ b/src/agentrun_cli/commands/runtime/crud_cmd.py @@ -17,6 +17,7 @@ def _lazy_sdk(): global AgentRuntime if AgentRuntime is None: from agentrun.agent_runtime import AgentRuntime as _AR + AgentRuntime = _AR return AgentRuntime @@ -31,19 +32,21 @@ def get_cmd(ctx, name): build_sdk_config(profile_name=profile, region=region) runtime = find_runtime_by_name(rt_cls, name) if runtime is None: - echo_error("ResourceNotFound", - f"AgentRuntime {name!r} not found.") + echo_error("ResourceNotFound", f"AgentRuntime {name!r} not found.") raise SystemExit(EXIT_NOT_FOUND) format_output(ctx, serialize_runtime(runtime), quiet_field="name") @click.command("list", help="List Agent Runtimes.") @click.option( - "--created-by-cli", is_flag=True, default=False, + "--created-by-cli", + is_flag=True, + default=False, help=f"Only show runtimes tagged with {SYSTEM_TAG_CLI!r}.", ) @click.option( - "--workspace", default=None, + "--workspace", + default=None, help="Restrict the listing to a workspace (by name).", ) @click.pass_context @@ -54,9 +57,11 @@ def list_cmd(ctx, created_by_cli, workspace): build_sdk_config(profile_name=profile, region=region) items = list(rt_cls.list_all()) if workspace is not None: - items = [r for r in items if - getattr(r, "workspace_name", None) == workspace] + items = [r for r in items if getattr(r, "workspace_name", None) == workspace] if created_by_cli: - items = [r for r in items if SYSTEM_TAG_CLI in - (getattr(r, "system_tags", None) or [])] + items = [ + r + for r in items + if SYSTEM_TAG_CLI in (getattr(r, "system_tags", None) or []) + ] format_output(ctx, [serialize_runtime(r) for r in items]) diff --git a/src/agentrun_cli/commands/runtime/delete_cmd.py b/src/agentrun_cli/commands/runtime/delete_cmd.py index 08931b8..4f21834 100644 --- a/src/agentrun_cli/commands/runtime/delete_cmd.py +++ b/src/agentrun_cli/commands/runtime/delete_cmd.py @@ -25,6 +25,7 @@ def _lazy_sdk(): global AgentRuntime if AgentRuntime is None: from agentrun.agent_runtime import AgentRuntime as _AR + AgentRuntime = _AR return AgentRuntime @@ -53,11 +54,15 @@ def _progress(parsed_name, runtime, elapsed): @click.argument("name") @click.option("--wait/--no-wait", default=True, show_default=True) @click.option( - "--timeout", default="5m", show_default=True, + "--timeout", + default="5m", + show_default=True, help="Polling timeout (e.g. 300s, 5m).", ) @click.option( - "--yes", is_flag=True, default=False, + "--yes", + is_flag=True, + default=False, help="Skip the interactive confirmation.", ) @click.pass_context @@ -75,11 +80,14 @@ def delete_cmd(ctx, name, wait, timeout, yes): runtime.delete() # SDK chains endpoint deletes internally if wait: poll_until_deleted( - runtime, resource_kind="AgentRuntime", + runtime, + resource_kind="AgentRuntime", is_not_found=_is_not_found, - cfg=PollConfig(timeout=float( - parse_duration(timeout) or DEFAULT_DELETE_TIMEOUT_SECONDS, - )), + cfg=PollConfig( + timeout=float( + parse_duration(timeout) or DEFAULT_DELETE_TIMEOUT_SECONDS, + ) + ), on_tick=lambda r, e: _progress(name, r, e), ) format_output( diff --git a/src/agentrun_cli/commands/runtime/render_cmd.py b/src/agentrun_cli/commands/runtime/render_cmd.py index 31ab641..215a31c 100644 --- a/src/agentrun_cli/commands/runtime/render_cmd.py +++ b/src/agentrun_cli/commands/runtime/render_cmd.py @@ -34,7 +34,10 @@ def _parse_file(path): ), ) @click.option( - "-f", "--file", "file_path", required=True, + "-f", + "--file", + "file_path", + required=True, help="YAML file path (supports multi-document).", ) @click.pass_context @@ -45,15 +48,19 @@ def render_cmd(ctx, file_path): for parsed in docs: rt_input = to_runtime_create_input(parsed) ep_inputs = to_endpoint_create_inputs(parsed) - results.append({ - "kind": "AgentRuntime", - "name": parsed.name, - "renderedCreateInput": ( - rt_input.model_dump() if hasattr(rt_input, "model_dump") else rt_input - ), - "renderedEndpoints": [ - ei.model_dump() if hasattr(ei, "model_dump") else ei - for ei in ep_inputs - ], - }) + results.append( + { + "kind": "AgentRuntime", + "name": parsed.name, + "renderedCreateInput": ( + rt_input.model_dump() + if hasattr(rt_input, "model_dump") + else rt_input + ), + "renderedEndpoints": [ + ei.model_dump() if hasattr(ei, "model_dump") else ei + for ei in ep_inputs + ], + } + ) format_output(ctx, results) diff --git a/src/agentrun_cli/commands/runtime/status_cmd.py b/src/agentrun_cli/commands/runtime/status_cmd.py index df89066..11e4474 100644 --- a/src/agentrun_cli/commands/runtime/status_cmd.py +++ b/src/agentrun_cli/commands/runtime/status_cmd.py @@ -25,6 +25,7 @@ def _lazy_sdk(): global AgentRuntime if AgentRuntime is None: from agentrun.agent_runtime import AgentRuntime as _AR + AgentRuntime = _AR return AgentRuntime @@ -34,10 +35,18 @@ def _lazy_sdk(): help="Show the status of an Agent Runtime (optionally wait for terminal).", ) @click.argument("name") -@click.option("--wait", is_flag=True, default=False, - help="Poll until the runtime reaches READY/*_FAILED.") -@click.option("--timeout", default="10m", show_default=True, - help="Polling timeout (only with --wait).") +@click.option( + "--wait", + is_flag=True, + default=False, + help="Poll until the runtime reaches READY/*_FAILED.", +) +@click.option( + "--timeout", + default="10m", + show_default=True, + help="Polling timeout (only with --wait).", +) @click.pass_context @handle_errors def status_cmd(ctx, name, wait, timeout): @@ -50,13 +59,18 @@ def status_cmd(ctx, name, wait, timeout): raise SystemExit(EXIT_NOT_FOUND) if wait: poll_until_final( - runtime, resource_kind="AgentRuntime", - cfg=PollConfig(timeout=float( - parse_duration(timeout) or DEFAULT_APPLY_TIMEOUT_SECONDS, - )), - on_tick=lambda r, e: sys.stderr.isatty() and sys.stderr.write( - f"[runtime {name}] status={getattr(r, 'status', None)} " - f"({e:.1f}s)\n" + runtime, + resource_kind="AgentRuntime", + cfg=PollConfig( + timeout=float( + parse_duration(timeout) or DEFAULT_APPLY_TIMEOUT_SECONDS, + ) + ), + on_tick=lambda r, e: ( + sys.stderr.isatty() + and sys.stderr.write( + f"[runtime {name}] status={getattr(r, 'status', None)} ({e:.1f}s)\n" + ) ), ) format_output(ctx, serialize_runtime(runtime), quiet_field="name") diff --git a/tests/integration/test_runtime_cmd.py b/tests/integration/test_runtime_cmd.py index b5f0490..8bc2239 100644 --- a/tests/integration/test_runtime_cmd.py +++ b/tests/integration/test_runtime_cmd.py @@ -20,6 +20,7 @@ def _root(): """Build a root CLI that mounts only ``runtime`` — keeps the test independent of PR6's main.py wiring.""" + @click.group() @click.option("--profile", default=None) @click.option("--region", default=None) @@ -92,28 +93,33 @@ def test_render_invalid_yaml_exit_code_2(): runner = CliRunner() with runner.isolated_filesystem(): with open("bad.yaml", "w") as f: - f.write("apiVersion: wrong/v1\nkind: AgentRuntime\nmetadata: {name: x}\n" - "spec: {container: {image: i}}\n") + f.write( + "apiVersion: wrong/v1\nkind: AgentRuntime\nmetadata: {name: x}\n" + "spec: {container: {image: i}}\n" + ) result = runner.invoke(_root(), ["runtime", "render", "-f", "bad.yaml"]) assert result.exit_code == 2 def _make_runtime(name="my-agent", status="READY", rid="ar-1"): return SimpleNamespace( - agent_runtime_name=name, agent_runtime_id=rid, + agent_runtime_name=name, + agent_runtime_id=rid, agent_runtime_arn=f"acs:{rid}", agent_runtime_version="1", - status=status, status_reason=None, - created_at="t0", last_updated_at="t1", + status=status, + status_reason=None, + created_at="t0", + last_updated_at="t1", ) -def _make_endpoint(name="default", status="READY", eid="ep-1", - url="https://x/"): +def _make_endpoint(name="default", status="READY", eid="ep-1", url="https://x/"): e = SimpleNamespace( agent_runtime_endpoint_name=name, agent_runtime_endpoint_id=eid, - status=status, status_reason=None, + status=status, + status_reason=None, endpoint_public_url=url, target_version="LATEST", description=None, @@ -136,6 +142,7 @@ def test_apply_create_happy_path(monkeypatch): def _refresh(self=None, *a, **k): created.status = next(refresh_states, "READY") return created + created.refresh = _refresh fake_runtime_cls.list_all.return_value = [] @@ -173,9 +180,9 @@ def test_apply_update_path(monkeypatch): monkeypatch.setattr("time.sleep", lambda *_: None) existing = _make_runtime(status="UPDATING") refresh_states = iter(["UPDATING", "READY"]) - existing.refresh = lambda *a, **k: (setattr(existing, "status", - next(refresh_states, "READY")) - or existing) + existing.refresh = lambda *a, **k: ( + setattr(existing, "status", next(refresh_states, "READY")) or existing + ) existing.list_endpoints = MagicMock(return_value=[]) existing.create_endpoint = MagicMock(return_value=_make_endpoint()) @@ -184,8 +191,10 @@ def test_apply_update_path(monkeypatch): rt_cls.update_by_id.return_value = existing with ( - patch("agentrun_cli.commands.runtime.apply_cmd.build_sdk_config", - return_value=MagicMock()), + patch( + "agentrun_cli.commands.runtime.apply_cmd.build_sdk_config", + return_value=MagicMock(), + ), patch("agentrun_cli.commands.runtime.apply_cmd.AgentRuntime", rt_cls), ): runner = CliRunner() @@ -208,8 +217,10 @@ def test_apply_runtime_failed_exits_5(monkeypatch): rt_cls.create.return_value = rt with ( - patch("agentrun_cli.commands.runtime.apply_cmd.build_sdk_config", - return_value=MagicMock()), + patch( + "agentrun_cli.commands.runtime.apply_cmd.build_sdk_config", + return_value=MagicMock(), + ), patch("agentrun_cli.commands.runtime.apply_cmd.AgentRuntime", rt_cls), ): runner = CliRunner() @@ -222,9 +233,10 @@ def test_apply_runtime_failed_exits_5(monkeypatch): def test_apply_timeout_exits_6(monkeypatch): import itertools + monkeypatch.setattr("time.sleep", lambda *_: None) rt = _make_runtime(status="CREATING") - rt.refresh = lambda *a, **k: rt # never advances + rt.refresh = lambda *a, **k: rt # never advances # apply_cmd.started uses 1 tick; poll_until_final uses 1 for start + # >=2 for elapsed checks (first under timeout, second exceeds). Provide # an unbounded chain so any extra calls keep returning the timeout value. @@ -236,8 +248,10 @@ def test_apply_timeout_exits_6(monkeypatch): rt_cls.create.return_value = rt with ( - patch("agentrun_cli.commands.runtime.apply_cmd.build_sdk_config", - return_value=MagicMock()), + patch( + "agentrun_cli.commands.runtime.apply_cmd.build_sdk_config", + return_value=MagicMock(), + ), patch("agentrun_cli.commands.runtime.apply_cmd.AgentRuntime", rt_cls), ): runner = CliRunner() @@ -245,7 +259,8 @@ def test_apply_timeout_exits_6(monkeypatch): with open("rt.yaml", "w") as f: f.write(VALID_YAML) result = runner.invoke( - _root(), ["runtime", "apply", "-f", "rt.yaml", "--timeout", "1s"], + _root(), + ["runtime", "apply", "-f", "rt.yaml", "--timeout", "1s"], ) assert result.exit_code == 6 @@ -255,8 +270,10 @@ def test_get_runtime(): rt_cls = MagicMock() rt_cls.list_all.return_value = [rt] with ( - patch("agentrun_cli.commands.runtime.crud_cmd.build_sdk_config", - return_value=MagicMock()), + patch( + "agentrun_cli.commands.runtime.crud_cmd.build_sdk_config", + return_value=MagicMock(), + ), patch("agentrun_cli.commands.runtime.crud_cmd.AgentRuntime", rt_cls), ): result = CliRunner().invoke(_root(), ["runtime", "get", "my-agent"]) @@ -269,8 +286,10 @@ def test_get_runtime_not_found_exit_1(): rt_cls = MagicMock() rt_cls.list_all.return_value = [] with ( - patch("agentrun_cli.commands.runtime.crud_cmd.build_sdk_config", - return_value=MagicMock()), + patch( + "agentrun_cli.commands.runtime.crud_cmd.build_sdk_config", + return_value=MagicMock(), + ), patch("agentrun_cli.commands.runtime.crud_cmd.AgentRuntime", rt_cls), ): result = CliRunner().invoke(_root(), ["runtime", "get", "missing"]) @@ -284,8 +303,10 @@ def test_list_runtimes(): _make_runtime("b", "CREATING", "ar-b"), ] with ( - patch("agentrun_cli.commands.runtime.crud_cmd.build_sdk_config", - return_value=MagicMock()), + patch( + "agentrun_cli.commands.runtime.crud_cmd.build_sdk_config", + return_value=MagicMock(), + ), patch("agentrun_cli.commands.runtime.crud_cmd.AgentRuntime", rt_cls), ): result = CliRunner().invoke(_root(), ["runtime", "list"]) @@ -303,12 +324,15 @@ def test_list_runtimes_created_by_cli_filter(): other.system_tags = ["something-else"] rt_cls.list_all.return_value = [cli_runtime, other] with ( - patch("agentrun_cli.commands.runtime.crud_cmd.build_sdk_config", - return_value=MagicMock()), + patch( + "agentrun_cli.commands.runtime.crud_cmd.build_sdk_config", + return_value=MagicMock(), + ), patch("agentrun_cli.commands.runtime.crud_cmd.AgentRuntime", rt_cls), ): result = CliRunner().invoke( - _root(), ["runtime", "list", "--created-by-cli"], + _root(), + ["runtime", "list", "--created-by-cli"], ) assert result.exit_code == 0 out = json.loads(result.output) @@ -319,12 +343,15 @@ def test_delete_idempotent_when_missing(): rt_cls = MagicMock() rt_cls.list_all.return_value = [] with ( - patch("agentrun_cli.commands.runtime.delete_cmd.build_sdk_config", - return_value=MagicMock()), + patch( + "agentrun_cli.commands.runtime.delete_cmd.build_sdk_config", + return_value=MagicMock(), + ), patch("agentrun_cli.commands.runtime.delete_cmd.AgentRuntime", rt_cls), ): result = CliRunner().invoke( - _root(), ["runtime", "delete", "missing", "--yes"], + _root(), + ["runtime", "delete", "missing", "--yes"], ) assert result.exit_code == 1 # ResourceNotFound @@ -339,6 +366,7 @@ def _refresh(*a, **k): raise next(states) except StopIteration: return rt + rt.refresh = _refresh rt.delete = MagicMock() @@ -348,8 +376,10 @@ def _refresh(*a, **k): # The SystemExit module needs an is_not_found predicate. The integration # test points the predicate at the simulated exception's message. with ( - patch("agentrun_cli.commands.runtime.delete_cmd.build_sdk_config", - return_value=MagicMock()), + patch( + "agentrun_cli.commands.runtime.delete_cmd.build_sdk_config", + return_value=MagicMock(), + ), patch("agentrun_cli.commands.runtime.delete_cmd.AgentRuntime", rt_cls), patch( "agentrun_cli.commands.runtime.delete_cmd._is_not_found", @@ -357,7 +387,8 @@ def _refresh(*a, **k): ), ): result = CliRunner().invoke( - _root(), ["runtime", "delete", "my-agent", "--yes"], + _root(), + ["runtime", "delete", "my-agent", "--yes"], ) assert result.exit_code == 0, result.output rt.delete.assert_called_once() @@ -368,8 +399,10 @@ def test_status_no_wait_returns_current(): rt_cls = MagicMock() rt_cls.list_all.return_value = [rt] with ( - patch("agentrun_cli.commands.runtime.status_cmd.build_sdk_config", - return_value=MagicMock()), + patch( + "agentrun_cli.commands.runtime.status_cmd.build_sdk_config", + return_value=MagicMock(), + ), patch("agentrun_cli.commands.runtime.status_cmd.AgentRuntime", rt_cls), ): result = CliRunner().invoke(_root(), ["runtime", "status", "my-agent"]) @@ -382,17 +415,19 @@ def test_status_wait_until_ready(monkeypatch): monkeypatch.setattr("time.sleep", lambda *_: None) rt = _make_runtime(status="CREATING") states = iter(["CREATING", "READY"]) - rt.refresh = lambda *a, **k: (setattr(rt, "status", next(states, "READY")) - or rt) + rt.refresh = lambda *a, **k: setattr(rt, "status", next(states, "READY")) or rt rt_cls = MagicMock() rt_cls.list_all.return_value = [rt] with ( - patch("agentrun_cli.commands.runtime.status_cmd.build_sdk_config", - return_value=MagicMock()), + patch( + "agentrun_cli.commands.runtime.status_cmd.build_sdk_config", + return_value=MagicMock(), + ), patch("agentrun_cli.commands.runtime.status_cmd.AgentRuntime", rt_cls), ): result = CliRunner().invoke( - _root(), ["runtime", "status", "my-agent", "--wait"], + _root(), + ["runtime", "status", "my-agent", "--wait"], ) assert result.exit_code == 0 out = json.loads(result.output) diff --git a/tests/unit/test_runtime_apply_e2e.py b/tests/unit/test_runtime_apply_e2e.py index 13fd783..0ab3b63 100644 --- a/tests/unit/test_runtime_apply_e2e.py +++ b/tests/unit/test_runtime_apply_e2e.py @@ -20,23 +20,30 @@ def _runtime(name="my-agent", rid="ar-1", status="CREATING"): rt = SimpleNamespace( - agent_runtime_name=name, agent_runtime_id=rid, + agent_runtime_name=name, + agent_runtime_id=rid, agent_runtime_arn=f"acs:{rid}", agent_runtime_version="1", - status=status, status_reason=None, - created_at="t", last_updated_at="t", + status=status, + status_reason=None, + created_at="t", + last_updated_at="t", ) rt.list_endpoints = MagicMock(return_value=[]) - rt.create_endpoint = MagicMock(return_value=SimpleNamespace( - agent_runtime_endpoint_name="default", - agent_runtime_endpoint_id="ep-1", - status="READY", status_reason=None, - endpoint_public_url="https://x/", - target_version="LATEST", - description=None, routing_configuration=None, - disable_public_network_access=None, - refresh=lambda *a, **k: None, - )) + rt.create_endpoint = MagicMock( + return_value=SimpleNamespace( + agent_runtime_endpoint_name="default", + agent_runtime_endpoint_id="ep-1", + status="READY", + status_reason=None, + endpoint_public_url="https://x/", + target_version="LATEST", + description=None, + routing_configuration=None, + disable_public_network_access=None, + refresh=lambda *a, **k: None, + ) + ) return rt @@ -44,15 +51,16 @@ def test_apply_full_chain(monkeypatch): monkeypatch.setattr("time.sleep", lambda *_: None) rt = _runtime() states = iter(["CREATING", "CREATING", "READY"]) - rt.refresh = lambda *a, **k: (setattr(rt, "status", - next(states, "READY")) or rt) + rt.refresh = lambda *a, **k: setattr(rt, "status", next(states, "READY")) or rt rt_cls = MagicMock() rt_cls.list_all.return_value = [] rt_cls.create.return_value = rt with ( - patch("agentrun_cli.commands.runtime.apply_cmd.build_sdk_config", - return_value=MagicMock()), + patch( + "agentrun_cli.commands.runtime.apply_cmd.build_sdk_config", + return_value=MagicMock(), + ), patch("agentrun_cli.commands.runtime.apply_cmd.AgentRuntime", rt_cls), ): runner = CliRunner() diff --git a/tests/unit/test_runtime_helpers.py b/tests/unit/test_runtime_helpers.py index a34b855..1d0d53b 100644 --- a/tests/unit/test_runtime_helpers.py +++ b/tests/unit/test_runtime_helpers.py @@ -65,6 +65,7 @@ def test_none(self): def test_enum_like(self): class StatusLike: value = "READY" + assert _coerce_status(StatusLike()) == "READY" def test_plain_string(self): diff --git a/tests/unit/test_runtime_state.py b/tests/unit/test_runtime_state.py index c556166..0da3868 100644 --- a/tests/unit/test_runtime_state.py +++ b/tests/unit/test_runtime_state.py @@ -87,9 +87,7 @@ class FakeNotFound(Exception): def test_poll_until_deleted(monkeypatch): monkeypatch.setattr("time.sleep", lambda *_: None) - res = SimpleNamespace( - status="DELETING", status_reason=None, agent_runtime_name="x" - ) + res = SimpleNamespace(status="DELETING", status_reason=None, agent_runtime_name="x") call_count = {"n": 0} def _refresh(*a, **k): @@ -186,9 +184,7 @@ def test_poll_until_final_on_tick_invoked(monkeypatch): def test_poll_until_deleted_on_tick_and_timeout(monkeypatch): """Exercise on_tick + timeout branches in poll_until_deleted.""" monkeypatch.setattr("time.sleep", lambda *_: None) - res = SimpleNamespace( - status="DELETING", status_reason=None, agent_runtime_name="x" - ) + res = SimpleNamespace(status="DELETING", status_reason=None, agent_runtime_name="x") res.refresh = lambda *a, **k: res # never raises fake_clock = iter([0.0, 11.0]) # passes timeout=10 on 2nd check monkeypatch.setattr("time.monotonic", lambda: next(fake_clock)) @@ -209,9 +205,7 @@ def test_poll_until_deleted_on_tick_and_timeout(monkeypatch): def test_poll_until_deleted_refresh_raises_other(monkeypatch): """Non-not-found exceptions from refresh propagate.""" monkeypatch.setattr("time.sleep", lambda *_: None) - res = SimpleNamespace( - status="DELETING", status_reason=None, agent_runtime_name="x" - ) + res = SimpleNamespace(status="DELETING", status_reason=None, agent_runtime_name="x") def _refresh(*a, **k): raise RuntimeError("boom") diff --git a/tests/unit/test_runtime_yaml.py b/tests/unit/test_runtime_yaml.py index 71fb562..6826b26 100644 --- a/tests/unit/test_runtime_yaml.py +++ b/tests/unit/test_runtime_yaml.py @@ -770,6 +770,7 @@ def test_endpoint_minimal_keeps_target_version_none(): def test_registry_password_not_in_repr(): from agentrun_cli._utils.agentruntime_yaml import ParsedRegistryAuth + auth = ParsedRegistryAuth(user_name="u", password="secret") # noqa: S106 rendered = repr(auth) assert "secret" not in rendered @@ -777,20 +778,26 @@ def test_registry_password_not_in_repr(): def test_image_registry_type_must_be_known(): - text = _doc_with(spec={ - "container": {"image": "img", "imageRegistryType": "acree"}, - }) + text = _doc_with( + spec={ + "container": {"image": "img", "imageRegistryType": "acree"}, + } + ) with pytest.raises(YamlSchemaError, match="imageRegistryType"): parse_yaml_text(text) def test_endpoint_routing_non_numeric_weight(): - text = _doc_with(spec={ - "container": {"image": "img"}, - "endpoints": [{ - "name": "x", - "routing": [{"version": "1", "weight": "abc"}], - }], - }) + text = _doc_with( + spec={ + "container": {"image": "img"}, + "endpoints": [ + { + "name": "x", + "routing": [{"version": "1", "weight": "abc"}], + } + ], + } + ) with pytest.raises(YamlSchemaError, match="weight"): parse_yaml_text(text) From 009807b0b44f851676310c05c4be7dc6e5df24f1 Mon Sep 17 00:00:00 2001 From: Sodawyx Date: Wed, 20 May 2026 10:30:40 +0800 Subject: [PATCH 6/8] docs(runtime): add bilingual YAML field reference, sample, CI gate notes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three documentation gaps surfaced while landing the PR4-6 runtime work: 1. The YAML schema chapter in docs/{en,zh}/runtime.md only pointed at an external design doc (projects/.../runtime-cli-design.md), which is useless to end users reading the published reference. 2. There was no copy-pasteable starter spec to pair with `ar runtime apply`. 3. AGENTS.md only mentioned `make lint`; the actual CI gate also runs ruff format --check, mypy, and the 95%-coverage pytest job — local contributors had no signal that those were blocking. Changes: - docs/{en,zh}/runtime-yaml.md (new): field-level reference covering every key the parser accepts. Sections for metadata, container, registryConfig, resources, protocol, network, healthCheck, log, env, nas, ossMount, endpoints/scaling; CLI auto-injection rules (system_tags, artifact_type, default endpoint); exhaustive validation table mirroring agentruntime_yaml.py; runnable minimal / production (ACREE + private network + NAS + canary) / custom-registry examples; YAML -> SDK field map. EN/ZH mirrored 1:1 per AGENTS.md doc parity. diskSize is documented as MB (not GB, which the design doc had wrong). - docs/{en,zh}/runtime.md: replace the external design-doc pointer with a link to runtime-yaml.md. - docs/{en,zh}/index.md: add the YAML reference next to runtime.md in the command-groups table. - agentruntime.yaml (new, repo root): runnable starter spec mirroring superagent.yaml's placement. Renders cleanly via `ar runtime render -f agentruntime.yaml`. Commented-out blocks point at the common optional knobs (VPC, SLS log, scaling). - AGENTS.md: - "CI Lint Gate" section listing all four blocking checks (ruff check, ruff format --check, mypy, pytest --cov-fail-under=95) with the local commands and Make targets that reproduce CI. - Commands block updated with make format-check, `mypy src/agentrun_cli`, and `make coverage`. - Docs layout diagram updated to include runtime.md / runtime-yaml.md. Signed-off-by: Sodawyx Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Sodawyx --- AGENTS.md | 44 +++- agentruntime.yaml | 52 +++++ docs/en/index.md | 2 +- docs/en/runtime-yaml.md | 430 ++++++++++++++++++++++++++++++++++++++++ docs/en/runtime.md | 5 +- docs/zh/index.md | 2 +- docs/zh/runtime-yaml.md | 425 +++++++++++++++++++++++++++++++++++++++ docs/zh/runtime.md | 3 +- 8 files changed, 957 insertions(+), 6 deletions(-) create mode 100644 agentruntime.yaml create mode 100644 docs/en/runtime-yaml.md create mode 100644 docs/zh/runtime-yaml.md diff --git a/AGENTS.md b/AGENTS.md index 144eb58..aa8f6a7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,8 +14,12 @@ make dev # Create venv + install editable with dev deps make install # Install without dev deps # Development -make lint # Run ruff linter +make lint # Run ruff linter (ruff check) +make format-check # Verify code is ruff-formatted (no rewrite) +.venv/bin/ruff format src/ tests/ # Apply ruff formatting in place +.venv/bin/mypy src/agentrun_cli # Static type check (CI runs this verbatim) make test # Run all tests +make coverage # Run tests with the >=95% coverage gate (matches CI) .venv/bin/pytest tests/test_cli_basic.py -v # Run a single test file .venv/bin/pytest tests/test_cli_basic.py::TestConfigCommands::test_set_and_get -v # Single test @@ -41,6 +45,42 @@ make build-all # macOS + Linux (via Docker) **Error handling** (`_utils/error.py`): The `@handle_errors` decorator catches SDK exceptions by class name pattern (no hard import) and maps them to deterministic exit codes (0=success, 1=not found, 2=bad input, 3=auth, 4=server). Errors go to stderr as JSON. +## CI Lint Gate (must pass locally before pushing) + +The GitHub Actions `CI` workflow (`.github/workflows/ci.yml`) blocks merges on +**all four** of these checks, run in dedicated jobs against Python 3.10–3.13. +Run them locally before pushing — failing any one of them produces a red PR. + +| CI job | Command (run from repo root) | Make target | +|---|---|---| +| Lint (ruff) — lint rules | `ruff check src/ tests/` | `make lint` | +| Lint (ruff) — format check | `ruff format --check src/ tests/` | `make format-check` | +| Type check (mypy) | `mypy src/agentrun_cli` | *(no make target — run directly)* | +| Test + coverage | `pytest tests/unit tests/integration --cov=agentrun_cli --cov-fail-under=95` | `make coverage` | + +Rules: + +- **Ruff format is non-negotiable.** CI runs `ruff format --check`, not + `ruff format` — it will not auto-fix. Run `ruff format src/ tests/` locally + (or `make format-check` to verify) before every commit. Configuration lives + in `pyproject.toml` under `[tool.ruff]`. +- **Mypy must stay green.** Mypy config is in `pyproject.toml` under + `[tool.mypy]` (`python_version = "3.10"`, `warn_unreachable = true`, + `ignore_missing_imports = true`). Prefer narrowing types (`cast`, explicit + annotations) over `# type: ignore`; suppressions need a `[code]` selector + and a one-line comment justifying them. +- **Coverage threshold is 95%.** Every code change must keep incremental + coverage at or above 95% — `make coverage` enforces it. A Claude Code hook + (`.claude/settings.json`) also runs this automatically after edits to + `src/` files. See [Integration Test Requirement](#integration-test-requirement) + for the test-coverage rules that feed this gate. +- **No `--no-verify` / `--no-gpg-sign` shortcuts.** If a hook or lint check + fails, fix the underlying issue and create a new commit. + +The CI workflow also runs a Smoke job on macOS + Windows (`make build` of the +PyInstaller binary) and a `pip-audit` security scan — both are advisory but +should not regress. + ## Testing Tests use `click.testing.CliRunner` and `unittest.mock.patch` to swap `CONFIG_FILE`/`CONFIG_DIR` with `tmp_path` fixtures, so no real `~/.agentrun/` is touched. Model commands that hit the SDK are not tested in the basic suite — they require credentials. @@ -83,6 +123,8 @@ docs/ │ ├── index.md # install / auth / global options / output / exit codes / group nav │ ├── config.md # one file per command group │ ├── model.md +│ ├── runtime.md +│ ├── runtime-yaml.md # detailed YAML field reference for `ar runtime apply` │ ├── sandbox.md │ ├── skill.md │ ├── super-agent.md diff --git a/agentruntime.yaml b/agentruntime.yaml new file mode 100644 index 0000000..6ea0e1b --- /dev/null +++ b/agentruntime.yaml @@ -0,0 +1,52 @@ +# Sample AgentRuntime spec for `ar runtime apply -f agentruntime.yaml`. +# Full field reference: docs/en/runtime-yaml.md (zh: docs/zh/runtime-yaml.md) +# +# Quick usage: +# ar runtime render -f agentruntime.yaml # dry-run, prints SDK input +# ar runtime apply -f agentruntime.yaml # create-or-update, waits to READY +# +# The CLI auto-injects system_tags=["x-agentrun-cli"] and artifact_type=Container. +# When spec.endpoints is omitted, a default endpoint (targetVersion=LATEST) is +# also injected — this sample defines an explicit one to make the shape clear. + +apiVersion: agentrun/v1 +kind: AgentRuntime + +metadata: + name: my-agent # required, matches [a-z0-9-]{1,63} + description: "Example AgentRuntime" + # workspace: default # optional; mutually exclusive with workspaceId + +spec: + container: + image: registry.cn-hangzhou.aliyuncs.com/my-ns/my-agent:v1 + # command: ["python", "app.py"] # optional, overrides image CMD/ENTRYPOINT + # port: 9000 # optional; spec.port below also works + + # ───── resources ───── + cpu: 2 # default 2 cores + memory: 4096 # default 4096 MB + # diskSize: 10240 # optional, MB (10 GiB) + + # ───── network (default PUBLIC; uncomment for VPC) ───── + # network: + # mode: PUBLIC_AND_PRIVATE + # vpcId: vpc-xxxxxxxx + # vswitchIds: [vsw-xxxxxxxx] + # securityGroupId: sg-xxxxxxxx + + # ───── log to SLS (project + logstore must be set together) ───── + # log: + # project: my-agent-logs + # logstore: runtime + + # ───── environment variables ───── + env: + LOG_LEVEL: info + + # ───── endpoints (omit to auto-inject `default` / LATEST) ───── + endpoints: + - name: default + targetVersion: LATEST + # scaling: + # minInstances: 1 diff --git a/docs/en/index.md b/docs/en/index.md index 8bd81e0..3554d24 100644 --- a/docs/en/index.md +++ b/docs/en/index.md @@ -199,5 +199,5 @@ Errors are written to stderr as JSON: | `sandbox` | `sb` | Sandboxes plus file, process, context, template and browser sub-groups | [sandbox.md](./sandbox.md) | | `tool` | | MCP and FunctionCall tools + sub-tool invocation | [tool.md](./tool.md) | | `skill` | | Platform skill packages + local scan/load/exec | [skill.md](./skill.md) | -| `runtime` | `rt` | Declarative Agent Runtime deploy (container mode) | [runtime.md](./runtime.md) | +| `runtime` | `rt` | Declarative Agent Runtime deploy (container mode) | [runtime.md](./runtime.md) · [YAML reference](./runtime-yaml.md) | | `super-agent` | `sa` | Quickstart REPL, declarative deploy, CRUD, conversations | [super-agent.md](./super-agent.md) | diff --git a/docs/en/runtime-yaml.md b/docs/en/runtime-yaml.md new file mode 100644 index 0000000..91f9a7f --- /dev/null +++ b/docs/en/runtime-yaml.md @@ -0,0 +1,430 @@ +**English** | [简体中文](../zh/runtime-yaml.md) + +# `ar runtime` YAML Reference + +This page is the field-level specification for the YAML consumed by +[`ar runtime apply`](./runtime.md#apply) and +[`ar runtime render`](./runtime.md#render). One document describes one Agent +Runtime; endpoints are embedded under `spec.endpoints`. The CLI is intentionally +container-only — `spec.code`, `metadata.tags`, and `metadata.systemTags` are +rejected (see [Validation rules](#validation-rules)). + +## Contents + +- [Document shape](#document-shape) +- [CLI auto-injection](#cli-auto-injection) +- [`metadata`](#metadata) +- [`spec.container`](#speccontainer) +- [`spec` resource & runtime knobs](#spec-resource--runtime-knobs) +- [`spec.protocol`](#specprotocol) +- [`spec.network`](#specnetwork) +- [`spec.healthCheck`](#spechealthcheck) +- [`spec.log`](#speclog) +- [`spec.env`](#specenv) +- [`spec.nas`](#specnas) +- [`spec.ossMount`](#specossmount) +- [`spec.endpoints`](#specendpoints) +- [Validation rules](#validation-rules) +- [Examples](#examples) +- [YAML → SDK field map](#yaml--sdk-field-map) + +## Document shape + +```yaml +apiVersion: agentrun/v1 # required, fixed string +kind: AgentRuntime # required, fixed string +metadata: {...} # see below +spec: {...} # see below +``` + +Multi-document YAML (`---` separators) is supported; each document is parsed and +applied independently in order. An empty stream is an error. + +## CLI auto-injection + +Two fields are managed by the CLI and **must not** appear in YAML: + +| Injected field | Value | Notes | +|---|---|---| +| `system_tags` | `["x-agentrun-cli"]` | The only label slot SDK 0.0.200 still exposes; powers `ar runtime list --created-by-cli`. | +| `artifact_type` | `Container` | This CLI only ships container-mode runtimes. | + +When `spec.endpoints` is **omitted entirely**, the CLI also injects: + +```yaml +endpoints: + - name: default + targetVersion: LATEST +``` + +`spec.endpoints: []` (explicitly empty) is honored — no endpoint is created and +existing ones are pruned (unless `--no-prune-endpoints`). + +## `metadata` + +| Key | Type | Required | Notes | +|---|---|---|---| +| `name` | string | ✓ | Must match `[a-z0-9-]{1,63}`. Becomes `agent_runtime_name`. | +| `description` | string | | Free text. | +| `workspace` | string | | Workspace **name**; mutually exclusive with `workspaceId`. Defaults to the account-level workspace when omitted. | +| `workspaceId` | string | | Workspace ID; mutually exclusive with `workspace`. | +| `tags` | — | ✗ | Rejected — SDK 0.0.200 removed user-facing tags. | +| `systemTags` | — | ✗ | Rejected — managed by the CLI. | + +## `spec.container` + +Required block. Defines the container image and registry credentials. + +| Key | Type | Required | Notes | +|---|---|---|---| +| `image` | string | ✓ | OCI image reference. | +| `command` | list<string> | | Overrides image `ENTRYPOINT`/`CMD`. | +| `port` | int | | Container listen port. If set, wins over `spec.port`. | +| `imageRegistryType` | enum | | One of `ACR`, `ACREE`, `CUSTOM`. | +| `acrInstanceId` | string | | Recommended when `imageRegistryType=ACREE`. | +| `registryConfig` | mapping | conditional | **Required** when `imageRegistryType=CUSTOM`; allowed (and parsed) otherwise. | + +### `spec.container.registryConfig` + +```yaml +registryConfig: + auth: + userName: + password: # sensitive — prefer env-var injection + cert: + insecure: + rootCaCertBase64: + network: + vpcId: + vSwitchId: + securityGroupId: +``` + +All three sub-blocks (`auth`, `cert`, `network`) are individually optional, but +`registryConfig` itself is mandatory under `CUSTOM`. + +## `spec` resource & runtime knobs + +| Key | Type | Default | Notes | +|---|---|---|---| +| `cpu` | float | `2` | Cores. | +| `memory` | int | `4096` | MB. | +| `port` | int | `9000` | Falls back behind `spec.container.port` if both are set. | +| `diskSize` | int | | MB. | +| `enableSessionIsolation` | bool | | | +| `credentialName` | string | | Reference to a registered credential. | +| `executionRoleArn` | string | | RAM role ARN the runtime assumes. | +| `sessionConcurrencyLimitPerInstance` | int | | | +| `sessionIdleTimeoutSeconds` | int | | | + +## `spec.protocol` + +| Key | Type | Default | Notes | +|---|---|---|---| +| `type` | enum | `HTTP` | One of `HTTP`, `MCP`, `SUPER_AGENT`. | +| `settings` | list<ProtocolSetting> | | Advanced multi-route definitions. | + +`ProtocolSetting` fields (all optional, free-form strings unless noted): + +| Key | Notes | +|---|---| +| `type` | | +| `name` | | +| `path` | | +| `pathPrefix` | | +| `method` | | +| `requestContentType` | | +| `responseContentType` | | +| `headers` | | +| `inputBodyJsonSchema` | | +| `outputBodyJsonSchema` | | +| `a2aAgentCard` | | +| `a2aAgentCardUrl` | | +| `config` | | + +## `spec.network` + +| Key | Type | Required | Notes | +|---|---|---|---| +| `mode` | enum | | One of `PUBLIC` (default), `PRIVATE`, `PUBLIC_AND_PRIVATE`. | +| `vpcId` | string | conditional | **Required** when `mode ∈ {PRIVATE, PUBLIC_AND_PRIVATE}`. | +| `vswitchIds` | list<string> | | | +| `securityGroupId` | string | | | + +## `spec.healthCheck` + +| Key | Type | Notes | +|---|---|---| +| `httpGetUrl` | string | | +| `initialDelaySeconds` | int | | +| `periodSeconds` | int | | +| `timeoutSeconds` | int | | +| `failureThreshold` | int | | +| `successThreshold` | int | | + +## `spec.log` + +| Key | Type | Required | Notes | +|---|---|---|---| +| `project` | string | paired with `logstore` | SLS project. | +| `logstore` | string | paired with `project` | SLS logstore. | + +Either both keys are set, or the whole block is omitted; setting one without +the other is rejected. + +## `spec.env` + +Map of `string → string`. Non-string values are coerced to strings. + +```yaml +env: + LOG_LEVEL: info + HTTP_PROXY: http://proxy.internal:8080 +``` + +## `spec.nas` + +| Key | Type | Notes | +|---|---|---| +| `userId` | int | | +| `groupId` | int | | +| `mountPoints` | list<NasMountPoint> | Optional. | + +`NasMountPoint`: + +| Key | Type | Required | Notes | +|---|---|---|---| +| `serverAddr` | string | ✓ | | +| `mountDir` | string | ✓ | Absolute path inside the container. | +| `enableTLS` | bool | | | + +## `spec.ossMount` + +| Key | Type | Notes | +|---|---|---| +| `mountPoints` | list<OssMountPoint> | | + +`OssMountPoint`: + +| Key | Type | Required | Notes | +|---|---|---|---| +| `bucketName` | string | ✓ | | +| `mountDir` | string | ✓ | Absolute path inside the container. | +| `bucketPath` | string | | Sub-path inside the bucket. | +| `endpoint` | string | | OSS endpoint override. | +| `readOnly` | bool | | | + +## `spec.endpoints` + +Three shapes are allowed: + +| YAML | Behaviour | +|---|---| +| key absent | CLI injects `[{name: default, targetVersion: LATEST}]`. | +| `endpoints: []` | No endpoints are created. With `--prune-endpoints` (default), any existing endpoint is deleted. | +| `endpoints: [...]` | Each item is reconciled by name. | + +Per-endpoint fields: + +| Key | Type | Required | Notes | +|---|---|---|---| +| `name` | string | ✓ | Unique within the document. | +| `description` | string | | | +| `targetVersion` | string | | Defaults to `LATEST`. Mutually exclusive with `routing`. | +| `routing` | list<RoutingWeight> | | Multi-version traffic split. Mutually exclusive with `targetVersion`. Weights must sum to **exactly 100**. | +| `disablePublicNetworkAccess` | bool | | | +| `scaling` | mapping | | See below. | + +`RoutingWeight`: + +```yaml +routing: + - version: "2" + weight: 90 + - version: "3" + weight: 10 +``` + +### `spec.endpoints[i].scaling` + +| Key | Type | Notes | +|---|---|---| +| `minInstances` | int | | +| `scheduledPolicies` | list<ScheduledPolicy> | | + +`ScheduledPolicy`: + +| Key | Type | Notes | +|---|---|---| +| `name` | string | | +| `scheduleExpression` | string | Cron expression. | +| `startTime` | string | | +| `endTime` | string | | +| `target` | int | Must be `>= minInstances` when both are set. | +| `timeZone` | string | | + +## Validation rules + +All violations exit with code `2` ("bad input"). The list below is exhaustive +for the parser (`src/agentrun_cli/_utils/agentruntime_yaml.py`). + +| Rule | Trigger | +|---|---| +| `apiVersion != agentrun/v1` or `kind != AgentRuntime` | Unsupported document. | +| `metadata.name` missing or fails `[a-z0-9-]{1,63}` | | +| `spec.container` missing or not a mapping | | +| `spec.container.image` missing or empty | | +| `spec.container.imageRegistryType` not in `ACR|ACREE|CUSTOM` | | +| `imageRegistryType=CUSTOM` but `registryConfig` missing | | +| `metadata.tags` present | SDK 0.0.200 removed the field. | +| `metadata.systemTags` present | Managed by the CLI. | +| `metadata.workspace` + `metadata.workspaceId` both set | | +| `spec.code` present | Container-only CLI. | +| `spec.network.mode` is `PRIVATE`/`PUBLIC_AND_PRIVATE` without `vpcId` | | +| `spec.log.project` and `spec.log.logstore` not paired | | +| `spec.env` not a mapping, or non-string keys | | +| `spec.nas.mountPoints[*]` missing `serverAddr` or `mountDir` | | +| `spec.ossMount.mountPoints[*]` missing `bucketName` or `mountDir` | | +| `spec.endpoints` not a list, or `endpoints[*]` not a mapping | | +| `spec.endpoints[*].name` missing or duplicated | | +| Endpoint with both `targetVersion` and `routing` | | +| `routing` empty, items missing `version`/`weight`, non-numeric `weight`, or sum ≠ 100 | | +| `scaling.scheduledPolicies[*].target < scaling.minInstances` | | + +## Examples + +### Minimal + +```yaml +apiVersion: agentrun/v1 +kind: AgentRuntime +metadata: + name: my-agent +spec: + container: + image: registry.cn-hangzhou.aliyuncs.com/my-ns/my-agent:v1 +``` + +After CLI auto-injection this is equivalent to: + +```yaml +apiVersion: agentrun/v1 +kind: AgentRuntime +metadata: {name: my-agent} +spec: + container: + image: registry.cn-hangzhou.aliyuncs.com/my-ns/my-agent:v1 + endpoints: + - name: default + targetVersion: LATEST +# system_tags=["x-agentrun-cli"], artifact_type=Container +``` + +### Production — ACREE + private network + NAS + canary endpoint + +```yaml +apiVersion: agentrun/v1 +kind: AgentRuntime +metadata: + name: my-agent + workspace: prod-ws +spec: + container: + image: registry-vpc.cn-hangzhou.cr.aliyuncs.com/my-ns/my-agent:v3 + command: ["python", "app.py"] + imageRegistryType: ACREE + acrInstanceId: cri-xxxxx + cpu: 4 + memory: 8192 + diskSize: 10240 # MB (10 GiB) + enableSessionIsolation: true + network: + mode: PUBLIC_AND_PRIVATE + vpcId: vpc-xxx + vswitchIds: [vsw-xxx] + securityGroupId: sg-xxx + log: + project: my-agent-logs + logstore: runtime + env: + LOG_LEVEL: info + nas: + userId: 1000 + groupId: 1000 + mountPoints: + - serverAddr: xxxx.nas.aliyuncs.com:/ + mountDir: /mnt/nas + enableTLS: true + endpoints: + - name: prod + targetVersion: LATEST + scaling: + minInstances: 2 + - name: canary + routing: + - {version: "2", weight: 90} + - {version: "3", weight: 10} + disablePublicNetworkAccess: true +``` + +### Custom registry + +```yaml +apiVersion: agentrun/v1 +kind: AgentRuntime +metadata: {name: my-agent} +spec: + container: + image: registry.example.com/team/agent:v1 + imageRegistryType: CUSTOM + registryConfig: + auth: + userName: deploy-bot + password: ${REGISTRY_PASSWORD} # interpolate before piping into apply + cert: + insecure: false + network: + vpcId: vpc-xxx + vSwitchId: vsw-xxx + securityGroupId: sg-xxx +``` + +## YAML → SDK field map + +For users who need to cross-reference the SDK +(`agentrun.agent_runtime.model`): + +| YAML key | SDK field | +|---|---| +| `metadata.name` | `agent_runtime_name` | +| `metadata.description` | `description` | +| `metadata.workspace` | `workspace_name` | +| `metadata.workspaceId` | `workspace_id` | +| `spec.container.image` | `container_configuration.image` | +| `spec.container.command` | `container_configuration.command` | +| `spec.container.port` | `container_configuration.port` | +| `spec.container.imageRegistryType` | `container_configuration.image_registry_type` | +| `spec.container.acrInstanceId` | `container_configuration.acr_instance_id` | +| `spec.container.registryConfig.*` | `container_configuration.registry_config.*` | +| `spec.cpu / memory / port / diskSize` | `cpu / memory / port / disk_size` | +| `spec.enableSessionIsolation` | `enable_session_isolation` | +| `spec.protocol.type` | `protocol_configuration.type` | +| `spec.protocol.settings` | `protocol_configuration.protocol_settings` | +| `spec.network.{mode,vpcId,vswitchIds,securityGroupId}` | `network_configuration.{network_mode,vpc_id,vswitch_ids,security_group_id}` | +| `spec.healthCheck.*` | `health_check_configuration.*` | +| `spec.log.{project,logstore}` | `log_configuration.{project,logstore}` | +| `spec.env` | `environment_variables` | +| `spec.credentialName` | `credential_name` | +| `spec.executionRoleArn` | `execution_role_arn` | +| `spec.sessionConcurrencyLimitPerInstance` | `session_concurrency_limit_per_instance` | +| `spec.sessionIdleTimeoutSeconds` | `session_idle_timeout_seconds` | +| `spec.nas.*` | `nas_config.*` | +| `spec.ossMount.*` | `oss_mount_config.*` | +| `spec.endpoints[i].name` | `agent_runtime_endpoint_name` | +| `spec.endpoints[i].description` | `description` | +| `spec.endpoints[i].targetVersion` | `target_version` | +| `spec.endpoints[i].routing` | `routing_configuration.version_weights` | +| `spec.endpoints[i].disablePublicNetworkAccess` | `disable_public_network_access` | +| `spec.endpoints[i].scaling.*` | `scaling_config.*` | +| *(auto-injected)* `system_tags` | `system_tags = ["x-agentrun-cli"]` | +| *(auto-injected)* `artifact_type` | `artifact_type = "Container"` | diff --git a/docs/en/runtime.md b/docs/en/runtime.md index 8068b11..1eba32e 100644 --- a/docs/en/runtime.md +++ b/docs/en/runtime.md @@ -179,5 +179,6 @@ ar runtime status my-agent --wait --timeout 20m ## YAML schema -See `projects/agent-infra-build-runit/design/runtime-cli-design.md` §2 for the -full field list and validation rules. +See [**runtime-yaml.md**](./runtime-yaml.md) for the full field reference, +CLI auto-injection rules, validation table, and copy-pasteable examples +(minimal, production, and custom-registry). diff --git a/docs/zh/index.md b/docs/zh/index.md index bdba385..a8b4461 100644 --- a/docs/zh/index.md +++ b/docs/zh/index.md @@ -195,5 +195,5 @@ ar sandbox exec "$SANDBOX" --code "print('hello')" | `sandbox` | `sb` | 沙箱以及 file / process / context / template / browser 子组 | [sandbox.md](./sandbox.md) | | `tool` | | MCP 与 FunctionCall 工具 + 子工具调用 | [tool.md](./tool.md) | | `skill` | | 平台侧技能包 + 本地 scan / load / exec | [skill.md](./skill.md) | -| `runtime` | `rt` | 声明式 Agent Runtime 部署(容器模式) | [runtime.md](./runtime.md) | +| `runtime` | `rt` | 声明式 Agent Runtime 部署(容器模式) | [runtime.md](./runtime.md) · [YAML 参考](./runtime-yaml.md) | | `super-agent` | `sa` | 一键拉起 REPL、声明式部署、CRUD、会话管理 | [super-agent.md](./super-agent.md) | diff --git a/docs/zh/runtime-yaml.md b/docs/zh/runtime-yaml.md new file mode 100644 index 0000000..d707b32 --- /dev/null +++ b/docs/zh/runtime-yaml.md @@ -0,0 +1,425 @@ +[English](../en/runtime-yaml.md) | **简体中文** + +# `ar runtime` YAML 参考 + +本页是 [`ar runtime apply`](./runtime.md#apply) 与 [`ar runtime render`](./runtime.md#render) +所消费 YAML 的字段级规范。一份文档描述一个 Agent Runtime;endpoint 嵌入在 +`spec.endpoints` 下。CLI 仅支持容器模式 —— `spec.code`、`metadata.tags`、 +`metadata.systemTags` 一律拒绝(详见[校验规则](#校验规则))。 + +## 目录 + +- [文档结构](#文档结构) +- [CLI 自动注入](#cli-自动注入) +- [`metadata`](#metadata) +- [`spec.container`](#speccontainer) +- [`spec` 资源与运行时开关](#spec-资源与运行时开关) +- [`spec.protocol`](#specprotocol) +- [`spec.network`](#specnetwork) +- [`spec.healthCheck`](#spechealthcheck) +- [`spec.log`](#speclog) +- [`spec.env`](#specenv) +- [`spec.nas`](#specnas) +- [`spec.ossMount`](#specossmount) +- [`spec.endpoints`](#specendpoints) +- [校验规则](#校验规则) +- [示例](#示例) +- [YAML → SDK 字段映射](#yaml--sdk-字段映射) + +## 文档结构 + +```yaml +apiVersion: agentrun/v1 # 必填,固定值 +kind: AgentRuntime # 必填,固定值 +metadata: {...} # 见下文 +spec: {...} # 见下文 +``` + +支持多文档 YAML(`---` 分隔),每篇按顺序独立解析并依次 apply。空流报错。 + +## CLI 自动注入 + +下列字段由 CLI 管控,**不允许**在 YAML 中出现: + +| 注入字段 | 值 | 说明 | +|---|---|---| +| `system_tags` | `["x-agentrun-cli"]` | SDK 0.0.200 唯一可写入的标签位;`ar runtime list --created-by-cli` 依赖此标签。 | +| `artifact_type` | `Container` | 本 CLI 只交付容器模式 runtime。 | + +当 `spec.endpoints` **整段省略**时,CLI 还会注入: + +```yaml +endpoints: + - name: default + targetVersion: LATEST +``` + +显式写 `spec.endpoints: []` 时不会注入 —— 不创建任何 endpoint; +默认还会删除远端已存在的 endpoint(除非加 `--no-prune-endpoints`)。 + +## `metadata` + +| 字段 | 类型 | 必填 | 说明 | +|---|---|---|---| +| `name` | string | ✓ | 必须匹配 `[a-z0-9-]{1,63}`。映射为 `agent_runtime_name`。 | +| `description` | string | | 自由文本。 | +| `workspace` | string | | 工作空间**名称**;与 `workspaceId` 互斥。省略时落到账号默认工作空间。 | +| `workspaceId` | string | | 工作空间 ID;与 `workspace` 互斥。 | +| `tags` | — | ✗ | 已被 SDK 0.0.200 移除,禁止写入。 | +| `systemTags` | — | ✗ | 由 CLI 管控,禁止写入。 | + +## `spec.container` + +必填块。定义容器镜像与凭证。 + +| 字段 | 类型 | 必填 | 说明 | +|---|---|---|---| +| `image` | string | ✓ | OCI 镜像引用。 | +| `command` | list<string> | | 覆盖镜像的 `ENTRYPOINT`/`CMD`。 | +| `port` | int | | 容器监听端口。若设置,则覆盖 `spec.port`。 | +| `imageRegistryType` | 枚举 | | `ACR`、`ACREE`、`CUSTOM` 之一。 | +| `acrInstanceId` | string | | `imageRegistryType=ACREE` 时建议设置。 | +| `registryConfig` | 映射 | 条件必填 | `imageRegistryType=CUSTOM` 时**必填**;其它情况允许且会被解析。 | + +### `spec.container.registryConfig` + +```yaml +registryConfig: + auth: + userName: + password: # 敏感,建议通过环境变量注入 + cert: + insecure: + rootCaCertBase64: + network: + vpcId: + vSwitchId: + securityGroupId: +``` + +三个子块(`auth`、`cert`、`network`)各自可选;但 `registryConfig` 本身在 +`CUSTOM` 下必填。 + +## `spec` 资源与运行时开关 + +| 字段 | 类型 | 默认 | 说明 | +|---|---|---|---| +| `cpu` | float | `2` | 核数。 | +| `memory` | int | `4096` | MB。 | +| `port` | int | `9000` | 与 `spec.container.port` 同时存在时以后者为准。 | +| `diskSize` | int | | MB。 | +| `enableSessionIsolation` | bool | | | +| `credentialName` | string | | 引用已注册凭证。 | +| `executionRoleArn` | string | | runtime 承担的 RAM 角色 ARN。 | +| `sessionConcurrencyLimitPerInstance` | int | | | +| `sessionIdleTimeoutSeconds` | int | | | + +## `spec.protocol` + +| 字段 | 类型 | 默认 | 说明 | +|---|---|---|---| +| `type` | 枚举 | `HTTP` | `HTTP`、`MCP`、`SUPER_AGENT` 之一。 | +| `settings` | list<ProtocolSetting> | | 多路由进阶配置。 | + +`ProtocolSetting` 字段(除非另注,均为可选字符串): + +| 字段 | 说明 | +|---|---| +| `type` | | +| `name` | | +| `path` | | +| `pathPrefix` | | +| `method` | | +| `requestContentType` | | +| `responseContentType` | | +| `headers` | | +| `inputBodyJsonSchema` | | +| `outputBodyJsonSchema` | | +| `a2aAgentCard` | | +| `a2aAgentCardUrl` | | +| `config` | | + +## `spec.network` + +| 字段 | 类型 | 必填 | 说明 | +|---|---|---|---| +| `mode` | 枚举 | | `PUBLIC`(默认)、`PRIVATE`、`PUBLIC_AND_PRIVATE` 之一。 | +| `vpcId` | string | 条件必填 | `mode ∈ {PRIVATE, PUBLIC_AND_PRIVATE}` 时**必填**。 | +| `vswitchIds` | list<string> | | | +| `securityGroupId` | string | | | + +## `spec.healthCheck` + +| 字段 | 类型 | 说明 | +|---|---|---| +| `httpGetUrl` | string | | +| `initialDelaySeconds` | int | | +| `periodSeconds` | int | | +| `timeoutSeconds` | int | | +| `failureThreshold` | int | | +| `successThreshold` | int | | + +## `spec.log` + +| 字段 | 类型 | 必填 | 说明 | +|---|---|---|---| +| `project` | string | 与 `logstore` 成对 | SLS project。 | +| `logstore` | string | 与 `project` 成对 | SLS logstore。 | + +要么两个键同时出现,要么整段省略;只写一个键会被拒绝。 + +## `spec.env` + +`string → string` 的映射。非字符串值会被强制转字符串。 + +```yaml +env: + LOG_LEVEL: info + HTTP_PROXY: http://proxy.internal:8080 +``` + +## `spec.nas` + +| 字段 | 类型 | 说明 | +|---|---|---| +| `userId` | int | | +| `groupId` | int | | +| `mountPoints` | list<NasMountPoint> | 可选。 | + +`NasMountPoint`: + +| 字段 | 类型 | 必填 | 说明 | +|---|---|---|---| +| `serverAddr` | string | ✓ | | +| `mountDir` | string | ✓ | 容器内绝对路径。 | +| `enableTLS` | bool | | | + +## `spec.ossMount` + +| 字段 | 类型 | 说明 | +|---|---|---| +| `mountPoints` | list<OssMountPoint> | | + +`OssMountPoint`: + +| 字段 | 类型 | 必填 | 说明 | +|---|---|---|---| +| `bucketName` | string | ✓ | | +| `mountDir` | string | ✓ | 容器内绝对路径。 | +| `bucketPath` | string | | bucket 内子路径。 | +| `endpoint` | string | | OSS endpoint 覆盖。 | +| `readOnly` | bool | | | + +## `spec.endpoints` + +允许三种写法: + +| YAML | 行为 | +|---|---| +| 整段省略 | CLI 注入 `[{name: default, targetVersion: LATEST}]`。 | +| `endpoints: []` | 不创建任何 endpoint。`--prune-endpoints`(默认开)时会删除远端已存在的 endpoint。 | +| `endpoints: [...]` | 按 `name` 逐项 reconcile。 | + +每个 endpoint 字段: + +| 字段 | 类型 | 必填 | 说明 | +|---|---|---|---| +| `name` | string | ✓ | 同一文档内唯一。 | +| `description` | string | | | +| `targetVersion` | string | | 默认 `LATEST`。与 `routing` 互斥。 | +| `routing` | list<RoutingWeight> | | 多版本流量分配。与 `targetVersion` 互斥。权重之和必须**正好 100**。 | +| `disablePublicNetworkAccess` | bool | | | +| `scaling` | 映射 | | 见下文。 | + +`RoutingWeight`: + +```yaml +routing: + - version: "2" + weight: 90 + - version: "3" + weight: 10 +``` + +### `spec.endpoints[i].scaling` + +| 字段 | 类型 | 说明 | +|---|---|---| +| `minInstances` | int | | +| `scheduledPolicies` | list<ScheduledPolicy> | | + +`ScheduledPolicy`: + +| 字段 | 类型 | 说明 | +|---|---|---| +| `name` | string | | +| `scheduleExpression` | string | Cron 表达式。 | +| `startTime` | string | | +| `endTime` | string | | +| `target` | int | 与 `minInstances` 同时存在时必须 `>= minInstances`。 | +| `timeZone` | string | | + +## 校验规则 + +所有违反均以退出码 `2`("bad input")退出。下表对解析器 +(`src/agentrun_cli/_utils/agentruntime_yaml.py`)是穷尽的。 + +| 规则 | 触发条件 | +|---|---| +| `apiVersion != agentrun/v1` 或 `kind != AgentRuntime` | 不支持的文档。 | +| `metadata.name` 缺失或不符合 `[a-z0-9-]{1,63}` | | +| `spec.container` 缺失或不是映射 | | +| `spec.container.image` 缺失或为空 | | +| `spec.container.imageRegistryType` 不在 `ACR|ACREE|CUSTOM` 中 | | +| `imageRegistryType=CUSTOM` 但 `registryConfig` 缺失 | | +| 出现 `metadata.tags` | SDK 0.0.200 已移除该字段。 | +| 出现 `metadata.systemTags` | 由 CLI 管控。 | +| `metadata.workspace` 与 `metadata.workspaceId` 同时出现 | | +| 出现 `spec.code` | 本 CLI 仅支持 Container 模式。 | +| `spec.network.mode` 是 `PRIVATE`/`PUBLIC_AND_PRIVATE` 但缺 `vpcId` | | +| `spec.log.project` 与 `spec.log.logstore` 单边出现 | | +| `spec.env` 不是映射,或键不是字符串 | | +| `spec.nas.mountPoints[*]` 缺 `serverAddr` 或 `mountDir` | | +| `spec.ossMount.mountPoints[*]` 缺 `bucketName` 或 `mountDir` | | +| `spec.endpoints` 不是列表,或 `endpoints[*]` 不是映射 | | +| `spec.endpoints[*].name` 缺失或重复 | | +| 同一 endpoint 同时设置 `targetVersion` 与 `routing` | | +| `routing` 为空、缺 `version`/`weight`、`weight` 非数字,或权重之和 ≠ 100 | | +| `scaling.scheduledPolicies[*].target < scaling.minInstances` | | + +## 示例 + +### 最小示例 + +```yaml +apiVersion: agentrun/v1 +kind: AgentRuntime +metadata: + name: my-agent +spec: + container: + image: registry.cn-hangzhou.aliyuncs.com/my-ns/my-agent:v1 +``` + +经 CLI 自动注入后等价于: + +```yaml +apiVersion: agentrun/v1 +kind: AgentRuntime +metadata: {name: my-agent} +spec: + container: + image: registry.cn-hangzhou.aliyuncs.com/my-ns/my-agent:v1 + endpoints: + - name: default + targetVersion: LATEST +# system_tags=["x-agentrun-cli"], artifact_type=Container +``` + +### 生产示例 —— ACREE + 私网 + NAS + 金丝雀 + +```yaml +apiVersion: agentrun/v1 +kind: AgentRuntime +metadata: + name: my-agent + workspace: prod-ws +spec: + container: + image: registry-vpc.cn-hangzhou.cr.aliyuncs.com/my-ns/my-agent:v3 + command: ["python", "app.py"] + imageRegistryType: ACREE + acrInstanceId: cri-xxxxx + cpu: 4 + memory: 8192 + diskSize: 10240 # MB(10 GiB) + enableSessionIsolation: true + network: + mode: PUBLIC_AND_PRIVATE + vpcId: vpc-xxx + vswitchIds: [vsw-xxx] + securityGroupId: sg-xxx + log: + project: my-agent-logs + logstore: runtime + env: + LOG_LEVEL: info + nas: + userId: 1000 + groupId: 1000 + mountPoints: + - serverAddr: xxxx.nas.aliyuncs.com:/ + mountDir: /mnt/nas + enableTLS: true + endpoints: + - name: prod + targetVersion: LATEST + scaling: + minInstances: 2 + - name: canary + routing: + - {version: "2", weight: 90} + - {version: "3", weight: 10} + disablePublicNetworkAccess: true +``` + +### 自建 registry + +```yaml +apiVersion: agentrun/v1 +kind: AgentRuntime +metadata: {name: my-agent} +spec: + container: + image: registry.example.com/team/agent:v1 + imageRegistryType: CUSTOM + registryConfig: + auth: + userName: deploy-bot + password: ${REGISTRY_PASSWORD} # apply 前先做模板替换 + cert: + insecure: false + network: + vpcId: vpc-xxx + vSwitchId: vsw-xxx + securityGroupId: sg-xxx +``` + +## YAML → SDK 字段映射 + +需要与 SDK(`agentrun.agent_runtime.model`)交叉对照时: + +| YAML 键 | SDK 字段 | +|---|---| +| `metadata.name` | `agent_runtime_name` | +| `metadata.description` | `description` | +| `metadata.workspace` | `workspace_name` | +| `metadata.workspaceId` | `workspace_id` | +| `spec.container.image` | `container_configuration.image` | +| `spec.container.command` | `container_configuration.command` | +| `spec.container.port` | `container_configuration.port` | +| `spec.container.imageRegistryType` | `container_configuration.image_registry_type` | +| `spec.container.acrInstanceId` | `container_configuration.acr_instance_id` | +| `spec.container.registryConfig.*` | `container_configuration.registry_config.*` | +| `spec.cpu / memory / port / diskSize` | `cpu / memory / port / disk_size` | +| `spec.enableSessionIsolation` | `enable_session_isolation` | +| `spec.protocol.type` | `protocol_configuration.type` | +| `spec.protocol.settings` | `protocol_configuration.protocol_settings` | +| `spec.network.{mode,vpcId,vswitchIds,securityGroupId}` | `network_configuration.{network_mode,vpc_id,vswitch_ids,security_group_id}` | +| `spec.healthCheck.*` | `health_check_configuration.*` | +| `spec.log.{project,logstore}` | `log_configuration.{project,logstore}` | +| `spec.env` | `environment_variables` | +| `spec.credentialName` | `credential_name` | +| `spec.executionRoleArn` | `execution_role_arn` | +| `spec.sessionConcurrencyLimitPerInstance` | `session_concurrency_limit_per_instance` | +| `spec.sessionIdleTimeoutSeconds` | `session_idle_timeout_seconds` | +| `spec.nas.*` | `nas_config.*` | +| `spec.ossMount.*` | `oss_mount_config.*` | +| `spec.endpoints[i].name` | `agent_runtime_endpoint_name` | +| `spec.endpoints[i].description` | `description` | +| `spec.endpoints[i].targetVersion` | `target_version` | +| `spec.endpoints[i].routing` | `routing_configuration.version_weights` | +| `spec.endpoints[i].disablePublicNetworkAccess` | `disable_public_network_access` | +| `spec.endpoints[i].scaling.*` | `scaling_config.*` | +| *(自动注入)* `system_tags` | `system_tags = ["x-agentrun-cli"]` | +| *(自动注入)* `artifact_type` | `artifact_type = "Container"` | diff --git a/docs/zh/runtime.md b/docs/zh/runtime.md index 764a91f..fc0f505 100644 --- a/docs/zh/runtime.md +++ b/docs/zh/runtime.md @@ -176,4 +176,5 @@ ar runtime status my-agent --wait --timeout 20m ## YAML schema -完整字段列表与校验规则见 `projects/agent-infra-build-runit/design/runtime-cli-design.md` §2。 +完整字段参考、CLI 自动注入规则、校验表与可直接复用的示例(最小、生产、自建 registry) +见 [**runtime-yaml.md**](./runtime-yaml.md)。 From 4b96e87482ef3589adbc0b3ed0ebe65e6a961b19 Mon Sep 17 00:00:00 2001 From: Sodawyx Date: Wed, 20 May 2026 10:54:48 +0800 Subject: [PATCH 7/8] fix(runtime): inject CPU/memory/port defaults; skip endpoints on --no-wait; silence SDK validation warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three problems surfaced when running the README's minimal example `ar runtime apply -f runtime.yaml`: 1. HTTP 400 "CPU is required; Memory is required; Port is required" The CLI passed cpu=null/memory=null/port=null through to the SDK even though the docs already promised default 2 cores / 4096 MB / 9000. Defaults were never actually applied. Add DEFAULT_CPU / DEFAULT_MEMORY_MB / DEFAULT_PORT in runtime_constants and inject them in to_runtime_create_input / to_runtime_update_input. `spec.container.port` keeps its documented precedence over `spec.port`, both fall back to DEFAULT_PORT. 2. HTTP 400 "runtime must be in READY status to create endpoints" under --no-wait apply_cmd unconditionally called reconcile_endpoints after reconcile_runtime — fine under --wait (we'd already polled runtime to READY), but under --no-wait the runtime is still CREATING and the backend rejects endpoint create. Gate reconcile_endpoints + poll_many_parallel on `wait`. Under --no-wait we just submit the runtime; an interactive run prints a stderr notice telling the user to re-apply once the runtime is READY (TTY-only so it doesn't pollute scripted JSON output). 3. SDK pydantic warning spam Every `list_all()` call deserializes every runtime in the workspace, and the SDK emits "validate type failed" WARNINGs whenever a server-side record doesn't match its current schema (other people's runtimes with codeConfiguration.language=java17, empty logConfiguration, etc.). A single apply emitted ~10 lines of noise. Install a logging.Filter on the `agentrun-logger` logger that drops exactly the "validate type failed" message. `--debug` removes the filter so debugging shows full logs. Docs: - runtime.md (en + zh) apply Options table: document the new --no-wait semantics; add a paragraph explaining the auto-injected resource defaults. Tests: - test_create_input_user_values_override_defaults — explicit values win. - test_create_input_container_port_wins_over_spec_port — precedence. - test_update_input_applies_same_defaults — symmetry with create. - Existing test_create_input_injects_system_tag_and_container_artifact now also asserts cpu=2.0 / memory=4096 / port=9000. - test_apply_create_happy_path tightened: under --no-wait, create_endpoint MUST NOT be called and endpoints list must be empty. - test_apply_update_path tightened: under --wait, create_endpoint IS called after the runtime reaches READY. Local gate: ruff + mypy clean, 525/525 tests pass, coverage 95.25%. Signed-off-by: Sodawyx Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Sodawyx --- docs/en/runtime.md | 6 +++- docs/zh/runtime.md | 5 ++- src/agentrun_cli/_utils/runtime_constants.py | 7 ++++ src/agentrun_cli/_utils/runtime_render.py | 25 ++++++++++---- .../commands/runtime/apply_cmd.py | 26 ++++++++++---- src/agentrun_cli/main.py | 27 +++++++++++++-- tests/integration/test_runtime_cmd.py | 6 ++++ tests/unit/test_runtime_render.py | 34 +++++++++++++++++++ 8 files changed, 119 insertions(+), 17 deletions(-) diff --git a/docs/en/runtime.md b/docs/en/runtime.md index 1eba32e..1862232 100644 --- a/docs/en/runtime.md +++ b/docs/en/runtime.md @@ -43,10 +43,14 @@ ar runtime apply -f FILE [--wait/--no-wait] [--timeout DURATION] | Flag | Type | Required | Default | Description | |------|------|----------|---------|-------------| | `-f`, `--file` | path | yes | | YAML file path (supports multi-document). | -| `--wait/--no-wait` | flag | no | `--wait` | Poll runtime + endpoints to final status. | +| `--wait/--no-wait` | flag | no | `--wait` | Poll runtime + endpoints to final status. Under `--no-wait` the runtime is submitted but **endpoints are not reconciled** — the backend rejects endpoint create/update while the runtime is still `CREATING`/`UPDATING`. Re-run apply once it reaches `READY`. | | `--timeout` | duration | no | `10m` | Polling timeout. Accepts `Ns`, `Nm`, `Nh`, or bare seconds. | | `--prune-endpoints/--no-prune-endpoints` | flag | no | `--prune-endpoints` | Delete remote endpoints absent from the YAML. | +The CLI injects sensible defaults for `cpu` (2 cores), `memory` (4096 MB) and +`port` (9000) when the YAML omits them — the backend rejects null values for +these three fields with HTTP 400. + ### Examples ```bash diff --git a/docs/zh/runtime.md b/docs/zh/runtime.md index fc0f505..0f0fcaf 100644 --- a/docs/zh/runtime.md +++ b/docs/zh/runtime.md @@ -41,10 +41,13 @@ ar runtime apply -f FILE [--wait/--no-wait] [--timeout DURATION] | Flag | Type | Required | Default | Description | |------|------|----------|---------|-------------| | `-f`, `--file` | path | yes | | YAML 文件路径(支持多文档)。 | -| `--wait/--no-wait` | flag | no | `--wait` | 轮询 runtime + endpoints 到终态。 | +| `--wait/--no-wait` | flag | no | `--wait` | 轮询 runtime + endpoints 到终态。`--no-wait` 时仅提交 runtime 创建/更新,**不会 reconcile endpoint** —— 后端在 runtime 处于 `CREATING`/`UPDATING` 时会拒绝 endpoint create/update。等 runtime 到 `READY` 后再 apply 一次即可。 | | `--timeout` | duration | no | `10m` | 轮询超时。支持 `Ns` / `Nm` / `Nh` 或裸秒数。 | | `--prune-endpoints/--no-prune-endpoints` | flag | no | `--prune-endpoints` | 删除远端存在但 YAML 缺失的 endpoint。 | +YAML 中省略 `cpu` / `memory` / `port` 时,CLI 会自动注入合理默认值(2 核 / +4096 MB / 9000)—— 后端对这三个字段的 null 会回复 HTTP 400。 + ### Examples ```bash diff --git a/src/agentrun_cli/_utils/runtime_constants.py b/src/agentrun_cli/_utils/runtime_constants.py index daa0d1d..4fcbab2 100644 --- a/src/agentrun_cli/_utils/runtime_constants.py +++ b/src/agentrun_cli/_utils/runtime_constants.py @@ -14,6 +14,13 @@ DEFAULT_ENDPOINT_NAME = "default" DEFAULT_TARGET_VERSION = "LATEST" +# Resource defaults — the backend rejects CreateAgentRuntime with HTTP 400 +# "CPU is required; Memory is required; Port is required" when these are null. +# Injecting them in the render layer keeps the minimal YAML example runnable. +DEFAULT_CPU = 2.0 # cores +DEFAULT_MEMORY_MB = 4096 +DEFAULT_PORT = 9000 + POLL_INITIAL_INTERVAL = 3.0 # seconds POLL_MAX_INTERVAL = 10.0 # seconds (cap of exponential backoff) POLL_BACKOFF_FACTOR = 1.5 diff --git a/src/agentrun_cli/_utils/runtime_render.py b/src/agentrun_cli/_utils/runtime_render.py index a42b6da..10319aa 100644 --- a/src/agentrun_cli/_utils/runtime_render.py +++ b/src/agentrun_cli/_utils/runtime_render.py @@ -17,7 +17,10 @@ ) from agentrun_cli._utils.runtime_constants import ( ARTIFACT_TYPE_CONTAINER, + DEFAULT_CPU, DEFAULT_ENDPOINT_NAME, + DEFAULT_MEMORY_MB, + DEFAULT_PORT, DEFAULT_TARGET_VERSION, SYSTEM_TAG_CLI, ) @@ -119,6 +122,16 @@ def _build_container(p: ParsedContainer, m): ) +def _resolve_port(p: ParsedAgentRuntime) -> int: + """container.port > spec.port > DEFAULT_PORT — matches the documented + precedence and prevents the backend's 'Port is required' 400.""" + if p.container.port is not None: + return p.container.port + if p.port is not None: + return p.port + return DEFAULT_PORT + + def to_runtime_create_input(p: ParsedAgentRuntime): m = _sdk_models() return m["create_input"]( @@ -129,9 +142,9 @@ def to_runtime_create_input(p: ParsedAgentRuntime): artifact_type=ARTIFACT_TYPE_CONTAINER, system_tags=[SYSTEM_TAG_CLI], container_configuration=_build_container(p.container, m), - cpu=p.cpu, - memory=p.memory, - port=p.port, + cpu=p.cpu if p.cpu is not None else DEFAULT_CPU, + memory=p.memory if p.memory is not None else DEFAULT_MEMORY_MB, + port=_resolve_port(p), disk_size=p.disk_size, enable_session_isolation=p.enable_session_isolation, protocol_configuration=_build_protocol(p.protocol, m), @@ -157,9 +170,9 @@ def to_runtime_update_input(p: ParsedAgentRuntime): artifact_type=ARTIFACT_TYPE_CONTAINER, system_tags=[SYSTEM_TAG_CLI], container_configuration=_build_container(p.container, m), - cpu=p.cpu, - memory=p.memory, - port=p.port, + cpu=p.cpu if p.cpu is not None else DEFAULT_CPU, + memory=p.memory if p.memory is not None else DEFAULT_MEMORY_MB, + port=_resolve_port(p), disk_size=p.disk_size, enable_session_isolation=p.enable_session_isolation, protocol_configuration=_build_protocol(p.protocol, m), diff --git a/src/agentrun_cli/commands/runtime/apply_cmd.py b/src/agentrun_cli/commands/runtime/apply_cmd.py index 1aa30b8..b4fc30b 100644 --- a/src/agentrun_cli/commands/runtime/apply_cmd.py +++ b/src/agentrun_cli/commands/runtime/apply_cmd.py @@ -116,6 +116,7 @@ def apply_cmd(ctx, file_path, wait, timeout, prune_endpoints): rt_res = reconcile_runtime(parsed, client=runtime_cls) runtime = rt_res.runtime + ep_actions: list = [] if wait: poll_until_final( runtime, @@ -123,14 +124,19 @@ def apply_cmd(ctx, file_path, wait, timeout, prune_endpoints): cfg=poll_cfg, on_tick=lambda r, e, p=parsed: _progress(sys.stderr, p, r, e), ) + # Endpoint create/update is rejected by the backend with HTTP 400 + # ("runtime must be in READY status") whenever the runtime isn't + # READY yet — so we only reconcile endpoints after the runtime has + # reached a final status. Under --no-wait the runtime is still in + # CREATING/UPDATING when we return, so we skip endpoint + # reconciliation entirely and the user can re-run apply once the + # runtime is READY. + ep_actions = reconcile_endpoints( + runtime, + desired=parsed.endpoints, + prune=prune_endpoints, + ) - ep_actions = reconcile_endpoints( - runtime, - desired=parsed.endpoints, - prune=prune_endpoints, - ) - - if wait: in_flight = [ a.endpoint for a in ep_actions @@ -143,6 +149,12 @@ def apply_cmd(ctx, file_path, wait, timeout, prune_endpoints): concurrency=ENDPOINT_POLL_CONCURRENCY, on_tick=lambda r, e, p=parsed: _progress(sys.stderr, p, r, e), ) + elif sys.stderr.isatty(): + sys.stderr.write( + f"[runtime {parsed.name}] --no-wait: runtime submitted; " + "endpoints will be reconciled on a subsequent apply once the " + "runtime reaches READY.\n" + ) results.append( { diff --git a/src/agentrun_cli/main.py b/src/agentrun_cli/main.py index 50fa743..de48631 100644 --- a/src/agentrun_cli/main.py +++ b/src/agentrun_cli/main.py @@ -10,6 +10,7 @@ agentrun super-agent run """ +import logging import os import click @@ -26,6 +27,24 @@ from agentrun_cli.commands.tool_cmd import tool_group +class _DropSdkValidationWarnings(logging.Filter): + """Drop the SDK's pydantic 'validate type failed' WARNINGs. + + They fire from ``agentrun.utils.model.from_object`` whenever the SDK + deserializes a server-side record whose shape doesn't match its current + pydantic schema (e.g. a runtime someone else created with + ``codeConfiguration.language=java17`` or with an empty ``logConfiguration``). + That noise is not actionable for the CLI user — a single ``ar runtime list`` + can emit a dozen of them. ``--debug`` re-enables full logging. + """ + + def filter(self, record: logging.LogRecord) -> bool: + return "validate type failed" not in record.getMessage() + + +logging.getLogger("agentrun-logger").addFilter(_DropSdkValidationWarnings()) + + class AliasGroup(click.Group): """Click Group that supports hidden command aliases.""" @@ -95,9 +114,13 @@ def cli(ctx: click.Context, profile, region, output, debug): ctx.obj["output"] = output if debug: - import logging - logging.basicConfig(level=logging.DEBUG) + # In debug mode users want to see the SDK's validation warnings, so + # strip the filter we installed at import time. + sdk_logger = logging.getLogger("agentrun-logger") + for f in list(sdk_logger.filters): + if isinstance(f, _DropSdkValidationWarnings): + sdk_logger.removeFilter(f) # Register sub-command groups diff --git a/tests/integration/test_runtime_cmd.py b/tests/integration/test_runtime_cmd.py index 8bc2239..595ecf1 100644 --- a/tests/integration/test_runtime_cmd.py +++ b/tests/integration/test_runtime_cmd.py @@ -174,6 +174,10 @@ def _refresh(self=None, *a, **k): assert out[0]["action"] == "create" assert out[0]["runtime"]["name"] == "my-agent" fake_runtime_cls.create.assert_called_once() + # --no-wait must not touch endpoints — the backend rejects endpoint + # create while the runtime is CREATING/UPDATING. + created.create_endpoint.assert_not_called() + assert out[0]["endpoints"] == [] def test_apply_update_path(monkeypatch): @@ -205,6 +209,8 @@ def test_apply_update_path(monkeypatch): assert result.exit_code == 0, result.output out = json.loads(result.output) assert out[0]["action"] == "update" + # Default --wait path reconciles endpoints after runtime reaches READY. + existing.create_endpoint.assert_called_once() def test_apply_runtime_failed_exits_5(monkeypatch): diff --git a/tests/unit/test_runtime_render.py b/tests/unit/test_runtime_render.py index ec438ad..c0d1a5b 100644 --- a/tests/unit/test_runtime_render.py +++ b/tests/unit/test_runtime_render.py @@ -53,6 +53,40 @@ def test_create_input_injects_system_tag_and_container_artifact(): assert inp.container_configuration.image == "img:v1" # code_configuration must not be set assert inp.code_configuration is None + # Defaults injected — backend rejects nulls for these three fields. + assert inp.cpu == 2.0 + assert inp.memory == 4096 + assert inp.port == 9000 + + +def test_create_input_user_values_override_defaults(): + p = ParsedAgentRuntime( + name="my-agent", + container=ParsedContainer(image="img:v1"), + cpu=4, + memory=16384, + port=8080, + ) + inp = to_runtime_create_input(p) + assert inp.cpu == 4 + assert inp.memory == 16384 + assert inp.port == 8080 + + +def test_create_input_container_port_wins_over_spec_port(): + p = ParsedAgentRuntime( + name="my-agent", + container=ParsedContainer(image="img:v1", port=7777), + port=9000, + ) + assert to_runtime_create_input(p).port == 7777 + + +def test_update_input_applies_same_defaults(): + upd = to_runtime_update_input(_minimal_parsed()) + assert upd.cpu == 2.0 + assert upd.memory == 4096 + assert upd.port == 9000 def test_endpoints_none_injects_default(): From 5bc9e8147a54af3b69499a75cee46044731c6ae8 Mon Sep 17 00:00:00 2001 From: Sodawyx Date: Sun, 24 May 2026 03:50:26 +0800 Subject: [PATCH 8/8] chore(pyproject.toml): Update agentrun dependency version This commit updates the `agentrun-sdk` dependency to version `0.0.37` or higher in the project configuration file. This ensures that we are using the latest stable release of the SDK which may include bug fixes and new features. Co-developed-by: Aone Copilot Signed-off-by: Sodawyx --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 26d9793..9b92733 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ classifiers = [ ] dependencies = [ - "agentrun-inner-test[core]==0.0.200", + "agentrun-sdk[core]>=0.0.37", "pyyaml>=6.0", "questionary>=2.0", ]