Add gigachat3 parser v1

ajpqs · ajpqs · commit 4e43da1f6725 · 2026-01-29T15:06:20.000Z
diff --git a/docs/advanced_features/server_arguments.md b/docs/advanced_features/server_arguments.md
@@ -218,7 +218,7 @@ Please consult the documentation below and [server_args.py](https://github.com/s
 | `--file-storage-path` | The path of the file storage in backend. | `sglang_storage` | Type: str |
 | `--enable-cache-report` | Return number of cached tokens in usage.prompt_tokens_details for each openai request. | `False` | bool flag (set to enable) |
 | `--reasoning-parser` | Specify the parser for reasoning models. Supported parsers: [deepseek-r1, deepseek-v3, glm45, gpt-oss, kimi, qwen3, qwen3-thinking, step3]. | `None` | `deepseek-r1`, `deepseek-v3`, `glm45`, `gpt-oss`, `kimi`, `qwen3`, `qwen3-thinking`, `step3` |
-| `--tool-call-parser` | Specify the parser for handling tool-call interactions. Supported parsers: [deepseekv3, deepseekv31, glm, glm45, glm47, gpt-oss, kimi_k2, llama3, mistral, pythonic, qwen, qwen25, qwen3_coder, step3]. | `None` | `deepseekv3`, `deepseekv31`, `glm`, `glm45`, `glm47`, `gpt-oss`, `kimi_k2`, `llama3`, `mistral`, `pythonic`, `qwen`, `qwen25`, `qwen3_coder`, `step3` |
+| `--tool-call-parser` | Specify the parser for handling tool-call interactions. Supported parsers: [deepseekv3, deepseekv31, glm, glm45, glm47, gpt-oss, kimi_k2, llama3, mistral, pythonic, qwen, qwen25, qwen3_coder, step3]. | `None` | `deepseekv3`, `deepseekv31`, `glm`, `glm45`, `glm47`, `gpt-oss`, `kimi_k2`, `llama3`, `mistral`, `pythonic`, `qwen`, `qwen25`, `qwen3_coder`, `step3`, `gigachat3` |
 | `--tool-server` | Either 'demo' or a comma-separated list of tool server urls to use for the model. If not specified, no tool server will be used. | `None` | Type: str |
 | `--sampling-defaults` | Where to get default sampling parameters. 'openai' uses SGLang/OpenAI defaults (temperature=1.0, top_p=1.0, etc.). 'model' uses the model's generation_config.json to get the recommended sampling parameters if available. Default is 'model'. | `model` | `openai`, `model` |
 
diff --git a/python/sglang/srt/function_call/function_call_parser.py b/python/sglang/srt/function_call/function_call_parser.py
@@ -14,6 +14,7 @@
 from sglang.srt.function_call.deepseekv3_detector import DeepSeekV3Detector
 from sglang.srt.function_call.deepseekv31_detector import DeepSeekV31Detector
 from sglang.srt.function_call.deepseekv32_detector import DeepSeekV32Detector
+from sglang.srt.function_call.gigachat3_detector import GigaChat3Detector
 from sglang.srt.function_call.glm4_moe_detector import Glm4MoeDetector
 from sglang.srt.function_call.glm47_moe_detector import Glm47MoeDetector
 from sglang.srt.function_call.gpt_oss_detector import GptOssDetector
@@ -66,6 +67,7 @@ class FunctionCallParser:
         "trinity": TrinityDetector,
         "interns1": InternlmDetector,
         "hermes": HermesDetector,
+        "gigachat3": GigaChat3Detector,
     }
 
     def __init__(self, tools: List[Tool], tool_call_parser: str):
diff --git a/python/sglang/srt/function_call/gigachat3_detector.py b/python/sglang/srt/function_call/gigachat3_detector.py
@@ -0,0 +1,209 @@
+import json
+import logging
+import re
+from typing import List
+
+from sglang.srt.entrypoints.openai.protocol import Tool
+from sglang.srt.function_call.base_format_detector import BaseFormatDetector
+from sglang.srt.function_call.core_types import (
+    StreamingParseResult,
+    ToolCallItem,
+    _GetInfoFunc,
+)
+
+logger = logging.getLogger(__name__)
+
+REGEX_FUNCTION_CALL = re.compile(
+    r"function call<\|role_sep\|>\n(.*)",
+    re.DOTALL,
+)
+
+REGEX_CONTENT_PATTERN = re.compile(
+    r"^(.*?)<\|message_sep\|>",
+    re.DOTALL,
+)
+
+NAME_REGEX = re.compile(
+    r'"name"\s*:\s*"([^"]*)"',
+    re.DOTALL,
+)
+
+ARGS_REGEX = re.compile(
+    r'"arguments"\s*:\s*(.*)',
+    re.DOTALL,
+)
+
+
+class GigaChat3Detector(BaseFormatDetector):
+    def __init__(self) -> None:
+        super().__init__()
+        self.tool_started: bool = False
+        self.tool_name_sent: bool = False
+        self.end_content: bool = False
+        self._buffer: str = ""
+        self.prev_tool_call_arr: list[dict] = []
+
+    def has_tool_call(self, text: str) -> bool:
+        """Check if text contains a tool call marker"""
+        return "function call<|role_sep|>\n" in text
+
+    def detect_and_parse(
+        self,
+        text: str,
+        tools: List[Tool],
+    ) -> StreamingParseResult:
+        """
+        Non-streaming parsing of complete model output.
+        Extracts tool calls and content from the full text.
+        """
+        logger.debug(f"[GigaChat3] detect_and_parse: {text}")
+        model_output = text
+        function_call = None
+        content = None
+        if model_output.rstrip().endswith("</s>"):
+            model_output = model_output[: model_output.rfind("</s>")]
+        m_func = REGEX_FUNCTION_CALL.search(model_output)
+        if m_func:
+            try:
+                function_call = json.loads(m_func.group(1), strict=False)
+                if not (
+                    isinstance(function_call, dict)
+                    and "name" in function_call
+                    and "arguments" in function_call
+                ):
+                    function_call = None
+                elif not isinstance(function_call["arguments"], dict):
+                    function_call = None
+            except json.JSONDecodeError as e:
+                logger.warning(f"[GigaChat3] JSON decode error: {e}")
+                return StreamingParseResult(
+                    normal_text=model_output,
+                    calls=[],
+                )
+        m_content = REGEX_CONTENT_PATTERN.search(model_output)
+        if m_content:
+            content = m_content.group(1)
+        else:
+            if "<|message_sep|>" in model_output:
+                content = model_output.split("<|message_sep|>")[0]
+            else:
+                content = model_output
+        if not function_call:
+            return StreamingParseResult(normal_text=content, calls=[])
+        name = function_call["name"]
+        args = function_call["arguments"]
+        match_result = {"name": name, "arguments": args}
+        calls = self.parse_base_json(match_result, tools)
+        return StreamingParseResult(normal_text=content, calls=calls)
+
+    def parse_streaming_increment(
+        self,
+        new_text: str,
+        tools: List[Tool],
+    ) -> StreamingParseResult:
+        """
+        Streaming parser for incremental text chunks.
+        Maintains state across calls to build complete tool calls.
+        """
+        if not new_text:
+            return StreamingParseResult()
+        logger.debug(f"[GigaChat3] parse_streaming_increment: '{new_text}'")
+        self._buffer += new_text
+        current_text = self._buffer
+        delta_text = new_text
+        content = None
+        func_name = None
+        cur_args = None
+        m_func = REGEX_FUNCTION_CALL.search(current_text)
+        if not self.tool_started:
+            m_content = REGEX_CONTENT_PATTERN.search(delta_text)
+            if m_content:
+                content = m_content.group(1)
+                self.end_content = True
+            else:
+                if "<|message_sep|>" in delta_text:
+                    content = delta_text.split("<|message_sep|>")[0]
+                    self.end_content = True
+                else:
+                    if not self.end_content:
+                        content = delta_text
+            if m_func:
+                self.tool_started = True
+                logger.debug("[GigaChat3] Tool call started")
+            if content:
+                return StreamingParseResult(normal_text=content)
+        if not m_func:
+            return StreamingParseResult()
+        json_tail = m_func.group(1).strip()
+        name_match = NAME_REGEX.search(json_tail)
+        if name_match:
+            func_name = name_match.group(1)
+        args_match = ARGS_REGEX.search(json_tail)
+        if args_match:
+            cur_args = args_match.group(1).strip()
+            if cur_args.endswith("</s>"):
+                cur_args = cur_args[: -len("</s>")]
+            if cur_args.endswith("}"):
+                try:
+                    candidate = cur_args[:-1].strip()
+                    json.loads(candidate, strict=False)
+                    cur_args = candidate
+                except json.JSONDecodeError:
+                    pass
+        calls: List[ToolCallItem] = []
+        if not self.prev_tool_call_arr:
+            self.prev_tool_call_arr.append({})
+        if not self.tool_name_sent:
+            if not func_name:
+                return StreamingParseResult()
+            self.tool_name_sent = True
+            self.prev_tool_call_arr[0]["name"] = func_name
+            logger.debug(f"[GigaChat3] Sending tool name: {func_name}")
+            calls.append(
+                ToolCallItem(
+                    tool_index=0,
+                    name=func_name,
+                    parameters="",
+                )
+            )
+            return StreamingParseResult(calls=calls)
+        if cur_args is None:
+            return StreamingParseResult()
+        prev_args = self.prev_tool_call_arr[0].get("arguments_str", "")
+        if not prev_args:
+            delta_args = cur_args
+        elif cur_args.startswith(prev_args):
+            delta_args = cur_args[len(prev_args) :]
+        else:
+            logger.warning(
+                f"[GigaChat3] Arguments overlap mismatch. "
+                f"prev='{prev_args[:50]}...' cur='{cur_args[:50]}...'"
+            )
+            return StreamingParseResult()
+        if not delta_args:
+            return StreamingParseResult()
+        self.prev_tool_call_arr[0]["arguments_str"] = cur_args
+        try:
+            args_dict = json.loads(cur_args, strict=False)
+            self.prev_tool_call_arr[0]["arguments"] = args_dict
+        except json.JSONDecodeError:
+            self.prev_tool_call_arr[0]["arguments"] = {}
+        logger.debug(f"[GigaChat3] Sending args delta: '{delta_args[:100]}...'")
+        calls.append(
+            ToolCallItem(
+                tool_index=0,
+                name=None,
+                parameters=delta_args,
+            )
+        )
+        return StreamingParseResult(calls=calls)
+
+    def supports_structural_tag(self) -> bool:
+        """GigaChat3 does not use structural tags"""
+        return False
+
+    def structure_info(self) -> _GetInfoFunc:
+        """Not applicable for GigaChat3"""
+        raise NotImplementedError(
+            "GigaChat3Detector does not support structural_tag format."
+        )
diff --git a/test/registered/function_call/test_function_call_parser.py b/test/registered/function_call/test_function_call_parser.py