[VLM] Support o1

kennymckormick · kennymckormick · commit e4e026375fff · 2025-03-22T20:31:05.000+08:00
diff --git a/vlmeval/api/gpt.py b/vlmeval/api/gpt.py
@@ -105,6 +105,7 @@ def __init__(self,
         assert img_detail in ['high', 'low']
         self.img_detail = img_detail
         self.timeout = timeout
+        self.o1_model = 'o1' in model or 'o3' in model
 
         super().__init__(wait=wait, retry=retry, system_prompt=system_prompt, verbose=verbose, **kwargs)
 
@@ -185,17 +186,6 @@ def generate_inner(self, inputs, **kwargs) -> str:
         temperature = kwargs.pop('temperature', self.temperature)
         max_tokens = kwargs.pop('max_tokens', self.max_tokens)
 
-        # context_window = GPT_context_window(self.model)
-        # new_max_tokens = min(max_tokens, context_window - self.get_token_len(inputs))
-        # if 0 < new_max_tokens <= 100 and new_max_tokens < max_tokens:
-        #     self.logger.warning(
-        #         'Less than 100 tokens left, '
-        #         'may exceed the context window with some additional meta symbols. '
-        #     )
-        # if new_max_tokens <= 0:
-        #     return 0, self.fail_msg + 'Input string longer than context window. ', 'Length Exceeded. '
-        # max_tokens = new_max_tokens
-
         # Will send request if use Azure, dk how to use openai client for it
         if self.use_azure:
             headers = {'Content-Type': 'application/json', 'api-key': self.key}
@@ -206,10 +196,16 @@ def generate_inner(self, inputs, **kwargs) -> str:
         payload = dict(
             model=self.model,
             messages=input_msgs,
-            max_tokens=max_tokens,
+            # max_tokens=max_tokens,
             n=1,
             temperature=temperature,
             **kwargs)
+        if self.o1_model:
+            payload['max_completion_tokens'] = max_tokens
+            payload.pop('temperature')
+        else:
+            payload['max_tokens'] = max_tokens
+        
         response = requests.post(
             self.api_base,
             headers=headers, data=json.dumps(payload), timeout=self.timeout * 1.1)
diff --git a/vlmeval/config.py b/vlmeval/config.py
@@ -80,6 +80,20 @@
     "Falcon2-VLM-11B": partial(Falcon2VLM, model_path="tiiuae/falcon-11B-vlm"),
 }
 
+o1_key = 'XXX'  # noqa: E501
+o1_apis = {
+    'o1': partial(
+        GPT4V,
+        model="o1-2024-12-17",
+        key=o1_key,
+        api_base='OFFICIAL', 
+        temperature=0,
+        img_detail='high',
+        retry=10,
+        verbose=False,
+    ),
+}
+
 api_models = {
     # GPT
     "GPT4V": partial(
@@ -1086,6 +1100,7 @@
 
 model_groups = [
     ungrouped,
+    o1_apis,
     api_models,
     xtuner_series,
     qwen_series,