From d17e7f219c9179fe3ba25520add735fde3128a8a Mon Sep 17 00:00:00 2001
From: lvhan028 <lvhan_028@163.com>
Date: Thu, 4 Sep 2025 14:44:23 +0800
Subject: [PATCH 1/5] bump version to v0.10.0

---
 docs/en/get_started/installation.md    | 4 ++--
 docs/zh_cn/get_started/installation.md | 4 ++--
 lmdeploy/version.py                    | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/docs/en/get_started/installation.md b/docs/en/get_started/installation.md
index efe32dc054..923da3448b 100644
--- a/docs/en/get_started/installation.md
+++ b/docs/en/get_started/installation.md
@@ -23,7 +23,7 @@ pip install lmdeploy
 The default prebuilt package is compiled on **CUDA 12**. If CUDA 11+ (>=11.3) is required, you can install lmdeploy by:
 
 ```shell
-export LMDEPLOY_VERSION=0.9.2
+export LMDEPLOY_VERSION=0.10.0
 export PYTHON_VERSION=310
 pip install https://github.com/InternLM/lmdeploy/releases/download/v${LMDEPLOY_VERSION}/lmdeploy-${LMDEPLOY_VERSION}+cu118-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux2014_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cu118
 ```
@@ -51,7 +51,7 @@ DISABLE_TURBOMIND=1 pip install git+https://github.com/InternLM/lmdeploy.git
 If you prefer a specific version instead of the `main` branch of LMDeploy, you can specify it in your command:
 
 ```shell
-pip install https://github.com/InternLM/lmdeploy/archive/refs/tags/v0.9.2.zip
+pip install https://github.com/InternLM/lmdeploy/archive/refs/tags/v0.10.0.zip
 ```
 
 If you want to build LMDeploy with support for Ascend, Cambricon, or MACA, install LMDeploy with the corresponding `LMDEPLOY_TARGET_DEVICE` environment variable.
diff --git a/docs/zh_cn/get_started/installation.md b/docs/zh_cn/get_started/installation.md
index 755c80245d..749b0919a7 100644
--- a/docs/zh_cn/get_started/installation.md
+++ b/docs/zh_cn/get_started/installation.md
@@ -23,7 +23,7 @@ pip install lmdeploy
 默认的预构建包是在 **CUDA 12** 上编译的。如果需要 CUDA 11+ (>=11.3)，你可以使用以下命令安装 lmdeploy：
 
 ```shell
-export LMDEPLOY_VERSION=0.9.2
+export LMDEPLOY_VERSION=0.10.0
 export PYTHON_VERSION=310
 pip install https://github.com/InternLM/lmdeploy/releases/download/v${LMDEPLOY_VERSION}/lmdeploy-${LMDEPLOY_VERSION}+cu118-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux2014_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cu118
 ```
@@ -51,7 +51,7 @@ DISABLE_TURBOMIND=1 pip install git+https://github.com/InternLM/lmdeploy.git
 如果您希望使用特定版本，而不是 LMDeploy 的 `main` 分支，可以在命令行中指定：
 
 ```shell
-pip install https://github.com/InternLM/lmdeploy/archive/refs/tags/v0.9.2.zip
+pip install https://github.com/InternLM/lmdeploy/archive/refs/tags/v0.10.0.zip
 ```
 
 如果您希望构建支持昇腾、寒武纪或沐熙的 LMDeploy，请使用相应的 `LMDEPLOY_TARGET_DEVICE` 环境变量进行安装。
diff --git a/lmdeploy/version.py b/lmdeploy/version.py
index ea8b3053b2..9f36352280 100644
--- a/lmdeploy/version.py
+++ b/lmdeploy/version.py
@@ -1,7 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from typing import Tuple
 
-__version__ = '0.9.2'
+__version__ = '0.10.0'
 short_version = __version__
 
 

From 9b4c3d1e87d89acba0b1ef775e45f3d462fe06a6 Mon Sep 17 00:00:00 2001
From: lvhan028 <lvhan_028@163.com>
Date: Thu, 4 Sep 2025 14:50:09 +0800
Subject: [PATCH 2/5] update readme

---
 README.md       | 1 +
 README_zh-CN.md | 1 +
 2 files changed, 2 insertions(+)

diff --git a/README.md b/README.md
index d504176dd0..7754647086 100644
--- a/README.md
+++ b/README.md
@@ -26,6 +26,7 @@ ______________________________________________________________________
 <details open>
 <summary><b>2025</b></summary>
 
+- \[2025/09\] TurboMind engine support MXFP4. The openai gpt-oss models can be deployed on NVIDIA GPU since V100!
 - \[2025/06\] Comprehensive inference optimization for FP8 MoE Models
 - \[2025/06\] DeepSeek PD Disaggregation deployment is now supported through integration with [DLSlime](https://github.com/DeepLink-org/DLSlime) and [Mooncake](https://github.com/kvcache-ai/Mooncake). Huge thanks to both teams!
 - \[2025/04\] Enhance DeepSeek inference performance by integration deepseek-ai techniques: FlashMLA, DeepGemm, DeepEP, MicroBatch and eplb
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 788995f1e4..38a5f1742e 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -27,6 +27,7 @@ ______________________________________________________________________
 <summary><b>2025</b></summary>
 </details>
 
+- 【2025年9月】TurboMind 引擎支持 MXFP4。OpenAI gpt-oss 模型可以 V100 及以上显卡上部署！
 - 【2025年6月】深度优化 FP8 MoE 模型推理
 - 【2025年6月】集成[DLSlime](https://github.com/DeepLink-org/DLSlime)和[Mooncake](https://github.com/kvcache-ai/Mooncake)，实现DeepSeek PD分离部署，向两个团队表示诚挚的感谢！
 - 【2025年4月】集成deepseek-ai组件FlashMLA、DeepGemm、DeepEP、MicroBatch、eplb等，提升DeepSeek推理性能

From 08b9e4f1131ffbd6902ae83dd4a3411ac0f2a2e3 Mon Sep 17 00:00:00 2001
From: lvhan028 <lvhan_028@163.com>
Date: Thu, 4 Sep 2025 21:30:58 +0800
Subject: [PATCH 3/5] update readme

---
 README.md       | 2 +-
 README_ja.md    | 4 ----
 README_zh-CN.md | 2 +-
 3 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 7754647086..8ccdbb57e9 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ ______________________________________________________________________
 <details open>
 <summary><b>2025</b></summary>
 
-- \[2025/09\] TurboMind engine support MXFP4. The openai gpt-oss models can be deployed on NVIDIA GPU since V100!
+- \[2025/09\] TurboMind supports MXFP4 on NVIDIA GPUs starting from V100, achieving 1.5x the performmance of vLLM on H800 for openai gpt-oss models!
 - \[2025/06\] Comprehensive inference optimization for FP8 MoE Models
 - \[2025/06\] DeepSeek PD Disaggregation deployment is now supported through integration with [DLSlime](https://github.com/DeepLink-org/DLSlime) and [Mooncake](https://github.com/kvcache-ai/Mooncake). Huge thanks to both teams!
 - \[2025/04\] Enhance DeepSeek inference performance by integration deepseek-ai techniques: FlashMLA, DeepGemm, DeepEP, MicroBatch and eplb
diff --git a/README_ja.md b/README_ja.md
index 009d6749ad..3537e84935 100644
--- a/README_ja.md
+++ b/README_ja.md
@@ -23,10 +23,6 @@ ______________________________________________________________________
 
 ## 最新ニュース 🎉
 
-<details open>
-<summary><b>2025</b></summary>
-</details>
-
 <details close>
 <summary><b>2024</b></summary>
 
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 38a5f1742e..c017254b6c 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -27,7 +27,7 @@ ______________________________________________________________________
 <summary><b>2025</b></summary>
 </details>
 
-- 【2025年9月】TurboMind 引擎支持 MXFP4。OpenAI gpt-oss 模型可以 V100 及以上显卡上部署！
+- 【2025年9月】TurboMind 引擎支持 MXFP4，适用于 NVIDIA V100 及以上 GPU。在 H800 上推理 openai gpt-oss 模型，性能可达 vLLM 的 1.5倍！
 - 【2025年6月】深度优化 FP8 MoE 模型推理
 - 【2025年6月】集成[DLSlime](https://github.com/DeepLink-org/DLSlime)和[Mooncake](https://github.com/kvcache-ai/Mooncake)，实现DeepSeek PD分离部署，向两个团队表示诚挚的感谢！
 - 【2025年4月】集成deepseek-ai组件FlashMLA、DeepGemm、DeepEP、MicroBatch、eplb等，提升DeepSeek推理性能

From 6ac9679887820107ca9967f03acd24e44654e950 Mon Sep 17 00:00:00 2001
From: lvhan028 <lvhan_028@163.com>
Date: Mon, 8 Sep 2025 13:42:11 +0800
Subject: [PATCH 4/5] update supported models

---
 README.md                                       | 1 +
 README_zh-CN.md                                 | 1 +
 docs/en/supported_models/supported_models.md    | 1 +
 docs/zh_cn/supported_models/supported_models.md | 1 +
 4 files changed, 4 insertions(+)

diff --git a/README.md b/README.md
index 8ccdbb57e9..d572b2ebf1 100644
--- a/README.md
+++ b/README.md
@@ -150,6 +150,7 @@ LMDeploy is a toolkit for compressing, deploying, and serving LLM, developed by
   <li>Phi-3.5-MoE (16x3.8B)</li>
   <li>Phi-4-mini (3.8B)</li>
   <li>MiniCPM3 (4B)</li>
+  <li>gpt-oss (20b - 120b)</li>
 </ul>
 </td>
 <td>
diff --git a/README_zh-CN.md b/README_zh-CN.md
index c017254b6c..5a82e32efb 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -151,6 +151,7 @@ LMDeploy TurboMind 引擎拥有卓越的推理能力，在各种规模的模型
   <li>Phi-3.5-MoE (16x3.8B)</li>
   <li>Phi-4-mini (3.8B)</li>
   <li>MiniCPM3 (4B)</li>
+  <li>gpt-oss (20b - 120b)</li>
 </ul>
 </td>
 <td>
diff --git a/docs/en/supported_models/supported_models.md b/docs/en/supported_models/supported_models.md
index 38f02fb1fa..cb437fc541 100644
--- a/docs/en/supported_models/supported_models.md
+++ b/docs/en/supported_models/supported_models.md
@@ -47,6 +47,7 @@ The following tables detail the models supported by LMDeploy's TurboMind engine
 |               GLM4               |        9B        | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
 |            CodeGeeX4             |        9B        | LLM  |    Yes    |   Yes   |   Yes   |   -   |
 |              Molmo               |     7B-D,72B     | MLLM |    Yes    |   Yes   |   Yes   |  No   |
+|             gpt-oss              |     20B,120B     | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
 
 "-" means not verified yet.
 
diff --git a/docs/zh_cn/supported_models/supported_models.md b/docs/zh_cn/supported_models/supported_models.md
index aaebb1df1a..7c50be35fa 100644
--- a/docs/zh_cn/supported_models/supported_models.md
+++ b/docs/zh_cn/supported_models/supported_models.md
@@ -47,6 +47,7 @@
 |               GLM4               |       9B       | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
 |            CodeGeeX4             |       9B       | LLM  |    Yes    |   Yes   |   Yes   |   -   |
 |              Molmo               |    7B-D,72B    | MLLM |    Yes    |   Yes   |   Yes   |  No   |
+|             gpt-oss              |    20B,120B    | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
 
 “-” 表示还没有验证。
 

From d4f3a8903f387183370758a13d9a7ef343165879 Mon Sep 17 00:00:00 2001
From: lvhan028 <lvhan_028@163.com>
Date: Mon, 8 Sep 2025 13:46:06 +0800
Subject: [PATCH 5/5] note about 0.10.0 on cuda12.8

---
 README.md       | 4 ++--
 README_zh-CN.md | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index d572b2ebf1..6034655f86 100644
--- a/README.md
+++ b/README.md
@@ -150,7 +150,7 @@ LMDeploy is a toolkit for compressing, deploying, and serving LLM, developed by
   <li>Phi-3.5-MoE (16x3.8B)</li>
   <li>Phi-4-mini (3.8B)</li>
   <li>MiniCPM3 (4B)</li>
-  <li>gpt-oss (20b - 120b)</li>
+  <li>gpt-oss (20B, 120B)</li>
 </ul>
 </td>
 <td>
@@ -206,7 +206,7 @@ conda activate lmdeploy
 pip install lmdeploy
 ```
 
-The default prebuilt package is compiled on **CUDA 12** since v0.3.0.
+The default prebuilt package is compiled on **CUDA 12.8** since v0.10.0.
 For more information on installing on CUDA 11+ platform, or for instructions on building from source, please refer to the [installation guide](docs/en/get_started/installation.md).
 
 ## Offline Batch Inference
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 5a82e32efb..e677ef2f7c 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -151,7 +151,7 @@ LMDeploy TurboMind 引擎拥有卓越的推理能力，在各种规模的模型
   <li>Phi-3.5-MoE (16x3.8B)</li>
   <li>Phi-4-mini (3.8B)</li>
   <li>MiniCPM3 (4B)</li>
-  <li>gpt-oss (20b - 120b)</li>
+  <li>gpt-oss (20B, 120B)</li>
 </ul>
 </td>
 <td>
@@ -207,7 +207,7 @@ conda activate lmdeploy
 pip install lmdeploy
 ```
 
-自 v0.3.0 起，LMDeploy 预编译包默认基于 CUDA 12 编译。如果需要在 CUDA 11+ 下安装 LMDeploy，或者源码安装 LMDeploy，请参考[安装文档](docs/zh_cn/get_started/installation.md)
+自 v0.10.0 起，LMDeploy 预编译包默认基于 CUDA 12.8 编译。如果需要在 CUDA 11+ 下安装 LMDeploy，或者源码安装 LMDeploy，请参考[安装文档](docs/zh_cn/get_started/installation.md)
 
 ## 离线批处理