From 2b77d8bc55f9b7057ad9d1f61714145ed42dec7c Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Thu, 29 Jan 2026 20:47:39 +0800
Subject: [PATCH 01/23] Use cache mount for genai docker (#4954)

---
 deploy/genai_vllm_server_docker/Dockerfile | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/deploy/genai_vllm_server_docker/Dockerfile b/deploy/genai_vllm_server_docker/Dockerfile
index 84aa4206fd..a7792851c7 100644
--- a/deploy/genai_vllm_server_docker/Dockerfile
+++ b/deploy/genai_vllm_server_docker/Dockerfile
@@ -4,17 +4,19 @@ RUN apt-get update \
     && apt-get install -y libgl1 \
     && rm -rf /var/lib/apt/lists/*
 
-ENV PIP_NO_CACHE_DIR=0
 ENV PYTHONUNBUFFERED=1
 ENV PYTHONDONTWRITEBYTECODE=1
 
-RUN python -m pip install torch==2.8.0
+RUN --mount=type=cache,target=/root/.cache/pip \
+    python -m pip install torch==2.8.0
 
 ARG PADDLEX_VERSION=">=3.3.6,<3.4"
-RUN python -m pip install "paddlex${PADDLEX_VERSION}"
+RUN --mount=type=cache,target=/root/.cache/pip \
+    python -m pip install "paddlex${PADDLEX_VERSION}"
 
 ARG BUILD_FOR_SM120=false
-RUN if [ "${BUILD_FOR_SM120}" = 'true' ]; then \
+RUN --mount=type=cache,target=/root/.cache/pip \
+    if [ "${BUILD_FOR_SM120}" = 'true' ]; then \
         python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.4.11/flash_attn-2.8.3%2Bcu128torch2.8-cp310-cp310-linux_x86_64.whl; \
     else \
         python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \

From 9d39bc9691fcb9aa7949dba1e0fc0b41047041ef Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Thu, 29 Jan 2026 21:32:17 +0800
Subject: [PATCH 02/23] Fix HPS order bug (#4955)

---
 .../pipelines/OCR/server/model_repo/ocr/1/model.py | 14 +++++++++-----
 .../server/model_repo/layout-parsing/1/model.py    | 14 +++++++++-----
 .../server/model_repo/layout-parsing/1/model.py    | 14 +++++++++-----
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py b/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py
index 7a99bf9829..5601c51674 100644
--- a/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py
+++ b/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py
@@ -105,12 +105,16 @@ def run_batch(self, inputs, log_ids, batch_id):
 
                 ret = executor.map(self._preprocess, inputs_g, log_ids_g)
                 ind_img_lsts, ind_data_info_lst, ind_visualize_enabled_lst = [], [], []
+                ind_input_id_lst, ind_log_id_lst, ind_input_lst = [], [], []
                 for i, item in enumerate(ret):
                     if isinstance(item, tuple):
                         assert len(item) == 3, len(item)
                         ind_img_lsts.append(item[0])
                         ind_data_info_lst.append(item[1])
                         ind_visualize_enabled_lst.append(item[2])
+                        ind_input_id_lst.append(input_ids_g[i])
+                        ind_log_id_lst.append(log_ids_g[i])
+                        ind_input_lst.append(inputs_g[i])
                     else:
                         input_id = input_ids_g[i]
                         result_or_output_dic[input_id] = item
@@ -146,19 +150,19 @@ def run_batch(self, inputs, log_ids, batch_id):
                         ind_preds.append(preds[start_idx : start_idx + len(item)])
                         start_idx += len(item)
 
-                    for i, result in zip(
-                        input_ids_g,
+                    for input_id, result in zip(
+                        ind_input_id_lst,
                         executor.map(
                             self._postprocess,
                             ind_img_lsts,
                             ind_data_info_lst,
                             ind_visualize_enabled_lst,
                             ind_preds,
-                            log_ids_g,
-                            inputs_g,
+                            ind_log_id_lst,
+                            ind_input_lst,
                         ),
                     ):
-                        result_or_output_dic[i] = result
+                        result_or_output_dic[input_id] = result
 
             assert len(result_or_output_dic) == len(
                 inputs
diff --git a/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py b/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py
index f7e8d9b56b..57dde5d42f 100644
--- a/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py
+++ b/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py
@@ -104,12 +104,16 @@ def run_batch(self, inputs, log_ids, batch_id):
 
                 ret = executor.map(self._preprocess, inputs_g, log_ids_g)
                 ind_img_lsts, ind_data_info_lst, ind_visualize_enabled_lst = [], [], []
+                ind_input_ids_lst, ind_log_ids_lst, ind_inputs_lst = [], [], []
                 for i, item in enumerate(ret):
                     if isinstance(item, tuple):
                         assert len(item) == 3, len(item)
                         ind_img_lsts.append(item[0])
                         ind_data_info_lst.append(item[1])
                         ind_visualize_enabled_lst.append(item[2])
+                        ind_input_ids_lst.append(input_ids_g[i])
+                        ind_log_ids_lst.append(log_ids_g[i])
+                        ind_inputs_lst.append(inputs_g[i])
                     else:
                         input_id = input_ids_g[i]
                         result_or_output_dic[input_id] = item
@@ -179,19 +183,19 @@ def run_batch(self, inputs, log_ids, batch_id):
                         ind_preds.append(preds[start_idx : start_idx + len(item)])
                         start_idx += len(item)
 
-                    for i, result in zip(
-                        input_ids_g,
+                    for input_id, result in zip(
+                        ind_input_ids_lst,
                         executor.map(
                             self._postprocess,
                             ind_img_lsts,
                             ind_data_info_lst,
                             ind_visualize_enabled_lst,
                             ind_preds,
-                            log_ids_g,
-                            inputs_g,
+                            ind_log_ids_lst,
+                            ind_inputs_lst,
                         ),
                     ):
-                        result_or_output_dic[i] = result
+                        result_or_output_dic[input_id] = result
 
             assert len(result_or_output_dic) == len(
                 inputs
diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py
index f0b0a64ac3..8047ba4249 100644
--- a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py
+++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py
@@ -104,12 +104,16 @@ def run_batch(self, inputs, log_ids, batch_id):
 
                 ret = executor.map(self._preprocess, inputs_g, log_ids_g)
                 ind_img_lsts, ind_data_info_lst, ind_visualize_enabled_lst = [], [], []
+                ind_input_id_lst, ind_log_id_lst, ind_input_lst = [], [], []
                 for i, item in enumerate(ret):
                     if isinstance(item, tuple):
                         assert len(item) == 3, len(item)
                         ind_img_lsts.append(item[0])
                         ind_data_info_lst.append(item[1])
                         ind_visualize_enabled_lst.append(item[2])
+                        ind_input_id_lst.append(input_ids_g[i])
+                        ind_log_id_lst.append(log_ids_g[i])
+                        ind_input_lst.append(inputs_g[i])
                     else:
                         input_id = input_ids_g[i]
                         result_or_output_dic[input_id] = item
@@ -157,19 +161,19 @@ def run_batch(self, inputs, log_ids, batch_id):
                         ind_preds.append(preds[start_idx : start_idx + len(item)])
                         start_idx += len(item)
 
-                    for i, result in zip(
-                        input_ids_g,
+                    for input_id, result in zip(
+                        ind_input_id_lst,
                         executor.map(
                             self._postprocess,
                             ind_img_lsts,
                             ind_data_info_lst,
                             ind_visualize_enabled_lst,
                             ind_preds,
-                            log_ids_g,
-                            inputs_g,
+                            ind_log_id_lst,
+                            ind_input_lst,
                         ),
                     ):
-                        result_or_output_dic[i] = result
+                        result_or_output_dic[input_id] = result
 
             assert len(result_or_output_dic) == len(
                 inputs

From 966969ffb828a4bb53943701b696fd2ea99580da Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Fri, 30 Jan 2026 01:50:29 +0800
Subject: [PATCH 03/23] Fix transformers version (#4956)

---
 deploy/genai_vllm_server_docker/Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/deploy/genai_vllm_server_docker/Dockerfile b/deploy/genai_vllm_server_docker/Dockerfile
index a7792851c7..f761cc3368 100644
--- a/deploy/genai_vllm_server_docker/Dockerfile
+++ b/deploy/genai_vllm_server_docker/Dockerfile
@@ -21,6 +21,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
     else \
         python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \
     fi \
+    && python -m pip install transformers==4.57.6 \
     && paddlex --install genai-vllm-server
 
 EXPOSE 8080

From 306430a46d18d8b20f9b604c0dc46800581da408 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Fri, 30 Jan 2026 01:50:09 +0800
Subject: [PATCH 04/23] Fix HPS and remove scipy from required deps (#4957)

---
 deploy/hps/server_env/cpu_version.txt | 2 +-
 deploy/hps/server_env/gpu_version.txt | 2 +-
 setup.py                              | 1 -
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/deploy/hps/server_env/cpu_version.txt b/deploy/hps/server_env/cpu_version.txt
index e4737652ca..0b69c00c5f 100644
--- a/deploy/hps/server_env/cpu_version.txt
+++ b/deploy/hps/server_env/cpu_version.txt
@@ -1 +1 @@
-0.3.13
+0.3.14
diff --git a/deploy/hps/server_env/gpu_version.txt b/deploy/hps/server_env/gpu_version.txt
index 0b69c00c5f..9e29e10619 100644
--- a/deploy/hps/server_env/gpu_version.txt
+++ b/deploy/hps/server_env/gpu_version.txt
@@ -1 +1 @@
-0.3.14
+0.3.15
diff --git a/setup.py b/setup.py
index c2401ff4c1..7802fd0297 100644
--- a/setup.py
+++ b/setup.py
@@ -114,7 +114,6 @@
             # Currently `pypdfium2` is required by the image batch sampler
             "pypdfium2",
             "scikit-image",
-            "scipy",
         ],
         "multimodal": [
             "einops",

From 01f63a6a22d28f238faa742ef4ce13a69c90d209 Mon Sep 17 00:00:00 2001
From: changdazhou <142379845+changdazhou@users.noreply.github.com>
Date: Fri, 30 Jan 2026 13:38:58 +0800
Subject: [PATCH 05/23] [Cherry-Pick]bugfix: unexpected change of the constant
 IMAGE_LABELS (#4961)

* bugfix: unexpected change of the constant IMAGE_LABELS

* update doc
---
 docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md | 2 +-
 docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md    | 2 +-
 paddlex/inference/pipelines/paddleocr_vl/pipeline.py           | 2 +-
 paddlex/inference/pipelines/paddleocr_vl/result.py             | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md
index bcac347f89..ac764c5ac4 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md
@@ -6,7 +6,7 @@ comments: true
 
 PaddleOCR-VL is a SOTA and resource-efficient model tailored for document parsing. Its core component is PaddleOCR-VL-0.9B, a compact yet powerful vision-language model (VLM) that integrates a NaViT-style dynamic resolution visual encoder with the ERNIE-4.5-0.3B language model to enable accurate element recognition. This innovative model efficiently supports 109 languages and excels in recognizing complex elements (e.g., text, tables, formulas, and charts), while maintaining minimal resource consumption. Through comprehensive evaluations on widely used public benchmarks and in-house benchmarks, PaddleOCR-VL achieves SOTA performance in both page-level document parsing and element-level recognition. It significantly outperforms existing solutions, exhibits strong competitiveness against top-tier VLMs, and delivers fast inference speeds. These strengths make it highly suitable for practical deployment in real-world scenarios.
 
-On January 29, 2026, we released PaddleOCR-VL-1.5. PaddleOCR-VL-1.5 not only significantly improved the accuracy on the OmniDocBench v1.5 evaluation set to 94.5%, but also innovatively supports irregular-shaped bounding box localization. As a result, PaddleOCR-VL-1.5 demonstrates outstanding performance in real-world scenarios such as Skew, Warping, Screen Photography, Illumination, and Scanning. In addition, the model has added new capabilities for seal (stamp) recognition and text detection and recognition, with key metrics continuing to lead the industry.
+**On January 29, 2026, we released PaddleOCR-VL-1.5. PaddleOCR-VL-1.5 not only significantly improved the accuracy on the OmniDocBench v1.5 evaluation set to 94.5%, but also innovatively supports irregular-shaped bounding box localization. As a result, PaddleOCR-VL-1.5 demonstrates outstanding performance in real-world scenarios such as Skew, Warping, Screen Photography, Illumination, and Scanning. In addition, the model has added new capabilities for seal (stamp) recognition and text detection and recognition, with key metrics continuing to lead the industry.**
 
 <img src="https://raw.githubusercontent.com/cuicheng01/PaddleX_doc_images/refs/heads/main/images/paddleocr_vl_1_5/paddleocr-vl-1.5_metrics.png"/>
 
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
index 1213104491..cee5b65fe5 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
@@ -6,7 +6,7 @@ comments: true
 
 PaddleOCR-VL 是一款先进、高效的文档解析模型，专为文档中的元素识别设计。其核心组件为 PaddleOCR-VL-0.9B，这是一种紧凑而强大的视觉语言模型（VLM），它由 NaViT 风格的动态分辨率视觉编码器与 ERNIE-4.5-0.3B 语言模型组成，能够实现精准的元素识别。该模型支持 109 种语言，并在识别复杂元素（如文本、表格、公式和图表）方面表现出色，同时保持极低的资源消耗。通过在广泛使用的公开基准与内部基准上的全面评测，PaddleOCR-VL 在页级级文档解析与元素级识别均达到 SOTA 表现。它显著优于现有的基于Pipeline方案和文档解析多模态方案以及先进的通用多模态大模型，并具备更快的推理速度。这些优势使其非常适合在真实场景中落地部署。
 
-2026年1月29日，我们发布了PaddleOCR-VL-1.5。PaddleOCR-VL-1.5不仅以94.5%精度大幅刷新了评测集OmniDocBench v1.5，更创新性地支持了异形框定位，使得PaddleOCR-VL-1.5 在扫描、倾斜、弯折、屏幕拍摄及复杂光照等真实场景中均表现优异。此外，模型还新增了印章识别与文本检测识别能力，关键指标持续领跑。
+**2026年1月29日，我们发布了PaddleOCR-VL-1.5。PaddleOCR-VL-1.5不仅以94.5%精度大幅刷新了评测集OmniDocBench v1.5，更创新性地支持了异形框定位，使得PaddleOCR-VL-1.5 在扫描、倾斜、弯折、屏幕拍摄及复杂光照等真实场景中均表现优异。此外，模型还新增了印章识别与文本检测识别能力，关键指标持续领跑。**
 
 <img src="https://raw.githubusercontent.com/cuicheng01/PaddleX_doc_images/refs/heads/main/images/paddleocr_vl_1_5/paddleocr-vl-1.5_metrics.png"/>
 
diff --git a/paddlex/inference/pipelines/paddleocr_vl/pipeline.py b/paddlex/inference/pipelines/paddleocr_vl/pipeline.py
index ee1994d83d..5a48667437 100644
--- a/paddlex/inference/pipelines/paddleocr_vl/pipeline.py
+++ b/paddlex/inference/pipelines/paddleocr_vl/pipeline.py
@@ -272,7 +272,7 @@ def get_layout_parsing_results(
         id2pixel_key_map = {}
         image_path_to_obj_map = {}
         vis_image_labels = IMAGE_LABELS + ["seal"]
-        image_labels = [] if use_ocr_for_image_block else IMAGE_LABELS
+        image_labels = [] if use_ocr_for_image_block else IMAGE_LABELS.copy()
         if not use_chart_recognition:
             image_labels += ["chart"]
             vis_image_labels += ["chart"]
diff --git a/paddlex/inference/pipelines/paddleocr_vl/result.py b/paddlex/inference/pipelines/paddleocr_vl/result.py
index 963f9db79f..f50b18a7c2 100644
--- a/paddlex/inference/pipelines/paddleocr_vl/result.py
+++ b/paddlex/inference/pipelines/paddleocr_vl/result.py
@@ -268,7 +268,7 @@ def __init__(self, data) -> None:
             "markdown_ignore_labels", []
         )
         self.skip_order_labels = [
-            label for label in SKIP_ORDER_LABELS + markdown_ignore_labels
+            label for label in SKIP_ORDER_LABELS.copy() + markdown_ignore_labels
         ]
 
     def _to_img(self) -> dict[str, np.ndarray]:

From 363b50854ae2fce6db82f23c4919b68fc117c0a6 Mon Sep 17 00:00:00 2001
From: SuperNova <91192235+handsomecoderyang@users.noreply.github.com>
Date: Fri, 30 Jan 2026 13:40:14 +0800
Subject: [PATCH 06/23] [METAX] add ppdoclayv3 to METAX_GPU_WHITELIST (#4959)

Co-authored-by: duqiemng <1640472053@qq.com>
---
 paddlex/utils/custom_device_list.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/paddlex/utils/custom_device_list.py b/paddlex/utils/custom_device_list.py
index ec4a85495c..d48ea12b71 100755
--- a/paddlex/utils/custom_device_list.py
+++ b/paddlex/utils/custom_device_list.py
@@ -418,6 +418,7 @@
     "PP-OCRv4_mobile_rec",
     "PP-OCRv4_server_rec",
     "PP-DocLayoutV2",
+    "PP-DocLayoutV3",
     "PP-ShiTuV2_rec",
     "PP-ShiTuV2_det",
     "PP-OCRv5_mobile_det",

From d59a34495fd44f210b41cdd22e365f3e67105e9d Mon Sep 17 00:00:00 2001
From: zhang-prog <69562787+zhang-prog@users.noreply.github.com>
Date: Fri, 30 Jan 2026 14:26:03 +0800
Subject: [PATCH 07/23] vllm 0.10.2 needs transformers 4.x (#4963)

* vllm 0.10.2 needs transformers 4.x

* update
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 7802fd0297..b02f516f2a 100644
--- a/setup.py
+++ b/setup.py
@@ -228,7 +228,7 @@
         "genai-vllm-server": [
             "einops",
             "torch == 2.8.0",
-            "transformers",
+            "transformers < 5.0.0",
             "uvloop",
             "vllm == 0.10.2",
         ],

From 622b60258a4821d97efd95e5cc7cee63a7ae68d5 Mon Sep 17 00:00:00 2001
From: Bobholamovic <mhlin425@whu.edu.cn>
Date: Fri, 30 Jan 2026 06:29:07 +0000
Subject: [PATCH 08/23] Bump version to 3.4.1

---
 paddlex/.version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlex/.version b/paddlex/.version
index 18091983f5..47b322c971 100644
--- a/paddlex/.version
+++ b/paddlex/.version
@@ -1 +1 @@
-3.4.0
+3.4.1

From c78fb958e47013d9c4abe80b11e53f71a2f82602 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Mon, 2 Feb 2026 11:29:39 +0800
Subject: [PATCH 09/23] Support setting PDF rendering scale factor (#4967)

---
 .../inference/common/batch_sampler/image_batch_sampler.py   | 3 ++-
 paddlex/inference/serving/infra/utils.py                    | 5 ++---
 paddlex/utils/flags.py                                      | 6 +++++-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/paddlex/inference/common/batch_sampler/image_batch_sampler.py b/paddlex/inference/common/batch_sampler/image_batch_sampler.py
index dd78354fe7..c519765f69 100644
--- a/paddlex/inference/common/batch_sampler/image_batch_sampler.py
+++ b/paddlex/inference/common/batch_sampler/image_batch_sampler.py
@@ -20,6 +20,7 @@
 from ....utils import logging
 from ....utils.cache import CACHE_DIR
 from ....utils.download import download
+from ....utils.flags import PDF_RENDER_SCALE
 from ...utils.io import PDFReader
 from .base_batch_sampler import BaseBatchSampler, Batch
 
@@ -48,7 +49,7 @@ class ImageBatchSampler(BaseBatchSampler):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.pdf_reader = PDFReader()
+        self.pdf_reader = PDFReader(zoom=PDF_RENDER_SCALE)
 
     # XXX: auto download for url
     def _download_from_url(self, in_path):
diff --git a/paddlex/inference/serving/infra/utils.py b/paddlex/inference/serving/infra/utils.py
index b6b0211f98..d9e971261e 100644
--- a/paddlex/inference/serving/infra/utils.py
+++ b/paddlex/inference/serving/infra/utils.py
@@ -30,7 +30,7 @@
 from typing_extensions import Literal, ParamSpec, TypeAlias, assert_never
 
 from ....utils.deps import function_requires_deps, is_dep_available
-from ...utils.pdfium_lock import pdfium_lock
+from ...utils.pdfium_lock import PDF_RENDER_SCALE, pdfium_lock
 from .models import ImageInfo, PDFInfo, PDFPageInfo
 
 if is_dep_available("aiohttp"):
@@ -191,8 +191,7 @@ def read_pdf(
                 if max_num_imgs is not None and len(images) >= max_num_imgs:
                     page.close()
                     break
-                # TODO: Do not always use zoom=2.0
-                zoom = 2.0
+                zoom = PDF_RENDER_SCALE
                 deg = 0
                 image = page.render(scale=zoom, rotation=deg).to_numpy()
                 images.append(image)
diff --git a/paddlex/utils/flags.py b/paddlex/utils/flags.py
index 1fcf547335..726d3603fd 100644
--- a/paddlex/utils/flags.py
+++ b/paddlex/utils/flags.py
@@ -70,7 +70,9 @@ def get_flag_from_env_var(name, default, format_func=str):
     "PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", False
 )
 
-HUGGING_FACE_ENDPOINT = os.environ.get("PADDLE_PDX_HUGGING_FACE_ENDPOINT", "https://huggingface.co")
+HUGGING_FACE_ENDPOINT = os.environ.get(
+    "PADDLE_PDX_HUGGING_FACE_ENDPOINT", "https://huggingface.co"
+)
 
 # Inference Benchmark
 INFER_BENCHMARK = get_flag_from_env_var("PADDLE_PDX_INFER_BENCHMARK", False)
@@ -87,3 +89,5 @@ def get_flag_from_env_var(name, default, format_func=str):
 INFER_BENCHMARK_USE_CACHE_FOR_READ = get_flag_from_env_var(
     "PADDLE_PDX_INFER_BENCHMARK_USE_CACHE_FOR_READ", False
 )
+
+PDF_RENDER_SCALE = get_flag_from_env_var("PADDLE_PDX_PDF_RENDER_SCALE", 2.0, float)

From 45989f0dc6d998ce66fe8d6cb61293355a75f429 Mon Sep 17 00:00:00 2001
From: Bvicii <98971614+scyyh11@users.noreply.github.com>
Date: Tue, 3 Feb 2026 20:00:55 -0800
Subject: [PATCH 10/23] Fix/doc vlm async cancellation (#4969) (#4971)

* fix(doc_vlm): cancel pending futures on batch request failure

When a batch of requests is sent to the VLM service and one fails,
the remaining pending futures are now properly cancelled to avoid
wasting VLM service resources.

* chore: remove test file and documentation for async cancellation fix
---
 paddlex/inference/models/doc_vlm/predictor.py | 215 +++++++++---------
 1 file changed, 111 insertions(+), 104 deletions(-)

diff --git a/paddlex/inference/models/doc_vlm/predictor.py b/paddlex/inference/models/doc_vlm/predictor.py
index 9cd9341736..3d8ab2e5c7 100644
--- a/paddlex/inference/models/doc_vlm/predictor.py
+++ b/paddlex/inference/models/doc_vlm/predictor.py
@@ -415,118 +415,125 @@ def _genai_client_process(
         max_pixels,
     ):
         futures = []
-        for item in data:
-            image = item["image"]
-            if isinstance(image, str):
-                if image.startswith("http://") or image.startswith("https://"):
-                    image_url = image
-                else:
+        try:
+            for item in data:
+                image = item["image"]
+                if isinstance(image, str):
+                    if image.startswith("http://") or image.startswith("https://"):
+                        image_url = image
+                    else:
+                        from PIL import Image
+
+                        with Image.open(image) as img:
+                            img = img.convert("RGB")
+                            with io.BytesIO() as buf:
+                                img.save(buf, format="JPEG")
+                                image_url = "data:image/jpeg;base64," + base64.b64encode(
+                                    buf.getvalue()
+                                ).decode("ascii")
+                elif isinstance(image, np.ndarray):
+                    import cv2
                     from PIL import Image
 
-                    with Image.open(image) as img:
-                        img = img.convert("RGB")
-                        with io.BytesIO() as buf:
-                            img.save(buf, format="JPEG")
-                            image_url = "data:image/jpeg;base64," + base64.b64encode(
-                                buf.getvalue()
-                            ).decode("ascii")
-            elif isinstance(image, np.ndarray):
-                import cv2
-                from PIL import Image
-
-                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-                img = Image.fromarray(image)
-                with io.BytesIO() as buf:
-                    img.save(buf, format="JPEG")
-                    image_url = "data:image/jpeg;base64," + base64.b64encode(
-                        buf.getvalue()
-                    ).decode("ascii")
-            else:
-                raise TypeError(f"Not supported image type: {type(image)}")
-
-            if self._genai_client.backend == "fastdeploy-server":
-                kwargs = {
-                    "temperature": 1 if temperature is None else temperature,
-                    "top_p": 0 if top_p is None else top_p,
-                }
-            else:
-                kwargs = {
-                    "temperature": 0 if temperature is None else temperature,
-                }
-                if top_p is not None:
-                    kwargs["top_p"] = top_p
-
-            if self._genai_client.backend == "mlx-vlm-server":
-                max_tokens_name = "max_tokens"
-            else:
-                max_tokens_name = "max_completion_tokens"
-
-            if max_new_tokens is not None:
-                kwargs[max_tokens_name] = max_new_tokens
-            elif self.model_name in self.model_group["PaddleOCR-VL"]:
-                kwargs[max_tokens_name] = 8192
-
-            kwargs["extra_body"] = {}
-            if skip_special_tokens is not None:
-                if self._genai_client.backend in (
-                    "fastdeploy-server",
-                    "vllm-server",
-                    "sglang-server",
-                    "mlx-vlm-server",
-                ):
-                    kwargs["extra_body"]["skip_special_tokens"] = skip_special_tokens
+                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+                    img = Image.fromarray(image)
+                    with io.BytesIO() as buf:
+                        img.save(buf, format="JPEG")
+                        image_url = "data:image/jpeg;base64," + base64.b64encode(
+                            buf.getvalue()
+                        ).decode("ascii")
                 else:
-                    raise ValueError("Not supported")
+                    raise TypeError(f"Not supported image type: {type(image)}")
 
-            if repetition_penalty is not None:
-                kwargs["extra_body"]["repetition_penalty"] = repetition_penalty
-
-            if min_pixels is not None:
-                if self._genai_client.backend == "vllm-server":
-                    kwargs["extra_body"]["mm_processor_kwargs"] = kwargs[
-                        "extra_body"
-                    ].get("mm_processor_kwargs", {})
-                    kwargs["extra_body"]["mm_processor_kwargs"][
-                        "min_pixels"
-                    ] = min_pixels
+                if self._genai_client.backend == "fastdeploy-server":
+                    kwargs = {
+                        "temperature": 1 if temperature is None else temperature,
+                        "top_p": 0 if top_p is None else top_p,
+                    }
                 else:
-                    warnings.warn(
-                        f"{repr(self._genai_client.backend)} does not support `min_pixels`."
-                    )
+                    kwargs = {
+                        "temperature": 0 if temperature is None else temperature,
+                    }
+                    if top_p is not None:
+                        kwargs["top_p"] = top_p
 
-            if max_pixels is not None:
-                if self._genai_client.backend == "vllm-server":
-                    kwargs["extra_body"]["mm_processor_kwargs"] = kwargs[
-                        "extra_body"
-                    ].get("mm_processor_kwargs", {})
-                    kwargs["extra_body"]["mm_processor_kwargs"][
-                        "max_pixels"
-                    ] = max_pixels
+                if self._genai_client.backend == "mlx-vlm-server":
+                    max_tokens_name = "max_tokens"
                 else:
-                    warnings.warn(
-                        f"{repr(self._genai_client.backend)} does not support `max_pixels`."
-                    )
+                    max_tokens_name = "max_completion_tokens"
+
+                if max_new_tokens is not None:
+                    kwargs[max_tokens_name] = max_new_tokens
+                elif self.model_name in self.model_group["PaddleOCR-VL"]:
+                    kwargs[max_tokens_name] = 8192
+
+                kwargs["extra_body"] = {}
+                if skip_special_tokens is not None:
+                    if self._genai_client.backend in (
+                        "fastdeploy-server",
+                        "vllm-server",
+                        "sglang-server",
+                        "mlx-vlm-server",
+                    ):
+                        kwargs["extra_body"]["skip_special_tokens"] = skip_special_tokens
+                    else:
+                        raise ValueError("Not supported")
+
+                if repetition_penalty is not None:
+                    kwargs["extra_body"]["repetition_penalty"] = repetition_penalty
 
-            future = self._genai_client.create_chat_completion(
-                [
-                    {
-                        "role": "user",
-                        "content": [
-                            {"type": "image_url", "image_url": {"url": image_url}},
-                            {"type": "text", "text": item["query"]},
-                        ],
-                    }
-                ],
-                return_future=True,
-                timeout=600,
-                **kwargs,
-            )
+                if min_pixels is not None:
+                    if self._genai_client.backend == "vllm-server":
+                        kwargs["extra_body"]["mm_processor_kwargs"] = kwargs[
+                            "extra_body"
+                        ].get("mm_processor_kwargs", {})
+                        kwargs["extra_body"]["mm_processor_kwargs"][
+                            "min_pixels"
+                        ] = min_pixels
+                    else:
+                        warnings.warn(
+                            f"{repr(self._genai_client.backend)} does not support `min_pixels`."
+                        )
+
+                if max_pixels is not None:
+                    if self._genai_client.backend == "vllm-server":
+                        kwargs["extra_body"]["mm_processor_kwargs"] = kwargs[
+                            "extra_body"
+                        ].get("mm_processor_kwargs", {})
+                        kwargs["extra_body"]["mm_processor_kwargs"][
+                            "max_pixels"
+                        ] = max_pixels
+                    else:
+                        warnings.warn(
+                            f"{repr(self._genai_client.backend)} does not support `max_pixels`."
+                        )
+
+                future = self._genai_client.create_chat_completion(
+                    [
+                        {
+                            "role": "user",
+                            "content": [
+                                {"type": "image_url", "image_url": {"url": image_url}},
+                                {"type": "text", "text": item["query"]},
+                            ],
+                        }
+                    ],
+                    return_future=True,
+                    timeout=600,
+                    **kwargs,
+                )
 
-            futures.append(future)
+                futures.append(future)
 
-        results = []
-        for future in futures:
-            result = future.result()
-            results.append(result.choices[0].message.content)
+            results = []
+            for future in futures:
+                result = future.result()
+                results.append(result.choices[0].message.content)
 
-        return results
+            return results
+        except Exception:
+            # Cancel all pending futures to avoid wasting resources
+            for future in futures:
+                if not future.done():
+                    future.cancel()
+            raise

From 0a936ba7a37ce783684d28be8545e17a5bab69c5 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Fri, 6 Feb 2026 18:15:14 +0800
Subject: [PATCH 11/23] Fix typo (#4982)

---
 docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
index cee5b65fe5..960a0bbe51 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
@@ -1551,7 +1551,7 @@ INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)
 <tr>
 <td><code>mergeTables</code></td>
 <td><code>boolean</code></td>
-<td>请参阅PaddleOCR-VL对象中 <code>restructure_pages</code> 方法的 <code>merge_table</code> 参数相关说明。仅当<code>restructurePages</code>为<code>true</code>时生效。</td>
+<td>请参阅PaddleOCR-VL对象中 <code>restructure_pages</code> 方法的 <code>merge_tables</code> 参数相关说明。仅当<code>restructurePages</code>为<code>true</code>时生效。</td>
 <td>否</td>
 </tr>
 <tr>

From f790efff558b4888349d61fdc5474c8a972b0f5d Mon Sep 17 00:00:00 2001
From: zhang-prog <69562787+zhang-prog@users.noreply.github.com>
Date: Mon, 9 Feb 2026 18:20:55 +0800
Subject: [PATCH 12/23] add llama.cpp support (#4983)

---
 paddlex/inference/models/common/genai.py      |  8 ++++-
 paddlex/inference/models/doc_vlm/predictor.py | 29 ++++++++++++-------
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/paddlex/inference/models/common/genai.py b/paddlex/inference/models/common/genai.py
index 9a6d2edbcf..d5a10d764f 100644
--- a/paddlex/inference/models/common/genai.py
+++ b/paddlex/inference/models/common/genai.py
@@ -29,12 +29,18 @@
     "vllm-server",
     "sglang-server",
     "mlx-vlm-server",
+    "llama-cpp-server",
 ]
 
 
 class GenAIConfig(BaseModel):
     backend: Literal[
-        "native", "fastdeploy-server", "vllm-server", "sglang-server", "mlx-vlm-server"
+        "native",
+        "fastdeploy-server",
+        "vllm-server",
+        "sglang-server",
+        "mlx-vlm-server",
+        "llama-cpp-server",
     ] = "native"
     server_url: Optional[str] = None
     max_concurrency: int = 200
diff --git a/paddlex/inference/models/doc_vlm/predictor.py b/paddlex/inference/models/doc_vlm/predictor.py
index 3d8ab2e5c7..43cc173a0b 100644
--- a/paddlex/inference/models/doc_vlm/predictor.py
+++ b/paddlex/inference/models/doc_vlm/predictor.py
@@ -415,6 +415,10 @@ def _genai_client_process(
         max_pixels,
     ):
         futures = []
+        if self._genai_client.backend == "llama-cpp-server":
+            image_format = "PNG"
+        else:
+            image_format = "JPEG"
         try:
             for item in data:
                 image = item["image"]
@@ -427,10 +431,11 @@ def _genai_client_process(
                         with Image.open(image) as img:
                             img = img.convert("RGB")
                             with io.BytesIO() as buf:
-                                img.save(buf, format="JPEG")
-                                image_url = "data:image/jpeg;base64," + base64.b64encode(
-                                    buf.getvalue()
-                                ).decode("ascii")
+                                img.save(buf, format=image_format)
+                                image_url = (
+                                    f"data:image/{image_format.lower()};base64,"
+                                    + base64.b64encode(buf.getvalue()).decode("ascii")
+                                )
                 elif isinstance(image, np.ndarray):
                     import cv2
                     from PIL import Image
@@ -438,10 +443,11 @@ def _genai_client_process(
                     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                     img = Image.fromarray(image)
                     with io.BytesIO() as buf:
-                        img.save(buf, format="JPEG")
-                        image_url = "data:image/jpeg;base64," + base64.b64encode(
-                            buf.getvalue()
-                        ).decode("ascii")
+                        img.save(buf, format=image_format)
+                        image_url = (
+                            f"data:image/{image_format.lower()};base64,"
+                            + base64.b64encode(buf.getvalue()).decode("ascii")
+                        )
                 else:
                     raise TypeError(f"Not supported image type: {type(image)}")
 
@@ -457,7 +463,7 @@ def _genai_client_process(
                     if top_p is not None:
                         kwargs["top_p"] = top_p
 
-                if self._genai_client.backend == "mlx-vlm-server":
+                if self._genai_client.backend in ["mlx-vlm-server", "llama-cpp-server"]:
                     max_tokens_name = "max_tokens"
                 else:
                     max_tokens_name = "max_completion_tokens"
@@ -474,8 +480,11 @@ def _genai_client_process(
                         "vllm-server",
                         "sglang-server",
                         "mlx-vlm-server",
+                        "llama-cpp-server",
                     ):
-                        kwargs["extra_body"]["skip_special_tokens"] = skip_special_tokens
+                        kwargs["extra_body"][
+                            "skip_special_tokens"
+                        ] = skip_special_tokens
                     else:
                         raise ValueError("Not supported")
 

From a10d7c5b50bb464a14ffdc9257bae67e791b4179 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Wed, 11 Feb 2026 12:37:35 +0800
Subject: [PATCH 13/23] Add Intel GPU config (#4992)

---
 .../genai/configs/paddleocr_vl_09b.py         | 27 ++++++++++++++-----
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/paddlex/inference/genai/configs/paddleocr_vl_09b.py b/paddlex/inference/genai/configs/paddleocr_vl_09b.py
index 5451bf6e25..9ecfde414a 100644
--- a/paddlex/inference/genai/configs/paddleocr_vl_09b.py
+++ b/paddlex/inference/genai/configs/paddleocr_vl_09b.py
@@ -45,13 +45,26 @@ def get_config(backend):
             cfg["max-concurrency"] = 2048
         return cfg
     elif backend == "vllm":
-        return {
-            "trust-remote-code": True,
-            "gpu-memory-utilization": 0.5,
-            "max-model-len": 16384,
-            "max-num-batched-tokens": 131072,
-            "api-server-count": 4,
-        }
+        require_deps("torch")
+
+        import torch
+
+        if torch.xpu.is_available():
+            return {
+                "trust-remote-code": True,
+                "max-num-batched-tokens": 16384,
+                "no-enable-prefix-caching": True,
+                "mm-processor-cache-gb": 0,
+                "enforce-eager": True,
+            }
+        else:
+            return {
+                "trust-remote-code": True,
+                "gpu-memory-utilization": 0.5,
+                "max-model-len": 16384,
+                "max-num-batched-tokens": 131072,
+                "api-server-count": 4,
+            }
     elif backend == "sglang":
         return {
             "trust-remote-code": True,

From 92a190e74d40ce2b2a8896ae0bff0385077a26d9 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Wed, 11 Feb 2026 12:31:08 +0800
Subject: [PATCH 14/23] Remove PaddleOCR-VL server page limit (#4991)

---
 .../sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml    | 4 ++++
 deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt             | 2 +-
 .../tutorials/ocr_pipelines/PP-DocTranslation.en.md           | 4 +---
 .../pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md | 4 +---
 paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml               | 4 ++++
 paddlex/configs/pipelines/PaddleOCR-VL.yaml                   | 4 ++++
 6 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml
index ebf5804d29..900892f522 100644
--- a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml
+++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml
@@ -103,3 +103,7 @@ SubPipelines:
         module_name: image_unwarping
         model_name: UVDoc
         model_dir: null
+
+Serving:
+  extra:
+    max_num_input_imgs: null
diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt b/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt
index 0ea3a944b3..0d91a54c7d 100644
--- a/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt
+++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt
@@ -1 +1 @@
-0.2.0
+0.3.0
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PP-DocTranslation.en.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PP-DocTranslation.en.md
index ae97468e26..5871215719 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/PP-DocTranslation.en.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PP-DocTranslation.en.md
@@ -1590,9 +1590,7 @@ The following is the API reference for basic Serving and examples of multilingua
 <tr>
 <td><code>file</code></td>
 <td><code>string</code></td>
-<td>The URL of an image file or PDF file accessible by the server, or the Base64-encoded result of the content of the aforementioned file types. By default, for PDF files with more than 10 pages, only the first 10 pages will be processed.<br/>To remove the page limit, add the following configuration to the pipeline configuration file:<pre><code>Serving:
-  extra:
-    max_num_input_imgs: null</code></pre>
+<td>The URL of an image file or PDF file accessible by the server, or the Base64-encoded result of the content of the aforementioned file types.
 </td>
 <td>Yes</td>
 </tr>
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md
index ac764c5ac4..e312cf733d 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md
@@ -1364,9 +1364,7 @@ Below are the API references for basic service-based deployment and examples of
 <tr>
 <td><code>file</code></td>
 <td><code>string</code></td>
-<td>The URL of an image file or PDF file accessible to the server, or the Base64-encoded result of the content of the aforementioned file types. By default, for PDF files with more than 10 pages, only the first 10 pages will be processed.<br/>To remove the page limit, add the following configuration to the production line configuration file:<pre> <code>Serving:
-  extra:
-    max_num_input_imgs: null</code></pre>
+<td>The URL of an image file or PDF file accessible to the server, or the Base64-encoded result of the content of the aforementioned file types.
 </td>
 <td>Yes</td>
 </tr>
diff --git a/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml b/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml
index 17aca18f1d..d0e197f8ea 100644
--- a/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml
+++ b/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml
@@ -79,3 +79,7 @@ SubPipelines:
         module_name: image_unwarping
         model_name: UVDoc
         model_dir: null
+
+Serving:
+  extra:
+    max_num_input_imgs: null
diff --git a/paddlex/configs/pipelines/PaddleOCR-VL.yaml b/paddlex/configs/pipelines/PaddleOCR-VL.yaml
index fdb52c7ede..37a4823cf0 100644
--- a/paddlex/configs/pipelines/PaddleOCR-VL.yaml
+++ b/paddlex/configs/pipelines/PaddleOCR-VL.yaml
@@ -103,3 +103,7 @@ SubPipelines:
         module_name: image_unwarping
         model_name: UVDoc
         model_dir: null
+
+Serving:
+  extra:
+    max_num_input_imgs: null

From 04476cb1808728f31460c86c60c5216e7077ec50 Mon Sep 17 00:00:00 2001
From: WILSON WEI <speedforcy@outlook.com>
Date: Thu, 12 Feb 2026 14:20:49 +0800
Subject: [PATCH 15/23] PaddleX Add ROCm 7.0 compatibility patches (#4990)
 (#4996)

* Use cache mount for genai docker (#4954)

* Fix HPS order bug (#4955)

* Fix transformers version (#4956)

* Fix HPS and remove scipy from required deps (#4957)

* [Cherry-Pick]bugfix: unexpected change of the constant IMAGE_LABELS (#4961)

* bugfix: unexpected change of the constant IMAGE_LABELS

* update doc

* [METAX] add ppdoclayv3 to METAX_GPU_WHITELIST (#4959)


* vllm 0.10.2 needs transformers 4.x (#4963)

* vllm 0.10.2 needs transformers 4.x

* update

* Bump version to 3.4.1

* Support setting PDF rendering scale factor (#4967)

* Fix/doc vlm async cancellation (#4969) (#4971)

* fix(doc_vlm): cancel pending futures on batch request failure

When a batch of requests is sent to the VLM service and one fails,
the remaining pending futures are now properly cancelled to avoid
wasting VLM service resources.

* chore: remove test file and documentation for async cancellation fix

* Fix typo (#4982)

* Revert "Fix typo (#4982)"

This reverts commit 0a936ba7a37ce783684d28be8545e17a5bab69c5.

* feat(ROCm): Add ROCm 7.0 compatibility patches

* version

---------

Co-authored-by: Lin Manhui <bob1998425@hotmail.com>
Co-authored-by: changdazhou <142379845+changdazhou@users.noreply.github.com>
Co-authored-by: SuperNova <91192235+handsomecoderyang@users.noreply.github.com>
Co-authored-by: duqiemng <1640472053@qq.com>
Co-authored-by: zhang-prog <69562787+zhang-prog@users.noreply.github.com>
Co-authored-by: Bobholamovic <mhlin425@whu.edu.cn>
Co-authored-by: Bvicii <98971614+scyyh11@users.noreply.github.com>
---
 .pre-commit-config.yaml                              |  1 +
 paddlex/inference/models/common/static_infer.py      | 12 +++++++++++-
 .../doc_vlm/modeling/paddleocr_vl/_paddleocr_vl.py   |  4 +++-
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f480361043..ab2e0f7a07 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -72,3 +72,4 @@ repos:
         files: ^paddlex/.*\.py$
         additional_dependencies:
             - stdlib-list==0.10.0
+            - setuptools
diff --git a/paddlex/inference/models/common/static_infer.py b/paddlex/inference/models/common/static_infer.py
index 4e0556c829..6806a7718a 100644
--- a/paddlex/inference/models/common/static_infer.py
+++ b/paddlex/inference/models/common/static_infer.py
@@ -402,6 +402,10 @@ def _create(
                     config.enable_new_executor()
                 config.set_optimization_level(3)
                 config.delete_pass("matmul_add_act_fuse_pass")
+                # ROCm does not support fused_conv2d_add_act kernel, delete the fuse passes
+                if paddle.is_compiled_with_rocm():
+                    config.delete_pass("conv2d_add_act_fuse_pass")
+                    config.delete_pass("conv2d_add_fuse_pass")
             elif self._option.device_type == "npu":
                 config.enable_custom_device("npu", self._option.device_id)
                 if hasattr(config, "enable_new_ir"):
@@ -480,7 +484,9 @@ def _create(
                 if hasattr(config, "enable_new_executor"):
                     config.enable_new_executor()
                 config.set_optimization_level(3)
-
+                if paddle.is_compiled_with_rocm():
+                    config.delete_pass("conv2d_add_act_fuse_pass")
+                    config.delete_pass("conv2d_add_fuse_pass")
         config.enable_memory_optim()
         for del_p in self._option.delete_pass:
             config.delete_pass(del_p)
@@ -488,6 +494,10 @@ def _create(
         # Disable paddle inference logging
         if not DEBUG:
             config.disable_glog_info()
+        # ROCm does not support fused_conv2d_add_act kernel, delete the fuse passes
+        if paddle.is_compiled_with_rocm():
+            config.delete_pass("conv2d_add_act_fuse_pass")
+            config.delete_pass("conv2d_add_fuse_pass")
 
         predictor = paddle.inference.create_predictor(config)
 
diff --git a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_paddleocr_vl.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_paddleocr_vl.py
index 93b61b6cc4..ab5a9cd87e 100644
--- a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_paddleocr_vl.py
+++ b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_paddleocr_vl.py
@@ -65,7 +65,9 @@ class PaddleOCRVLForConditionalGeneration(Ernie4_5PretrainedModel):
     _tied_weights_keys = ["lm_head.weight"]
     config_class = PaddleOCRVLConfig
     _no_split_modules = ["Ernie4_5DecoderLayer", "SiglipEncoderLayer"]
-
+    # Keep visual encoder in fp32 for ROCm stability (MIOpen bf16 conv has bugs)
+    # This also improves precision for vision processing
+    _keep_in_fp32_modules = ["visual", "mlp_AR"]
     base_model_prefix = ""
 
     def __init__(self, config):

From edb40225e105f874a1fd7f4466ed5a8ff4e80c5b Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Thu, 12 Feb 2026 17:24:03 +0800
Subject: [PATCH 16/23] [Feat] Support setting expiration for BOS URLs (#4993)

* Support setting expiration for BOS URLs

* Fix docs

* Fix bugs
---
 .github/workflows/deploy_docs.yml                         | 4 ++--
 .../sdk/pipelines/OCR/server/model_repo/ocr/1/model.py    | 4 ++++
 deploy/hps/sdk/pipelines/OCR/version.txt                  | 2 +-
 .../server/model_repo/chatocr-visual/1/model.py           | 4 ++++
 deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/version.txt     | 2 +-
 .../server/model_repo/chatocr-visual/1/model.py           | 4 ++++
 deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/version.txt     | 2 +-
 .../server/model_repo/doctrans-visual/1/model.py          | 8 +++++---
 deploy/hps/sdk/pipelines/PP-DocTranslation/version.txt    | 2 +-
 .../server/model_repo/layout-parsing/1/model.py           | 5 +++++
 deploy/hps/sdk/pipelines/PP-StructureV3/version.txt       | 2 +-
 .../server/model_repo/layout-parsing/1/model.py           | 5 +++++
 deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt         | 2 +-
 .../server/model_repo/document-preprocessing/1/model.py   | 4 ++++
 deploy/hps/sdk/pipelines/doc_preprocessor/version.txt     | 2 +-
 .../server/model_repo/formula-recognition/1/model.py      | 4 ++++
 deploy/hps/sdk/pipelines/formula_recognition/version.txt  | 2 +-
 .../server/model_repo/layout-parsing/1/model.py           | 4 ++++
 deploy/hps/sdk/pipelines/layout_parsing/version.txt       | 2 +-
 .../server/model_repo/seal-recognition/1/model.py         | 4 ++++
 deploy/hps/sdk/pipelines/seal_recognition/version.txt     | 2 +-
 .../server/model_repo/table-recognition/1/model.py        | 4 ++++
 deploy/hps/sdk/pipelines/table_recognition/version.txt    | 2 +-
 .../server/model_repo/table-recognition/1/model.py        | 4 ++++
 deploy/hps/sdk/pipelines/table_recognition_v2/version.txt | 2 +-
 deploy/hps/server_env/paddlex-hps-server/pyproject.toml   | 2 +-
 .../src/paddlex_hps_server/app_common.py                  | 5 ++++-
 paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml           | 2 +-
 paddlex/configs/pipelines/PaddleOCR-VL.yaml               | 2 +-
 .../basic_serving/_pipeline_apps/_common/common.py        | 5 ++++-
 .../serving/basic_serving/_pipeline_apps/_common/ocr.py   | 4 ++++
 .../basic_serving/_pipeline_apps/doc_preprocessor.py      | 1 +
 .../basic_serving/_pipeline_apps/formula_recognition.py   | 1 +
 .../basic_serving/_pipeline_apps/layout_parsing.py        | 2 +-
 .../inference/serving/basic_serving/_pipeline_apps/ocr.py | 1 +
 .../serving/basic_serving/_pipeline_apps/paddleocr_vl.py  | 2 ++
 .../basic_serving/_pipeline_apps/pp_chatocrv3_doc.py      | 1 +
 .../basic_serving/_pipeline_apps/pp_chatocrv4_doc.py      | 1 +
 .../basic_serving/_pipeline_apps/pp_doctranslation.py     | 2 ++
 .../basic_serving/_pipeline_apps/pp_structurev3.py        | 2 ++
 .../basic_serving/_pipeline_apps/seal_recognition.py      | 1 +
 .../basic_serving/_pipeline_apps/table_recognition.py     | 1 +
 .../basic_serving/_pipeline_apps/table_recognition_v2.py  | 1 +
 paddlex/inference/serving/infra/storage.py                | 8 +++++---
 paddlex/inference/serving/infra/utils.py                  | 3 ++-
 45 files changed, 102 insertions(+), 27 deletions(-)

diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml
index 209027caa4..e939baadcd 100644
--- a/.github/workflows/deploy_docs.yml
+++ b/.github/workflows/deploy_docs.yml
@@ -2,7 +2,7 @@ name: Develop Docs
 on:
   push:
     branches: #设置更新哪个分支会更新站点
-      - release/3.3
+      - release/3.4
 permissions:
   contents: write
 jobs:
@@ -27,5 +27,5 @@ jobs:
       - run: pip install mike mkdocs-material jieba mkdocs-git-revision-date-localized-plugin mkdocs-git-committers-plugin-2 mkdocs-git-authors-plugin mkdocs-static-i18n mkdocs-minify-plugin 
       - run: |
           git fetch origin gh-pages --depth=1
-          mike deploy --push --update-aliases 3.3 latest
+          mike deploy --push --update-aliases 3.4 latest
           mike set-default --push latest
diff --git a/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py b/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py
index 5601c51674..1b094662e8 100644
--- a/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py
+++ b/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py
@@ -48,6 +48,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -59,6 +60,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -274,6 +277,7 @@ def _postprocess(self, images, data_info, visualize_enabled, preds, log_id, inpu
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/OCR/version.txt b/deploy/hps/sdk/pipelines/OCR/version.txt
index 3a4036fb45..53a75d6735 100644
--- a/deploy/hps/sdk/pipelines/OCR/version.txt
+++ b/deploy/hps/sdk/pipelines/OCR/version.txt
@@ -1 +1 @@
-0.2.5
+0.2.6
diff --git a/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/server/model_repo/chatocr-visual/1/model.py b/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/server/model_repo/chatocr-visual/1/model.py
index 7b2568a7a1..40f7684f15 100644
--- a/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/server/model_repo/chatocr-visual/1/model.py
+++ b/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/server/model_repo/chatocr-visual/1/model.py
@@ -33,6 +33,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -44,6 +45,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -142,6 +145,7 @@ def run(self, input, log_id):
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/version.txt b/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/version.txt
index d15723fbe8..1c09c74e22 100644
--- a/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/version.txt
+++ b/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/version.txt
@@ -1 +1 @@
-0.3.2
+0.3.3
diff --git a/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/server/model_repo/chatocr-visual/1/model.py b/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/server/model_repo/chatocr-visual/1/model.py
index de0a16bdee..c563ca0823 100644
--- a/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/server/model_repo/chatocr-visual/1/model.py
+++ b/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/server/model_repo/chatocr-visual/1/model.py
@@ -33,6 +33,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -44,6 +45,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -143,6 +146,7 @@ def run(self, input, log_id):
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/version.txt b/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/version.txt
index 2b7c5ae018..17b2ccd9bf 100644
--- a/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/version.txt
+++ b/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/version.txt
@@ -1 +1 @@
-0.4.2
+0.4.3
diff --git a/deploy/hps/sdk/pipelines/PP-DocTranslation/server/model_repo/doctrans-visual/1/model.py b/deploy/hps/sdk/pipelines/PP-DocTranslation/server/model_repo/doctrans-visual/1/model.py
index 361ce50332..7243078c10 100644
--- a/deploy/hps/sdk/pipelines/PP-DocTranslation/server/model_repo/doctrans-visual/1/model.py
+++ b/deploy/hps/sdk/pipelines/PP-DocTranslation/server/model_repo/doctrans-visual/1/model.py
@@ -30,12 +30,10 @@
 class TritonPythonModel(BaseTritonPythonModel):
     def initialize(self, args):
         super().initialize(args)
-
-        self.pipeline.inintial_visual_predictor(self.pipeline.config)
-
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -47,6 +45,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -151,6 +151,7 @@ def run(self, input, log_id):
                 filename_template=f"markdown_{i}/{{key}}",
                 file_storage=self.context["file_storage"],
                 return_urls=self.context["return_img_urls"],
+                url_expires_in=self.context["url_expires_in"],
                 max_img_size=self.context["max_output_img_size"],
             )
             md_flags = md_data["page_continuation_flags"]
@@ -165,6 +166,7 @@ def run(self, input, log_id):
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/PP-DocTranslation/version.txt b/deploy/hps/sdk/pipelines/PP-DocTranslation/version.txt
index d917d3e26a..b1e80bb248 100644
--- a/deploy/hps/sdk/pipelines/PP-DocTranslation/version.txt
+++ b/deploy/hps/sdk/pipelines/PP-DocTranslation/version.txt
@@ -1 +1 @@
-0.1.2
+0.1.3
diff --git a/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py b/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py
index 57dde5d42f..5183d7c62f 100644
--- a/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py
+++ b/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py
@@ -47,6 +47,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -58,6 +59,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -327,6 +330,7 @@ def _postprocess(self, images, data_info, visualize_enabled, preds, log_id, inpu
                 filename_template=f"markdown_{i}/{{key}}",
                 file_storage=self.context["file_storage"],
                 return_urls=self.context["return_img_urls"],
+                url_expires_in=self.context["url_expires_in"],
                 max_img_size=self.context["max_output_img_size"],
             )
             md_flags = md_data["page_continuation_flags"]
@@ -341,6 +345,7 @@ def _postprocess(self, images, data_info, visualize_enabled, preds, log_id, inpu
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/PP-StructureV3/version.txt b/deploy/hps/sdk/pipelines/PP-StructureV3/version.txt
index c2c0004f0e..449d7e73a9 100644
--- a/deploy/hps/sdk/pipelines/PP-StructureV3/version.txt
+++ b/deploy/hps/sdk/pipelines/PP-StructureV3/version.txt
@@ -1 +1 @@
-0.3.5
+0.3.6
diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py
index 8047ba4249..6192a312d0 100644
--- a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py
+++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py
@@ -47,6 +47,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -58,6 +59,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -305,6 +308,7 @@ def _postprocess(self, images, data_info, visualize_enabled, preds, log_id, inpu
                 filename_template=f"markdown_{i}/{{key}}",
                 file_storage=self.context["file_storage"],
                 return_urls=self.context["return_img_urls"],
+                url_expires_in=self.context["url_expires_in"],
                 max_img_size=self.context["max_output_img_size"],
             )
             if visualize_enabled:
@@ -318,6 +322,7 @@ def _postprocess(self, images, data_info, visualize_enabled, preds, log_id, inpu
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt b/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt
index 0d91a54c7d..9e11b32fca 100644
--- a/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt
+++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt
@@ -1 +1 @@
-0.3.0
+0.3.1
diff --git a/deploy/hps/sdk/pipelines/doc_preprocessor/server/model_repo/document-preprocessing/1/model.py b/deploy/hps/sdk/pipelines/doc_preprocessor/server/model_repo/document-preprocessing/1/model.py
index 629dd34e1a..390ac1a1d1 100644
--- a/deploy/hps/sdk/pipelines/doc_preprocessor/server/model_repo/document-preprocessing/1/model.py
+++ b/deploy/hps/sdk/pipelines/doc_preprocessor/server/model_repo/document-preprocessing/1/model.py
@@ -33,6 +33,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -44,6 +45,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -131,6 +134,7 @@ def run(self, input, log_id):
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/doc_preprocessor/version.txt b/deploy/hps/sdk/pipelines/doc_preprocessor/version.txt
index 0c62199f16..ee1372d33a 100644
--- a/deploy/hps/sdk/pipelines/doc_preprocessor/version.txt
+++ b/deploy/hps/sdk/pipelines/doc_preprocessor/version.txt
@@ -1 +1 @@
-0.2.1
+0.2.2
diff --git a/deploy/hps/sdk/pipelines/formula_recognition/server/model_repo/formula-recognition/1/model.py b/deploy/hps/sdk/pipelines/formula_recognition/server/model_repo/formula-recognition/1/model.py
index 7af06c405a..0ccc35d1de 100644
--- a/deploy/hps/sdk/pipelines/formula_recognition/server/model_repo/formula-recognition/1/model.py
+++ b/deploy/hps/sdk/pipelines/formula_recognition/server/model_repo/formula-recognition/1/model.py
@@ -33,6 +33,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -44,6 +45,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -128,6 +131,7 @@ def run(self, input, log_id):
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/formula_recognition/version.txt b/deploy/hps/sdk/pipelines/formula_recognition/version.txt
index 0c62199f16..ee1372d33a 100644
--- a/deploy/hps/sdk/pipelines/formula_recognition/version.txt
+++ b/deploy/hps/sdk/pipelines/formula_recognition/version.txt
@@ -1 +1 @@
-0.2.1
+0.2.2
diff --git a/deploy/hps/sdk/pipelines/layout_parsing/server/model_repo/layout-parsing/1/model.py b/deploy/hps/sdk/pipelines/layout_parsing/server/model_repo/layout-parsing/1/model.py
index b4ba08c961..e96a2f6f55 100644
--- a/deploy/hps/sdk/pipelines/layout_parsing/server/model_repo/layout-parsing/1/model.py
+++ b/deploy/hps/sdk/pipelines/layout_parsing/server/model_repo/layout-parsing/1/model.py
@@ -33,6 +33,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -44,6 +45,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -143,6 +146,7 @@ def run(self, input, log_id):
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/layout_parsing/version.txt b/deploy/hps/sdk/pipelines/layout_parsing/version.txt
index 9e11b32fca..d15723fbe8 100644
--- a/deploy/hps/sdk/pipelines/layout_parsing/version.txt
+++ b/deploy/hps/sdk/pipelines/layout_parsing/version.txt
@@ -1 +1 @@
-0.3.1
+0.3.2
diff --git a/deploy/hps/sdk/pipelines/seal_recognition/server/model_repo/seal-recognition/1/model.py b/deploy/hps/sdk/pipelines/seal_recognition/server/model_repo/seal-recognition/1/model.py
index 4885f6a68c..ee41cd35b3 100644
--- a/deploy/hps/sdk/pipelines/seal_recognition/server/model_repo/seal-recognition/1/model.py
+++ b/deploy/hps/sdk/pipelines/seal_recognition/server/model_repo/seal-recognition/1/model.py
@@ -33,6 +33,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -44,6 +45,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -134,6 +137,7 @@ def run(self, input, log_id):
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/seal_recognition/version.txt b/deploy/hps/sdk/pipelines/seal_recognition/version.txt
index ee1372d33a..7179039691 100644
--- a/deploy/hps/sdk/pipelines/seal_recognition/version.txt
+++ b/deploy/hps/sdk/pipelines/seal_recognition/version.txt
@@ -1 +1 @@
-0.2.2
+0.2.3
diff --git a/deploy/hps/sdk/pipelines/table_recognition/server/model_repo/table-recognition/1/model.py b/deploy/hps/sdk/pipelines/table_recognition/server/model_repo/table-recognition/1/model.py
index c1624046bb..baaafe4d4e 100644
--- a/deploy/hps/sdk/pipelines/table_recognition/server/model_repo/table-recognition/1/model.py
+++ b/deploy/hps/sdk/pipelines/table_recognition/server/model_repo/table-recognition/1/model.py
@@ -33,6 +33,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -44,6 +45,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -132,6 +135,7 @@ def run(self, input, log_id):
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/table_recognition/version.txt b/deploy/hps/sdk/pipelines/table_recognition/version.txt
index 267577d47e..2b7c5ae018 100644
--- a/deploy/hps/sdk/pipelines/table_recognition/version.txt
+++ b/deploy/hps/sdk/pipelines/table_recognition/version.txt
@@ -1 +1 @@
-0.4.1
+0.4.2
diff --git a/deploy/hps/sdk/pipelines/table_recognition_v2/server/model_repo/table-recognition/1/model.py b/deploy/hps/sdk/pipelines/table_recognition_v2/server/model_repo/table-recognition/1/model.py
index 508981080b..552bf3b5ef 100644
--- a/deploy/hps/sdk/pipelines/table_recognition_v2/server/model_repo/table-recognition/1/model.py
+++ b/deploy/hps/sdk/pipelines/table_recognition_v2/server/model_repo/table-recognition/1/model.py
@@ -33,6 +33,7 @@ def initialize(self, args):
         self.context = {}
         self.context["file_storage"] = None
         self.context["return_img_urls"] = False
+        self.context["url_expires_in"] = -1
         self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS
         self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE
         if self.app_config.extra:
@@ -44,6 +45,8 @@ def initialize(self, args):
                 self.context["return_img_urls"] = self.app_config.extra[
                     "return_img_urls"
                 ]
+            if "url_expires_in" in self.app_config.extra:
+                self.context["url_expires_in"] = self.app_config.extra["url_expires_in"]
             if "max_num_input_imgs" in self.app_config.extra:
                 self.context["max_num_input_imgs"] = self.app_config.extra[
                     "max_num_input_imgs"
@@ -137,6 +140,7 @@ def run(self, input, log_id):
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=self.context["file_storage"],
                     return_urls=self.context["return_img_urls"],
+                    url_expires_in=self.context["url_expires_in"],
                     max_img_size=self.context["max_output_img_size"],
                 )
             else:
diff --git a/deploy/hps/sdk/pipelines/table_recognition_v2/version.txt b/deploy/hps/sdk/pipelines/table_recognition_v2/version.txt
index 267577d47e..2b7c5ae018 100644
--- a/deploy/hps/sdk/pipelines/table_recognition_v2/version.txt
+++ b/deploy/hps/sdk/pipelines/table_recognition_v2/version.txt
@@ -1 +1 @@
-0.4.1
+0.4.2
diff --git a/deploy/hps/server_env/paddlex-hps-server/pyproject.toml b/deploy/hps/server_env/paddlex-hps-server/pyproject.toml
index 5d85392841..f6dcad64a4 100644
--- a/deploy/hps/server_env/paddlex-hps-server/pyproject.toml
+++ b/deploy/hps/server_env/paddlex-hps-server/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "paddlex-hps-server"
-version = "0.4.0"
+version = "0.5.0"
 # `paddlex` is not included here
 dependencies = [
     "colorlog >= 6.9",
diff --git a/deploy/hps/server_env/paddlex-hps-server/src/paddlex_hps_server/app_common.py b/deploy/hps/server_env/paddlex-hps-server/src/paddlex_hps_server/app_common.py
index 14f699c3f1..6250706821 100644
--- a/deploy/hps/server_env/paddlex-hps-server/src/paddlex_hps_server/app_common.py
+++ b/deploy/hps/server_env/paddlex-hps-server/src/paddlex_hps_server/app_common.py
@@ -46,6 +46,7 @@ def postprocess_image(
     *,
     file_storage: Optional[Storage] = None,
     return_url: bool = False,
+    url_expires_in: int = -1,
     max_img_size: Optional[Tuple[int, int]] = None,
 ) -> str:
     if return_url:
@@ -71,7 +72,7 @@ def postprocess_image(
         file_storage.set(key, img_bytes)
         if return_url:
             assert isinstance(file_storage, SupportsGetURL)
-            return file_storage.get_url(key)
+            return file_storage.get_url(key, expires_in=url_expires_in)
     return utils.base64_encode(img_bytes)
 
 
@@ -81,6 +82,7 @@ def postprocess_images(
     filename_template: str = "{key}.jpg",
     file_storage: Optional[Storage] = None,
     return_urls: bool = False,
+    url_expires_in: int = -1,
     max_img_size: Optional[Tuple[int, int]] = None,
 ) -> Dict[str, str]:
     output_images: Dict[str, str] = {}
@@ -95,6 +97,7 @@ def postprocess_images(
             filename=filename_template.format(key=key),
             file_storage=file_storage,
             return_url=return_urls,
+            url_expires_in=url_expires_in,
             max_img_size=max_img_size,
         )
     return output_images
diff --git a/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml b/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml
index d0e197f8ea..e49cef34e3 100644
--- a/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml
+++ b/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml
@@ -59,7 +59,7 @@ SubModules:
     module_name: vl_recognition
     model_name: PaddleOCR-VL-1.5-0.9B
     model_dir: null
-    batch_size: 4096
+    batch_size: -1
     genai_config:
       backend: native
 
diff --git a/paddlex/configs/pipelines/PaddleOCR-VL.yaml b/paddlex/configs/pipelines/PaddleOCR-VL.yaml
index 37a4823cf0..900892f522 100644
--- a/paddlex/configs/pipelines/PaddleOCR-VL.yaml
+++ b/paddlex/configs/pipelines/PaddleOCR-VL.yaml
@@ -83,7 +83,7 @@ SubModules:
     module_name: vl_recognition
     model_name: PaddleOCR-VL-0.9B
     model_dir: null
-    batch_size: 4096
+    batch_size: -1
     genai_config:
       backend: native
 
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py
index 82b8bf580a..87347fb034 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py
@@ -50,6 +50,7 @@ def postprocess_image(
     *,
     file_storage: Optional[Storage] = None,
     return_url: bool = False,
+    url_expires_in: int = -1,
     max_img_size: Optional[Tuple[int, int]] = None,
 ) -> str:
     if return_url:
@@ -75,7 +76,7 @@ def postprocess_image(
         file_storage.set(key, img_bytes)
         if return_url:
             assert isinstance(file_storage, SupportsGetURL)
-            return file_storage.get_url(key)
+            return file_storage.get_url(key, expires_in=url_expires_in)
     return serving_utils.base64_encode(img_bytes)
 
 
@@ -85,6 +86,7 @@ def postprocess_images(
     filename_template: str = "{key}.jpg",
     file_storage: Optional[Storage] = None,
     return_urls: bool = False,
+    url_expires_in: int = -1,
     max_img_size: Optional[Tuple[int, int]] = None,
 ) -> Dict[str, str]:
     output_images: Dict[str, str] = {}
@@ -99,6 +101,7 @@ def postprocess_images(
             filename=filename_template.format(key=key),
             file_storage=file_storage,
             return_url=return_urls,
+            url_expires_in=url_expires_in,
             max_img_size=max_img_size,
         )
     return output_images
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py
index 1d46eadb20..73473800d2 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py
@@ -31,6 +31,7 @@
 
 DEFAULT_MAX_NUM_INPUT_IMGS: Final[int] = 10
 DEFAULT_MAX_OUTPUT_IMG_SIZE: Final[Tuple[int, int]] = (2000, 2000)
+DEFAULT_URL_EXPIRES_IN: Final[int] = -1
 
 
 def update_app_context(app_context: AppContext) -> None:
@@ -49,6 +50,9 @@ def update_app_context(app_context: AppContext) -> None:
             raise TypeError(
                 f"`{type(file_storage).__name__}` does not support getting URLs."
             )
+    app_context.extra["url_expires_in"] = extra_cfg.get(
+        "url_expires_in", DEFAULT_URL_EXPIRES_IN
+    )
     app_context.extra["max_num_input_imgs"] = extra_cfg.get(
         "max_num_input_imgs", DEFAULT_MAX_NUM_INPUT_IMGS
     )
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py
index e9e837a50f..2bd38780cb 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py
@@ -80,6 +80,7 @@ async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py
index 786c426d91..8cc3343641 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py
@@ -75,6 +75,7 @@ async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py
index a778632cff..e35633e969 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py
@@ -91,7 +91,7 @@ async def _infer(
                     log_id,
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
-                    return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py
index 4020a4f48b..72e3382ed1 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py
@@ -81,6 +81,7 @@ async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/paddleocr_vl.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/paddleocr_vl.py
index 1eaa219d86..f2f0a46450 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/paddleocr_vl.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/paddleocr_vl.py
@@ -111,6 +111,7 @@ async def _infer(
                 filename_template=f"markdown_{i}/{{key}}",
                 file_storage=ctx.extra["file_storage"],
                 return_urls=ctx.extra["return_img_urls"],
+                url_expires_in=ctx.extra["url_expires_in"],
                 max_img_size=ctx.extra["max_output_img_size"],
             )
             if visualize_enabled:
@@ -125,6 +126,7 @@ async def _infer(
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py
index 8e74699f14..4e449bca0e 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py
@@ -92,6 +92,7 @@ async def _analyze_images(
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py
index ced0e6bb88..071a6a2f3d 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py
@@ -93,6 +93,7 @@ async def _analyze_images(
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py
index f4a6c88a13..232dae19aa 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py
@@ -100,6 +100,7 @@ async def _analyze_images(
                 filename_template=f"markdown_{i}/{{key}}",
                 file_storage=ctx.extra["file_storage"],
                 return_urls=ctx.extra["return_img_urls"],
+                url_expires_in=ctx.extra["url_expires_in"],
                 max_img_size=ctx.extra["max_output_img_size"],
             )
             md_flags = md_data["page_continuation_flags"]
@@ -115,6 +116,7 @@ async def _analyze_images(
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py
index 79e1e17ee6..b68ab3369a 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py
@@ -103,6 +103,7 @@ async def _infer(
                 filename_template=f"markdown_{i}/{{key}}",
                 file_storage=ctx.extra["file_storage"],
                 return_urls=ctx.extra["return_img_urls"],
+                url_expires_in=ctx.extra["url_expires_in"],
                 max_img_size=ctx.extra["max_output_img_size"],
             )
             md_flags = md_data["page_continuation_flags"]
@@ -118,6 +119,7 @@ async def _infer(
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py
index 012f94abb8..1c85b56d13 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py
@@ -81,6 +81,7 @@ async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py
index eecd45be99..9ca46ade4c 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py
@@ -79,6 +79,7 @@ async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py
index 288a415fca..8adef8afd7 100644
--- a/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py
+++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py
@@ -84,6 +84,7 @@ async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]:
                     filename_template=f"{{key}}_{i}.jpg",
                     file_storage=ctx.extra["file_storage"],
                     return_urls=ctx.extra["return_img_urls"],
+                    url_expires_in=ctx.extra["url_expires_in"],
                     max_img_size=ctx.extra["max_output_img_size"],
                 )
             else:
diff --git a/paddlex/inference/serving/infra/storage.py b/paddlex/inference/serving/infra/storage.py
index 7a0cbbf929..ddda1b3838 100644
--- a/paddlex/inference/serving/infra/storage.py
+++ b/paddlex/inference/serving/infra/storage.py
@@ -65,7 +65,7 @@ class BOSConfig(BaseModel):
 
 @runtime_checkable
 class SupportsGetURL(Protocol):
-    def get_url(self, key: str) -> str: ...
+    def get_url(self, key: str, expires_in: int = -1) -> str: ...
 
 
 class Storage(metaclass=abc.ABCMeta):
@@ -156,10 +156,12 @@ def delete(self, key: str) -> None:
         key = self._get_full_key(key)
         self._client.delete_object(bucket_name=self._bucket_name, key=key)
 
-    def get_url(self, key: str) -> str:
+    def get_url(self, key: str, expires_in: int = -1) -> str:
         key = self._get_full_key(key)
         return self._client.generate_pre_signed_url(
-            self._bucket_name, key, expiration_in_seconds=-1
+            self._bucket_name,
+            key,
+            expiration_in_seconds=expires_in,
         ).decode("ascii")
 
     def _get_full_key(self, key: str) -> str:
diff --git a/paddlex/inference/serving/infra/utils.py b/paddlex/inference/serving/infra/utils.py
index d9e971261e..7c67567957 100644
--- a/paddlex/inference/serving/infra/utils.py
+++ b/paddlex/inference/serving/infra/utils.py
@@ -30,7 +30,8 @@
 from typing_extensions import Literal, ParamSpec, TypeAlias, assert_never
 
 from ....utils.deps import function_requires_deps, is_dep_available
-from ...utils.pdfium_lock import PDF_RENDER_SCALE, pdfium_lock
+from ....utils.flags import PDF_RENDER_SCALE
+from ...utils.pdfium_lock import pdfium_lock
 from .models import ImageInfo, PDFInfo, PDFPageInfo
 
 if is_dep_available("aiohttp"):

From 69e8d75bd0504d3371b491cfdca1076c711b607b Mon Sep 17 00:00:00 2001
From: changdazhou <142379845+changdazhou@users.noreply.github.com>
Date: Fri, 13 Feb 2026 14:22:34 +0800
Subject: [PATCH 17/23] add \n for seal rec && bugfix for text in table &&
 delete_pass by model_name (#4999)

---
 paddlex/inference/models/common/static_infer.py  |  4 +++-
 .../models/layout_analysis/processors.py         | 13 +++++++++----
 .../inference/pipelines/paddleocr_vl/result.py   |  2 +-
 .../inference/pipelines/paddleocr_vl/uilts.py    | 16 +++++++++-------
 4 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/paddlex/inference/models/common/static_infer.py b/paddlex/inference/models/common/static_infer.py
index 6806a7718a..3b4de0518e 100644
--- a/paddlex/inference/models/common/static_infer.py
+++ b/paddlex/inference/models/common/static_infer.py
@@ -401,7 +401,9 @@ def _create(
                 if hasattr(config, "enable_new_executor"):
                     config.enable_new_executor()
                 config.set_optimization_level(3)
-                config.delete_pass("matmul_add_act_fuse_pass")
+                # TODO(changdazhou): use a black list instead
+                if self._model_name == "PP-DocLayoutV3":
+                    config.delete_pass("matmul_add_act_fuse_pass")
                 # ROCm does not support fused_conv2d_add_act kernel, delete the fuse passes
                 if paddle.is_compiled_with_rocm():
                     config.delete_pass("conv2d_add_act_fuse_pass")
diff --git a/paddlex/inference/models/layout_analysis/processors.py b/paddlex/inference/models/layout_analysis/processors.py
index d1672a2000..b7ebaf0992 100644
--- a/paddlex/inference/models/layout_analysis/processors.py
+++ b/paddlex/inference/models/layout_analysis/processors.py
@@ -595,10 +595,15 @@ def filter_boxes(
                         continue
                 box_area_i = calculate_bbox_area(boxes[i]["coordinate"])
                 box_area_j = calculate_bbox_area(boxes[j]["coordinate"])
-                if (
-                    boxes[i]["label"] == "image" or boxes[j]["label"] == "image"
-                ) and boxes[i]["label"] != boxes[j]["label"]:
-                    continue
+                labels = {boxes[i]["label"], boxes[j]["label"]}
+                if labels & {"image", "table", "seal", "chart"} and len(labels) > 1:
+                    if "table" not in labels or labels <= {
+                        "table",
+                        "image",
+                        "seal",
+                        "chart",
+                    }:
+                        continue
                 if box_area_i >= box_area_j:
                     dropped_indexes.add(j)
                 else:
diff --git a/paddlex/inference/pipelines/paddleocr_vl/result.py b/paddlex/inference/pipelines/paddleocr_vl/result.py
index f50b18a7c2..dc7dc92c98 100644
--- a/paddlex/inference/pipelines/paddleocr_vl/result.py
+++ b/paddlex/inference/pipelines/paddleocr_vl/result.py
@@ -468,7 +468,7 @@ def _to_json(self, *args, **kwargs) -> dict[str, str]:
                     original_image_width=original_image_width,
                     show_ocr_content=True,
                 ),
-                remove_symbol=use_seal_recognition,
+                remove_symbol=not use_seal_recognition,
             )
 
             if self["model_settings"].get("use_chart_recognition", False):
diff --git a/paddlex/inference/pipelines/paddleocr_vl/uilts.py b/paddlex/inference/pipelines/paddleocr_vl/uilts.py
index 8a4009a473..c753ec9d32 100644
--- a/paddlex/inference/pipelines/paddleocr_vl/uilts.py
+++ b/paddlex/inference/pipelines/paddleocr_vl/uilts.py
@@ -123,13 +123,15 @@ def filter_overlap_boxes(
                         continue
                 box_area_i = calculate_bbox_area(boxes[i]["coordinate"])
                 box_area_j = calculate_bbox_area(boxes[j]["coordinate"])
-                if {boxes[i]["label"], boxes[j]["label"]} & {
-                    "image",
-                    "table",
-                    "seal",
-                    "chart",
-                } and boxes[i]["label"] != boxes[j]["label"]:
-                    continue
+                labels = {boxes[i]["label"], boxes[j]["label"]}
+                if labels & {"image", "table", "seal", "chart"} and len(labels) > 1:
+                    if "table" not in labels or labels <= {
+                        "table",
+                        "image",
+                        "seal",
+                        "chart",
+                    }:
+                        continue
                 if box_area_i >= box_area_j:
                     dropped_indexes.add(j)
                 else:

From f95d8734bd6d97eb2836f96866035ff8433a5a22 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Fri, 13 Feb 2026 14:27:19 +0800
Subject: [PATCH 18/23] Fix auto batch size for PaddleOCR-VL-1.5-0.9B (#5003)

---
 paddlex/inference/models/doc_vlm/predictor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlex/inference/models/doc_vlm/predictor.py b/paddlex/inference/models/doc_vlm/predictor.py
index 43cc173a0b..0f97ebde46 100644
--- a/paddlex/inference/models/doc_vlm/predictor.py
+++ b/paddlex/inference/models/doc_vlm/predictor.py
@@ -167,7 +167,7 @@ def _build(self, **kwargs):
         return model, processor
 
     def _determine_batch_size(self):
-        if self._model_name == "PaddleOCR-VL-0.9B":
+        if self._model_name in ("PaddleOCR-VL-0.9B", "PaddleOCR-VL-1.5-0.9B"):
             batch_size = 1
             if not self._use_local_model:
                 batch_size = 4096

From c88d4c1f1c064edcd11da84c3ff38436a5682512 Mon Sep 17 00:00:00 2001
From: Bobholamovic <mhlin425@whu.edu.cn>
Date: Fri, 13 Feb 2026 06:32:17 +0000
Subject: [PATCH 19/23] Bump version to 3.4.2

---
 paddlex/.version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlex/.version b/paddlex/.version
index 47b322c971..4d9d11cf50 100644
--- a/paddlex/.version
+++ b/paddlex/.version
@@ -1 +1 @@
-3.4.1
+3.4.2

From e92d21f418d4ed0507fd53402dc1cb1f862c3f85 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Fri, 13 Feb 2026 14:53:23 +0800
Subject: [PATCH 20/23] Update HPS frozon deps (#5004)

---
 deploy/hps/server_env/requirements/cpu.txt     | 5 ++++-
 deploy/hps/server_env/requirements/gpu.txt     | 5 ++++-
 deploy/hps/server_env/scripts/remove_images.sh | 2 +-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/deploy/hps/server_env/requirements/cpu.txt b/deploy/hps/server_env/requirements/cpu.txt
index c24a8fc5af..a3acfe3055 100644
--- a/deploy/hps/server_env/requirements/cpu.txt
+++ b/deploy/hps/server_env/requirements/cpu.txt
@@ -161,7 +161,9 @@ langchain-core==0.2.43
 langchain-openai==0.1.25
     # via paddlex (../../../setup.py)
 langchain-text-splitters==0.2.4
-    # via langchain
+    # via
+    #   langchain
+    #   paddlex (../../../setup.py)
 langsmith==0.1.147
     # via
     #   langchain
@@ -345,6 +347,7 @@ scikit-learn==1.6.1
     # via paddlex (../../../setup.py)
 scipy==1.15.2
     # via
+    #   paddlex (../../../setup.py)
     #   scikit-image
     #   scikit-learn
 sentencepiece==0.2.1
diff --git a/deploy/hps/server_env/requirements/gpu.txt b/deploy/hps/server_env/requirements/gpu.txt
index caa9a8fbc0..e43d89b38f 100644
--- a/deploy/hps/server_env/requirements/gpu.txt
+++ b/deploy/hps/server_env/requirements/gpu.txt
@@ -161,7 +161,9 @@ langchain-core==0.2.43
 langchain-openai==0.1.25
     # via paddlex (../../../setup.py)
 langchain-text-splitters==0.2.4
-    # via langchain
+    # via
+    #   langchain
+    #   paddlex (../../../setup.py)
 langsmith==0.1.147
     # via
     #   langchain
@@ -345,6 +347,7 @@ scikit-learn==1.6.1
     # via paddlex (../../../setup.py)
 scipy==1.15.2
     # via
+    #   paddlex (../../../setup.py)
     #   scikit-image
     #   scikit-learn
 sentencepiece==0.2.1
diff --git a/deploy/hps/server_env/scripts/remove_images.sh b/deploy/hps/server_env/scripts/remove_images.sh
index 2926504e3d..2b89b756f3 100755
--- a/deploy/hps/server_env/scripts/remove_images.sh
+++ b/deploy/hps/server_env/scripts/remove_images.sh
@@ -6,6 +6,6 @@ for device_type in 'gpu' 'cpu'; do
     version="$(cat "${device_type}_version.txt")"
     docker rmi \
         "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:paddlex${paddlex_version%.*}-${device_type}" \
-        "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:${version}-${device_type}" \
+        "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:${version}-paddlex${paddlex_version}-${device_type}" \
         "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:latest-${device_type}"
 done

From 41b695b2effe897b7aaaad7cbaf869b5939aaa30 Mon Sep 17 00:00:00 2001
From: zhang-prog <69562787+zhang-prog@users.noreply.github.com>
Date: Fri, 13 Feb 2026 17:32:02 +0800
Subject: [PATCH 21/23] update vlm batch_size (#5005)

---
 paddlex/inference/models/doc_vlm/predictor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlex/inference/models/doc_vlm/predictor.py b/paddlex/inference/models/doc_vlm/predictor.py
index 0f97ebde46..3f983a8d3a 100644
--- a/paddlex/inference/models/doc_vlm/predictor.py
+++ b/paddlex/inference/models/doc_vlm/predictor.py
@@ -170,7 +170,7 @@ def _determine_batch_size(self):
         if self._model_name in ("PaddleOCR-VL-0.9B", "PaddleOCR-VL-1.5-0.9B"):
             batch_size = 1
             if not self._use_local_model:
-                batch_size = 4096
+                batch_size = 8192
             logging.debug(
                 f"The batch size of {self._model_name} is determined to be {batch_size}."
             )

From 901393a19aef820670758a0de302b00855b0e7da Mon Sep 17 00:00:00 2001
From: Netra Prasad Neupane <39429615+np-n@users.noreply.github.com>
Date: Wed, 25 Feb 2026 13:12:10 +0545
Subject: [PATCH 22/23] support modular langchain as well

---
 .../pipelines/components/retriever/base.py       | 16 +++++++++++++---
 setup.py                                         |  1 +
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/paddlex/inference/pipelines/components/retriever/base.py b/paddlex/inference/pipelines/components/retriever/base.py
index a348836836..b4f067951e 100644
--- a/paddlex/inference/pipelines/components/retriever/base.py
+++ b/paddlex/inference/pipelines/components/retriever/base.py
@@ -21,13 +21,23 @@
 from .....utils.deps import class_requires_deps, is_dep_available
 from .....utils.subclass_register import AutoRegisterABCMetaClass
 
-if is_dep_available("langchain"):
-    from langchain.docstore.document import Document
-    from langchain.text_splitter import RecursiveCharacterTextSplitter
+
 if is_dep_available("langchain-community"):
     from langchain_community import vectorstores
     from langchain_community.vectorstores import FAISS
 
+# Document import capability
+try:
+    from langchain_core.documents import Document
+except ImportError:
+    from langchain.docstore.document import Document
+
+# Text splitter compatibility
+try:
+    from langchain_text_splitters import RecursiveCharacterTextSplitter
+except ImportError:
+    from langchain.text_splitter import RecursiveCharacterTextSplitter
+
 
 @class_requires_deps("langchain", "langchain-community")
 class BaseRetriever(ABC, metaclass=AutoRegisterABCMetaClass):
diff --git a/setup.py b/setup.py
index b02f516f2a..c5018ace36 100644
--- a/setup.py
+++ b/setup.py
@@ -44,6 +44,7 @@
     "langchain-community": ">= 0.2, < 1.0",
     "langchain-core": "",
     "langchain-openai": ">= 0.1, < 1.0",
+    "langchain_text_splitters": "",
     "lxml": "",
     "matplotlib": "",
     "modelscope": ">=1.28.0",

From b39f4308ba781fed72a0cbcd214429a5d9dd528b Mon Sep 17 00:00:00 2001
From: Netra Prasad Neupane <39429615+np-n@users.noreply.github.com>
Date: Wed, 25 Feb 2026 13:14:41 +0545
Subject: [PATCH 23/23] fix

---
 setup.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index c5018ace36..a3a43d9f88 100644
--- a/setup.py
+++ b/setup.py
@@ -40,10 +40,10 @@
     "jieba": "",
     "Jinja2": "",
     "joblib": "",
-    "langchain": ">= 0.2, < 1.0",
-    "langchain-community": ">= 0.2, < 1.0",
+    "langchain": ">= 0.2",
+    "langchain-community": ">= 0.2",
     "langchain-core": "",
-    "langchain-openai": ">= 0.1, < 1.0",
+    "langchain-openai": ">= 0.1",
     "langchain_text_splitters": "",
     "lxml": "",
     "matplotlib": "",