From 5d19fca6e269c847a69007046bf8248396a41f5b Mon Sep 17 00:00:00 2001 From: zhangyf Date: Thu, 29 Jan 2026 17:19:09 +0800 Subject: [PATCH 01/37] =?UTF-8?q?feat(storage):=20=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E9=98=BF=E9=87=8C=E4=BA=91OSS=E5=AD=98=E5=82=A8=E6=94=AF?= =?UTF-8?q?=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在导出列表中增加OSSConfig和OSS类 - 新增OSSConfig配置模型,包含endpoint、access_key等字段 - 扩展FileStorageConfig联合类型以支持OSS配置 - 实现OSS存储类,继承Storage基类 - 添加OSS初始化方法,创建认证和Bucket对象 - 实现get、set、delete、get_url等存储操作方法 - 添加私有_get_full_key方法处理键前缀逻辑 - 在create_storage工厂函数中添加OSS实例创建分支 - 使用装饰器class_requires_deps确保oss2依赖可用 --- paddlex/inference/serving/infra/storage.py | 56 +++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/paddlex/inference/serving/infra/storage.py b/paddlex/inference/serving/infra/storage.py index 7a0cbbf929..9a4bced7b5 100644 --- a/paddlex/inference/serving/infra/storage.py +++ b/paddlex/inference/serving/infra/storage.py @@ -26,12 +26,14 @@ "InMemoryStorageConfig", "FileSystemStorageConfig", "BOSConfig", + "OSSConfig", "FileStorageConfig", "SupportsGetURL", "Storage", "InMemoryStorage", "FileSystemStorage", "BOS", + "OSS", "create_storage", ] @@ -57,8 +59,18 @@ class BOSConfig(BaseModel): type: Literal["bos"] = "bos" +class OSSConfig(BaseModel): + endpoint: str + access_key_id: SecretStr + access_key_secret: SecretStr + bucket_name: str + key_prefix: Optional[str] = None + + type: Literal["oss"] = "oss" + + FileStorageConfig = Annotated[ - Union[InMemoryStorageConfig, FileSystemStorageConfig, BOSConfig], + Union[InMemoryStorageConfig, FileSystemStorageConfig, BOSConfig, OSSConfig], Discriminator("type"), ] @@ -168,6 +180,46 @@ def _get_full_key(self, key: str) -> str: return key +@class_requires_deps("oss2") +class OSS(Storage): + def __init__(self, config: OSSConfig) -> None: + import oss2 + + super().__init__() + + # 创建认证对象 + auth = oss2.Auth( + config.access_key_id.get_secret_value(), + config.access_key_secret.get_secret_value() + ) + + # 创建Bucket对象 + self._bucket = oss2.Bucket(auth, config.endpoint, config.bucket_name) + self._key_prefix = config.key_prefix + + def get(self, key: str) -> bytes: + key = self._get_full_key(key) + return self._bucket.get_object(key).read() + + def set(self, key: str, value: bytes) -> None: + key = self._get_full_key(key) + self._bucket.put_object(key, value) + + def delete(self, key: str) -> None: + key = self._get_full_key(key) + self._bucket.delete_object(key) + + def get_url(self, key: str) -> str: + # 生成签名URL,有效期3600秒(1小时) + key = self._get_full_key(key) + return self._bucket.sign_url('GET', key, 3600) + + def _get_full_key(self, key: str) -> str: + if self._key_prefix: + return f"{self._key_prefix}/{key}" + return key + + def create_storage(dic: Dict[str, Any], /) -> Storage: config = TypeAdapter(FileStorageConfig).validate_python(dic) if config.type == "memory": @@ -176,5 +228,7 @@ def create_storage(dic: Dict[str, Any], /) -> Storage: return FileSystemStorage(config) elif config.type == "bos": return BOS(config) + elif config.type == "oss": + return OSS(config) else: assert_never(config) From f0ccfd4cf8f095c234844f5a5596d0e6d1160397 Mon Sep 17 00:00:00 2001 From: Lin Manhui Date: Thu, 29 Jan 2026 20:47:39 +0800 Subject: [PATCH 02/37] Use cache mount for genai docker (#4954) --- deploy/genai_vllm_server_docker/Dockerfile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/deploy/genai_vllm_server_docker/Dockerfile b/deploy/genai_vllm_server_docker/Dockerfile index 84aa4206fd..a7792851c7 100644 --- a/deploy/genai_vllm_server_docker/Dockerfile +++ b/deploy/genai_vllm_server_docker/Dockerfile @@ -4,17 +4,19 @@ RUN apt-get update \ && apt-get install -y libgl1 \ && rm -rf /var/lib/apt/lists/* -ENV PIP_NO_CACHE_DIR=0 ENV PYTHONUNBUFFERED=1 ENV PYTHONDONTWRITEBYTECODE=1 -RUN python -m pip install torch==2.8.0 +RUN --mount=type=cache,target=/root/.cache/pip \ + python -m pip install torch==2.8.0 ARG PADDLEX_VERSION=">=3.3.6,<3.4" -RUN python -m pip install "paddlex${PADDLEX_VERSION}" +RUN --mount=type=cache,target=/root/.cache/pip \ + python -m pip install "paddlex${PADDLEX_VERSION}" ARG BUILD_FOR_SM120=false -RUN if [ "${BUILD_FOR_SM120}" = 'true' ]; then \ +RUN --mount=type=cache,target=/root/.cache/pip \ + if [ "${BUILD_FOR_SM120}" = 'true' ]; then \ python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.4.11/flash_attn-2.8.3%2Bcu128torch2.8-cp310-cp310-linux_x86_64.whl; \ else \ python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \ From f34bfc265c37ccd70712c37c17d9e24f889ab4e2 Mon Sep 17 00:00:00 2001 From: Lin Manhui Date: Thu, 29 Jan 2026 21:32:17 +0800 Subject: [PATCH 03/37] Fix HPS order bug (#4955) --- .../pipelines/OCR/server/model_repo/ocr/1/model.py | 14 +++++++++----- .../server/model_repo/layout-parsing/1/model.py | 14 +++++++++----- .../server/model_repo/layout-parsing/1/model.py | 14 +++++++++----- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py b/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py index 7a99bf9829..5601c51674 100644 --- a/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py +++ b/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py @@ -105,12 +105,16 @@ def run_batch(self, inputs, log_ids, batch_id): ret = executor.map(self._preprocess, inputs_g, log_ids_g) ind_img_lsts, ind_data_info_lst, ind_visualize_enabled_lst = [], [], [] + ind_input_id_lst, ind_log_id_lst, ind_input_lst = [], [], [] for i, item in enumerate(ret): if isinstance(item, tuple): assert len(item) == 3, len(item) ind_img_lsts.append(item[0]) ind_data_info_lst.append(item[1]) ind_visualize_enabled_lst.append(item[2]) + ind_input_id_lst.append(input_ids_g[i]) + ind_log_id_lst.append(log_ids_g[i]) + ind_input_lst.append(inputs_g[i]) else: input_id = input_ids_g[i] result_or_output_dic[input_id] = item @@ -146,19 +150,19 @@ def run_batch(self, inputs, log_ids, batch_id): ind_preds.append(preds[start_idx : start_idx + len(item)]) start_idx += len(item) - for i, result in zip( - input_ids_g, + for input_id, result in zip( + ind_input_id_lst, executor.map( self._postprocess, ind_img_lsts, ind_data_info_lst, ind_visualize_enabled_lst, ind_preds, - log_ids_g, - inputs_g, + ind_log_id_lst, + ind_input_lst, ), ): - result_or_output_dic[i] = result + result_or_output_dic[input_id] = result assert len(result_or_output_dic) == len( inputs diff --git a/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py b/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py index f7e8d9b56b..57dde5d42f 100644 --- a/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py +++ b/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py @@ -104,12 +104,16 @@ def run_batch(self, inputs, log_ids, batch_id): ret = executor.map(self._preprocess, inputs_g, log_ids_g) ind_img_lsts, ind_data_info_lst, ind_visualize_enabled_lst = [], [], [] + ind_input_ids_lst, ind_log_ids_lst, ind_inputs_lst = [], [], [] for i, item in enumerate(ret): if isinstance(item, tuple): assert len(item) == 3, len(item) ind_img_lsts.append(item[0]) ind_data_info_lst.append(item[1]) ind_visualize_enabled_lst.append(item[2]) + ind_input_ids_lst.append(input_ids_g[i]) + ind_log_ids_lst.append(log_ids_g[i]) + ind_inputs_lst.append(inputs_g[i]) else: input_id = input_ids_g[i] result_or_output_dic[input_id] = item @@ -179,19 +183,19 @@ def run_batch(self, inputs, log_ids, batch_id): ind_preds.append(preds[start_idx : start_idx + len(item)]) start_idx += len(item) - for i, result in zip( - input_ids_g, + for input_id, result in zip( + ind_input_ids_lst, executor.map( self._postprocess, ind_img_lsts, ind_data_info_lst, ind_visualize_enabled_lst, ind_preds, - log_ids_g, - inputs_g, + ind_log_ids_lst, + ind_inputs_lst, ), ): - result_or_output_dic[i] = result + result_or_output_dic[input_id] = result assert len(result_or_output_dic) == len( inputs diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py index f0b0a64ac3..8047ba4249 100644 --- a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py +++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py @@ -104,12 +104,16 @@ def run_batch(self, inputs, log_ids, batch_id): ret = executor.map(self._preprocess, inputs_g, log_ids_g) ind_img_lsts, ind_data_info_lst, ind_visualize_enabled_lst = [], [], [] + ind_input_id_lst, ind_log_id_lst, ind_input_lst = [], [], [] for i, item in enumerate(ret): if isinstance(item, tuple): assert len(item) == 3, len(item) ind_img_lsts.append(item[0]) ind_data_info_lst.append(item[1]) ind_visualize_enabled_lst.append(item[2]) + ind_input_id_lst.append(input_ids_g[i]) + ind_log_id_lst.append(log_ids_g[i]) + ind_input_lst.append(inputs_g[i]) else: input_id = input_ids_g[i] result_or_output_dic[input_id] = item @@ -157,19 +161,19 @@ def run_batch(self, inputs, log_ids, batch_id): ind_preds.append(preds[start_idx : start_idx + len(item)]) start_idx += len(item) - for i, result in zip( - input_ids_g, + for input_id, result in zip( + ind_input_id_lst, executor.map( self._postprocess, ind_img_lsts, ind_data_info_lst, ind_visualize_enabled_lst, ind_preds, - log_ids_g, - inputs_g, + ind_log_id_lst, + ind_input_lst, ), ): - result_or_output_dic[i] = result + result_or_output_dic[input_id] = result assert len(result_or_output_dic) == len( inputs From f53eaf72531bb409eac9981f720d8a3e4e42839f Mon Sep 17 00:00:00 2001 From: Lin Manhui Date: Fri, 30 Jan 2026 01:50:09 +0800 Subject: [PATCH 04/37] Fix HPS and remove scipy from required deps (#4957) --- deploy/hps/server_env/cpu_version.txt | 2 +- deploy/hps/server_env/gpu_version.txt | 2 +- setup.py | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/deploy/hps/server_env/cpu_version.txt b/deploy/hps/server_env/cpu_version.txt index e4737652ca..0b69c00c5f 100644 --- a/deploy/hps/server_env/cpu_version.txt +++ b/deploy/hps/server_env/cpu_version.txt @@ -1 +1 @@ -0.3.13 +0.3.14 diff --git a/deploy/hps/server_env/gpu_version.txt b/deploy/hps/server_env/gpu_version.txt index 0b69c00c5f..9e29e10619 100644 --- a/deploy/hps/server_env/gpu_version.txt +++ b/deploy/hps/server_env/gpu_version.txt @@ -1 +1 @@ -0.3.14 +0.3.15 diff --git a/setup.py b/setup.py index c2401ff4c1..7802fd0297 100644 --- a/setup.py +++ b/setup.py @@ -114,7 +114,6 @@ # Currently `pypdfium2` is required by the image batch sampler "pypdfium2", "scikit-image", - "scipy", ], "multimodal": [ "einops", From 839241ed91cefcbb9fe85556c84a5c2126d587ea Mon Sep 17 00:00:00 2001 From: Lin Manhui Date: Fri, 30 Jan 2026 01:50:29 +0800 Subject: [PATCH 05/37] Fix transformers version (#4956) --- deploy/genai_vllm_server_docker/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/deploy/genai_vllm_server_docker/Dockerfile b/deploy/genai_vllm_server_docker/Dockerfile index a7792851c7..f761cc3368 100644 --- a/deploy/genai_vllm_server_docker/Dockerfile +++ b/deploy/genai_vllm_server_docker/Dockerfile @@ -21,6 +21,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \ else \ python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl; \ fi \ + && python -m pip install transformers==4.57.6 \ && paddlex --install genai-vllm-server EXPOSE 8080 From a102e8d3994e6622801de41534b056183ee20b01 Mon Sep 17 00:00:00 2001 From: changdazhou <142379845+changdazhou@users.noreply.github.com> Date: Fri, 30 Jan 2026 13:39:04 +0800 Subject: [PATCH 06/37] bugfix: unexpected change of the constant IMAGE_LABELS (#4960) * bugfix: unexpected change of the constant IMAGE_LABELS * update doc --- docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md | 2 +- docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md | 2 +- paddlex/inference/pipelines/paddleocr_vl/pipeline.py | 2 +- paddlex/inference/pipelines/paddleocr_vl/result.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md index bcac347f89..ac764c5ac4 100644 --- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md +++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md @@ -6,7 +6,7 @@ comments: true PaddleOCR-VL is a SOTA and resource-efficient model tailored for document parsing. Its core component is PaddleOCR-VL-0.9B, a compact yet powerful vision-language model (VLM) that integrates a NaViT-style dynamic resolution visual encoder with the ERNIE-4.5-0.3B language model to enable accurate element recognition. This innovative model efficiently supports 109 languages and excels in recognizing complex elements (e.g., text, tables, formulas, and charts), while maintaining minimal resource consumption. Through comprehensive evaluations on widely used public benchmarks and in-house benchmarks, PaddleOCR-VL achieves SOTA performance in both page-level document parsing and element-level recognition. It significantly outperforms existing solutions, exhibits strong competitiveness against top-tier VLMs, and delivers fast inference speeds. These strengths make it highly suitable for practical deployment in real-world scenarios. -On January 29, 2026, we released PaddleOCR-VL-1.5. PaddleOCR-VL-1.5 not only significantly improved the accuracy on the OmniDocBench v1.5 evaluation set to 94.5%, but also innovatively supports irregular-shaped bounding box localization. As a result, PaddleOCR-VL-1.5 demonstrates outstanding performance in real-world scenarios such as Skew, Warping, Screen Photography, Illumination, and Scanning. In addition, the model has added new capabilities for seal (stamp) recognition and text detection and recognition, with key metrics continuing to lead the industry. +**On January 29, 2026, we released PaddleOCR-VL-1.5. PaddleOCR-VL-1.5 not only significantly improved the accuracy on the OmniDocBench v1.5 evaluation set to 94.5%, but also innovatively supports irregular-shaped bounding box localization. As a result, PaddleOCR-VL-1.5 demonstrates outstanding performance in real-world scenarios such as Skew, Warping, Screen Photography, Illumination, and Scanning. In addition, the model has added new capabilities for seal (stamp) recognition and text detection and recognition, with key metrics continuing to lead the industry.** diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md index 1213104491..cee5b65fe5 100644 --- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md +++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md @@ -6,7 +6,7 @@ comments: true PaddleOCR-VL 是一款先进、高效的文档解析模型,专为文档中的元素识别设计。其核心组件为 PaddleOCR-VL-0.9B,这是一种紧凑而强大的视觉语言模型(VLM),它由 NaViT 风格的动态分辨率视觉编码器与 ERNIE-4.5-0.3B 语言模型组成,能够实现精准的元素识别。该模型支持 109 种语言,并在识别复杂元素(如文本、表格、公式和图表)方面表现出色,同时保持极低的资源消耗。通过在广泛使用的公开基准与内部基准上的全面评测,PaddleOCR-VL 在页级级文档解析与元素级识别均达到 SOTA 表现。它显著优于现有的基于Pipeline方案和文档解析多模态方案以及先进的通用多模态大模型,并具备更快的推理速度。这些优势使其非常适合在真实场景中落地部署。 -2026年1月29日,我们发布了PaddleOCR-VL-1.5。PaddleOCR-VL-1.5不仅以94.5%精度大幅刷新了评测集OmniDocBench v1.5,更创新性地支持了异形框定位,使得PaddleOCR-VL-1.5 在扫描、倾斜、弯折、屏幕拍摄及复杂光照等真实场景中均表现优异。此外,模型还新增了印章识别与文本检测识别能力,关键指标持续领跑。 +**2026年1月29日,我们发布了PaddleOCR-VL-1.5。PaddleOCR-VL-1.5不仅以94.5%精度大幅刷新了评测集OmniDocBench v1.5,更创新性地支持了异形框定位,使得PaddleOCR-VL-1.5 在扫描、倾斜、弯折、屏幕拍摄及复杂光照等真实场景中均表现优异。此外,模型还新增了印章识别与文本检测识别能力,关键指标持续领跑。** diff --git a/paddlex/inference/pipelines/paddleocr_vl/pipeline.py b/paddlex/inference/pipelines/paddleocr_vl/pipeline.py index ee1994d83d..5a48667437 100644 --- a/paddlex/inference/pipelines/paddleocr_vl/pipeline.py +++ b/paddlex/inference/pipelines/paddleocr_vl/pipeline.py @@ -272,7 +272,7 @@ def get_layout_parsing_results( id2pixel_key_map = {} image_path_to_obj_map = {} vis_image_labels = IMAGE_LABELS + ["seal"] - image_labels = [] if use_ocr_for_image_block else IMAGE_LABELS + image_labels = [] if use_ocr_for_image_block else IMAGE_LABELS.copy() if not use_chart_recognition: image_labels += ["chart"] vis_image_labels += ["chart"] diff --git a/paddlex/inference/pipelines/paddleocr_vl/result.py b/paddlex/inference/pipelines/paddleocr_vl/result.py index 963f9db79f..f50b18a7c2 100644 --- a/paddlex/inference/pipelines/paddleocr_vl/result.py +++ b/paddlex/inference/pipelines/paddleocr_vl/result.py @@ -268,7 +268,7 @@ def __init__(self, data) -> None: "markdown_ignore_labels", [] ) self.skip_order_labels = [ - label for label in SKIP_ORDER_LABELS + markdown_ignore_labels + label for label in SKIP_ORDER_LABELS.copy() + markdown_ignore_labels ] def _to_img(self) -> dict[str, np.ndarray]: From bb4b1c153b348f7207b3784994a5bad8b14799b4 Mon Sep 17 00:00:00 2001 From: changdazhou <142379845+changdazhou@users.noreply.github.com> Date: Fri, 30 Jan 2026 13:40:19 +0800 Subject: [PATCH 07/37] [METAX] add ppdoclayoutv3 to METAX_GPU_WHITELIST (#4962) --- paddlex/utils/custom_device_list.py | 1 + 1 file changed, 1 insertion(+) diff --git a/paddlex/utils/custom_device_list.py b/paddlex/utils/custom_device_list.py index ec4a85495c..d48ea12b71 100755 --- a/paddlex/utils/custom_device_list.py +++ b/paddlex/utils/custom_device_list.py @@ -418,6 +418,7 @@ "PP-OCRv4_mobile_rec", "PP-OCRv4_server_rec", "PP-DocLayoutV2", + "PP-DocLayoutV3", "PP-ShiTuV2_rec", "PP-ShiTuV2_det", "PP-OCRv5_mobile_det", From f7f83b75a0b6800d94f6d994042fadcefb778046 Mon Sep 17 00:00:00 2001 From: zhang-prog <69562787+zhang-prog@users.noreply.github.com> Date: Fri, 30 Jan 2026 14:26:03 +0800 Subject: [PATCH 08/37] vllm 0.10.2 needs transformers 4.x (#4963) * vllm 0.10.2 needs transformers 4.x * update --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7802fd0297..b02f516f2a 100644 --- a/setup.py +++ b/setup.py @@ -228,7 +228,7 @@ "genai-vllm-server": [ "einops", "torch == 2.8.0", - "transformers", + "transformers < 5.0.0", "uvloop", "vllm == 0.10.2", ], From 56ca189dfa3280c5894abe5899e51d0e0e95c93a Mon Sep 17 00:00:00 2001 From: Lin Manhui Date: Mon, 2 Feb 2026 11:29:39 +0800 Subject: [PATCH 09/37] Support setting PDF rendering scale factor (#4967) --- .../inference/common/batch_sampler/image_batch_sampler.py | 3 ++- paddlex/inference/serving/infra/utils.py | 5 ++--- paddlex/utils/flags.py | 6 +++++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/paddlex/inference/common/batch_sampler/image_batch_sampler.py b/paddlex/inference/common/batch_sampler/image_batch_sampler.py index dd78354fe7..c519765f69 100644 --- a/paddlex/inference/common/batch_sampler/image_batch_sampler.py +++ b/paddlex/inference/common/batch_sampler/image_batch_sampler.py @@ -20,6 +20,7 @@ from ....utils import logging from ....utils.cache import CACHE_DIR from ....utils.download import download +from ....utils.flags import PDF_RENDER_SCALE from ...utils.io import PDFReader from .base_batch_sampler import BaseBatchSampler, Batch @@ -48,7 +49,7 @@ class ImageBatchSampler(BaseBatchSampler): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.pdf_reader = PDFReader() + self.pdf_reader = PDFReader(zoom=PDF_RENDER_SCALE) # XXX: auto download for url def _download_from_url(self, in_path): diff --git a/paddlex/inference/serving/infra/utils.py b/paddlex/inference/serving/infra/utils.py index b6b0211f98..d9e971261e 100644 --- a/paddlex/inference/serving/infra/utils.py +++ b/paddlex/inference/serving/infra/utils.py @@ -30,7 +30,7 @@ from typing_extensions import Literal, ParamSpec, TypeAlias, assert_never from ....utils.deps import function_requires_deps, is_dep_available -from ...utils.pdfium_lock import pdfium_lock +from ...utils.pdfium_lock import PDF_RENDER_SCALE, pdfium_lock from .models import ImageInfo, PDFInfo, PDFPageInfo if is_dep_available("aiohttp"): @@ -191,8 +191,7 @@ def read_pdf( if max_num_imgs is not None and len(images) >= max_num_imgs: page.close() break - # TODO: Do not always use zoom=2.0 - zoom = 2.0 + zoom = PDF_RENDER_SCALE deg = 0 image = page.render(scale=zoom, rotation=deg).to_numpy() images.append(image) diff --git a/paddlex/utils/flags.py b/paddlex/utils/flags.py index 1fcf547335..726d3603fd 100644 --- a/paddlex/utils/flags.py +++ b/paddlex/utils/flags.py @@ -70,7 +70,9 @@ def get_flag_from_env_var(name, default, format_func=str): "PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", False ) -HUGGING_FACE_ENDPOINT = os.environ.get("PADDLE_PDX_HUGGING_FACE_ENDPOINT", "https://huggingface.co") +HUGGING_FACE_ENDPOINT = os.environ.get( + "PADDLE_PDX_HUGGING_FACE_ENDPOINT", "https://huggingface.co" +) # Inference Benchmark INFER_BENCHMARK = get_flag_from_env_var("PADDLE_PDX_INFER_BENCHMARK", False) @@ -87,3 +89,5 @@ def get_flag_from_env_var(name, default, format_func=str): INFER_BENCHMARK_USE_CACHE_FOR_READ = get_flag_from_env_var( "PADDLE_PDX_INFER_BENCHMARK_USE_CACHE_FOR_READ", False ) + +PDF_RENDER_SCALE = get_flag_from_env_var("PADDLE_PDX_PDF_RENDER_SCALE", 2.0, float) From 2e70318e4cccb7f8dbf5ef896d95fa7c322e6298 Mon Sep 17 00:00:00 2001 From: lyn-zzz <978291632@qq.com> Date: Tue, 3 Feb 2026 11:52:41 +0800 Subject: [PATCH 10/37] fix: check if cropped image size is zero in table recognition v2 (#4937) * fix: check if cropped image size is zero in table recognition v2 * fix: remove redundant colon causing SyntaxError --------- Co-authored-by: Lin Manhui From 06223e38e03c58c9f83769bf9210f84af84e1ae7 Mon Sep 17 00:00:00 2001 From: Bvicii <98971614+scyyh11@users.noreply.github.com> Date: Tue, 3 Feb 2026 19:44:44 -0800 Subject: [PATCH 11/37] Fix/doc vlm async cancellation (#4969) * fix(doc_vlm): cancel pending futures on batch request failure When a batch of requests is sent to the VLM service and one fails, the remaining pending futures are now properly cancelled to avoid wasting VLM service resources. * chore: remove test file and documentation for async cancellation fix --- paddlex/inference/models/doc_vlm/predictor.py | 215 +++++++++--------- 1 file changed, 111 insertions(+), 104 deletions(-) diff --git a/paddlex/inference/models/doc_vlm/predictor.py b/paddlex/inference/models/doc_vlm/predictor.py index 9cd9341736..3d8ab2e5c7 100644 --- a/paddlex/inference/models/doc_vlm/predictor.py +++ b/paddlex/inference/models/doc_vlm/predictor.py @@ -415,118 +415,125 @@ def _genai_client_process( max_pixels, ): futures = [] - for item in data: - image = item["image"] - if isinstance(image, str): - if image.startswith("http://") or image.startswith("https://"): - image_url = image - else: + try: + for item in data: + image = item["image"] + if isinstance(image, str): + if image.startswith("http://") or image.startswith("https://"): + image_url = image + else: + from PIL import Image + + with Image.open(image) as img: + img = img.convert("RGB") + with io.BytesIO() as buf: + img.save(buf, format="JPEG") + image_url = "data:image/jpeg;base64," + base64.b64encode( + buf.getvalue() + ).decode("ascii") + elif isinstance(image, np.ndarray): + import cv2 from PIL import Image - with Image.open(image) as img: - img = img.convert("RGB") - with io.BytesIO() as buf: - img.save(buf, format="JPEG") - image_url = "data:image/jpeg;base64," + base64.b64encode( - buf.getvalue() - ).decode("ascii") - elif isinstance(image, np.ndarray): - import cv2 - from PIL import Image - - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - img = Image.fromarray(image) - with io.BytesIO() as buf: - img.save(buf, format="JPEG") - image_url = "data:image/jpeg;base64," + base64.b64encode( - buf.getvalue() - ).decode("ascii") - else: - raise TypeError(f"Not supported image type: {type(image)}") - - if self._genai_client.backend == "fastdeploy-server": - kwargs = { - "temperature": 1 if temperature is None else temperature, - "top_p": 0 if top_p is None else top_p, - } - else: - kwargs = { - "temperature": 0 if temperature is None else temperature, - } - if top_p is not None: - kwargs["top_p"] = top_p - - if self._genai_client.backend == "mlx-vlm-server": - max_tokens_name = "max_tokens" - else: - max_tokens_name = "max_completion_tokens" - - if max_new_tokens is not None: - kwargs[max_tokens_name] = max_new_tokens - elif self.model_name in self.model_group["PaddleOCR-VL"]: - kwargs[max_tokens_name] = 8192 - - kwargs["extra_body"] = {} - if skip_special_tokens is not None: - if self._genai_client.backend in ( - "fastdeploy-server", - "vllm-server", - "sglang-server", - "mlx-vlm-server", - ): - kwargs["extra_body"]["skip_special_tokens"] = skip_special_tokens + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + img = Image.fromarray(image) + with io.BytesIO() as buf: + img.save(buf, format="JPEG") + image_url = "data:image/jpeg;base64," + base64.b64encode( + buf.getvalue() + ).decode("ascii") else: - raise ValueError("Not supported") + raise TypeError(f"Not supported image type: {type(image)}") - if repetition_penalty is not None: - kwargs["extra_body"]["repetition_penalty"] = repetition_penalty - - if min_pixels is not None: - if self._genai_client.backend == "vllm-server": - kwargs["extra_body"]["mm_processor_kwargs"] = kwargs[ - "extra_body" - ].get("mm_processor_kwargs", {}) - kwargs["extra_body"]["mm_processor_kwargs"][ - "min_pixels" - ] = min_pixels + if self._genai_client.backend == "fastdeploy-server": + kwargs = { + "temperature": 1 if temperature is None else temperature, + "top_p": 0 if top_p is None else top_p, + } else: - warnings.warn( - f"{repr(self._genai_client.backend)} does not support `min_pixels`." - ) + kwargs = { + "temperature": 0 if temperature is None else temperature, + } + if top_p is not None: + kwargs["top_p"] = top_p - if max_pixels is not None: - if self._genai_client.backend == "vllm-server": - kwargs["extra_body"]["mm_processor_kwargs"] = kwargs[ - "extra_body" - ].get("mm_processor_kwargs", {}) - kwargs["extra_body"]["mm_processor_kwargs"][ - "max_pixels" - ] = max_pixels + if self._genai_client.backend == "mlx-vlm-server": + max_tokens_name = "max_tokens" else: - warnings.warn( - f"{repr(self._genai_client.backend)} does not support `max_pixels`." - ) + max_tokens_name = "max_completion_tokens" + + if max_new_tokens is not None: + kwargs[max_tokens_name] = max_new_tokens + elif self.model_name in self.model_group["PaddleOCR-VL"]: + kwargs[max_tokens_name] = 8192 + + kwargs["extra_body"] = {} + if skip_special_tokens is not None: + if self._genai_client.backend in ( + "fastdeploy-server", + "vllm-server", + "sglang-server", + "mlx-vlm-server", + ): + kwargs["extra_body"]["skip_special_tokens"] = skip_special_tokens + else: + raise ValueError("Not supported") + + if repetition_penalty is not None: + kwargs["extra_body"]["repetition_penalty"] = repetition_penalty - future = self._genai_client.create_chat_completion( - [ - { - "role": "user", - "content": [ - {"type": "image_url", "image_url": {"url": image_url}}, - {"type": "text", "text": item["query"]}, - ], - } - ], - return_future=True, - timeout=600, - **kwargs, - ) + if min_pixels is not None: + if self._genai_client.backend == "vllm-server": + kwargs["extra_body"]["mm_processor_kwargs"] = kwargs[ + "extra_body" + ].get("mm_processor_kwargs", {}) + kwargs["extra_body"]["mm_processor_kwargs"][ + "min_pixels" + ] = min_pixels + else: + warnings.warn( + f"{repr(self._genai_client.backend)} does not support `min_pixels`." + ) + + if max_pixels is not None: + if self._genai_client.backend == "vllm-server": + kwargs["extra_body"]["mm_processor_kwargs"] = kwargs[ + "extra_body" + ].get("mm_processor_kwargs", {}) + kwargs["extra_body"]["mm_processor_kwargs"][ + "max_pixels" + ] = max_pixels + else: + warnings.warn( + f"{repr(self._genai_client.backend)} does not support `max_pixels`." + ) + + future = self._genai_client.create_chat_completion( + [ + { + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": image_url}}, + {"type": "text", "text": item["query"]}, + ], + } + ], + return_future=True, + timeout=600, + **kwargs, + ) - futures.append(future) + futures.append(future) - results = [] - for future in futures: - result = future.result() - results.append(result.choices[0].message.content) + results = [] + for future in futures: + result = future.result() + results.append(result.choices[0].message.content) - return results + return results + except Exception: + # Cancel all pending futures to avoid wasting resources + for future in futures: + if not future.done(): + future.cancel() + raise From a20fddc88abe8d835e2a47e6844780aee4dfb2a9 Mon Sep 17 00:00:00 2001 From: Dhouibi Iheb Date: Wed, 4 Feb 2026 11:16:44 +0100 Subject: [PATCH 12/37] Fix: Update langchain import to use langchain_core.documents (#4944) --- paddlex/inference/pipelines/components/retriever/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddlex/inference/pipelines/components/retriever/base.py b/paddlex/inference/pipelines/components/retriever/base.py index a348836836..ef146c1b7a 100644 --- a/paddlex/inference/pipelines/components/retriever/base.py +++ b/paddlex/inference/pipelines/components/retriever/base.py @@ -22,7 +22,7 @@ from .....utils.subclass_register import AutoRegisterABCMetaClass if is_dep_available("langchain"): - from langchain.docstore.document import Document + from langchain_core.documents import Document from langchain.text_splitter import RecursiveCharacterTextSplitter if is_dep_available("langchain-community"): from langchain_community import vectorstores From edeb50ef1628982d3a3159b8c57fc9a177b9f2a5 Mon Sep 17 00:00:00 2001 From: Lin Manhui Date: Fri, 6 Feb 2026 18:15:14 +0800 Subject: [PATCH 13/37] Fix typo (#4982) --- docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md index cee5b65fe5..960a0bbe51 100644 --- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md +++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md @@ -1551,7 +1551,7 @@ INFO: Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit) mergeTables boolean -请参阅PaddleOCR-VL对象中 restructure_pages 方法的 merge_table 参数相关说明。仅当restructurePagestrue时生效。 +请参阅PaddleOCR-VL对象中 restructure_pages 方法的 merge_tables 参数相关说明。仅当restructurePagestrue时生效。 否 From fb23d613f33a18b858e54c3c4b9bcfbc07d9de3c Mon Sep 17 00:00:00 2001 From: Jiaxin Sui <95567040+plusNew001@users.noreply.github.com> Date: Mon, 9 Feb 2026 15:08:26 +0800 Subject: [PATCH 14/37] Update Docker image for CI workflow (#4975) * Update Docker image for CI workflow * Disable model source check in CI workflow --- .github/workflows/xpu_ci.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/xpu_ci.yml b/.github/workflows/xpu_ci.yml index cac51fafe9..ed08004964 100644 --- a/.github/workflows/xpu_ci.yml +++ b/.github/workflows/xpu_ci.yml @@ -27,7 +27,7 @@ jobs: - name: Code Checkout env: - docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.3.0 + docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:xpu-ubuntu2204-x86_64-gcc123-py310 run: | REPO="https://github.com/${{ github.repository }}.git" FULL_REPO="${{ github.repository }}" @@ -58,7 +58,7 @@ jobs: - name: Run CI unittest env: - docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.3.0 + docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:xpu-ubuntu2204-x86_64-gcc123-py310 run: | runner_name="${{ runner.name }}" PARENT_DIR=$(dirname "$WORKSPACE") @@ -71,5 +71,6 @@ jobs: ${docker_image} /bin/bash -c " git config --global --add safe.directory /workspace/PaddleX cd PaddleX + export PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK=true bash tests/run_xpu_ci.sh " From ac930a98c364aeab83e46cedd051d7f44a4d7ed1 Mon Sep 17 00:00:00 2001 From: zhang-prog <69562787+zhang-prog@users.noreply.github.com> Date: Mon, 9 Feb 2026 18:20:55 +0800 Subject: [PATCH 15/37] add llama.cpp support (#4983) --- paddlex/inference/models/common/genai.py | 8 ++++- paddlex/inference/models/doc_vlm/predictor.py | 29 ++++++++++++------- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/paddlex/inference/models/common/genai.py b/paddlex/inference/models/common/genai.py index 9a6d2edbcf..d5a10d764f 100644 --- a/paddlex/inference/models/common/genai.py +++ b/paddlex/inference/models/common/genai.py @@ -29,12 +29,18 @@ "vllm-server", "sglang-server", "mlx-vlm-server", + "llama-cpp-server", ] class GenAIConfig(BaseModel): backend: Literal[ - "native", "fastdeploy-server", "vllm-server", "sglang-server", "mlx-vlm-server" + "native", + "fastdeploy-server", + "vllm-server", + "sglang-server", + "mlx-vlm-server", + "llama-cpp-server", ] = "native" server_url: Optional[str] = None max_concurrency: int = 200 diff --git a/paddlex/inference/models/doc_vlm/predictor.py b/paddlex/inference/models/doc_vlm/predictor.py index 3d8ab2e5c7..43cc173a0b 100644 --- a/paddlex/inference/models/doc_vlm/predictor.py +++ b/paddlex/inference/models/doc_vlm/predictor.py @@ -415,6 +415,10 @@ def _genai_client_process( max_pixels, ): futures = [] + if self._genai_client.backend == "llama-cpp-server": + image_format = "PNG" + else: + image_format = "JPEG" try: for item in data: image = item["image"] @@ -427,10 +431,11 @@ def _genai_client_process( with Image.open(image) as img: img = img.convert("RGB") with io.BytesIO() as buf: - img.save(buf, format="JPEG") - image_url = "data:image/jpeg;base64," + base64.b64encode( - buf.getvalue() - ).decode("ascii") + img.save(buf, format=image_format) + image_url = ( + f"data:image/{image_format.lower()};base64," + + base64.b64encode(buf.getvalue()).decode("ascii") + ) elif isinstance(image, np.ndarray): import cv2 from PIL import Image @@ -438,10 +443,11 @@ def _genai_client_process( image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) img = Image.fromarray(image) with io.BytesIO() as buf: - img.save(buf, format="JPEG") - image_url = "data:image/jpeg;base64," + base64.b64encode( - buf.getvalue() - ).decode("ascii") + img.save(buf, format=image_format) + image_url = ( + f"data:image/{image_format.lower()};base64," + + base64.b64encode(buf.getvalue()).decode("ascii") + ) else: raise TypeError(f"Not supported image type: {type(image)}") @@ -457,7 +463,7 @@ def _genai_client_process( if top_p is not None: kwargs["top_p"] = top_p - if self._genai_client.backend == "mlx-vlm-server": + if self._genai_client.backend in ["mlx-vlm-server", "llama-cpp-server"]: max_tokens_name = "max_tokens" else: max_tokens_name = "max_completion_tokens" @@ -474,8 +480,11 @@ def _genai_client_process( "vllm-server", "sglang-server", "mlx-vlm-server", + "llama-cpp-server", ): - kwargs["extra_body"]["skip_special_tokens"] = skip_special_tokens + kwargs["extra_body"][ + "skip_special_tokens" + ] = skip_special_tokens else: raise ValueError("Not supported") From e2b463e6afacf35732cf1c23dc35591e204487e6 Mon Sep 17 00:00:00 2001 From: norbbrog <110168034+norbbrog@users.noreply.github.com> Date: Tue, 10 Feb 2026 08:15:13 +0100 Subject: [PATCH 16/37] fixing langchain text splitter import (#4981) * fixing langchain text splitter import to be compatible with langchain v0 and v1 * updating check_imports.py with new explicit dependency * keeping alphabetical order --------- Co-authored-by: Lin Manhui --- .precommit/check_imports.py | 1 + paddlex/inference/pipelines/components/retriever/base.py | 4 ++-- setup.py | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.precommit/check_imports.py b/.precommit/check_imports.py index 7f9ec4a349..4de0856d20 100644 --- a/.precommit/check_imports.py +++ b/.precommit/check_imports.py @@ -56,6 +56,7 @@ "langchain_community": "langchain-community", "langchain_core": "langchain-core", "langchain_openai": "langchain-openai", + "langchain_text_splitters": "langchain-text-splitters", "lxml": "lxml", "matplotlib": "matplotlib", "modelscope": "modelscope", diff --git a/paddlex/inference/pipelines/components/retriever/base.py b/paddlex/inference/pipelines/components/retriever/base.py index ef146c1b7a..8aa42ef7b5 100644 --- a/paddlex/inference/pipelines/components/retriever/base.py +++ b/paddlex/inference/pipelines/components/retriever/base.py @@ -23,13 +23,13 @@ if is_dep_available("langchain"): from langchain_core.documents import Document - from langchain.text_splitter import RecursiveCharacterTextSplitter + from langchain_text_splitters import RecursiveCharacterTextSplitter if is_dep_available("langchain-community"): from langchain_community import vectorstores from langchain_community.vectorstores import FAISS -@class_requires_deps("langchain", "langchain-community") +@class_requires_deps("langchain", "langchain-text-splitters", "langchain-community") class BaseRetriever(ABC, metaclass=AutoRegisterABCMetaClass): """Base Retriever""" diff --git a/setup.py b/setup.py index b02f516f2a..6d942d5ce8 100644 --- a/setup.py +++ b/setup.py @@ -44,6 +44,7 @@ "langchain-community": ">= 0.2, < 1.0", "langchain-core": "", "langchain-openai": ">= 0.1, < 1.0", + "langchain-text-splitters": ">= 0.2, < 1.0", "lxml": "", "matplotlib": "", "modelscope": ">=1.28.0", @@ -133,6 +134,7 @@ "imagesize", "langchain", "langchain-community", + "langchain-text-splitters", "langchain-core", "langchain-openai", "lxml", From b0be02f518461320b03f9f550aede5070742edc3 Mon Sep 17 00:00:00 2001 From: yang-521 <122206917+yang-521@users.noreply.github.com> Date: Tue, 10 Feb 2026 15:16:10 +0800 Subject: [PATCH 17/37] =?UTF-8?q?=E4=BF=AE=E5=A4=8DPNG=E6=A0=BC=E5=BC=8F?= =?UTF-8?q?=E7=A9=BA=E7=99=BD=E5=9B=BE=E5=83=8F=E5=87=BA=E7=8E=B0=E8=B6=85?= =?UTF-8?q?=E5=87=BA=E7=B4=A2=E5=BC=95=E8=8C=83=E5=9B=B4=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98=20(#4945)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 修复PNG格式空白图像出现超出索引范围的问题 PNG格式空白图像进行文本检测后打印会出现超出索引范围的错误,目前测试中jpg不会出现这样的错误,只有png出现。 最小复现代码 from paddlex import create_model from PIL import Image import numpy as np model = create_model(model_name="PP-OCRv3_mobile_det", model_dir='model/PP-OCRv3_mobile_det_infer') img = Image.open("0002.png") image_array = np.array(img) 输出 = model.predict(image_array, batch_size=1) 对于输出中的 res: print(res) * 注释修改为英文 --------- Co-authored-by: 学卿 <64625668+leo-q8@users.noreply.github.com> --- paddlex/inference/models/text_detection/processors.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/paddlex/inference/models/text_detection/processors.py b/paddlex/inference/models/text_detection/processors.py index c43e7df6b4..23a38fdd65 100644 --- a/paddlex/inference/models/text_detection/processors.py +++ b/paddlex/inference/models/text_detection/processors.py @@ -253,6 +253,9 @@ def __call__(self, imgs): """apply""" def _norm(img): + # Check if the image is in 4-channel RGBA format. If so, convert it to RGB format. + if img.shape[2] == 4: + img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB) if self.order == "chw": img = np.transpose(img, (2, 0, 1)) From 9cdf48e1d66336976c6a2666e24780491dbca9c4 Mon Sep 17 00:00:00 2001 From: Lin Manhui Date: Wed, 11 Feb 2026 12:31:08 +0800 Subject: [PATCH 18/37] Remove PaddleOCR-VL server page limit (#4991) --- .../sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml | 4 ++++ deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt | 2 +- .../tutorials/ocr_pipelines/PP-DocTranslation.en.md | 4 +--- .../pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md | 4 +--- paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml | 4 ++++ paddlex/configs/pipelines/PaddleOCR-VL.yaml | 4 ++++ 6 files changed, 15 insertions(+), 7 deletions(-) diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml index ebf5804d29..900892f522 100644 --- a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml +++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml @@ -103,3 +103,7 @@ SubPipelines: module_name: image_unwarping model_name: UVDoc model_dir: null + +Serving: + extra: + max_num_input_imgs: null diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt b/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt index 0ea3a944b3..0d91a54c7d 100644 --- a/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt +++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt @@ -1 +1 @@ -0.2.0 +0.3.0 diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PP-DocTranslation.en.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PP-DocTranslation.en.md index ae97468e26..5871215719 100644 --- a/docs/pipeline_usage/tutorials/ocr_pipelines/PP-DocTranslation.en.md +++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PP-DocTranslation.en.md @@ -1590,9 +1590,7 @@ The following is the API reference for basic Serving and examples of multilingua file string -The URL of an image file or PDF file accessible by the server, or the Base64-encoded result of the content of the aforementioned file types. By default, for PDF files with more than 10 pages, only the first 10 pages will be processed.
To remove the page limit, add the following configuration to the pipeline configuration file:
Serving:
-  extra:
-    max_num_input_imgs: null
+The URL of an image file or PDF file accessible by the server, or the Base64-encoded result of the content of the aforementioned file types. Yes diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md index ac764c5ac4..e312cf733d 100644 --- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md +++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.en.md @@ -1364,9 +1364,7 @@ Below are the API references for basic service-based deployment and examples of file string -The URL of an image file or PDF file accessible to the server, or the Base64-encoded result of the content of the aforementioned file types. By default, for PDF files with more than 10 pages, only the first 10 pages will be processed.
To remove the page limit, add the following configuration to the production line configuration file:
 Serving:
-  extra:
-    max_num_input_imgs: null
+The URL of an image file or PDF file accessible to the server, or the Base64-encoded result of the content of the aforementioned file types. Yes diff --git a/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml b/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml index 17aca18f1d..d0e197f8ea 100644 --- a/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml +++ b/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml @@ -79,3 +79,7 @@ SubPipelines: module_name: image_unwarping model_name: UVDoc model_dir: null + +Serving: + extra: + max_num_input_imgs: null diff --git a/paddlex/configs/pipelines/PaddleOCR-VL.yaml b/paddlex/configs/pipelines/PaddleOCR-VL.yaml index fdb52c7ede..37a4823cf0 100644 --- a/paddlex/configs/pipelines/PaddleOCR-VL.yaml +++ b/paddlex/configs/pipelines/PaddleOCR-VL.yaml @@ -103,3 +103,7 @@ SubPipelines: module_name: image_unwarping model_name: UVDoc model_dir: null + +Serving: + extra: + max_num_input_imgs: null From fe7c1496981266ecf9f9cbb554696183f088223a Mon Sep 17 00:00:00 2001 From: Lin Manhui Date: Wed, 11 Feb 2026 12:37:35 +0800 Subject: [PATCH 19/37] Add Intel GPU config (#4992) --- .../genai/configs/paddleocr_vl_09b.py | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/paddlex/inference/genai/configs/paddleocr_vl_09b.py b/paddlex/inference/genai/configs/paddleocr_vl_09b.py index 5451bf6e25..9ecfde414a 100644 --- a/paddlex/inference/genai/configs/paddleocr_vl_09b.py +++ b/paddlex/inference/genai/configs/paddleocr_vl_09b.py @@ -45,13 +45,26 @@ def get_config(backend): cfg["max-concurrency"] = 2048 return cfg elif backend == "vllm": - return { - "trust-remote-code": True, - "gpu-memory-utilization": 0.5, - "max-model-len": 16384, - "max-num-batched-tokens": 131072, - "api-server-count": 4, - } + require_deps("torch") + + import torch + + if torch.xpu.is_available(): + return { + "trust-remote-code": True, + "max-num-batched-tokens": 16384, + "no-enable-prefix-caching": True, + "mm-processor-cache-gb": 0, + "enforce-eager": True, + } + else: + return { + "trust-remote-code": True, + "gpu-memory-utilization": 0.5, + "max-model-len": 16384, + "max-num-batched-tokens": 131072, + "api-server-count": 4, + } elif backend == "sglang": return { "trust-remote-code": True, From d59b2c449d31ad49e445459023110c5e145469a7 Mon Sep 17 00:00:00 2001 From: WILSON WEI Date: Wed, 11 Feb 2026 18:08:57 +0800 Subject: [PATCH 20/37] PaddleX Add ROCm 7.0 compatibility patches (#4990) * Use cache mount for genai docker (#4954) * Fix HPS order bug (#4955) * Fix transformers version (#4956) * Fix HPS and remove scipy from required deps (#4957) * [Cherry-Pick]bugfix: unexpected change of the constant IMAGE_LABELS (#4961) * bugfix: unexpected change of the constant IMAGE_LABELS * update doc * [METAX] add ppdoclayv3 to METAX_GPU_WHITELIST (#4959) Co-authored-by: duqiemng <1640472053@qq.com> * vllm 0.10.2 needs transformers 4.x (#4963) * vllm 0.10.2 needs transformers 4.x * update * Bump version to 3.4.1 * Support setting PDF rendering scale factor (#4967) * Fix/doc vlm async cancellation (#4969) (#4971) * fix(doc_vlm): cancel pending futures on batch request failure When a batch of requests is sent to the VLM service and one fails, the remaining pending futures are now properly cancelled to avoid wasting VLM service resources. * chore: remove test file and documentation for async cancellation fix * Fix typo (#4982) * Revert "Fix typo (#4982)" This reverts commit 0a936ba7a37ce783684d28be8545e17a5bab69c5. * feat(ROCm): Add ROCm 7.0 compatibility patches * version --------- Co-authored-by: Lin Manhui Co-authored-by: changdazhou <142379845+changdazhou@users.noreply.github.com> Co-authored-by: SuperNova <91192235+handsomecoderyang@users.noreply.github.com> Co-authored-by: duqiemng <1640472053@qq.com> Co-authored-by: zhang-prog <69562787+zhang-prog@users.noreply.github.com> Co-authored-by: Bobholamovic Co-authored-by: Bvicii <98971614+scyyh11@users.noreply.github.com> --- .pre-commit-config.yaml | 1 + paddlex/inference/models/common/static_infer.py | 12 +++++++++++- .../doc_vlm/modeling/paddleocr_vl/_paddleocr_vl.py | 4 +++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f480361043..ab2e0f7a07 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -72,3 +72,4 @@ repos: files: ^paddlex/.*\.py$ additional_dependencies: - stdlib-list==0.10.0 + - setuptools diff --git a/paddlex/inference/models/common/static_infer.py b/paddlex/inference/models/common/static_infer.py index 4e0556c829..6806a7718a 100644 --- a/paddlex/inference/models/common/static_infer.py +++ b/paddlex/inference/models/common/static_infer.py @@ -402,6 +402,10 @@ def _create( config.enable_new_executor() config.set_optimization_level(3) config.delete_pass("matmul_add_act_fuse_pass") + # ROCm does not support fused_conv2d_add_act kernel, delete the fuse passes + if paddle.is_compiled_with_rocm(): + config.delete_pass("conv2d_add_act_fuse_pass") + config.delete_pass("conv2d_add_fuse_pass") elif self._option.device_type == "npu": config.enable_custom_device("npu", self._option.device_id) if hasattr(config, "enable_new_ir"): @@ -480,7 +484,9 @@ def _create( if hasattr(config, "enable_new_executor"): config.enable_new_executor() config.set_optimization_level(3) - + if paddle.is_compiled_with_rocm(): + config.delete_pass("conv2d_add_act_fuse_pass") + config.delete_pass("conv2d_add_fuse_pass") config.enable_memory_optim() for del_p in self._option.delete_pass: config.delete_pass(del_p) @@ -488,6 +494,10 @@ def _create( # Disable paddle inference logging if not DEBUG: config.disable_glog_info() + # ROCm does not support fused_conv2d_add_act kernel, delete the fuse passes + if paddle.is_compiled_with_rocm(): + config.delete_pass("conv2d_add_act_fuse_pass") + config.delete_pass("conv2d_add_fuse_pass") predictor = paddle.inference.create_predictor(config) diff --git a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_paddleocr_vl.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_paddleocr_vl.py index 93b61b6cc4..ab5a9cd87e 100644 --- a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_paddleocr_vl.py +++ b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_paddleocr_vl.py @@ -65,7 +65,9 @@ class PaddleOCRVLForConditionalGeneration(Ernie4_5PretrainedModel): _tied_weights_keys = ["lm_head.weight"] config_class = PaddleOCRVLConfig _no_split_modules = ["Ernie4_5DecoderLayer", "SiglipEncoderLayer"] - + # Keep visual encoder in fp32 for ROCm stability (MIOpen bf16 conv has bugs) + # This also improves precision for vision processing + _keep_in_fp32_modules = ["visual", "mlp_AR"] base_model_prefix = "" def __init__(self, config): From 9a3f4dd629e3fb39faf2b37039b94114447d542a Mon Sep 17 00:00:00 2001 From: Lin Manhui Date: Thu, 12 Feb 2026 17:24:03 +0800 Subject: [PATCH 21/37] [Feat] Support setting expiration for BOS URLs (#4993) * Support setting expiration for BOS URLs * Fix docs * Fix bugs --- .github/workflows/deploy_docs.yml | 4 ++-- .../sdk/pipelines/OCR/server/model_repo/ocr/1/model.py | 4 ++++ deploy/hps/sdk/pipelines/OCR/version.txt | 2 +- .../server/model_repo/chatocr-visual/1/model.py | 4 ++++ deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/version.txt | 2 +- .../server/model_repo/chatocr-visual/1/model.py | 4 ++++ deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/version.txt | 2 +- .../server/model_repo/doctrans-visual/1/model.py | 8 +++++--- deploy/hps/sdk/pipelines/PP-DocTranslation/version.txt | 2 +- .../server/model_repo/layout-parsing/1/model.py | 5 +++++ deploy/hps/sdk/pipelines/PP-StructureV3/version.txt | 2 +- .../server/model_repo/layout-parsing/1/model.py | 5 +++++ deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt | 2 +- .../server/model_repo/document-preprocessing/1/model.py | 4 ++++ deploy/hps/sdk/pipelines/doc_preprocessor/version.txt | 2 +- .../server/model_repo/formula-recognition/1/model.py | 4 ++++ deploy/hps/sdk/pipelines/formula_recognition/version.txt | 2 +- .../server/model_repo/layout-parsing/1/model.py | 4 ++++ deploy/hps/sdk/pipelines/layout_parsing/version.txt | 2 +- .../server/model_repo/seal-recognition/1/model.py | 4 ++++ deploy/hps/sdk/pipelines/seal_recognition/version.txt | 2 +- .../server/model_repo/table-recognition/1/model.py | 4 ++++ deploy/hps/sdk/pipelines/table_recognition/version.txt | 2 +- .../server/model_repo/table-recognition/1/model.py | 4 ++++ deploy/hps/sdk/pipelines/table_recognition_v2/version.txt | 2 +- deploy/hps/server_env/paddlex-hps-server/pyproject.toml | 2 +- .../src/paddlex_hps_server/app_common.py | 5 ++++- paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml | 2 +- paddlex/configs/pipelines/PaddleOCR-VL.yaml | 2 +- .../basic_serving/_pipeline_apps/_common/common.py | 5 ++++- .../serving/basic_serving/_pipeline_apps/_common/ocr.py | 4 ++++ .../basic_serving/_pipeline_apps/doc_preprocessor.py | 1 + .../basic_serving/_pipeline_apps/formula_recognition.py | 1 + .../basic_serving/_pipeline_apps/layout_parsing.py | 2 +- .../inference/serving/basic_serving/_pipeline_apps/ocr.py | 1 + .../serving/basic_serving/_pipeline_apps/paddleocr_vl.py | 2 ++ .../basic_serving/_pipeline_apps/pp_chatocrv3_doc.py | 1 + .../basic_serving/_pipeline_apps/pp_chatocrv4_doc.py | 1 + .../basic_serving/_pipeline_apps/pp_doctranslation.py | 2 ++ .../basic_serving/_pipeline_apps/pp_structurev3.py | 2 ++ .../basic_serving/_pipeline_apps/seal_recognition.py | 1 + .../basic_serving/_pipeline_apps/table_recognition.py | 1 + .../basic_serving/_pipeline_apps/table_recognition_v2.py | 1 + paddlex/inference/serving/infra/storage.py | 8 +++++--- paddlex/inference/serving/infra/utils.py | 3 ++- 45 files changed, 102 insertions(+), 27 deletions(-) diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml index 209027caa4..e939baadcd 100644 --- a/.github/workflows/deploy_docs.yml +++ b/.github/workflows/deploy_docs.yml @@ -2,7 +2,7 @@ name: Develop Docs on: push: branches: #设置更新哪个分支会更新站点 - - release/3.3 + - release/3.4 permissions: contents: write jobs: @@ -27,5 +27,5 @@ jobs: - run: pip install mike mkdocs-material jieba mkdocs-git-revision-date-localized-plugin mkdocs-git-committers-plugin-2 mkdocs-git-authors-plugin mkdocs-static-i18n mkdocs-minify-plugin - run: | git fetch origin gh-pages --depth=1 - mike deploy --push --update-aliases 3.3 latest + mike deploy --push --update-aliases 3.4 latest mike set-default --push latest diff --git a/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py b/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py index 5601c51674..1b094662e8 100644 --- a/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py +++ b/deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py @@ -48,6 +48,7 @@ def initialize(self, args): self.context = {} self.context["file_storage"] = None self.context["return_img_urls"] = False + self.context["url_expires_in"] = -1 self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE if self.app_config.extra: @@ -59,6 +60,8 @@ def initialize(self, args): self.context["return_img_urls"] = self.app_config.extra[ "return_img_urls" ] + if "url_expires_in" in self.app_config.extra: + self.context["url_expires_in"] = self.app_config.extra["url_expires_in"] if "max_num_input_imgs" in self.app_config.extra: self.context["max_num_input_imgs"] = self.app_config.extra[ "max_num_input_imgs" @@ -274,6 +277,7 @@ def _postprocess(self, images, data_info, visualize_enabled, preds, log_id, inpu filename_template=f"{{key}}_{i}.jpg", file_storage=self.context["file_storage"], return_urls=self.context["return_img_urls"], + url_expires_in=self.context["url_expires_in"], max_img_size=self.context["max_output_img_size"], ) else: diff --git a/deploy/hps/sdk/pipelines/OCR/version.txt b/deploy/hps/sdk/pipelines/OCR/version.txt index 3a4036fb45..53a75d6735 100644 --- a/deploy/hps/sdk/pipelines/OCR/version.txt +++ b/deploy/hps/sdk/pipelines/OCR/version.txt @@ -1 +1 @@ -0.2.5 +0.2.6 diff --git a/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/server/model_repo/chatocr-visual/1/model.py b/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/server/model_repo/chatocr-visual/1/model.py index 7b2568a7a1..40f7684f15 100644 --- a/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/server/model_repo/chatocr-visual/1/model.py +++ b/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/server/model_repo/chatocr-visual/1/model.py @@ -33,6 +33,7 @@ def initialize(self, args): self.context = {} self.context["file_storage"] = None self.context["return_img_urls"] = False + self.context["url_expires_in"] = -1 self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE if self.app_config.extra: @@ -44,6 +45,8 @@ def initialize(self, args): self.context["return_img_urls"] = self.app_config.extra[ "return_img_urls" ] + if "url_expires_in" in self.app_config.extra: + self.context["url_expires_in"] = self.app_config.extra["url_expires_in"] if "max_num_input_imgs" in self.app_config.extra: self.context["max_num_input_imgs"] = self.app_config.extra[ "max_num_input_imgs" @@ -142,6 +145,7 @@ def run(self, input, log_id): filename_template=f"{{key}}_{i}.jpg", file_storage=self.context["file_storage"], return_urls=self.context["return_img_urls"], + url_expires_in=self.context["url_expires_in"], max_img_size=self.context["max_output_img_size"], ) else: diff --git a/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/version.txt b/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/version.txt index d15723fbe8..1c09c74e22 100644 --- a/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/version.txt +++ b/deploy/hps/sdk/pipelines/PP-ChatOCRv3-doc/version.txt @@ -1 +1 @@ -0.3.2 +0.3.3 diff --git a/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/server/model_repo/chatocr-visual/1/model.py b/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/server/model_repo/chatocr-visual/1/model.py index de0a16bdee..c563ca0823 100644 --- a/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/server/model_repo/chatocr-visual/1/model.py +++ b/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/server/model_repo/chatocr-visual/1/model.py @@ -33,6 +33,7 @@ def initialize(self, args): self.context = {} self.context["file_storage"] = None self.context["return_img_urls"] = False + self.context["url_expires_in"] = -1 self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE if self.app_config.extra: @@ -44,6 +45,8 @@ def initialize(self, args): self.context["return_img_urls"] = self.app_config.extra[ "return_img_urls" ] + if "url_expires_in" in self.app_config.extra: + self.context["url_expires_in"] = self.app_config.extra["url_expires_in"] if "max_num_input_imgs" in self.app_config.extra: self.context["max_num_input_imgs"] = self.app_config.extra[ "max_num_input_imgs" @@ -143,6 +146,7 @@ def run(self, input, log_id): filename_template=f"{{key}}_{i}.jpg", file_storage=self.context["file_storage"], return_urls=self.context["return_img_urls"], + url_expires_in=self.context["url_expires_in"], max_img_size=self.context["max_output_img_size"], ) else: diff --git a/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/version.txt b/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/version.txt index 2b7c5ae018..17b2ccd9bf 100644 --- a/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/version.txt +++ b/deploy/hps/sdk/pipelines/PP-ChatOCRv4-doc/version.txt @@ -1 +1 @@ -0.4.2 +0.4.3 diff --git a/deploy/hps/sdk/pipelines/PP-DocTranslation/server/model_repo/doctrans-visual/1/model.py b/deploy/hps/sdk/pipelines/PP-DocTranslation/server/model_repo/doctrans-visual/1/model.py index 361ce50332..7243078c10 100644 --- a/deploy/hps/sdk/pipelines/PP-DocTranslation/server/model_repo/doctrans-visual/1/model.py +++ b/deploy/hps/sdk/pipelines/PP-DocTranslation/server/model_repo/doctrans-visual/1/model.py @@ -30,12 +30,10 @@ class TritonPythonModel(BaseTritonPythonModel): def initialize(self, args): super().initialize(args) - - self.pipeline.inintial_visual_predictor(self.pipeline.config) - self.context = {} self.context["file_storage"] = None self.context["return_img_urls"] = False + self.context["url_expires_in"] = -1 self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE if self.app_config.extra: @@ -47,6 +45,8 @@ def initialize(self, args): self.context["return_img_urls"] = self.app_config.extra[ "return_img_urls" ] + if "url_expires_in" in self.app_config.extra: + self.context["url_expires_in"] = self.app_config.extra["url_expires_in"] if "max_num_input_imgs" in self.app_config.extra: self.context["max_num_input_imgs"] = self.app_config.extra[ "max_num_input_imgs" @@ -151,6 +151,7 @@ def run(self, input, log_id): filename_template=f"markdown_{i}/{{key}}", file_storage=self.context["file_storage"], return_urls=self.context["return_img_urls"], + url_expires_in=self.context["url_expires_in"], max_img_size=self.context["max_output_img_size"], ) md_flags = md_data["page_continuation_flags"] @@ -165,6 +166,7 @@ def run(self, input, log_id): filename_template=f"{{key}}_{i}.jpg", file_storage=self.context["file_storage"], return_urls=self.context["return_img_urls"], + url_expires_in=self.context["url_expires_in"], max_img_size=self.context["max_output_img_size"], ) else: diff --git a/deploy/hps/sdk/pipelines/PP-DocTranslation/version.txt b/deploy/hps/sdk/pipelines/PP-DocTranslation/version.txt index d917d3e26a..b1e80bb248 100644 --- a/deploy/hps/sdk/pipelines/PP-DocTranslation/version.txt +++ b/deploy/hps/sdk/pipelines/PP-DocTranslation/version.txt @@ -1 +1 @@ -0.1.2 +0.1.3 diff --git a/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py b/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py index 57dde5d42f..5183d7c62f 100644 --- a/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py +++ b/deploy/hps/sdk/pipelines/PP-StructureV3/server/model_repo/layout-parsing/1/model.py @@ -47,6 +47,7 @@ def initialize(self, args): self.context = {} self.context["file_storage"] = None self.context["return_img_urls"] = False + self.context["url_expires_in"] = -1 self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE if self.app_config.extra: @@ -58,6 +59,8 @@ def initialize(self, args): self.context["return_img_urls"] = self.app_config.extra[ "return_img_urls" ] + if "url_expires_in" in self.app_config.extra: + self.context["url_expires_in"] = self.app_config.extra["url_expires_in"] if "max_num_input_imgs" in self.app_config.extra: self.context["max_num_input_imgs"] = self.app_config.extra[ "max_num_input_imgs" @@ -327,6 +330,7 @@ def _postprocess(self, images, data_info, visualize_enabled, preds, log_id, inpu filename_template=f"markdown_{i}/{{key}}", file_storage=self.context["file_storage"], return_urls=self.context["return_img_urls"], + url_expires_in=self.context["url_expires_in"], max_img_size=self.context["max_output_img_size"], ) md_flags = md_data["page_continuation_flags"] @@ -341,6 +345,7 @@ def _postprocess(self, images, data_info, visualize_enabled, preds, log_id, inpu filename_template=f"{{key}}_{i}.jpg", file_storage=self.context["file_storage"], return_urls=self.context["return_img_urls"], + url_expires_in=self.context["url_expires_in"], max_img_size=self.context["max_output_img_size"], ) else: diff --git a/deploy/hps/sdk/pipelines/PP-StructureV3/version.txt b/deploy/hps/sdk/pipelines/PP-StructureV3/version.txt index c2c0004f0e..449d7e73a9 100644 --- a/deploy/hps/sdk/pipelines/PP-StructureV3/version.txt +++ b/deploy/hps/sdk/pipelines/PP-StructureV3/version.txt @@ -1 +1 @@ -0.3.5 +0.3.6 diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py index 8047ba4249..6192a312d0 100644 --- a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py +++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/model_repo/layout-parsing/1/model.py @@ -47,6 +47,7 @@ def initialize(self, args): self.context = {} self.context["file_storage"] = None self.context["return_img_urls"] = False + self.context["url_expires_in"] = -1 self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE if self.app_config.extra: @@ -58,6 +59,8 @@ def initialize(self, args): self.context["return_img_urls"] = self.app_config.extra[ "return_img_urls" ] + if "url_expires_in" in self.app_config.extra: + self.context["url_expires_in"] = self.app_config.extra["url_expires_in"] if "max_num_input_imgs" in self.app_config.extra: self.context["max_num_input_imgs"] = self.app_config.extra[ "max_num_input_imgs" @@ -305,6 +308,7 @@ def _postprocess(self, images, data_info, visualize_enabled, preds, log_id, inpu filename_template=f"markdown_{i}/{{key}}", file_storage=self.context["file_storage"], return_urls=self.context["return_img_urls"], + url_expires_in=self.context["url_expires_in"], max_img_size=self.context["max_output_img_size"], ) if visualize_enabled: @@ -318,6 +322,7 @@ def _postprocess(self, images, data_info, visualize_enabled, preds, log_id, inpu filename_template=f"{{key}}_{i}.jpg", file_storage=self.context["file_storage"], return_urls=self.context["return_img_urls"], + url_expires_in=self.context["url_expires_in"], max_img_size=self.context["max_output_img_size"], ) else: diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt b/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt index 0d91a54c7d..9e11b32fca 100644 --- a/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt +++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL/version.txt @@ -1 +1 @@ -0.3.0 +0.3.1 diff --git a/deploy/hps/sdk/pipelines/doc_preprocessor/server/model_repo/document-preprocessing/1/model.py b/deploy/hps/sdk/pipelines/doc_preprocessor/server/model_repo/document-preprocessing/1/model.py index 629dd34e1a..390ac1a1d1 100644 --- a/deploy/hps/sdk/pipelines/doc_preprocessor/server/model_repo/document-preprocessing/1/model.py +++ b/deploy/hps/sdk/pipelines/doc_preprocessor/server/model_repo/document-preprocessing/1/model.py @@ -33,6 +33,7 @@ def initialize(self, args): self.context = {} self.context["file_storage"] = None self.context["return_img_urls"] = False + self.context["url_expires_in"] = -1 self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE if self.app_config.extra: @@ -44,6 +45,8 @@ def initialize(self, args): self.context["return_img_urls"] = self.app_config.extra[ "return_img_urls" ] + if "url_expires_in" in self.app_config.extra: + self.context["url_expires_in"] = self.app_config.extra["url_expires_in"] if "max_num_input_imgs" in self.app_config.extra: self.context["max_num_input_imgs"] = self.app_config.extra[ "max_num_input_imgs" @@ -131,6 +134,7 @@ def run(self, input, log_id): filename_template=f"{{key}}_{i}.jpg", file_storage=self.context["file_storage"], return_urls=self.context["return_img_urls"], + url_expires_in=self.context["url_expires_in"], max_img_size=self.context["max_output_img_size"], ) else: diff --git a/deploy/hps/sdk/pipelines/doc_preprocessor/version.txt b/deploy/hps/sdk/pipelines/doc_preprocessor/version.txt index 0c62199f16..ee1372d33a 100644 --- a/deploy/hps/sdk/pipelines/doc_preprocessor/version.txt +++ b/deploy/hps/sdk/pipelines/doc_preprocessor/version.txt @@ -1 +1 @@ -0.2.1 +0.2.2 diff --git a/deploy/hps/sdk/pipelines/formula_recognition/server/model_repo/formula-recognition/1/model.py b/deploy/hps/sdk/pipelines/formula_recognition/server/model_repo/formula-recognition/1/model.py index 7af06c405a..0ccc35d1de 100644 --- a/deploy/hps/sdk/pipelines/formula_recognition/server/model_repo/formula-recognition/1/model.py +++ b/deploy/hps/sdk/pipelines/formula_recognition/server/model_repo/formula-recognition/1/model.py @@ -33,6 +33,7 @@ def initialize(self, args): self.context = {} self.context["file_storage"] = None self.context["return_img_urls"] = False + self.context["url_expires_in"] = -1 self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE if self.app_config.extra: @@ -44,6 +45,8 @@ def initialize(self, args): self.context["return_img_urls"] = self.app_config.extra[ "return_img_urls" ] + if "url_expires_in" in self.app_config.extra: + self.context["url_expires_in"] = self.app_config.extra["url_expires_in"] if "max_num_input_imgs" in self.app_config.extra: self.context["max_num_input_imgs"] = self.app_config.extra[ "max_num_input_imgs" @@ -128,6 +131,7 @@ def run(self, input, log_id): filename_template=f"{{key}}_{i}.jpg", file_storage=self.context["file_storage"], return_urls=self.context["return_img_urls"], + url_expires_in=self.context["url_expires_in"], max_img_size=self.context["max_output_img_size"], ) else: diff --git a/deploy/hps/sdk/pipelines/formula_recognition/version.txt b/deploy/hps/sdk/pipelines/formula_recognition/version.txt index 0c62199f16..ee1372d33a 100644 --- a/deploy/hps/sdk/pipelines/formula_recognition/version.txt +++ b/deploy/hps/sdk/pipelines/formula_recognition/version.txt @@ -1 +1 @@ -0.2.1 +0.2.2 diff --git a/deploy/hps/sdk/pipelines/layout_parsing/server/model_repo/layout-parsing/1/model.py b/deploy/hps/sdk/pipelines/layout_parsing/server/model_repo/layout-parsing/1/model.py index b4ba08c961..e96a2f6f55 100644 --- a/deploy/hps/sdk/pipelines/layout_parsing/server/model_repo/layout-parsing/1/model.py +++ b/deploy/hps/sdk/pipelines/layout_parsing/server/model_repo/layout-parsing/1/model.py @@ -33,6 +33,7 @@ def initialize(self, args): self.context = {} self.context["file_storage"] = None self.context["return_img_urls"] = False + self.context["url_expires_in"] = -1 self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE if self.app_config.extra: @@ -44,6 +45,8 @@ def initialize(self, args): self.context["return_img_urls"] = self.app_config.extra[ "return_img_urls" ] + if "url_expires_in" in self.app_config.extra: + self.context["url_expires_in"] = self.app_config.extra["url_expires_in"] if "max_num_input_imgs" in self.app_config.extra: self.context["max_num_input_imgs"] = self.app_config.extra[ "max_num_input_imgs" @@ -143,6 +146,7 @@ def run(self, input, log_id): filename_template=f"{{key}}_{i}.jpg", file_storage=self.context["file_storage"], return_urls=self.context["return_img_urls"], + url_expires_in=self.context["url_expires_in"], max_img_size=self.context["max_output_img_size"], ) else: diff --git a/deploy/hps/sdk/pipelines/layout_parsing/version.txt b/deploy/hps/sdk/pipelines/layout_parsing/version.txt index 9e11b32fca..d15723fbe8 100644 --- a/deploy/hps/sdk/pipelines/layout_parsing/version.txt +++ b/deploy/hps/sdk/pipelines/layout_parsing/version.txt @@ -1 +1 @@ -0.3.1 +0.3.2 diff --git a/deploy/hps/sdk/pipelines/seal_recognition/server/model_repo/seal-recognition/1/model.py b/deploy/hps/sdk/pipelines/seal_recognition/server/model_repo/seal-recognition/1/model.py index 4885f6a68c..ee41cd35b3 100644 --- a/deploy/hps/sdk/pipelines/seal_recognition/server/model_repo/seal-recognition/1/model.py +++ b/deploy/hps/sdk/pipelines/seal_recognition/server/model_repo/seal-recognition/1/model.py @@ -33,6 +33,7 @@ def initialize(self, args): self.context = {} self.context["file_storage"] = None self.context["return_img_urls"] = False + self.context["url_expires_in"] = -1 self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE if self.app_config.extra: @@ -44,6 +45,8 @@ def initialize(self, args): self.context["return_img_urls"] = self.app_config.extra[ "return_img_urls" ] + if "url_expires_in" in self.app_config.extra: + self.context["url_expires_in"] = self.app_config.extra["url_expires_in"] if "max_num_input_imgs" in self.app_config.extra: self.context["max_num_input_imgs"] = self.app_config.extra[ "max_num_input_imgs" @@ -134,6 +137,7 @@ def run(self, input, log_id): filename_template=f"{{key}}_{i}.jpg", file_storage=self.context["file_storage"], return_urls=self.context["return_img_urls"], + url_expires_in=self.context["url_expires_in"], max_img_size=self.context["max_output_img_size"], ) else: diff --git a/deploy/hps/sdk/pipelines/seal_recognition/version.txt b/deploy/hps/sdk/pipelines/seal_recognition/version.txt index ee1372d33a..7179039691 100644 --- a/deploy/hps/sdk/pipelines/seal_recognition/version.txt +++ b/deploy/hps/sdk/pipelines/seal_recognition/version.txt @@ -1 +1 @@ -0.2.2 +0.2.3 diff --git a/deploy/hps/sdk/pipelines/table_recognition/server/model_repo/table-recognition/1/model.py b/deploy/hps/sdk/pipelines/table_recognition/server/model_repo/table-recognition/1/model.py index c1624046bb..baaafe4d4e 100644 --- a/deploy/hps/sdk/pipelines/table_recognition/server/model_repo/table-recognition/1/model.py +++ b/deploy/hps/sdk/pipelines/table_recognition/server/model_repo/table-recognition/1/model.py @@ -33,6 +33,7 @@ def initialize(self, args): self.context = {} self.context["file_storage"] = None self.context["return_img_urls"] = False + self.context["url_expires_in"] = -1 self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE if self.app_config.extra: @@ -44,6 +45,8 @@ def initialize(self, args): self.context["return_img_urls"] = self.app_config.extra[ "return_img_urls" ] + if "url_expires_in" in self.app_config.extra: + self.context["url_expires_in"] = self.app_config.extra["url_expires_in"] if "max_num_input_imgs" in self.app_config.extra: self.context["max_num_input_imgs"] = self.app_config.extra[ "max_num_input_imgs" @@ -132,6 +135,7 @@ def run(self, input, log_id): filename_template=f"{{key}}_{i}.jpg", file_storage=self.context["file_storage"], return_urls=self.context["return_img_urls"], + url_expires_in=self.context["url_expires_in"], max_img_size=self.context["max_output_img_size"], ) else: diff --git a/deploy/hps/sdk/pipelines/table_recognition/version.txt b/deploy/hps/sdk/pipelines/table_recognition/version.txt index 267577d47e..2b7c5ae018 100644 --- a/deploy/hps/sdk/pipelines/table_recognition/version.txt +++ b/deploy/hps/sdk/pipelines/table_recognition/version.txt @@ -1 +1 @@ -0.4.1 +0.4.2 diff --git a/deploy/hps/sdk/pipelines/table_recognition_v2/server/model_repo/table-recognition/1/model.py b/deploy/hps/sdk/pipelines/table_recognition_v2/server/model_repo/table-recognition/1/model.py index 508981080b..552bf3b5ef 100644 --- a/deploy/hps/sdk/pipelines/table_recognition_v2/server/model_repo/table-recognition/1/model.py +++ b/deploy/hps/sdk/pipelines/table_recognition_v2/server/model_repo/table-recognition/1/model.py @@ -33,6 +33,7 @@ def initialize(self, args): self.context = {} self.context["file_storage"] = None self.context["return_img_urls"] = False + self.context["url_expires_in"] = -1 self.context["max_num_input_imgs"] = _DEFAULT_MAX_NUM_INPUT_IMGS self.context["max_output_img_size"] = _DEFAULT_MAX_OUTPUT_IMG_SIZE if self.app_config.extra: @@ -44,6 +45,8 @@ def initialize(self, args): self.context["return_img_urls"] = self.app_config.extra[ "return_img_urls" ] + if "url_expires_in" in self.app_config.extra: + self.context["url_expires_in"] = self.app_config.extra["url_expires_in"] if "max_num_input_imgs" in self.app_config.extra: self.context["max_num_input_imgs"] = self.app_config.extra[ "max_num_input_imgs" @@ -137,6 +140,7 @@ def run(self, input, log_id): filename_template=f"{{key}}_{i}.jpg", file_storage=self.context["file_storage"], return_urls=self.context["return_img_urls"], + url_expires_in=self.context["url_expires_in"], max_img_size=self.context["max_output_img_size"], ) else: diff --git a/deploy/hps/sdk/pipelines/table_recognition_v2/version.txt b/deploy/hps/sdk/pipelines/table_recognition_v2/version.txt index 267577d47e..2b7c5ae018 100644 --- a/deploy/hps/sdk/pipelines/table_recognition_v2/version.txt +++ b/deploy/hps/sdk/pipelines/table_recognition_v2/version.txt @@ -1 +1 @@ -0.4.1 +0.4.2 diff --git a/deploy/hps/server_env/paddlex-hps-server/pyproject.toml b/deploy/hps/server_env/paddlex-hps-server/pyproject.toml index 5d85392841..f6dcad64a4 100644 --- a/deploy/hps/server_env/paddlex-hps-server/pyproject.toml +++ b/deploy/hps/server_env/paddlex-hps-server/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "paddlex-hps-server" -version = "0.4.0" +version = "0.5.0" # `paddlex` is not included here dependencies = [ "colorlog >= 6.9", diff --git a/deploy/hps/server_env/paddlex-hps-server/src/paddlex_hps_server/app_common.py b/deploy/hps/server_env/paddlex-hps-server/src/paddlex_hps_server/app_common.py index 14f699c3f1..6250706821 100644 --- a/deploy/hps/server_env/paddlex-hps-server/src/paddlex_hps_server/app_common.py +++ b/deploy/hps/server_env/paddlex-hps-server/src/paddlex_hps_server/app_common.py @@ -46,6 +46,7 @@ def postprocess_image( *, file_storage: Optional[Storage] = None, return_url: bool = False, + url_expires_in: int = -1, max_img_size: Optional[Tuple[int, int]] = None, ) -> str: if return_url: @@ -71,7 +72,7 @@ def postprocess_image( file_storage.set(key, img_bytes) if return_url: assert isinstance(file_storage, SupportsGetURL) - return file_storage.get_url(key) + return file_storage.get_url(key, expires_in=url_expires_in) return utils.base64_encode(img_bytes) @@ -81,6 +82,7 @@ def postprocess_images( filename_template: str = "{key}.jpg", file_storage: Optional[Storage] = None, return_urls: bool = False, + url_expires_in: int = -1, max_img_size: Optional[Tuple[int, int]] = None, ) -> Dict[str, str]: output_images: Dict[str, str] = {} @@ -95,6 +97,7 @@ def postprocess_images( filename=filename_template.format(key=key), file_storage=file_storage, return_url=return_urls, + url_expires_in=url_expires_in, max_img_size=max_img_size, ) return output_images diff --git a/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml b/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml index d0e197f8ea..e49cef34e3 100644 --- a/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml +++ b/paddlex/configs/pipelines/PaddleOCR-VL-1.5.yaml @@ -59,7 +59,7 @@ SubModules: module_name: vl_recognition model_name: PaddleOCR-VL-1.5-0.9B model_dir: null - batch_size: 4096 + batch_size: -1 genai_config: backend: native diff --git a/paddlex/configs/pipelines/PaddleOCR-VL.yaml b/paddlex/configs/pipelines/PaddleOCR-VL.yaml index 37a4823cf0..900892f522 100644 --- a/paddlex/configs/pipelines/PaddleOCR-VL.yaml +++ b/paddlex/configs/pipelines/PaddleOCR-VL.yaml @@ -83,7 +83,7 @@ SubModules: module_name: vl_recognition model_name: PaddleOCR-VL-0.9B model_dir: null - batch_size: 4096 + batch_size: -1 genai_config: backend: native diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py index 82b8bf580a..87347fb034 100644 --- a/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py @@ -50,6 +50,7 @@ def postprocess_image( *, file_storage: Optional[Storage] = None, return_url: bool = False, + url_expires_in: int = -1, max_img_size: Optional[Tuple[int, int]] = None, ) -> str: if return_url: @@ -75,7 +76,7 @@ def postprocess_image( file_storage.set(key, img_bytes) if return_url: assert isinstance(file_storage, SupportsGetURL) - return file_storage.get_url(key) + return file_storage.get_url(key, expires_in=url_expires_in) return serving_utils.base64_encode(img_bytes) @@ -85,6 +86,7 @@ def postprocess_images( filename_template: str = "{key}.jpg", file_storage: Optional[Storage] = None, return_urls: bool = False, + url_expires_in: int = -1, max_img_size: Optional[Tuple[int, int]] = None, ) -> Dict[str, str]: output_images: Dict[str, str] = {} @@ -99,6 +101,7 @@ def postprocess_images( filename=filename_template.format(key=key), file_storage=file_storage, return_url=return_urls, + url_expires_in=url_expires_in, max_img_size=max_img_size, ) return output_images diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py index 1d46eadb20..73473800d2 100644 --- a/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py +++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py @@ -31,6 +31,7 @@ DEFAULT_MAX_NUM_INPUT_IMGS: Final[int] = 10 DEFAULT_MAX_OUTPUT_IMG_SIZE: Final[Tuple[int, int]] = (2000, 2000) +DEFAULT_URL_EXPIRES_IN: Final[int] = -1 def update_app_context(app_context: AppContext) -> None: @@ -49,6 +50,9 @@ def update_app_context(app_context: AppContext) -> None: raise TypeError( f"`{type(file_storage).__name__}` does not support getting URLs." ) + app_context.extra["url_expires_in"] = extra_cfg.get( + "url_expires_in", DEFAULT_URL_EXPIRES_IN + ) app_context.extra["max_num_input_imgs"] = extra_cfg.get( "max_num_input_imgs", DEFAULT_MAX_NUM_INPUT_IMGS ) diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py index e9e837a50f..2bd38780cb 100644 --- a/paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py @@ -80,6 +80,7 @@ async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]: filename_template=f"{{key}}_{i}.jpg", file_storage=ctx.extra["file_storage"], return_urls=ctx.extra["return_img_urls"], + url_expires_in=ctx.extra["url_expires_in"], max_img_size=ctx.extra["max_output_img_size"], ) else: diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py index 786c426d91..8cc3343641 100644 --- a/paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py @@ -75,6 +75,7 @@ async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]: filename_template=f"{{key}}_{i}.jpg", file_storage=ctx.extra["file_storage"], return_urls=ctx.extra["return_img_urls"], + url_expires_in=ctx.extra["url_expires_in"], max_img_size=ctx.extra["max_output_img_size"], ) else: diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py index a778632cff..e35633e969 100644 --- a/paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py @@ -91,7 +91,7 @@ async def _infer( log_id, filename_template=f"{{key}}_{i}.jpg", file_storage=ctx.extra["file_storage"], - return_urls=ctx.extra["return_img_urls"], + url_expires_in=ctx.extra["url_expires_in"], max_img_size=ctx.extra["max_output_img_size"], ) else: diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py index 4020a4f48b..72e3382ed1 100644 --- a/paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py @@ -81,6 +81,7 @@ async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]: filename_template=f"{{key}}_{i}.jpg", file_storage=ctx.extra["file_storage"], return_urls=ctx.extra["return_img_urls"], + url_expires_in=ctx.extra["url_expires_in"], max_img_size=ctx.extra["max_output_img_size"], ) else: diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/paddleocr_vl.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/paddleocr_vl.py index 1eaa219d86..f2f0a46450 100644 --- a/paddlex/inference/serving/basic_serving/_pipeline_apps/paddleocr_vl.py +++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/paddleocr_vl.py @@ -111,6 +111,7 @@ async def _infer( filename_template=f"markdown_{i}/{{key}}", file_storage=ctx.extra["file_storage"], return_urls=ctx.extra["return_img_urls"], + url_expires_in=ctx.extra["url_expires_in"], max_img_size=ctx.extra["max_output_img_size"], ) if visualize_enabled: @@ -125,6 +126,7 @@ async def _infer( filename_template=f"{{key}}_{i}.jpg", file_storage=ctx.extra["file_storage"], return_urls=ctx.extra["return_img_urls"], + url_expires_in=ctx.extra["url_expires_in"], max_img_size=ctx.extra["max_output_img_size"], ) else: diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py index 8e74699f14..4e449bca0e 100644 --- a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py @@ -92,6 +92,7 @@ async def _analyze_images( filename_template=f"{{key}}_{i}.jpg", file_storage=ctx.extra["file_storage"], return_urls=ctx.extra["return_img_urls"], + url_expires_in=ctx.extra["url_expires_in"], max_img_size=ctx.extra["max_output_img_size"], ) else: diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py index ced0e6bb88..071a6a2f3d 100644 --- a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py @@ -93,6 +93,7 @@ async def _analyze_images( filename_template=f"{{key}}_{i}.jpg", file_storage=ctx.extra["file_storage"], return_urls=ctx.extra["return_img_urls"], + url_expires_in=ctx.extra["url_expires_in"], max_img_size=ctx.extra["max_output_img_size"], ) else: diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py index f4a6c88a13..232dae19aa 100644 --- a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py +++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py @@ -100,6 +100,7 @@ async def _analyze_images( filename_template=f"markdown_{i}/{{key}}", file_storage=ctx.extra["file_storage"], return_urls=ctx.extra["return_img_urls"], + url_expires_in=ctx.extra["url_expires_in"], max_img_size=ctx.extra["max_output_img_size"], ) md_flags = md_data["page_continuation_flags"] @@ -115,6 +116,7 @@ async def _analyze_images( filename_template=f"{{key}}_{i}.jpg", file_storage=ctx.extra["file_storage"], return_urls=ctx.extra["return_img_urls"], + url_expires_in=ctx.extra["url_expires_in"], max_img_size=ctx.extra["max_output_img_size"], ) else: diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py index 79e1e17ee6..b68ab3369a 100644 --- a/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py @@ -103,6 +103,7 @@ async def _infer( filename_template=f"markdown_{i}/{{key}}", file_storage=ctx.extra["file_storage"], return_urls=ctx.extra["return_img_urls"], + url_expires_in=ctx.extra["url_expires_in"], max_img_size=ctx.extra["max_output_img_size"], ) md_flags = md_data["page_continuation_flags"] @@ -118,6 +119,7 @@ async def _infer( filename_template=f"{{key}}_{i}.jpg", file_storage=ctx.extra["file_storage"], return_urls=ctx.extra["return_img_urls"], + url_expires_in=ctx.extra["url_expires_in"], max_img_size=ctx.extra["max_output_img_size"], ) else: diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py index 012f94abb8..1c85b56d13 100644 --- a/paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py @@ -81,6 +81,7 @@ async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]: filename_template=f"{{key}}_{i}.jpg", file_storage=ctx.extra["file_storage"], return_urls=ctx.extra["return_img_urls"], + url_expires_in=ctx.extra["url_expires_in"], max_img_size=ctx.extra["max_output_img_size"], ) else: diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py index eecd45be99..9ca46ade4c 100644 --- a/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py @@ -79,6 +79,7 @@ async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]: filename_template=f"{{key}}_{i}.jpg", file_storage=ctx.extra["file_storage"], return_urls=ctx.extra["return_img_urls"], + url_expires_in=ctx.extra["url_expires_in"], max_img_size=ctx.extra["max_output_img_size"], ) else: diff --git a/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py b/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py index 288a415fca..8adef8afd7 100644 --- a/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +++ b/paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py @@ -84,6 +84,7 @@ async def _infer(request: InferRequest) -> AIStudioResultResponse[InferResult]: filename_template=f"{{key}}_{i}.jpg", file_storage=ctx.extra["file_storage"], return_urls=ctx.extra["return_img_urls"], + url_expires_in=ctx.extra["url_expires_in"], max_img_size=ctx.extra["max_output_img_size"], ) else: diff --git a/paddlex/inference/serving/infra/storage.py b/paddlex/inference/serving/infra/storage.py index 7a0cbbf929..ddda1b3838 100644 --- a/paddlex/inference/serving/infra/storage.py +++ b/paddlex/inference/serving/infra/storage.py @@ -65,7 +65,7 @@ class BOSConfig(BaseModel): @runtime_checkable class SupportsGetURL(Protocol): - def get_url(self, key: str) -> str: ... + def get_url(self, key: str, expires_in: int = -1) -> str: ... class Storage(metaclass=abc.ABCMeta): @@ -156,10 +156,12 @@ def delete(self, key: str) -> None: key = self._get_full_key(key) self._client.delete_object(bucket_name=self._bucket_name, key=key) - def get_url(self, key: str) -> str: + def get_url(self, key: str, expires_in: int = -1) -> str: key = self._get_full_key(key) return self._client.generate_pre_signed_url( - self._bucket_name, key, expiration_in_seconds=-1 + self._bucket_name, + key, + expiration_in_seconds=expires_in, ).decode("ascii") def _get_full_key(self, key: str) -> str: diff --git a/paddlex/inference/serving/infra/utils.py b/paddlex/inference/serving/infra/utils.py index d9e971261e..7c67567957 100644 --- a/paddlex/inference/serving/infra/utils.py +++ b/paddlex/inference/serving/infra/utils.py @@ -30,7 +30,8 @@ from typing_extensions import Literal, ParamSpec, TypeAlias, assert_never from ....utils.deps import function_requires_deps, is_dep_available -from ...utils.pdfium_lock import PDF_RENDER_SCALE, pdfium_lock +from ....utils.flags import PDF_RENDER_SCALE +from ...utils.pdfium_lock import pdfium_lock from .models import ImageInfo, PDFInfo, PDFPageInfo if is_dep_available("aiohttp"): From 3b04645c443c65beb1a683f715d33772f4bbbd07 Mon Sep 17 00:00:00 2001 From: changdazhou <142379845+changdazhou@users.noreply.github.com> Date: Fri, 13 Feb 2026 14:22:41 +0800 Subject: [PATCH 22/37] add \n for seal rec && bugfix for text in table && delete_pass by model_name (#4998) --- paddlex/inference/models/common/static_infer.py | 4 +++- .../models/layout_analysis/processors.py | 13 +++++++++---- .../inference/pipelines/paddleocr_vl/result.py | 2 +- .../inference/pipelines/paddleocr_vl/uilts.py | 16 +++++++++------- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/paddlex/inference/models/common/static_infer.py b/paddlex/inference/models/common/static_infer.py index 6806a7718a..3b4de0518e 100644 --- a/paddlex/inference/models/common/static_infer.py +++ b/paddlex/inference/models/common/static_infer.py @@ -401,7 +401,9 @@ def _create( if hasattr(config, "enable_new_executor"): config.enable_new_executor() config.set_optimization_level(3) - config.delete_pass("matmul_add_act_fuse_pass") + # TODO(changdazhou): use a black list instead + if self._model_name == "PP-DocLayoutV3": + config.delete_pass("matmul_add_act_fuse_pass") # ROCm does not support fused_conv2d_add_act kernel, delete the fuse passes if paddle.is_compiled_with_rocm(): config.delete_pass("conv2d_add_act_fuse_pass") diff --git a/paddlex/inference/models/layout_analysis/processors.py b/paddlex/inference/models/layout_analysis/processors.py index d1672a2000..b7ebaf0992 100644 --- a/paddlex/inference/models/layout_analysis/processors.py +++ b/paddlex/inference/models/layout_analysis/processors.py @@ -595,10 +595,15 @@ def filter_boxes( continue box_area_i = calculate_bbox_area(boxes[i]["coordinate"]) box_area_j = calculate_bbox_area(boxes[j]["coordinate"]) - if ( - boxes[i]["label"] == "image" or boxes[j]["label"] == "image" - ) and boxes[i]["label"] != boxes[j]["label"]: - continue + labels = {boxes[i]["label"], boxes[j]["label"]} + if labels & {"image", "table", "seal", "chart"} and len(labels) > 1: + if "table" not in labels or labels <= { + "table", + "image", + "seal", + "chart", + }: + continue if box_area_i >= box_area_j: dropped_indexes.add(j) else: diff --git a/paddlex/inference/pipelines/paddleocr_vl/result.py b/paddlex/inference/pipelines/paddleocr_vl/result.py index f50b18a7c2..dc7dc92c98 100644 --- a/paddlex/inference/pipelines/paddleocr_vl/result.py +++ b/paddlex/inference/pipelines/paddleocr_vl/result.py @@ -468,7 +468,7 @@ def _to_json(self, *args, **kwargs) -> dict[str, str]: original_image_width=original_image_width, show_ocr_content=True, ), - remove_symbol=use_seal_recognition, + remove_symbol=not use_seal_recognition, ) if self["model_settings"].get("use_chart_recognition", False): diff --git a/paddlex/inference/pipelines/paddleocr_vl/uilts.py b/paddlex/inference/pipelines/paddleocr_vl/uilts.py index 8a4009a473..c753ec9d32 100644 --- a/paddlex/inference/pipelines/paddleocr_vl/uilts.py +++ b/paddlex/inference/pipelines/paddleocr_vl/uilts.py @@ -123,13 +123,15 @@ def filter_overlap_boxes( continue box_area_i = calculate_bbox_area(boxes[i]["coordinate"]) box_area_j = calculate_bbox_area(boxes[j]["coordinate"]) - if {boxes[i]["label"], boxes[j]["label"]} & { - "image", - "table", - "seal", - "chart", - } and boxes[i]["label"] != boxes[j]["label"]: - continue + labels = {boxes[i]["label"], boxes[j]["label"]} + if labels & {"image", "table", "seal", "chart"} and len(labels) > 1: + if "table" not in labels or labels <= { + "table", + "image", + "seal", + "chart", + }: + continue if box_area_i >= box_area_j: dropped_indexes.add(j) else: From bfda368039e9b2ee3fff4f8cb83fa240c6dcf220 Mon Sep 17 00:00:00 2001 From: Lin Manhui Date: Fri, 13 Feb 2026 14:27:19 +0800 Subject: [PATCH 23/37] Fix auto batch size for PaddleOCR-VL-1.5-0.9B (#5003) --- paddlex/inference/models/doc_vlm/predictor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddlex/inference/models/doc_vlm/predictor.py b/paddlex/inference/models/doc_vlm/predictor.py index 43cc173a0b..0f97ebde46 100644 --- a/paddlex/inference/models/doc_vlm/predictor.py +++ b/paddlex/inference/models/doc_vlm/predictor.py @@ -167,7 +167,7 @@ def _build(self, **kwargs): return model, processor def _determine_batch_size(self): - if self._model_name == "PaddleOCR-VL-0.9B": + if self._model_name in ("PaddleOCR-VL-0.9B", "PaddleOCR-VL-1.5-0.9B"): batch_size = 1 if not self._use_local_model: batch_size = 4096 From e63a51a20401f112b966f72705bfbc2466e39e9f Mon Sep 17 00:00:00 2001 From: Lin Manhui Date: Fri, 13 Feb 2026 14:53:23 +0800 Subject: [PATCH 24/37] Update HPS frozon deps (#5004) --- deploy/hps/server_env/requirements/cpu.txt | 5 ++++- deploy/hps/server_env/requirements/gpu.txt | 5 ++++- deploy/hps/server_env/scripts/remove_images.sh | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/deploy/hps/server_env/requirements/cpu.txt b/deploy/hps/server_env/requirements/cpu.txt index c24a8fc5af..a3acfe3055 100644 --- a/deploy/hps/server_env/requirements/cpu.txt +++ b/deploy/hps/server_env/requirements/cpu.txt @@ -161,7 +161,9 @@ langchain-core==0.2.43 langchain-openai==0.1.25 # via paddlex (../../../setup.py) langchain-text-splitters==0.2.4 - # via langchain + # via + # langchain + # paddlex (../../../setup.py) langsmith==0.1.147 # via # langchain @@ -345,6 +347,7 @@ scikit-learn==1.6.1 # via paddlex (../../../setup.py) scipy==1.15.2 # via + # paddlex (../../../setup.py) # scikit-image # scikit-learn sentencepiece==0.2.1 diff --git a/deploy/hps/server_env/requirements/gpu.txt b/deploy/hps/server_env/requirements/gpu.txt index caa9a8fbc0..e43d89b38f 100644 --- a/deploy/hps/server_env/requirements/gpu.txt +++ b/deploy/hps/server_env/requirements/gpu.txt @@ -161,7 +161,9 @@ langchain-core==0.2.43 langchain-openai==0.1.25 # via paddlex (../../../setup.py) langchain-text-splitters==0.2.4 - # via langchain + # via + # langchain + # paddlex (../../../setup.py) langsmith==0.1.147 # via # langchain @@ -345,6 +347,7 @@ scikit-learn==1.6.1 # via paddlex (../../../setup.py) scipy==1.15.2 # via + # paddlex (../../../setup.py) # scikit-image # scikit-learn sentencepiece==0.2.1 diff --git a/deploy/hps/server_env/scripts/remove_images.sh b/deploy/hps/server_env/scripts/remove_images.sh index 2926504e3d..2b89b756f3 100755 --- a/deploy/hps/server_env/scripts/remove_images.sh +++ b/deploy/hps/server_env/scripts/remove_images.sh @@ -6,6 +6,6 @@ for device_type in 'gpu' 'cpu'; do version="$(cat "${device_type}_version.txt")" docker rmi \ "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:paddlex${paddlex_version%.*}-${device_type}" \ - "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:${version}-${device_type}" \ + "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:${version}-paddlex${paddlex_version}-${device_type}" \ "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:latest-${device_type}" done From 5bf095a46ee206a6ebbea5b978adb0d9b9254a16 Mon Sep 17 00:00:00 2001 From: zhang-prog <69562787+zhang-prog@users.noreply.github.com> Date: Fri, 13 Feb 2026 17:32:02 +0800 Subject: [PATCH 25/37] update vlm batch_size (#5005) --- paddlex/inference/models/doc_vlm/predictor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddlex/inference/models/doc_vlm/predictor.py b/paddlex/inference/models/doc_vlm/predictor.py index 0f97ebde46..3f983a8d3a 100644 --- a/paddlex/inference/models/doc_vlm/predictor.py +++ b/paddlex/inference/models/doc_vlm/predictor.py @@ -170,7 +170,7 @@ def _determine_batch_size(self): if self._model_name in ("PaddleOCR-VL-0.9B", "PaddleOCR-VL-1.5-0.9B"): batch_size = 1 if not self._use_local_model: - batch_size = 4096 + batch_size = 8192 logging.debug( f"The batch size of {self._model_name} is determined to be {batch_size}." ) From 50f5932942dbd342f5a94e7fc1c0e6ef83cf73a3 Mon Sep 17 00:00:00 2001 From: onecatcn Date: Sat, 14 Feb 2026 20:42:02 +0800 Subject: [PATCH 26/37] add P800 document (#4995) * add P800 document * Update multi_devices_use_guide.en.md * Update multi_devices_use_guide.md * Update multi_devices_use_guide.md * Update multi_devices_use_guide.md * Update multi_devices_use_guide.en.md --- .../multi_devices_use_guide.en.md | 4 +- .../multi_devices_use_guide.md | 4 +- .../paddlepaddle_install_P800.en.md | 44 +++++++++++++++++++ .../paddlepaddle_install_P800.md | 44 +++++++++++++++++++ 4 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 docs/other_devices_support/paddlepaddle_install_P800.en.md create mode 100644 docs/other_devices_support/paddlepaddle_install_P800.md diff --git a/docs/other_devices_support/multi_devices_use_guide.en.md b/docs/other_devices_support/multi_devices_use_guide.en.md index a37706cb59..8bfd62b27e 100644 --- a/docs/other_devices_support/multi_devices_use_guide.en.md +++ b/docs/other_devices_support/multi_devices_use_guide.en.md @@ -14,7 +14,9 @@ Ascend NPU: [Ascend NPU PaddlePaddle Installation Guide](./paddlepaddle_install_ Cambricon MLU: [Cambricon MLU PaddlePaddle Installation Guide](./paddlepaddle_install_MLU.en.md) -Kunlun XPU: [Kunlun XPU PaddlePaddle Installation Guide](./paddlepaddle_install_XPU.en.md) +Kunlunxin 2: [Kunlunxin 2 PaddlePaddle Installation Guide](./paddlepaddle_install_XPU.en.md) + +Kunlunxin P800: [Kunlunxin P800 PaddlePaddle Installation Guide](./paddlepaddle_install_P800.en.md) Hygon DCU: [Hygon DCU PaddlePaddle Installation Guide](./paddlepaddle_install_DCU.en.md) diff --git a/docs/other_devices_support/multi_devices_use_guide.md b/docs/other_devices_support/multi_devices_use_guide.md index 238d761f10..6f249a2532 100644 --- a/docs/other_devices_support/multi_devices_use_guide.md +++ b/docs/other_devices_support/multi_devices_use_guide.md @@ -12,7 +12,9 @@ comments: true 昇腾 NPU:[昇腾 NPU 飞桨安装教程](./paddlepaddle_install_NPU.md) -昆仑 XPU:[昆仑 XPU 飞桨安装教程](./paddlepaddle_install_XPU.md) +昆仑芯 XPU2:[昆仑芯二代 AI 芯片飞桨安装教程](./paddlepaddle_install_XPU.md) + +昆仑芯 P800: [昆仑芯 P800 飞桨安装教程](./paddlepaddle_install_P800.md) 寒武纪 MLU:[寒武纪 MLU 飞桨安装教程](./paddlepaddle_install_MLU.md) diff --git a/docs/other_devices_support/paddlepaddle_install_P800.en.md b/docs/other_devices_support/paddlepaddle_install_P800.en.md new file mode 100644 index 0000000000..a9c118525f --- /dev/null +++ b/docs/other_devices_support/paddlepaddle_install_P800.en.md @@ -0,0 +1,44 @@ +--- +comments: true +--- + +# Kunlunxin XPU PaddlePaddle Installation Tutorial + +Currently, PaddleX supports Kunlunxin P800. Considering environmental differences, we recommend using the Kunlunxin P800 development image officially released by PaddlePaddle, which is pre-installed with the Kunlunxin basic runtime environment library (XRE). + +## 1. Docker Environment Preparation +Pull the image. This image is only for the development environment and does not include a pre-compiled PaddlePaddle installation package. + +```bash +docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:xpu-ubuntu2204-x86_64-gcc123-py310 +``` +Refer to the following command to start the container: + +```bash +docker run -it --name paddle-xpu-dev -v $(pwd):/work \ + -v /usr/local/bin/xpu-smi:/usr/local/bin/xpu-smi \ + -w=/work --shm-size=128G --network=host --privileged \ + --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ + ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:xpu-ubuntu2204-x86_64-gcc123-py310 /bin/bash +``` + +## 2. Install Paddle Package +Currently, Python3.10 wheel installation packages are provided. If you have a need for other Python versions, you can refer to the [PaddlePaddle official documentation](https://www.paddlepaddle.org.cn/en/install/quick) to compile and install them yourself. + +Install the Python3.10 wheel installation package: + +```bash +python -m pip install --pre paddlepaddle-xpu -i https://www.paddlepaddle.org.cn/packages/nightly/xpu-p800/ # For X86 architecture +``` + +Verify the installation package. After installation, run the following command: + +```bash +python -c "import paddle; paddle.utils.run_check()" +``` + +The expected output is: + +``` +PaddlePaddle is installed successfully! Let's start deep learning with PaddlePaddle now. +``` diff --git a/docs/other_devices_support/paddlepaddle_install_P800.md b/docs/other_devices_support/paddlepaddle_install_P800.md new file mode 100644 index 0000000000..ddcbdfad5c --- /dev/null +++ b/docs/other_devices_support/paddlepaddle_install_P800.md @@ -0,0 +1,44 @@ +--- +comments: true +--- + +# 昆仑芯 P800 飞桨安装教程 + +当前 PaddleX 支持昆仑 P800 等芯片。考虑到环境差异性,我们推荐使用飞桨官方发布的昆仑芯 XPU 开发镜像,该镜像预装有昆仑基础运行环境库(XRE)。 + +## 1、docker环境准备 +拉取镜像,此镜像仅为开发环境,镜像中不包含预编译的飞桨安装包 + +``` +# 拉取镜像 +docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:xpu-ubuntu2204-x86_64-gcc123-py310 +``` +参考如下命令启动容器 + +``` +# 参考如下命令,启动容器 +docker run -it --name paddle-xpu-dev -v $(pwd):/work \ + -v /usr/local/bin/xpu-smi:/usr/local/bin/xpu-smi \ + -w=/work --shm-size=128G --network=host --privileged \ + --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ + ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:xpu-ubuntu2204-x86_64-gcc123-py310 /bin/bash +``` +## 2、安装paddle包 +当前提供 Python3.10 的 wheel 安装包。如有其他 Python 版本需求,可以参考[飞桨官方文档](https://www.paddlepaddle.org.cn/install/quick)自行编译安装。 + +安装 Python3.10 的 wheel 安装包 + +``` +# 下载并安装 wheel 包 +python -m pip install --pre paddlepaddle-xpu -i https://www.paddlepaddle.org.cn/packages/nightly/xpu-p800/ +``` +验证安装包 安装完成之后,运行如下命令 + +``` +python -c "import paddle; paddle.utils.run_check()" +``` +预期得到如下输出结果 + +``` +PaddlePaddle is installed successfully! Let's start deep learning with PaddlePaddle now. +``` From 062a7827047bf4ad38d02185bac152a4fe1a420f Mon Sep 17 00:00:00 2001 From: onecatcn Date: Sat, 14 Feb 2026 20:42:39 +0800 Subject: [PATCH 27/37] Update mkdocs.yml to reflect kunlunxin docs changes (#5006) * Update mkdocs.yml * Update mkdocs.yml --- mkdocs.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index 28ef9ea9d4..d49b7c2d44 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -195,7 +195,8 @@ plugins: 海光 DCU 飞桨安装教程: HYGON DCU PaddlePaddle Installation Guide 寒武纪 MLU 飞桨安装教程: Cambricon MLU PaddlePaddle Installation Guide 昇腾 NPU 飞桨安装教程: Ascend NPU PaddlePaddle Installation Guide - 昆仑 XPU 飞桨安装教程: Kunlun XPU PaddlePaddle Installation Guide + 昆仑芯 Kunlunxin 2 飞桨安装教程: Kunlunxin 2 PaddlePaddle Installation Guide + 昆仑芯 P800 飞桨安装教程: Kunlunxin P800 PaddlePaddle Installation Guide 燧原 GCU 飞桨安装教程: Enflame GCU PaddlePaddle Installation Guide 数据标注教程: Data Annotation Tutorials 计算机视觉: Computer Vision @@ -437,7 +438,8 @@ nav: - 海光 DCU 飞桨安装教程: other_devices_support/paddlepaddle_install_DCU.md - 寒武纪 MLU 飞桨安装教程: other_devices_support/paddlepaddle_install_MLU.md - 昇腾 NPU 飞桨安装教程: other_devices_support/paddlepaddle_install_NPU.md - - 昆仑 XPU 飞桨安装教程: other_devices_support/paddlepaddle_install_XPU.md + - 昆仑芯 Kunlunxin 2 飞桨安装教程: other_devices_support/paddlepaddle_install_XPU.md + - 昆仑芯 Kunlunxin P800 飞桨安装教程: other_devices_support/paddlepaddle_install_P800.md - 燧原 GCU 飞桨安装教程: other_devices_support/paddlepaddle_install_GCU.md - 数据标注教程: - 计算机视觉: From 2054f9411d4a089bbe5a5c3a73bd74de60a735a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AD=A6=E5=8D=BF?= <64625668+leo-q8@users.noreply.github.com> Date: Sat, 14 Feb 2026 20:47:52 +0800 Subject: [PATCH 28/37] support iluvatar_gpu for ppdet (#5002) Co-authored-by: cuicheng01 <45199522+cuicheng01@users.noreply.github.com> --- paddlex/repo_apis/PaddleDetection_api/object_det/config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/paddlex/repo_apis/PaddleDetection_api/object_det/config.py b/paddlex/repo_apis/PaddleDetection_api/object_det/config.py index 8285bda439..3e37dbead4 100644 --- a/paddlex/repo_apis/PaddleDetection_api/object_det/config.py +++ b/paddlex/repo_apis/PaddleDetection_api/object_det/config.py @@ -297,6 +297,9 @@ def update_device(self, device_type: str): elif device_type.lower() == "metax_gpu": self["use_metax_gpu"] = True self["use_gpu"] = False + elif device_type.lower() == "iluvatar_gpu": + self["use_iluvatar_gpu"] = True + self["use_gpu"] = False else: assert device_type.lower() == "cpu" self["use_gpu"] = False From 4511e2ff1cec101ad1cad2ddaf17a21122e08ad1 Mon Sep 17 00:00:00 2001 From: Majid Ali Ansari Date: Tue, 24 Feb 2026 14:42:20 +0530 Subject: [PATCH 29/37] fix: add langchain compatibility shim for newer versions (0.1.x+) (#4997) * fix: add langchain compatibility shim for newer versions (fixes #17674) * Update copyright year in langchain_shim.py * Refactor langchain_shim import and initialization --------- Co-authored-by: Lin Manhui --- paddlex/__init__.py | 4 ++ paddlex/utils/langchain_shim.py | 79 +++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 paddlex/utils/langchain_shim.py diff --git a/paddlex/__init__.py b/paddlex/__init__.py index 27f40e73ec..ad89c47164 100644 --- a/paddlex/__init__.py +++ b/paddlex/__init__.py @@ -15,6 +15,10 @@ import os import sys +from .utils.langchain_shim import apply_langchain_shim + +apply_langchain_shim() + _SPECIAL_MODS = ["paddle", "paddle_custom_device", "ultra_infer"] _loaded_special_mods = [] for mod in _SPECIAL_MODS: diff --git a/paddlex/utils/langchain_shim.py b/paddlex/utils/langchain_shim.py new file mode 100644 index 0000000000..f3ada62fbc --- /dev/null +++ b/paddlex/utils/langchain_shim.py @@ -0,0 +1,79 @@ +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import types + + +def apply_langchain_shim(): + """ + A compatibility shim for LangChain to handle breaking changes in newer versions. + Specifically addresses the removal of 'langchain.docstore' and relocation of + 'RecursiveCharacterTextSplitter'. + """ + # Check if langchain is installed + try: + import langchain + except ImportError: + return + + # Ensure langchain is treated as a package if it's a dummy module + if not hasattr(langchain, "__path__"): + langchain.__path__ = [] + + # Helper to create shim modules + def create_shim(name, parent, attr): + if not hasattr(parent, attr): + mod = types.ModuleType(name) + if not hasattr(mod, "__path__"): + mod.__path__ = [] + sys.modules[name] = mod + setattr(parent, attr, mod) + return getattr(parent, attr) + + # Shim for docstore and document + docstore = create_shim("langchain.docstore", langchain, "docstore") + document = create_shim("langchain.docstore.document", docstore, "document") + + if not hasattr(document, "Document"): + try: + from langchain_core.documents import Document as RealDocument + document.Document = RealDocument + except ImportError: + + class MockDocument: + + def __init__(self, page_content, metadata=None): + self.page_content = page_content + self.metadata = metadata or {} + + document.Document = MockDocument + + # Shim for text_splitter + text_splitter = create_shim("langchain.text_splitter", langchain, "text_splitter") + if not hasattr(text_splitter, "RecursiveCharacterTextSplitter"): + try: + from langchain_text_splitters import ( + RecursiveCharacterTextSplitter as RealSplitter, + ) + + text_splitter.RecursiveCharacterTextSplitter = RealSplitter + except ImportError: + + class MockSplitter: + + def __init__(self, *args, **kwargs): + pass + + text_splitter.RecursiveCharacterTextSplitter = MockSplitter From 914f5fbacec0da497bf900ee7d3139302f518a15 Mon Sep 17 00:00:00 2001 From: Liu Jiaxuan <85537209+liu-jiaxuan@users.noreply.github.com> Date: Tue, 24 Feb 2026 17:16:31 +0800 Subject: [PATCH 30/37] fix codes (#4984) Co-authored-by: Lin Manhui --- .../object_detection/modeling/rt_detr.py | 116 +++++++++++------- .../modeling/slanext.py | 51 +++++--- .../text_recognition/modeling/pp_ocrv5_rec.py | 82 ++++++++----- 3 files changed, 157 insertions(+), 92 deletions(-) diff --git a/paddlex/inference/models/object_detection/modeling/rt_detr.py b/paddlex/inference/models/object_detection/modeling/rt_detr.py index 9f15589214..ece6c1afa7 100644 --- a/paddlex/inference/models/object_detection/modeling/rt_detr.py +++ b/paddlex/inference/models/object_detection/modeling/rt_detr.py @@ -181,52 +181,78 @@ def __call__(self, head_out, im_shape, scale_factor, pad_shape): class RTDETRConfig(PretrainedConfig): def __init__( self, - backbone, - HybridEncoder, - RTDETRTransformer, - DINOHead, - DETRPostProcess, + arch, + return_idx, + freeze_stem_only, + freeze_at, + freeze_norm, + lr_mult_list, + hidden_dim, + use_encoder_idx, + num_encoder_layers, + el_d_model, + el_nhead, + el_dim_feedforward, + el_dropout, + el_activation, + expansion, + tf_num_queries, + tf_position_embed_type, + tf_feat_strides, + tf_num_levels, + tf_nhead, + tf_num_decoder_layers, + tf_backbone_feat_channels, + tf_dim_feedforward, + tf_dropout, + tf_activation, + tf_num_denoising, + tf_label_noise_ratio, + tf_box_noise_scale, + tf_learnt_init_query, + loss_coeff, + aux_loss, + use_vfl, + matcher_coeff, + num_top_queries, + use_focal_loss, + **kwargs, ): - if backbone["name"] == "PPHGNetV2": - self.arch = backbone["arch"] - self.return_idx = backbone["return_idx"] - self.freeze_stem_only = backbone["freeze_stem_only"] - self.freeze_at = backbone["freeze_at"] - self.freeze_norm = backbone["freeze_norm"] - self.lr_mult_list = backbone["lr_mult_list"] - else: - raise RuntimeError( - f"There is no dynamic graph implementation for backbone {backbone['name']}." - ) - self.hidden_dim = HybridEncoder["hidden_dim"] - self.use_encoder_idx = HybridEncoder["use_encoder_idx"] - self.num_encoder_layers = HybridEncoder["num_encoder_layers"] - self.el_d_model = HybridEncoder["encoder_layer"]["d_model"] - self.el_nhead = HybridEncoder["encoder_layer"]["nhead"] - self.el_dim_feedforward = HybridEncoder["encoder_layer"]["dim_feedforward"] - self.el_dropout = HybridEncoder["encoder_layer"]["dropout"] - self.el_activation = HybridEncoder["encoder_layer"]["activation"] - self.expansion = HybridEncoder["expansion"] - self.tf_num_queries = RTDETRTransformer["num_queries"] - self.tf_position_embed_type = RTDETRTransformer["position_embed_type"] - self.tf_feat_strides = RTDETRTransformer["feat_strides"] - self.tf_num_levels = RTDETRTransformer["num_levels"] - self.tf_nhead = RTDETRTransformer["nhead"] - self.tf_num_decoder_layers = RTDETRTransformer["num_decoder_layers"] - self.tf_backbone_feat_channels = RTDETRTransformer["backbone_feat_channels"] - self.tf_dim_feedforward = RTDETRTransformer["dim_feedforward"] - self.tf_dropout = RTDETRTransformer["dropout"] - self.tf_activation = RTDETRTransformer["activation"] - self.tf_num_denoising = RTDETRTransformer["num_denoising"] - self.tf_label_noise_ratio = RTDETRTransformer["label_noise_ratio"] - self.tf_box_noise_scale = RTDETRTransformer["box_noise_scale"] - self.tf_learnt_init_query = RTDETRTransformer["learnt_init_query"] - self.loss_coeff = DINOHead["loss"]["loss_coeff"] - self.aux_loss = DINOHead["loss"]["aux_loss"] - self.use_vfl = DINOHead["loss"]["use_vfl"] - self.matcher_coeff = DINOHead["loss"]["matcher"]["matcher_coeff"] - self.num_top_queries = DETRPostProcess["num_top_queries"] - self.use_focal_loss = DETRPostProcess["use_focal_loss"] + self.arch = arch + self.return_idx = return_idx + self.freeze_stem_only = freeze_stem_only + self.freeze_at = freeze_at + self.freeze_norm = freeze_norm + self.lr_mult_list = lr_mult_list + self.hidden_dim = hidden_dim + self.use_encoder_idx = use_encoder_idx + self.num_encoder_layers = num_encoder_layers + self.el_d_model = d_model + self.el_nhead = nhead + self.el_dim_feedforward = dim_feedforward + self.el_dropout = dropout + self.el_activation = activation + self.expansion = expansion + self.tf_num_queries = num_queries + self.tf_position_embed_type = position_embed_type + self.tf_feat_strides = feat_strides + self.tf_num_levels = num_levels + self.tf_nhead = nhead + self.tf_num_decoder_layers = num_decoder_layers + self.tf_backbone_feat_channels = backbone_feat_channels + self.tf_dim_feedforward = dim_feedforward + self.tf_dropout = dropout + self.tf_activation = activation + self.tf_num_denoising = num_denoising + self.tf_label_noise_ratio = label_noise_ratio + self.tf_box_noise_scale = box_noise_scale + self.tf_learnt_init_query = learnt_init_query + self.loss_coeff = loss_coeff + self.aux_loss = aux_loss + self.use_vfl = use_vfl + self.matcher_coeff = matcher_coeff + self.num_top_queries = num_top_queries + self.use_focal_loss = use_focal_loss self.tensor_parallel_degree = 1 diff --git a/paddlex/inference/models/table_structure_recognition/modeling/slanext.py b/paddlex/inference/models/table_structure_recognition/modeling/slanext.py index 884339f0fa..956a7d36c6 100644 --- a/paddlex/inference/models/table_structure_recognition/modeling/slanext.py +++ b/paddlex/inference/models/table_structure_recognition/modeling/slanext.py @@ -25,23 +25,26 @@ class SLANeXtConfig(PretrainedConfig): def __init__( self, - backbone, - SLAHead, + out_channels, + hidden_size, + max_text_length, + loc_reg_num, + image_size, + encoder_embed_dim, + encoder_depth, + encoder_num_heads, + encoder_global_attn_indexes, + **kwargs, ): - if backbone["name"] == "Vary_VIT_B": - self.image_size = backbone["image_size"] - self.encoder_embed_dim = backbone["encoder_embed_dim"] - self.encoder_depth = backbone["encoder_depth"] - self.encoder_num_heads = backbone["encoder_num_heads"] - self.encoder_global_attn_indexes = backbone["encoder_global_attn_indexes"] - else: - raise RuntimeError( - f"There is no dynamic graph implementation for backbone {backbone['name']}." - ) - self.out_channels = SLAHead["out_channels"] - self.hidden_size = SLAHead["hidden_size"] - self.max_text_length = SLAHead["max_text_length"] - self.loc_reg_num = SLAHead["loc_reg_num"] + self.out_channels = out_channels + self.hidden_size = hidden_size + self.max_text_length = max_text_length + self.loc_reg_num = loc_reg_num + self.image_size = image_size + self.encoder_embed_dim = encoder_embed_dim + self.encoder_depth = encoder_depth + self.encoder_num_heads = encoder_num_heads + self.encoder_global_attn_indexes = encoder_global_attn_indexes self.tensor_parallel_degree = 1 @@ -76,11 +79,25 @@ def forward(self, x): return [x["loc_preds"], x["structure_probs"]] def get_transpose_weight_keys(self): - transpose_keys = ["mlp.lin2", "attn.qkv", "mlp.lin1"] + transpose_keys = [ + "mlp.lin2", + "attn.qkv", + "mlp.lin1", + "structure_attention_cell.score", + "attn.proj", + "i2h", + "h2h", + "structure_generator.0", + "structure_generator.1", + "loc_generator.0", + "loc_generator.1", + ] need_to_transpose = [] all_weight_keys = [] for name, param in self.backbone.named_parameters(): all_weight_keys.append("backbone." + name) + for name, param in self.head.named_parameters(): + all_weight_keys.append("head." + name) for i in range(len(all_weight_keys)): for j in range(len(transpose_keys)): if (transpose_keys[j] in all_weight_keys[i]) and ( diff --git a/paddlex/inference/models/text_recognition/modeling/pp_ocrv5_rec.py b/paddlex/inference/models/text_recognition/modeling/pp_ocrv5_rec.py index 0c90123e6b..50ac8d1196 100644 --- a/paddlex/inference/models/text_recognition/modeling/pp_ocrv5_rec.py +++ b/paddlex/inference/models/text_recognition/modeling/pp_ocrv5_rec.py @@ -27,34 +27,49 @@ class PPOCRV5RecConfig(PretrainedConfig): def __init__( self, - backbone, - MultiHead, + model_type, + scale: float = 0.95, + conv_kxk_num: int = 4, + lr_mult_list: list = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + lab_lr: float = 0.1, + net_config: dict | None = None, + text_rec: bool = True, + stem_channels: list = [3, 32, 48], + det: bool = False, + use_lab: bool = False, + use_last_conv: bool = True, + class_expand: int = 2048, + dropout_prob: float = 0.0, + class_num: int = 1000, + lr_mult_list: list = [1.0, 1.0, 1.0, 1.0, 1.0], + out_indices: list | None = None, + stage_config: dict | None = None, + head_list: list | None = None, + decode_list: dict | None = None, + **kwargs, ): - self.backbone_name = backbone["name"] - if self.backbone_name == "PPLCNetV3": - self.net_config = backbone["net_config"] - self.scale = backbone["scale"] - self.conv_kxk_num = backbone["conv_kxk_num"] - self.lr_mult_list = backbone["lr_mult_list"] - self.lab_lr = backbone["lab_lr"] - elif self.backbone_name == "PPHGNetV2": - self.text_rec = backbone["text_rec"] - self.stem_channels = backbone["stem_channels"] - self.stage_config = backbone["stage_config"] - self.det = backbone["det"] - self.use_lab = backbone["use_lab"] - self.use_last_conv = backbone["use_last_conv"] - self.class_expand = backbone["class_expand"] - self.dropout_prob = backbone["dropout_prob"] - self.class_num = backbone["class_num"] - self.lr_mult_list = backbone["lr_mult_list"] - self.out_indices = backbone["out_indices"] - else: - raise RuntimeError( - f"There is no dynamic graph implementation for backbone {backbone['name']}." - ) - self.head_list = MultiHead["head_list"] - self.decode_list = MultiHead["decode_list"] + self.model_type = model_type + if self.model_type == "pp_ocrv5_mobile_rec": + self.net_config = net_config + self.scale = scale + self.conv_kxk_num =conv_kxk_num + self.lr_mult_list = lr_mult_list + self.lab_lr = lab_lr + elif self.model_type == "pp_ocrv5_server_rec": + self.text_rec = text_rec + self.stem_channels = stem_channels + self.stage_config = stage_config + self.det = det + self.use_lab = use_lab + self.use_last_conv = use_last_conv + self.class_expand = class_expand + self.dropout_prob = dropout_prob + self.class_num = class_num + self.lr_mult_list = lr_mult_list + self.out_indices = out_indices + + self.head_list = head_list + self.decode_list = decode_list self.tensor_parallel_degree = 1 @@ -64,7 +79,7 @@ class PPOCRV5Rec(PretrainedModel): def __init__(self, config: PPOCRV5RecConfig): super().__init__(config) - if self.config.backbone_name == "PPLCNetV3": + if self.config.model_type == "pp_ocrv5_mobile_rec": self.backbone = PPLCNetV3( scale=self.config.scale, net_config=self.config.net_config, @@ -72,7 +87,7 @@ def __init__(self, config: PPOCRV5RecConfig): lr_mult_list=self.config.lr_mult_list, lab_lr=self.config.lab_lr, ) - elif self.config.backbone_name == "PPHGNetV2": + elif self.config.model_type == "pp_ocrv5_server_rec": self.backbone = PPHGNetV2( stage_config=self.config.stage_config, stem_channels=self.config.stem_channels, @@ -102,7 +117,14 @@ def forward(self, x): return [x.cpu().numpy()] def get_transpose_weight_keys(self): - transpose_keys = ["fc", "out_proj", "attn.qkv"] + transpose_keys = [ + "fc", + "out_proj", + "attn.qkv", + "mixer.qkv", + "cross_attn.kv", + "mixer.proj" + ] need_to_transpose = [] all_weight_keys = [] for name, param in self.head.named_parameters(): From cfba8bc39cccb505a7a41f49cee4a93c81f1da06 Mon Sep 17 00:00:00 2001 From: Bvicii <98971614+scyyh11@users.noreply.github.com> Date: Tue, 24 Feb 2026 01:17:39 -0800 Subject: [PATCH 31/37] fix: guard chart_recognition_model init with use_chart_recognition flag (#4986) (#5008) - Add `if self.use_chart_recognition` guard around chart_recognition_model initialization, consistent with other optional components - Fix copy-paste error in fallback error message: "block_region_detection_model" -> "chart_recognition_model" - Fix getattr in close() to use default None, preventing AttributeError when chart recognition is disabled Co-authored-by: Lin Manhui --- .../pipelines/layout_parsing/pipeline_v2.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/paddlex/inference/pipelines/layout_parsing/pipeline_v2.py b/paddlex/inference/pipelines/layout_parsing/pipeline_v2.py index 198013bfdf..bdf08a7878 100644 --- a/paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +++ b/paddlex/inference/pipelines/layout_parsing/pipeline_v2.py @@ -92,7 +92,7 @@ def __init__( self.img_reader = ReadImage(format="BGR") def close(self): - if getattr(self, "chart_recognition_model"): + if getattr(self, "chart_recognition_model", None): self.chart_recognition_model.close() def inintial_predictor(self, config: dict) -> None: @@ -213,13 +213,14 @@ def inintial_predictor(self, config: dict) -> None: ) # TODO(gaotingquan): init the model at any time - chart_recognition_config = config.get("SubModules", {}).get( - "ChartRecognition", - {"model_config_error": "config error for block_region_detection_model!"}, - ) - self.chart_recognition_model = self.create_model( - chart_recognition_config, - ) + if self.use_chart_recognition: + chart_recognition_config = config.get("SubModules", {}).get( + "ChartRecognition", + {"model_config_error": "config error for chart_recognition_model!"}, + ) + self.chart_recognition_model = self.create_model( + chart_recognition_config, + ) self.markdown_ignore_labels = config.get( "markdown_ignore_labels", [ From 3d5e3a0ade3e165e5049ef8b91d4e77014a40c0a Mon Sep 17 00:00:00 2001 From: Lin Manhui Date: Thu, 26 Feb 2026 11:12:52 +0800 Subject: [PATCH 32/37] Use git hash as image version (#5016) --- deploy/hps/server_env/cpu_version.txt | 1 - deploy/hps/server_env/gpu_version.txt | 1 - deploy/hps/server_env/scripts/remove_images.sh | 3 +-- deploy/hps/server_env/scripts/tag_and_push_images.sh | 3 +-- 4 files changed, 2 insertions(+), 6 deletions(-) delete mode 100644 deploy/hps/server_env/cpu_version.txt delete mode 100644 deploy/hps/server_env/gpu_version.txt diff --git a/deploy/hps/server_env/cpu_version.txt b/deploy/hps/server_env/cpu_version.txt deleted file mode 100644 index 0b69c00c5f..0000000000 --- a/deploy/hps/server_env/cpu_version.txt +++ /dev/null @@ -1 +0,0 @@ -0.3.14 diff --git a/deploy/hps/server_env/gpu_version.txt b/deploy/hps/server_env/gpu_version.txt deleted file mode 100644 index 9e29e10619..0000000000 --- a/deploy/hps/server_env/gpu_version.txt +++ /dev/null @@ -1 +0,0 @@ -0.3.15 diff --git a/deploy/hps/server_env/scripts/remove_images.sh b/deploy/hps/server_env/scripts/remove_images.sh index 2b89b756f3..5218df41b9 100755 --- a/deploy/hps/server_env/scripts/remove_images.sh +++ b/deploy/hps/server_env/scripts/remove_images.sh @@ -3,9 +3,8 @@ paddlex_version="$(cat ../../../paddlex/.version)" for device_type in 'gpu' 'cpu'; do - version="$(cat "${device_type}_version.txt")" docker rmi \ "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:paddlex${paddlex_version%.*}-${device_type}" \ - "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:${version}-paddlex${paddlex_version}-${device_type}" \ + "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:$(git rev-parse --short HEAD)-${device_type}" \ "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:latest-${device_type}" done diff --git a/deploy/hps/server_env/scripts/tag_and_push_images.sh b/deploy/hps/server_env/scripts/tag_and_push_images.sh index fc334a13d5..5dfbad5f15 100755 --- a/deploy/hps/server_env/scripts/tag_and_push_images.sh +++ b/deploy/hps/server_env/scripts/tag_and_push_images.sh @@ -3,9 +3,8 @@ paddlex_version="$(cat ../../../paddlex/.version)" for device_type in 'gpu' 'cpu'; do - version="$(cat "${device_type}_version.txt")" docker push "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:latest-${device_type}" - for tag in "${version}-paddlex${paddlex_version}-${device_type}" "paddlex${paddlex_version%.*}-${device_type}"; do + for tag in "$(git rev-parse --short HEAD)-${device_type}" "paddlex${paddlex_version%.*}-${device_type}"; do docker tag "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:latest-${device_type}" "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:${tag}" docker push "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:${tag}" done From 20442656a1f4b9619692e1cf829fbf48b7b6b11d Mon Sep 17 00:00:00 2001 From: Fabian Palmer Date: Thu, 26 Feb 2026 10:26:45 +0100 Subject: [PATCH 33/37] fix typo in error message (#5015) --- paddlex/inference/models/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddlex/inference/models/__init__.py b/paddlex/inference/models/__init__.py index 154cef49e8..a1e0e13495 100644 --- a/paddlex/inference/models/__init__.py +++ b/paddlex/inference/models/__init__.py @@ -71,7 +71,7 @@ def create_predictor( if model_dir is None: model_dir = official_models[model_name] else: - assert Path(model_dir).exists(), f"{model_dir} is not exists!" + assert Path(model_dir).exists(), f"{model_dir} does not exist!" model_dir = Path(model_dir) config = BasePredictor.load_config(model_dir) assert ( From 1ffc4a66345392d7314623f233cc8e5a8fd1b1d9 Mon Sep 17 00:00:00 2001 From: Bvicii <98971614+scyyh11@users.noreply.github.com> Date: Sat, 28 Feb 2026 03:47:09 -0800 Subject: [PATCH 34/37] Feature/hps paddleocr vl 1.5 (#5017) * Support HPS SDK assembly for PaddleOCR-VL-1.5 Add derived pipeline support to the HPS assembly scripts so that pipelines defined in PIPELINE_APP_ROUTER (e.g. PaddleOCR-VL-1.5) can automatically reuse the source pipeline's server/client/version while substituting the correct pipeline_config.yaml. - assemble.sh: mount name_mappings.py and pipeline configs into the Docker container; resolve paths relative to the script location - assemble.py: parse PIPELINE_APP_ROUTER via ast, include mapped pipelines in --all, copy from source dir and overwrite config - docs: add PaddleOCR-VL-1.5 SDK download link to serving docs * Fix archive naming for pipelines with dots in their name pathlib.Path.with_suffix() treats the dot in names like PaddleOCR-VL-1.5 as a file extension, producing incorrect archive names (e.g. paddlex_hps_PaddleOCR-VL-1.tar.gz). Use string concatenation instead to preserve the full SDK name. * Store HPS pipeline config locally and add ast parsing note - Create pipelines/PaddleOCR-VL-1.5/ with its own pipeline_config.yaml so HPS config can diverge from paddlex/configs independently - Remove _pipeline_configs volume mount from assemble.sh (no longer needed) - Remove PIPELINE_CONFIGS_DIR from assemble.py, read config from local pipeline directory instead - Add NOTE comment explaining why ast is used to parse PIPELINE_APP_ROUTER * Address PR review feedback - Move pipeline_config.yaml to server/ subdirectory to mirror source pipeline structure, enabling generic file-level merge - Use copytree with dirs_exist_ok to overlay mapped pipeline files on top of source, so any file can be overridden (not just config) - Sync config with latest: VLRecognition batch_size=-1, add Serving - Move ast NOTE from docstring to inline comment --- .../server/pipeline_config.yaml | 85 +++++++++++++++++++ deploy/hps/sdk/scripts/assemble.py | 42 ++++++++- deploy/hps/sdk/scripts/assemble.sh | 7 +- docs/pipeline_deploy/serving.en.md | 4 + docs/pipeline_deploy/serving.md | 4 + 5 files changed, 139 insertions(+), 3 deletions(-) create mode 100644 deploy/hps/sdk/pipelines/PaddleOCR-VL-1.5/server/pipeline_config.yaml diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL-1.5/server/pipeline_config.yaml b/deploy/hps/sdk/pipelines/PaddleOCR-VL-1.5/server/pipeline_config.yaml new file mode 100644 index 0000000000..37c85317f0 --- /dev/null +++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL-1.5/server/pipeline_config.yaml @@ -0,0 +1,85 @@ + +pipeline_name: PaddleOCR-VL-1.5 + +batch_size: 64 + +use_queues: True + +use_doc_preprocessor: False +use_layout_detection: True +use_chart_recognition: False +use_seal_recognition: False +format_block_content: False +merge_layout_blocks: True +markdown_ignore_labels: + - number + - footnote + - header + - header_image + - footer + - footer_image + - aside_text + +SubModules: + LayoutDetection: + module_name: layout_detection + model_name: PP-DocLayoutV3 + model_dir: null + batch_size: 8 + threshold: 0.3 + layout_nms: True + layout_unclip_ratio: [1.0, 1.0] + layout_merge_bboxes_mode: + 0: "union" # abstract + 1: "union" # algorithm + 2: "union" # aside_text + 3: "large" # chart + 4: "union" # content + 5: "large" # display_formula + 6: "large" # doc_title + 7: "union" # figure_title + 8: "union" # footer + 9: "union" # footer + 10: "union" # footnote + 11: "union" # formula_number + 12: "union" # header + 13: "union" # header + 14: "union" # image + 15: "large" # inline_formula + 16: "union" # number + 17: "large" # paragraph_title + 18: "union" # reference + 19: "union" # reference_content + 20: "union" # seal + 21: "union" # table + 22: "union" # text + 23: "union" # text + 24: "union" # vision_footnote + VLRecognition: + module_name: vl_recognition + model_name: PaddleOCR-VL-1.5-0.9B + model_dir: null + batch_size: -1 + genai_config: + backend: native + +SubPipelines: + DocPreprocessor: + pipeline_name: doc_preprocessor + batch_size: 8 + use_doc_orientation_classify: True + use_doc_unwarping: True + SubModules: + DocOrientationClassify: + module_name: doc_text_orientation + model_name: PP-LCNet_x1_0_doc_ori + model_dir: null + batch_size: 8 + DocUnwarping: + module_name: image_unwarping + model_name: UVDoc + model_dir: null + +Serving: + extra: + max_num_input_imgs: null diff --git a/deploy/hps/sdk/scripts/assemble.py b/deploy/hps/sdk/scripts/assemble.py index 0354108334..85e40f0339 100755 --- a/deploy/hps/sdk/scripts/assemble.py +++ b/deploy/hps/sdk/scripts/assemble.py @@ -15,6 +15,7 @@ # limitations under the License. import argparse +import ast import pathlib import shutil import subprocess @@ -30,6 +31,25 @@ COMMON_DIR = BASE_DIR / "common" CLIENT_LIB_PATH = BASE_DIR / "paddlex-hps-client" OUTPUT_DIR = BASE_DIR / "output" +NAME_MAPPINGS_PATH = BASE_DIR / "_name_mappings.py" + + +def _load_pipeline_app_router(): + """Parse PIPELINE_APP_ROUTER from the mounted name_mappings.py file.""" + if not NAME_MAPPINGS_PATH.exists(): + return {} + source = NAME_MAPPINGS_PATH.read_text() + # NOTE: We use `ast` to extract the dict value without importing the module, + # because name_mappings.py may have dependencies that are not available in + # the build environment. `ast.parse` + `ast.literal_eval` safely evaluates + # the dict literal from the source code. + tree = ast.parse(source) + for node in ast.iter_child_nodes(tree): + if isinstance(node, ast.Assign): + for target in node.targets: + if isinstance(target, ast.Name) and target.id == "PIPELINE_APP_ROUTER": + return ast.literal_eval(node.value) + return {} if __name__ == "__main__": @@ -53,6 +73,8 @@ ) sys.exit(2) + pipeline_app_router = _load_pipeline_app_router() + if args.all: pipeline_names = [p.name for p in PIPELINES_DIR.iterdir()] else: @@ -90,7 +112,20 @@ print("=" * 30) print(f"Pipeline: {pipeline_name}") pipeline_dir = PIPELINES_DIR / pipeline_name - if not pipeline_dir.exists(): + + mapped_pipeline_dir = None + if pipeline_name in pipeline_app_router: + source_name = pipeline_app_router[pipeline_name] + source_dir = PIPELINES_DIR / source_name + if not source_dir.exists(): + sys.exit( + f"Source pipeline directory {source_dir} not found" + f" for mapped pipeline {pipeline_name}" + ) + mapped_pipeline_dir = pipeline_dir + pipeline_dir = source_dir + print(f"Using source pipeline: {source_name}") + elif not pipeline_dir.exists(): sys.exit(f"{pipeline_dir} not found") tgt_name = TARGET_NAME_PATTERN.format(pipeline_name=pipeline_name) @@ -120,7 +155,10 @@ shutil.copy(pipeline_dir / "version.txt", tgt_dir / "version.txt") - arch_path = tgt_dir.with_suffix(ARCHIVE_SUFFIX) + if mapped_pipeline_dir is not None: + shutil.copytree(mapped_pipeline_dir, tgt_dir, dirs_exist_ok=True) + + arch_path = OUTPUT_DIR / (tgt_name + ARCHIVE_SUFFIX) print(f"Creating archive: {arch_path}") with tarfile.open(arch_path, "w:gz") as tar: tar.add(tgt_dir, arcname=tgt_dir.name) diff --git a/deploy/hps/sdk/scripts/assemble.sh b/deploy/hps/sdk/scripts/assemble.sh index 9926d45779..a679c37237 100755 --- a/deploy/hps/sdk/scripts/assemble.sh +++ b/deploy/hps/sdk/scripts/assemble.sh @@ -1,10 +1,15 @@ #!/usr/bin/env bash +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SDK_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" +REPO_ROOT="$(cd "${SDK_DIR}/../../.." && pwd)" + docker run \ -it \ -e OUID="$(id -u)" \ -e OGID="$(id -g)" \ - -v "$(pwd)":/workspace \ + -v "${SDK_DIR}":/workspace \ + -v "${REPO_ROOT}/paddlex/inference/serving/infra/name_mappings.py":/workspace/_name_mappings.py:ro \ -w /workspace \ --rm \ python:3.10 \ diff --git a/docs/pipeline_deploy/serving.en.md b/docs/pipeline_deploy/serving.en.md index 975cc995db..3cda0afb8b 100644 --- a/docs/pipeline_deploy/serving.en.md +++ b/docs/pipeline_deploy/serving.en.md @@ -258,6 +258,10 @@ Find the high-stability serving SDK corresponding to the pipeline in the table b PaddleOCR-VL paddlex_hps_PaddleOCR-VL_sdk.tar.gz + +PaddleOCR-VL-1.5 +paddlex_hps_PaddleOCR-VL-1.5_sdk.tar.gz + diff --git a/docs/pipeline_deploy/serving.md b/docs/pipeline_deploy/serving.md index a8e956e1cb..5749103ff5 100644 --- a/docs/pipeline_deploy/serving.md +++ b/docs/pipeline_deploy/serving.md @@ -258,6 +258,10 @@ paddlex --serve --pipeline image_classification --use_hpip PaddleOCR-VL paddlex_hps_PaddleOCR-VL_sdk.tar.gz + +PaddleOCR-VL-1.5 +paddlex_hps_PaddleOCR-VL-1.5_sdk.tar.gz + From 09e1ff189b0f383fe8eac79c22b871f1ccb39aba Mon Sep 17 00:00:00 2001 From: Bvicii <98971614+scyyh11@users.noreply.github.com> Date: Mon, 2 Mar 2026 00:08:55 -0800 Subject: [PATCH 35/37] Add independent version.txt for PaddleOCR-VL-1.5 HPS SDK (#5026) --- deploy/hps/sdk/pipelines/PaddleOCR-VL-1.5/version.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 deploy/hps/sdk/pipelines/PaddleOCR-VL-1.5/version.txt diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL-1.5/version.txt b/deploy/hps/sdk/pipelines/PaddleOCR-VL-1.5/version.txt new file mode 100644 index 0000000000..6e8bf73aa5 --- /dev/null +++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL-1.5/version.txt @@ -0,0 +1 @@ +0.1.0 From 6b397a6de22c209697db52d982fd55b4f3c29a11 Mon Sep 17 00:00:00 2001 From: albcunha Date: Mon, 2 Mar 2026 10:10:22 -0300 Subject: [PATCH 36/37] Fix: Integer overflow in `calculate_overlap_ratio` (`utils.py:248`) (#5020) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What it does Updates `calculate_overlap_ratio` in `utils.py` to use NumPy types and functions that prevent integer overflow. --- ## Problem `calculate_overlap_ratio` in `paddlex/inference/pipelines/layout_parsing/utils.py` triggers: RuntimeWarning: overflow encountered in scalar multiply at line 248: inter_area = inter_width * inter_height --- ## Root cause The issue originates at lines 237–238: bbox1 = np.array(bbox1) bbox2 = np.array(bbox2) Calling `np.array()` without specifying `dtype` preserves the input’s original type. When bounding boxes come from detection models as `int32` arrays: - All arithmetic remains in `int32` - `int32` max value ≈ 2.1 billion - Large box dimensions can overflow: 50000 × 50000 = 2.5 billion (exceeds int32 limit) This causes overflow and incorrect overlap ratios. --- ## Fix Two changes in `calculate_overlap_ratio`: 1) Cast inputs to `float64` Prevents overflow in all downstream arithmetic: bbox1 = np.array(bbox1, dtype=np.float64) bbox2 = np.array(bbox2, dtype=np.float64) 2) Use `np.multiply` with explicit dtype Extra safety on the exact line that overflows: inter_area = np.multiply(inter_width, inter_height, dtype=np.float64) --- ## Why `float64`? - `float64` is NumPy’s default float type - Supports values up to ~1.8 × 10^308 - The function returns a floating-point ratio (0.0–1.0) - `calculate_bbox_area` already uses `float` internally - Keeps all arithmetic consistent Why not `int64`? - `int64` would also prevent overflow - But values would still be implicitly upcast to float during division - Using `float64` from the start avoids mixed-type arithmetic --- ## Impact This function is used by: - `_get_minbox_if_overlap_by_ratio` - `remove_overlap_blocks` - `shrink_supplement_region_bbox` It is also imported directly by: xycut_enhanced/xycuts.py from ..utils import calculate_overlap_ratio The fix is fully backward-compatible: - No signature changes - No behavior changes - Same return type - Eliminates overflow warnings and incorrect ratios Co-authored-by: Lin Manhui --- paddlex/inference/pipelines/layout_parsing/utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/paddlex/inference/pipelines/layout_parsing/utils.py b/paddlex/inference/pipelines/layout_parsing/utils.py index 7c83d00a19..1fce7e368e 100644 --- a/paddlex/inference/pipelines/layout_parsing/utils.py +++ b/paddlex/inference/pipelines/layout_parsing/utils.py @@ -234,8 +234,8 @@ def calculate_overlap_ratio( Returns: float: The overlap ratio value between the two bounding boxes """ - bbox1 = np.array(bbox1) - bbox2 = np.array(bbox2) + bbox1 = np.array(bbox1, dtype=np.float64) + bbox2 = np.array(bbox2, dtype=np.float64) x_min_inter = np.maximum(bbox1[0], bbox2[0]) y_min_inter = np.maximum(bbox1[1], bbox2[1]) @@ -245,7 +245,8 @@ def calculate_overlap_ratio( inter_width = np.maximum(0, x_max_inter - x_min_inter) inter_height = np.maximum(0, y_max_inter - y_min_inter) - inter_area = inter_width * inter_height + inter_area = np.multiply(inter_width, inter_height, dtype=np.float64) + bbox1_area = calculate_bbox_area(bbox1) bbox2_area = calculate_bbox_area(bbox2) From 29092094dc8f92c382fe0fcaf8761e1ff2d11139 Mon Sep 17 00:00:00 2001 From: Liu Jiaxuan <85537209+liu-jiaxuan@users.noreply.github.com> Date: Thu, 5 Mar 2026 20:47:16 +0800 Subject: [PATCH 37/37] [Fix] refine config of RT-DETR-L (#5036) * fix codes * refine config * refine codes * refine codes * refine codes --- .../object_detection/modeling/rt_detr.py | 158 +++++++++++------- 1 file changed, 93 insertions(+), 65 deletions(-) diff --git a/paddlex/inference/models/object_detection/modeling/rt_detr.py b/paddlex/inference/models/object_detection/modeling/rt_detr.py index ece6c1afa7..c8b3963b6b 100644 --- a/paddlex/inference/models/object_detection/modeling/rt_detr.py +++ b/paddlex/inference/models/object_detection/modeling/rt_detr.py @@ -181,77 +181,107 @@ def __call__(self, head_out, im_shape, scale_factor, pad_shape): class RTDETRConfig(PretrainedConfig): def __init__( self, - arch, - return_idx, - freeze_stem_only, - freeze_at, - freeze_norm, - lr_mult_list, - hidden_dim, - use_encoder_idx, - num_encoder_layers, - el_d_model, - el_nhead, - el_dim_feedforward, - el_dropout, - el_activation, - expansion, - tf_num_queries, - tf_position_embed_type, - tf_feat_strides, - tf_num_levels, - tf_nhead, - tf_num_decoder_layers, - tf_backbone_feat_channels, - tf_dim_feedforward, - tf_dropout, - tf_activation, - tf_num_denoising, - tf_label_noise_ratio, - tf_box_noise_scale, - tf_learnt_init_query, - loss_coeff, - aux_loss, - use_vfl, - matcher_coeff, - num_top_queries, - use_focal_loss, + initializer_range=0.01, + initializer_bias_prior_prob=None, + layer_norm_eps=1e-5, + batch_norm_eps=1e-5, + # backbone + backbone_config=None, + freeze_backbone_batch_norms=True, + # encoder HybridEncoder + encoder_hidden_dim=256, + encoder_in_channels=[512, 1024, 2048], + feat_strides=[8, 16, 32], + encoder_layers=1, + encoder_ffn_dim=1024, + encoder_attention_heads=8, + dropout=0.0, + activation_dropout=0.0, + encode_proj_layers=[2], + positional_encoding_temperature=10000, + encoder_activation_function="gelu", + activation_function="silu", + eval_size=None, + normalize_before=False, + hidden_expansion=1.0, + # decoder RTDetrTransformer + d_model=256, + num_queries=300, + decoder_in_channels=[256, 256, 256], + decoder_ffn_dim=1024, + num_feature_levels=3, + decoder_n_points=4, + decoder_layers=6, + decoder_attention_heads=8, + decoder_activation_function="relu", + attention_dropout=0.0, + num_denoising=100, + label_noise_ratio=0.5, + box_noise_scale=1.0, + learn_initial_query=False, + anchor_image_size=None, + disable_custom_kernels=True, + with_box_refine=True, + is_encoder_decoder=True, + # Loss + matcher_alpha=0.25, + matcher_gamma=2.0, + matcher_class_cost=2.0, + matcher_bbox_cost=5.0, + matcher_giou_cost=2.0, + use_focal_loss=True, + auxiliary_loss=True, + focal_loss_alpha=0.75, + focal_loss_gamma=2.0, + weight_loss_vfl=1.0, + weight_loss_bbox=5.0, + weight_loss_giou=2.0, + eos_coefficient=1e-4, **kwargs, ): - self.arch = arch - self.return_idx = return_idx - self.freeze_stem_only = freeze_stem_only - self.freeze_at = freeze_at - self.freeze_norm = freeze_norm - self.lr_mult_list = lr_mult_list - self.hidden_dim = hidden_dim - self.use_encoder_idx = use_encoder_idx - self.num_encoder_layers = num_encoder_layers + if backbone_config["model_type"] != "hgnet_v2": + raise RuntimeError( + f"There is no dynamic graph implementation for backbone {repr(backbone_config["model_type"])}." + ) + self.arch = backbone_config["arch"] + self.freeze_stem_only = backbone_config["freeze_stem_only"] + self.freeze_at = backbone_config["freeze_at"] + self.freeze_norm = backbone_config["freeze_norm"] + self.lr_mult_list = backbone_config["lr_mult_list"] + self.return_idx = backbone_config["return_idx"] + self.hidden_dim = encoder_hidden_dim + self.use_encoder_idx = encode_proj_layers + self.num_encoder_layers = encoder_layers self.el_d_model = d_model - self.el_nhead = nhead - self.el_dim_feedforward = dim_feedforward + self.el_nhead = encoder_attention_heads + self.el_dim_feedforward = encoder_ffn_dim self.el_dropout = dropout - self.el_activation = activation - self.expansion = expansion + self.el_activation = encoder_activation_function + self.expansion = hidden_expansion self.tf_num_queries = num_queries - self.tf_position_embed_type = position_embed_type self.tf_feat_strides = feat_strides - self.tf_num_levels = num_levels - self.tf_nhead = nhead - self.tf_num_decoder_layers = num_decoder_layers - self.tf_backbone_feat_channels = backbone_feat_channels - self.tf_dim_feedforward = dim_feedforward - self.tf_dropout = dropout - self.tf_activation = activation + self.tf_num_levels = num_feature_levels + self.tf_nhead = decoder_attention_heads + self.tf_num_decoder_layers = decoder_layers + self.tf_backbone_feat_channels = decoder_in_channels + self.tf_dim_feedforward = decoder_ffn_dim + self.tf_dropout = attention_dropout + self.tf_activation = decoder_activation_function self.tf_num_denoising = num_denoising self.tf_label_noise_ratio = label_noise_ratio self.tf_box_noise_scale = box_noise_scale - self.tf_learnt_init_query = learnt_init_query - self.loss_coeff = loss_coeff - self.aux_loss = aux_loss - self.use_vfl = use_vfl - self.matcher_coeff = matcher_coeff - self.num_top_queries = num_top_queries + self.tf_learnt_init_query = learn_initial_query + self.loss_coeff = { + "class": weight_loss_vfl, + "bbox": weight_loss_bbox, + "giou": weight_loss_giou + } + self.aux_loss = auxiliary_loss + self.matcher_coeff = { + "class": matcher_class_cost, + "bbox": matcher_bbox_cost, + "giou": matcher_giou_cost + } self.use_focal_loss = use_focal_loss self.tensor_parallel_degree = 1 @@ -286,7 +316,6 @@ def __init__(self, config: RTDETRConfig): ) self.transformer = RTDETRTransformer( num_queries=self.config.tf_num_queries, - position_embed_type=self.config.tf_position_embed_type, feat_strides=self.config.tf_feat_strides, backbone_feat_channels=self.config.tf_backbone_feat_channels, num_levels=self.config.tf_num_levels, @@ -304,14 +333,13 @@ def __init__(self, config: RTDETRConfig): loss=DINOLoss( loss_coeff=self.config.loss_coeff, aux_loss=self.config.aux_loss, - use_vfl=self.config.use_vfl, matcher=HungarianMatcher( matcher_coeff=self.config.matcher_coeff, ), ) ) self.post_process = DETRPostProcess( - num_top_queries=self.config.num_top_queries, + num_top_queries=self.config.tf_num_queries, use_focal_loss=self.config.use_focal_loss, )