Skip to content
Open
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
2b77d8b
Use cache mount for genai docker (#4954)
Bobholamovic Jan 29, 2026
9d39bc9
Fix HPS order bug (#4955)
Bobholamovic Jan 29, 2026
966969f
Fix transformers version (#4956)
Bobholamovic Jan 29, 2026
306430a
Fix HPS and remove scipy from required deps (#4957)
Bobholamovic Jan 29, 2026
01f63a6
[Cherry-Pick]bugfix: unexpected change of the constant IMAGE_LABELS (…
changdazhou Jan 30, 2026
363b508
[METAX] add ppdoclayv3 to METAX_GPU_WHITELIST (#4959)
handsomecoderyang Jan 30, 2026
d59a344
vllm 0.10.2 needs transformers 4.x (#4963)
zhang-prog Jan 30, 2026
622b602
Bump version to 3.4.1
Bobholamovic Jan 30, 2026
c78fb95
Support setting PDF rendering scale factor (#4967)
Bobholamovic Feb 2, 2026
45989f0
Fix/doc vlm async cancellation (#4969) (#4971)
scyyh11 Feb 4, 2026
0a936ba
Fix typo (#4982)
Bobholamovic Feb 6, 2026
f790eff
add llama.cpp support (#4983)
zhang-prog Feb 9, 2026
a10d7c5
Add Intel GPU config (#4992)
Bobholamovic Feb 11, 2026
92a190e
Remove PaddleOCR-VL server page limit (#4991)
Bobholamovic Feb 11, 2026
04476cb
PaddleX Add ROCm 7.0 compatibility patches (#4990) (#4996)
M4jupitercannon Feb 12, 2026
edb4022
[Feat] Support setting expiration for BOS URLs (#4993)
Bobholamovic Feb 12, 2026
69e8d75
add \n for seal rec && bugfix for text in table && delete_pass by mod…
changdazhou Feb 13, 2026
f95d873
Fix auto batch size for PaddleOCR-VL-1.5-0.9B (#5003)
Bobholamovic Feb 13, 2026
c88d4c1
Bump version to 3.4.2
Bobholamovic Feb 13, 2026
e92d21f
Update HPS frozon deps (#5004)
Bobholamovic Feb 13, 2026
41b695b
update vlm batch_size (#5005)
zhang-prog Feb 13, 2026
901393a
support modular langchain as well
np-n Feb 25, 2026
b39f430
fix
np-n Feb 25, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix/doc vlm async cancellation (#4969) (#4971)
* fix(doc_vlm): cancel pending futures on batch request failure

When a batch of requests is sent to the VLM service and one fails,
the remaining pending futures are now properly cancelled to avoid
wasting VLM service resources.

* chore: remove test file and documentation for async cancellation fix
  • Loading branch information
scyyh11 authored Feb 4, 2026
commit 45989f0dc6d998ce66fe8d6cb61293355a75f429
215 changes: 111 additions & 104 deletions paddlex/inference/models/doc_vlm/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,118 +415,125 @@ def _genai_client_process(
max_pixels,
):
futures = []
for item in data:
image = item["image"]
if isinstance(image, str):
if image.startswith("http://") or image.startswith("https://"):
image_url = image
else:
try:
for item in data:
image = item["image"]
if isinstance(image, str):
if image.startswith("http://") or image.startswith("https://"):
image_url = image
else:
from PIL import Image

with Image.open(image) as img:
img = img.convert("RGB")
with io.BytesIO() as buf:
img.save(buf, format="JPEG")
image_url = "data:image/jpeg;base64," + base64.b64encode(
buf.getvalue()
).decode("ascii")
elif isinstance(image, np.ndarray):
import cv2
from PIL import Image

with Image.open(image) as img:
img = img.convert("RGB")
with io.BytesIO() as buf:
img.save(buf, format="JPEG")
image_url = "data:image/jpeg;base64," + base64.b64encode(
buf.getvalue()
).decode("ascii")
elif isinstance(image, np.ndarray):
import cv2
from PIL import Image

image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
img = Image.fromarray(image)
with io.BytesIO() as buf:
img.save(buf, format="JPEG")
image_url = "data:image/jpeg;base64," + base64.b64encode(
buf.getvalue()
).decode("ascii")
else:
raise TypeError(f"Not supported image type: {type(image)}")

if self._genai_client.backend == "fastdeploy-server":
kwargs = {
"temperature": 1 if temperature is None else temperature,
"top_p": 0 if top_p is None else top_p,
}
else:
kwargs = {
"temperature": 0 if temperature is None else temperature,
}
if top_p is not None:
kwargs["top_p"] = top_p

if self._genai_client.backend == "mlx-vlm-server":
max_tokens_name = "max_tokens"
else:
max_tokens_name = "max_completion_tokens"

if max_new_tokens is not None:
kwargs[max_tokens_name] = max_new_tokens
elif self.model_name in self.model_group["PaddleOCR-VL"]:
kwargs[max_tokens_name] = 8192

kwargs["extra_body"] = {}
if skip_special_tokens is not None:
if self._genai_client.backend in (
"fastdeploy-server",
"vllm-server",
"sglang-server",
"mlx-vlm-server",
):
kwargs["extra_body"]["skip_special_tokens"] = skip_special_tokens
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
img = Image.fromarray(image)
with io.BytesIO() as buf:
img.save(buf, format="JPEG")
image_url = "data:image/jpeg;base64," + base64.b64encode(
buf.getvalue()
).decode("ascii")
else:
raise ValueError("Not supported")
raise TypeError(f"Not supported image type: {type(image)}")

if repetition_penalty is not None:
kwargs["extra_body"]["repetition_penalty"] = repetition_penalty

if min_pixels is not None:
if self._genai_client.backend == "vllm-server":
kwargs["extra_body"]["mm_processor_kwargs"] = kwargs[
"extra_body"
].get("mm_processor_kwargs", {})
kwargs["extra_body"]["mm_processor_kwargs"][
"min_pixels"
] = min_pixels
if self._genai_client.backend == "fastdeploy-server":
kwargs = {
"temperature": 1 if temperature is None else temperature,
"top_p": 0 if top_p is None else top_p,
}
else:
warnings.warn(
f"{repr(self._genai_client.backend)} does not support `min_pixels`."
)
kwargs = {
"temperature": 0 if temperature is None else temperature,
}
if top_p is not None:
kwargs["top_p"] = top_p

if max_pixels is not None:
if self._genai_client.backend == "vllm-server":
kwargs["extra_body"]["mm_processor_kwargs"] = kwargs[
"extra_body"
].get("mm_processor_kwargs", {})
kwargs["extra_body"]["mm_processor_kwargs"][
"max_pixels"
] = max_pixels
if self._genai_client.backend == "mlx-vlm-server":
max_tokens_name = "max_tokens"
else:
warnings.warn(
f"{repr(self._genai_client.backend)} does not support `max_pixels`."
)
max_tokens_name = "max_completion_tokens"

if max_new_tokens is not None:
kwargs[max_tokens_name] = max_new_tokens
elif self.model_name in self.model_group["PaddleOCR-VL"]:
kwargs[max_tokens_name] = 8192

kwargs["extra_body"] = {}
if skip_special_tokens is not None:
if self._genai_client.backend in (
"fastdeploy-server",
"vllm-server",
"sglang-server",
"mlx-vlm-server",
):
kwargs["extra_body"]["skip_special_tokens"] = skip_special_tokens
else:
raise ValueError("Not supported")

if repetition_penalty is not None:
kwargs["extra_body"]["repetition_penalty"] = repetition_penalty

future = self._genai_client.create_chat_completion(
[
{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": image_url}},
{"type": "text", "text": item["query"]},
],
}
],
return_future=True,
timeout=600,
**kwargs,
)
if min_pixels is not None:
if self._genai_client.backend == "vllm-server":
kwargs["extra_body"]["mm_processor_kwargs"] = kwargs[
"extra_body"
].get("mm_processor_kwargs", {})
kwargs["extra_body"]["mm_processor_kwargs"][
"min_pixels"
] = min_pixels
else:
warnings.warn(
f"{repr(self._genai_client.backend)} does not support `min_pixels`."
)

if max_pixels is not None:
if self._genai_client.backend == "vllm-server":
kwargs["extra_body"]["mm_processor_kwargs"] = kwargs[
"extra_body"
].get("mm_processor_kwargs", {})
kwargs["extra_body"]["mm_processor_kwargs"][
"max_pixels"
] = max_pixels
else:
warnings.warn(
f"{repr(self._genai_client.backend)} does not support `max_pixels`."
)

future = self._genai_client.create_chat_completion(
[
{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": image_url}},
{"type": "text", "text": item["query"]},
],
}
],
return_future=True,
timeout=600,
**kwargs,
)

futures.append(future)
futures.append(future)

results = []
for future in futures:
result = future.result()
results.append(result.choices[0].message.content)
results = []
for future in futures:
result = future.result()
results.append(result.choices[0].message.content)

return results
return results
except Exception:
# Cancel all pending futures to avoid wasting resources
for future in futures:
if not future.done():
future.cancel()
raise
Loading