Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions paddlex/inference/genai/backends/vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,7 @@
# limitations under the License.

from ....utils import logging
from ....utils.deps import (
get_dep_version,
is_genai_engine_plugin_available,
require_genai_engine_plugin,
)
from ....utils.deps import is_genai_engine_plugin_available, require_genai_engine_plugin
from ..configs.utils import (
backend_config_to_args,
set_config_defaults,
Expand All @@ -29,8 +25,6 @@
def register_models():
from vllm import ModelRegistry

vllm_version = get_dep_version("vllm")

if is_genai_engine_plugin_available("vllm-server"):
for model_name in ALL_MODEL_INFO:
if (
Expand Down
14 changes: 11 additions & 3 deletions paddlex/inference/genai/configs/paddleocr_vl_09b.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
# limitations under the License.


from ....utils.deps import require_deps
from packaging.version import Version

from ....utils.deps import get_dep_version, require_deps

__all__ = ["get_config"]

Expand Down Expand Up @@ -49,17 +51,23 @@ def get_config(backend):

import torch

tf_version = get_dep_version("transformers")
if Version(tf_version) >= Version("5.0.0"):
remote_code_key = "no-trust-remote-code"
else:
remote_code_key = "trust-remote-code"

if torch.xpu.is_available():
return {
"trust-remote-code": True,
remote_code_key: True,
"max-num-batched-tokens": 16384,
"no-enable-prefix-caching": True,
"mm-processor-cache-gb": 0,
"enforce-eager": True,
}
else:
return {
"trust-remote-code": True,
remote_code_key: False,
"gpu-memory-utilization": 0.5,
"max-model-len": 16384,
"max-num-batched-tokens": 131072,
Expand Down
2 changes: 1 addition & 1 deletion paddlex/paddlex_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ def _install_genai_deps(plugin_types):
sys.exit(1)

for plugin_type in plugin_types:
if "vllm" in plugin_type or "sglang" in plugin_type:
if "sglang" in plugin_type:
install_packages(["xformers"], constraints="required")
if is_cuda_available():
try:
Expand Down
7 changes: 7 additions & 0 deletions paddlex/utils/deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,13 @@ def is_genai_engine_plugin_available(backend="any"):
from .env import is_cuda_available

if is_cuda_available():
if "vllm" in backend:
vllm_version = get_dep_version("vllm")
assert (
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里其实可以不用"Could not find vLLM. Please install it by running: 'paddlex --install genai-vllm-server'",因为这个分支理论上是走不到的(assert never),而且前面已经校验过genai-vllm-server已安装了,这样反而可能给用户误导

vllm_version is not None
), "Could not find vLLM. Please install it by running: 'paddlex --install genai-vllm-server'"
if Version(vllm_version) >= Version("0.12.0"):
return True
return is_dep_available("xformers") and is_dep_available("flash-attn")
return True
return False
Expand Down
6 changes: 1 addition & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,7 @@
"transformers",
],
"genai-vllm-server": [
"einops",
"torch == 2.8.0",
"transformers < 5.0.0",
"uvloop",
"vllm == 0.10.2",
"vllm == 0.16.0",
],
"paddle2onnx": [
"paddle2onnx == 2.0.2rc3",
Expand Down
Loading