Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docker/Dockerfile_ascend_a2_300i
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
ARG ASCEND_DEVICE_TYPE=ascend_a2
ARG ASCEND_HUB=swr.cn-south-1.myhuaweicloud.com/ascendhub

FROM ${ASCEND_HUB}/cann:8.3.rc1.alpha002-910b-ubuntu22.04-py3.11 AS ascend_a2_base
FROM ${ASCEND_HUB}/cann:8.3.rc1.alpha002-310p-ubuntu22.04-py3.11 AS ascend_300i_base
FROM ${ASCEND_HUB}/cann:8.3.rc1-910b-ubuntu22.04-py3.11 AS ascend_a2_base
FROM ${ASCEND_HUB}/cann:8.3.rc1-310p-ubuntu22.04-py3.11 AS ascend_300i_base

FROM ${ASCEND_DEVICE_TYPE}_base AS builder
ENV DEBIAN_FRONTEND=noninteractive
Expand All @@ -23,6 +23,6 @@ ARG LMDEPLOY_TAG=main
RUN --mount=type=cache,target=/root/.cache \
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
pip config set global.trusted-host pypi.tuna.tsinghua.edu.cn && \
pip install --no-cache-dir torch==2.8.0 torch-npu==2.8.0rc1 torchvision==0.23.0 && \
pip install --no-cache-dir torch==2.8.0 torch-npu==2.8.0 torchvision==0.23.0 && \
TORCH_DEVICE_BACKEND_AUTOLOAD=0 DEVICE=ascend pip install git+https://github.com/DeepLink-org/dlinfer.git@${DLINFER_TAG} && \
LMDEPLOY_TARGET_DEVICE=ascend pip install git+https://github.com/InternLM/lmdeploy.git@${LMDEPLOY_TAG}
4 changes: 2 additions & 2 deletions docker/Dockerfile_ascend_a3
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
ARG ASCEND_DEVICE_TYPE=ascend_a3
ARG ASCEND_HUB=swr.cn-south-1.myhuaweicloud.com/ascendhub

FROM ${ASCEND_HUB}/cann:8.3.rc1.alpha002-a3-openeuler24.03-py3.11 AS ascend_a3_base
FROM ${ASCEND_HUB}/cann:8.3.rc1-a3-openeuler24.03-py3.11 AS ascend_a3_base

FROM ${ASCEND_DEVICE_TYPE}_base AS builder
ENV DEBIAN_FRONTEND=noninteractive
Expand All @@ -22,6 +22,6 @@ ARG LMDEPLOY_TAG=main
RUN --mount=type=cache,target=/root/.cache \
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
pip config set global.trusted-host pypi.tuna.tsinghua.edu.cn && \
pip install --no-cache-dir torch==2.8.0 torch-npu==2.8.0rc1 torchvision==0.23.0 && \
pip install --no-cache-dir torch==2.8.0 torch-npu==2.8.0 torchvision==0.23.0 && \
TORCH_DEVICE_BACKEND_AUTOLOAD=0 DEVICE=ascend pip install git+https://github.com/DeepLink-org/dlinfer.git@${DLINFER_TAG} && \
LMDEPLOY_TARGET_DEVICE=ascend pip install git+https://github.com/InternLM/lmdeploy.git@${LMDEPLOY_TAG}
8 changes: 3 additions & 5 deletions lmdeploy/pytorch/backends/dlinfer/ascend/op_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def get_total_slots():
elif is_unpaged_prefill:
# prepare some params of unpaged_prefill attention stage.
q_start_loc_cpu, kv_seqlens_cpu = None, None
q_seqlens_cpu = step_context.q_seqlens.cpu()
q_seqlens_cpu = step_context.q_seqlens.cpu().to(torch.int32)
if SocVersion.is_Ascend910():
single_attention_mask = torch.logical_not(
torch.tril(
Expand Down Expand Up @@ -251,7 +251,7 @@ def get_total_slots():
step_context.block_offsets = step_context.block_offsets\
.repeat_interleave(step_context.q_seqlens, 0)
dynamo.mark_dynamic(step_context.block_offsets, [0, 1])
kv_seqlens = step_context.kv_seqlens.to(torch.int32)
kv_seqlens = step_context.kv_seqlens.cpu().to(torch.int32)
if not step_context.is_decoding:
if is_unpaged_prefill:
if SocVersion.is_Ascend910():
Expand All @@ -269,11 +269,9 @@ def get_total_slots():
else:
raise ValueError(f"dlinfer doesn't support {SocVersion.device_name()} device currently.")
kv_seqlens = kv_seqlens.repeat_interleave(step_context.q_seqlens, 0)
if not is_unpaged_prefill and AscendOpsBackend.enable_aclgraph():
kv_seqlens = kv_seqlens.cpu().tolist()
else:
if step_context.is_decoding:
kv_seqlens_cpu = step_context.kv_seqlens.cpu()
kv_seqlens_cpu = step_context.kv_seqlens.cpu().to(torch.int32)
elif is_unpaged_prefill:
pass
else:
Expand Down
4 changes: 2 additions & 2 deletions lmdeploy/pytorch/backends/dlinfer/moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ class DlinferSoftmaxTopKBuilder(SoftmaxTopKBuilder):
"""Dlinfer softmax topk implementation builder."""

@staticmethod
def build(top_k: int, dim: int = -1):
def build(top_k: int, dim: int = -1, n_groups: int = -1):
"""build."""
return DlinferSoftmaxTopKImpl(top_k, dim)
return DlinferSoftmaxTopKImpl(top_k, dim, n_groups)


class DlinferFusedMoEImpl(FusedMoEImpl):
Expand Down
3 changes: 1 addition & 2 deletions lmdeploy/pytorch/backends/dlinfer/rotary_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ def __init__(self, dim: int, base: int = 10000, scaling_factor: float = 1.0):
self.dim = dim
self.base = base
# yapf: disable
inv_freq = 1.0 / (self.base
** (torch.arange(0, self.dim, 2, dtype=torch.int64).float() / self.dim)).float().cuda()
inv_freq = 1.0 / (self.base**(torch.arange(0, self.dim, 2, dtype=torch.float, device='cuda') / self.dim))
# yapf: enable
self.register_buffer('inv_freq', inv_freq, persistent=False)

Expand Down
Loading