Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ jobs:
# Run CUDA backend Python tests
python -m pytest backends/cuda/tests backends/cuda/passes/tests -v -o "addopts="

# Run Qwen 3.5 MoE tests (quantize roundtrip + TurboQuant KV cache + sampler)
python -m pytest examples/models/qwen3_5_moe/test_quantize_roundtrip.py examples/models/qwen3_5_moe/test_turboquant.py examples/models/qwen3_5_moe/test_sampler.py -v -o "addopts="
# Run Qwen 3.5 MoE tests (quantize roundtrip + TurboQuant KV cache)
python -m pytest examples/models/qwen3_5_moe/test_quantize_roundtrip.py examples/models/qwen3_5_moe/test_turboquant.py -v -o "addopts="

export-model-cuda-artifact:
name: export-model-cuda-artifact
Expand Down
9 changes: 4 additions & 5 deletions backends/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,10 @@ set(_aoti_cuda_shim_sources runtime/shims/memory.cpp
runtime/shims/cuda_guard.cpp
)

# Only build CUDA shims when CUDA language/toolchain is available.
# Only build int4mm shim when CUDA language/toolchain is available.
if(CMAKE_CUDA_COMPILER)
list(APPEND _aoti_cuda_shim_sources runtime/shims/int4mm.cu
runtime/shims/sort.cu runtime/shims/rand.cu
runtime/shims/sort.cu
)
endif()

Expand Down Expand Up @@ -152,8 +152,7 @@ endif()
# retention.
if(_cuda_is_msvc_toolchain)
target_link_libraries(
aoti_cuda_shims PRIVATE cuda_platform CUDA::cudart CUDA::curand
${CMAKE_DL_LIBS}
aoti_cuda_shims PRIVATE cuda_platform CUDA::cudart ${CMAKE_DL_LIBS}
)
# Link object library directly so symbols are pulled exactly once while
# avoiding duplicate static/object inclusion and interface leakage.
Expand All @@ -163,7 +162,7 @@ else()
aoti_cuda_shims
PRIVATE cuda_platform
PUBLIC -Wl,--whole-archive aoti_common_shims_slim -Wl,--no-whole-archive
CUDA::cudart CUDA::curand ${CMAKE_DL_LIBS}
CUDA::cudart ${CMAKE_DL_LIBS}
)
endif()

Expand Down
1 change: 0 additions & 1 deletion backends/cuda/cuda_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,6 @@ def get_supported_fallback_kernels(cls) -> Dict[str, Any]:
return {
"at::_ops::_weight_int4pack_mm::call": None,
"at::_ops::sort_stable::call": None,
"aoti_torch_cuda_randint_low_out": None,
}

@classmethod
Expand Down
2 changes: 0 additions & 2 deletions backends/cuda/runtime/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ runtime.cxx_library(
"shims/cuda_guard.cpp",
"shims/int4mm.cu",
"shims/memory.cpp",
"shims/rand.cu",
"shims/sort.cu",
"shims/tensor_attribute.cpp",
],
Expand All @@ -42,7 +41,6 @@ runtime.cxx_library(
"shims/int4mm.cuh",
"shims/int4mm.h",
"shims/memory.h",
"shims/rand.h",
"shims/sort.h",
"shims/tensor_attribute.h",
"utils.h",
Expand Down
273 changes: 0 additions & 273 deletions backends/cuda/runtime/shims/rand.cu

This file was deleted.

Loading
Loading