Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/manual-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ jobs:
-DGGML_NATIVE=OFF \
-DGGML_CPU_ALL_VARIANTS=ON \
-DLLAMA_CURL=OFF \
-DLLAMA_OPENSSL=OFF \
${{ env.CMAKE_ARGS }}
cmake --build build --config Release -j $(nproc)

Expand Down Expand Up @@ -141,6 +142,7 @@ jobs:
-DGGML_CPU_ALL_VARIANTS=ON ^
-DGGML_OPENMP=ON ^
-DLLAMA_CURL=OFF ^
-DLLAMA_OPENSSL=OFF ^
${{ env.CMAKE_ARGS }}
cmake --build build --config Release

Expand Down Expand Up @@ -356,6 +358,7 @@ jobs:
-DGGML_METAL_EMBED_LIBRARY=ON \
-DGGML_RPC=ON \
-DLLAMA_CURL=OFF \
-DLLAMA_OPENSSL=OFF \
${{ env.CMAKE_ARGS }}
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)

Expand Down
16 changes: 16 additions & 0 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#endif

#include "ggml.h"
#include "ggml-backend.h"
#include "gguf.h"

#include "common.h"
Expand Down Expand Up @@ -1094,6 +1095,21 @@ common_init_result::common_init_result(common_params & params) :
auto mparams = common_model_params_to_llama(params);
auto cparams = common_context_params_to_llama(params);

// If only iGPUs are available and user didn't explicitly set -ngl, default to 0 (CPU-only)
if (mparams.n_gpu_layers == -1) {
bool has_dgpu = false;
bool has_igpu = false;
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
auto type = ggml_backend_dev_type(ggml_backend_dev_get(i));
if (type == GGML_BACKEND_DEVICE_TYPE_GPU) has_dgpu = true;
if (type == GGML_BACKEND_DEVICE_TYPE_IGPU) has_igpu = true;
}
if (has_igpu && !has_dgpu) {
LOG_INF("%s: only integrated GPU(s) detected, defaulting to -ngl 0 (CPU-only)\n", __func__);
mparams.n_gpu_layers = 0;
}
}

if (params.fit_params) {
LOG_INF("%s: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on\n", __func__);
llama_params_fit(params.model.path.c_str(), &mparams, &cparams,
Expand Down
20 changes: 18 additions & 2 deletions ggml/src/ggml-vulkan/ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15017,11 +15017,27 @@ static bool ggml_backend_vk_device_supports_buft(ggml_backend_dev_t dev, ggml_ba
return buft_ctx->device->idx == ctx->device;
}

static int64_t get_op_batch_size(const ggml_tensor * op) {
switch (op->op) {
case GGML_OP_GET_ROWS:
return 0;
case GGML_OP_MUL_MAT:
return op->ne[1];
case GGML_OP_MUL_MAT_ID:
case GGML_OP_ROPE:
case GGML_OP_ROPE_BACK:
return op->ne[2];
default:
return ggml_nrows(op);
}
}

static bool ggml_backend_vk_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
ggml_backend_vk_device_context * dev_ctx = (ggml_backend_vk_device_context *)dev->context;

return (op->ne[1] >= dev_ctx->op_offload_min_batch_size && op->op != GGML_OP_GET_ROWS) ||
(op->ne[2] >= dev_ctx->op_offload_min_batch_size && op->op == GGML_OP_MUL_MAT_ID);
return (op->op == GGML_OP_MUL_MAT ||
op->op == GGML_OP_MUL_MAT_ID) &&
get_op_batch_size(op) >= dev_ctx->op_offload_min_batch_size;
}

static ggml_backend_event_t ggml_backend_vk_device_event_new(ggml_backend_dev_t dev) {
Expand Down
4 changes: 2 additions & 2 deletions src/llama-model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7536,7 +7536,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
/* attn_type_v */ params.type_v,
/* attn_v_trans */ !cparams.flash_attn,
/* attn_swa_full */ params.swa_full,
/* attn_kv_size */ cparams.n_ctx,
/* attn_kv_size */ cparams.n_ctx_seq,
/* attn_n_ubatch */ cparams.n_ubatch,
/* attn_n_pad */ 1,
/* recurrent_type_r */ GGML_TYPE_F32,
Expand All @@ -7553,7 +7553,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
/* attn_type_k */ params.type_k,
/* attn_type_v */ params.type_v,
/* attn_v_trans */ !cparams.flash_attn,
/* attn_kv_size */ cparams.n_ctx,
/* attn_kv_size */ cparams.n_ctx_seq,
/* attn_n_pad */ 1,
/* attn_n_swa */ hparams.n_swa,
/* attn_swa_type */ hparams.swa_type,
Expand Down