Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 25 additions & 63 deletions clip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -956,64 +956,32 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
return hidden_states;
}

struct ggml_cgraph* build_graph(struct ggml_allocr* allocr, std::vector<int> tokens, bool return_pooled = false) {
struct ggml_cgraph* build_graph(struct ggml_tensor* input_ids,
struct ggml_tensor* input_ids2 = NULL,
size_t max_token_idx = 0,
bool return_pooled = false) {
struct ggml_cgraph* gf = ggml_new_graph(compute_ctx);

struct ggml_tensor* input_ids = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_I32, tokens.size());
ggml_allocr_alloc(allocr, input_ids);

if (!ggml_allocr_is_measure(allocr)) {
ggml_backend_tensor_set(input_ids, tokens.data(), 0, tokens.size() * ggml_element_size(input_ids));
}

struct ggml_tensor* input_ids2 = NULL;
size_t max_token_idx = 0;
if (version == VERSION_XL) {
input_ids2 = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_I32, tokens.size());
ggml_allocr_alloc(allocr, input_ids2);

auto it = std::find(tokens.begin(), tokens.end(), EOS_TOKEN_ID);
if (it != tokens.end()) {
std::fill(std::next(it), tokens.end(), 0);
}

max_token_idx = std::min<size_t>(std::distance(tokens.begin(), it), tokens.size() - 1);

// for (int i = 0; i < tokens.size(); i++) {
// printf("%d ", tokens[i]);
// }
// printf("\n");

if (!ggml_allocr_is_measure(allocr)) {
ggml_backend_tensor_set(input_ids2, tokens.data(), 0, tokens.size() * ggml_element_size(input_ids2));
}
input_ids2 = to_backend(input_ids2);
if (!return_pooled) {
input_ids = to_backend(input_ids);
}

struct ggml_tensor* embeddings = NULL;

if (num_custom_embeddings > 0 && version != VERSION_XL) {
embeddings = ggml_new_tensor_2d(compute_ctx,
wtype,
text_model.hidden_size,
text_model.vocab_size + num_custom_embeddings /* custom placeholder */);
ggml_allocr_alloc(allocr, embeddings);
if (!ggml_allocr_is_measure(allocr)) {
// really bad, there is memory inflexibility (this is for host<->device memory conflicts)
auto token_embed_weight = text_model.get_token_embed_weight();
void* freeze_data = malloc(ggml_nbytes(token_embed_weight));
ggml_backend_tensor_get_and_sync(backend,
token_embed_weight,
freeze_data,
0,
ggml_nbytes(token_embed_weight));
ggml_backend_tensor_set(embeddings, freeze_data, 0, ggml_nbytes(token_embed_weight));
free(freeze_data);
// concatenate custom embeddings
ggml_backend_tensor_set(embeddings,
(const void*)token_embed_custom.data(),
ggml_nbytes(token_embed_weight),
num_custom_embeddings * text_model.hidden_size * ggml_type_size(wtype));
}
auto custom_embeddings = ggml_new_tensor_3d(compute_ctx,
wtype,
text_model.hidden_size,
1,
num_custom_embeddings);
set_backend_tensor_data(custom_embeddings, token_embed_custom.data());

auto token_embed_weight = text_model.get_token_embed_weight();
token_embed_weight = ggml_reshape_3d(compute_ctx, token_embed_weight, token_embed_weight->ne[0], 1, token_embed_weight->ne[1]);
// concatenate custom embeddings
embeddings = ggml_concat(compute_ctx, token_embed_weight, custom_embeddings);
embeddings = ggml_reshape_2d(compute_ctx, embeddings, embeddings->ne[0], embeddings->ne[2]);
}

struct ggml_tensor* hidden_states = forward(compute_ctx, input_ids, input_ids2, embeddings, max_token_idx, return_pooled);
Expand All @@ -1024,12 +992,14 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
}

void compute(const int n_threads,
std::vector<int> tokens,
struct ggml_tensor* input_ids,
struct ggml_tensor* input_ids2,
size_t max_token_idx,
bool return_pooled,
ggml_tensor** output,
ggml_context* output_ctx = NULL) {
auto get_graph = [&]() -> struct ggml_cgraph* {
return build_graph(compute_allocr, tokens, return_pooled);
return build_graph(input_ids, input_ids2, max_token_idx, return_pooled);
};
GGMLModule::compute(get_graph, n_threads, true, output, output_ctx);
}
Expand Down Expand Up @@ -1143,8 +1113,7 @@ struct FrozenCLIPVisionEmbedder : public GGMLModule {
vision_model.get_param_tensors(tensors, prefix + "transformer.visual_model");
}

struct ggml_cgraph* build_graph(struct ggml_allocr* allocr,
struct ggml_tensor* pixel_values) {
struct ggml_cgraph* build_graph(struct ggml_tensor* pixel_values) {
struct ggml_cgraph* gf = ggml_new_graph(compute_ctx);

pixel_values = to_backend(pixel_values);
Expand All @@ -1156,19 +1125,12 @@ struct FrozenCLIPVisionEmbedder : public GGMLModule {
return gf;
}

void alloc_compute_buffer(ggml_context* work_ctx, ggml_tensor* pixel_values) {
auto get_graph = [&]() -> struct ggml_cgraph* {
return build_graph(compute_allocr, pixel_values);
};
GGMLModule::alloc_compute_buffer(get_graph);
}

void compute(const int n_threads,
ggml_tensor* pixel_values,
ggml_tensor** output,
ggml_context* output_ctx) {
auto get_graph = [&]() -> struct ggml_cgraph* {
return build_graph(compute_allocr, pixel_values);
return build_graph(pixel_values);
};
GGMLModule::compute(get_graph, n_threads, true, output, output_ctx);
}
Expand Down
36 changes: 18 additions & 18 deletions control.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ class ControlNetBlock : public GGMLBlock {

struct ggml_tensor* resblock_forward(std::string name,
struct ggml_context* ctx,
struct ggml_allocr* allocr,
struct ggml_tensor* x,
struct ggml_tensor* emb) {
auto block = std::dynamic_pointer_cast<ResBlock>(blocks[name]);
Expand All @@ -175,7 +174,6 @@ class ControlNetBlock : public GGMLBlock {

struct ggml_tensor* attention_layer_forward(std::string name,
struct ggml_context* ctx,
struct ggml_allocr* allocr,
struct ggml_tensor* x,
struct ggml_tensor* context) {
auto block = std::dynamic_pointer_cast<SpatialTransformer>(blocks[name]);
Expand All @@ -201,11 +199,10 @@ class ControlNetBlock : public GGMLBlock {
}

std::vector<struct ggml_tensor*> forward(struct ggml_context* ctx,
struct ggml_allocr* allocr,
struct ggml_tensor* x,
struct ggml_tensor* hint,
struct ggml_tensor* guided_hint,
std::vector<float> timesteps,
struct ggml_tensor* timesteps,
struct ggml_tensor* context,
struct ggml_tensor* y = NULL) {
// x: [N, in_channels, h, w] or [N, in_channels/2, h, w]
Expand All @@ -231,7 +228,7 @@ class ControlNetBlock : public GGMLBlock {

auto middle_block_out = std::dynamic_pointer_cast<Conv2d>(blocks["middle_block_out.0"]);

auto t_emb = new_timestep_embedding(ctx, allocr, timesteps, model_channels); // [N, model_channels]
auto t_emb = ggml_nn_timestep_embedding(ctx, timesteps, model_channels); // [N, model_channels]

auto emb = time_embed_0->forward(ctx, t_emb);
emb = ggml_silu_inplace(ctx, emb);
Expand Down Expand Up @@ -272,10 +269,10 @@ class ControlNetBlock : public GGMLBlock {
for (int j = 0; j < num_res_blocks; j++) {
input_block_idx += 1;
std::string name = "input_blocks." + std::to_string(input_block_idx) + ".0";
h = resblock_forward(name, ctx, allocr, h, emb); // [N, mult*model_channels, h, w]
h = resblock_forward(name, ctx, h, emb); // [N, mult*model_channels, h, w]
if (std::find(attention_resolutions.begin(), attention_resolutions.end(), ds) != attention_resolutions.end()) {
std::string name = "input_blocks." + std::to_string(input_block_idx) + ".1";
h = attention_layer_forward(name, ctx, allocr, h, context); // [N, mult*model_channels, h, w]
h = attention_layer_forward(name, ctx, h, context); // [N, mult*model_channels, h, w]
}

auto zero_conv = std::dynamic_pointer_cast<Conv2d>(blocks["zero_convs." + std::to_string(input_block_idx) + ".0"]);
Expand All @@ -299,9 +296,9 @@ class ControlNetBlock : public GGMLBlock {
// [N, 4*model_channels, h/8, w/8]

// middle_block
h = resblock_forward("middle_block.0", ctx, allocr, h, emb); // [N, 4*model_channels, h/8, w/8]
h = attention_layer_forward("middle_block.1", ctx, allocr, h, context); // [N, 4*model_channels, h/8, w/8]
h = resblock_forward("middle_block.2", ctx, allocr, h, emb); // [N, 4*model_channels, h/8, w/8]
h = resblock_forward("middle_block.0", ctx, h, emb); // [N, 4*model_channels, h/8, w/8]
h = attention_layer_forward("middle_block.1", ctx, h, context); // [N, 4*model_channels, h/8, w/8]
h = resblock_forward("middle_block.2", ctx, h, emb); // [N, 4*model_channels, h/8, w/8]

// out
outs.push_back(middle_block_out->forward(ctx, h));
Expand Down Expand Up @@ -386,18 +383,22 @@ struct ControlNet : public GGMLModule {

struct ggml_cgraph* build_graph(struct ggml_tensor* x,
struct ggml_tensor* hint,
std::vector<float> timesteps,
struct ggml_tensor* timesteps,
struct ggml_tensor* context,
struct ggml_tensor* y = NULL) {
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, CONTROL_NET_GRAPH_SIZE, false);

x = to_backend(x);
hint = to_backend(hint);
context = to_backend(context);
y = to_backend(y);
x = to_backend(x);
if (guided_hint_cached) {
hint = NULL;
} else {
hint = to_backend(hint);
}
context = to_backend(context);
y = to_backend(y);
timesteps = to_backend(timesteps);

auto outs = control_net.forward(compute_ctx,
compute_allocr,
x,
hint,
guided_hint_cached ? guided_hint : NULL,
Expand All @@ -420,7 +421,7 @@ struct ControlNet : public GGMLModule {
void compute(int n_threads,
struct ggml_tensor* x,
struct ggml_tensor* hint,
std::vector<float> timesteps,
struct ggml_tensor* timesteps,
struct ggml_tensor* context,
struct ggml_tensor* y,
struct ggml_tensor** output = NULL,
Expand All @@ -434,7 +435,6 @@ struct ControlNet : public GGMLModule {
};

GGMLModule::compute(get_graph, n_threads, false, output, output_ctx);

guided_hint_cached = true;
}

Expand Down
2 changes: 1 addition & 1 deletion ggml
Submodule ggml updated from 9a5ce3 to 4212b7
Loading