Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/server/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ set(TARGET_SRCS
server.cpp
httplib.h
json.hpp
tts_server_threading_osx.h
)
set(PUBLIC_ASSETS
index.html
Expand Down
5 changes: 3 additions & 2 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "audio_file.h"
#include "args.h"
#include "common.h"
#include "tts_server_threading_osx.h"

#include "index.html.hpp"

Expand Down Expand Up @@ -232,7 +233,7 @@ struct worker {
std::unordered_map<std::string, struct tts_runner *> runners;
std::string text_encoder_path;
std::atomic<bool> running = true;
std::thread * thread = nullptr;
tts_server_threading::native_thread * thread = nullptr;

int task_timeout;

Expand Down Expand Up @@ -813,7 +814,7 @@ int main(int argc, const char ** argv) {
init_worker(&model_map, *args.get_int_param("--n-threads"), !args.get_bool_param("--use-metal"), default_generation_config, w);
} else {
worker * w = new worker(tqueue, rmap, args.get_string_param("--text-encoder-path"), *args.get_int_param("--timeout"));
w->thread = new std::thread(init_worker, &model_map, *args.get_int_param("--n-threads"), !args.get_bool_param("--use-metal"), default_generation_config, w);
w->thread = new tts_server_threading::native_thread(init_worker, &model_map, *args.get_int_param("--n-threads"), !args.get_bool_param("--use-metal"), default_generation_config, w);
pool->push_back(w);
}
}
Expand Down
54 changes: 54 additions & 0 deletions examples/server/tts_server_threading_osx.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#pragma once

// OSX threads other than the main thread are created with a reduced stack size of 512KB by default, this is too low
// for large GGML graphs in which graph nodes are traversed recursively. To address this we instead use pthreads so that stack
// size can be increased in parity with linux.

#include <thread>

#if defined(__APPLE__)

#include <pthread.h>
#include <functional>

using namespace std;

namespace tts_server_threading {
// The implementation calls pthread_create() with the stack size parameter equal to the Linux 8MB default, on platforms that support it.
class native_thread {
pthread_t thread;
static constexpr size_t THREAD_STACK_SIZE = 8 * 1024 * 1024;
public:
native_thread() = default;
native_thread(const native_thread&) = delete;
template<class Function, class... Args>
explicit native_thread(Function&& fun, Args&&... args) {
auto func = new function<void()>(
std::bind(std::forward<Function>(fun), std::forward<Args>(args)...));

pthread_attr_t attr_storage, *attr = &attr_storage;
pthread_attr_init(attr);
pthread_attr_setstacksize(attr, THREAD_STACK_SIZE);

auto start_routine = [](void* ptr) -> void* {
auto f = reinterpret_cast<function<void()>*>(ptr);
// Call the function
(*f)();
delete f;
return nullptr;
};

pthread_create(&thread, attr, start_routine, func);
}

void join() { pthread_join(thread, nullptr); }
};
}

#else

namespace tts_server_threading {
using native_thread = std::thread;
}

#endif