diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt index 139d9a2..e9c60f0 100644 --- a/examples/server/CMakeLists.txt +++ b/examples/server/CMakeLists.txt @@ -12,6 +12,7 @@ set(TARGET_SRCS server.cpp httplib.h json.hpp + tts_server_threading_osx.h ) set(PUBLIC_ASSETS index.html diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 97854f6..92aae79 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -29,6 +29,7 @@ #include "audio_file.h" #include "args.h" #include "common.h" +#include "tts_server_threading_osx.h" #include "index.html.hpp" @@ -232,7 +233,7 @@ struct worker { std::unordered_map runners; std::string text_encoder_path; std::atomic running = true; - std::thread * thread = nullptr; + tts_server_threading::native_thread * thread = nullptr; int task_timeout; @@ -813,7 +814,7 @@ int main(int argc, const char ** argv) { init_worker(&model_map, *args.get_int_param("--n-threads"), !args.get_bool_param("--use-metal"), default_generation_config, w); } else { worker * w = new worker(tqueue, rmap, args.get_string_param("--text-encoder-path"), *args.get_int_param("--timeout")); - w->thread = new std::thread(init_worker, &model_map, *args.get_int_param("--n-threads"), !args.get_bool_param("--use-metal"), default_generation_config, w); + w->thread = new tts_server_threading::native_thread(init_worker, &model_map, *args.get_int_param("--n-threads"), !args.get_bool_param("--use-metal"), default_generation_config, w); pool->push_back(w); } } diff --git a/examples/server/tts_server_threading_osx.h b/examples/server/tts_server_threading_osx.h new file mode 100644 index 0000000..22c2061 --- /dev/null +++ b/examples/server/tts_server_threading_osx.h @@ -0,0 +1,54 @@ +#pragma once + +// OSX threads other than the main thread are created with a reduced stack size of 512KB by default, this is too low +// for large GGML graphs in which graph nodes are traversed recursively. To address this we instead use pthreads so that stack +// size can be increased in parity with linux. + +#include + +#if defined(__APPLE__) + +#include +#include + +using namespace std; + +namespace tts_server_threading { + // The implementation calls pthread_create() with the stack size parameter equal to the Linux 8MB default, on platforms that support it. + class native_thread { + pthread_t thread; + static constexpr size_t THREAD_STACK_SIZE = 8 * 1024 * 1024; + public: + native_thread() = default; + native_thread(const native_thread&) = delete; + template + explicit native_thread(Function&& fun, Args&&... args) { + auto func = new function( + std::bind(std::forward(fun), std::forward(args)...)); + + pthread_attr_t attr_storage, *attr = &attr_storage; + pthread_attr_init(attr); + pthread_attr_setstacksize(attr, THREAD_STACK_SIZE); + + auto start_routine = [](void* ptr) -> void* { + auto f = reinterpret_cast*>(ptr); + // Call the function + (*f)(); + delete f; + return nullptr; + }; + + pthread_create(&thread, attr, start_routine, func); + } + + void join() { pthread_join(thread, nullptr); } + }; +} + +#else + +namespace tts_server_threading { + using native_thread = std::thread; +} + +#endif