From 780bfe8b4ac218142f78dc96b6daa3ca811e54a4 Mon Sep 17 00:00:00 2001 From: Matthew Willett-Jeffries Date: Wed, 4 Jun 2025 15:51:32 -0400 Subject: [PATCH 1/4] fix for stack overflow on osx --- examples/server/CMakeLists.txt | 1 + examples/server/server.cpp | 5 +- examples/server/tts_server_threading_osx.h | 54 ++++++++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 examples/server/tts_server_threading_osx.h diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt index 139d9a2..e9c60f0 100644 --- a/examples/server/CMakeLists.txt +++ b/examples/server/CMakeLists.txt @@ -12,6 +12,7 @@ set(TARGET_SRCS server.cpp httplib.h json.hpp + tts_server_threading_osx.h ) set(PUBLIC_ASSETS index.html diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 97854f6..92aae79 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -29,6 +29,7 @@ #include "audio_file.h" #include "args.h" #include "common.h" +#include "tts_server_threading_osx.h" #include "index.html.hpp" @@ -232,7 +233,7 @@ struct worker { std::unordered_map runners; std::string text_encoder_path; std::atomic running = true; - std::thread * thread = nullptr; + tts_server_threading::native_thread * thread = nullptr; int task_timeout; @@ -813,7 +814,7 @@ int main(int argc, const char ** argv) { init_worker(&model_map, *args.get_int_param("--n-threads"), !args.get_bool_param("--use-metal"), default_generation_config, w); } else { worker * w = new worker(tqueue, rmap, args.get_string_param("--text-encoder-path"), *args.get_int_param("--timeout")); - w->thread = new std::thread(init_worker, &model_map, *args.get_int_param("--n-threads"), !args.get_bool_param("--use-metal"), default_generation_config, w); + w->thread = new tts_server_threading::native_thread(init_worker, &model_map, *args.get_int_param("--n-threads"), !args.get_bool_param("--use-metal"), default_generation_config, w); pool->push_back(w); } } diff --git a/examples/server/tts_server_threading_osx.h b/examples/server/tts_server_threading_osx.h new file mode 100644 index 0000000..2bd9952 --- /dev/null +++ b/examples/server/tts_server_threading_osx.h @@ -0,0 +1,54 @@ +#pragma once + +// On OSX threads other than the main thread are created with a reduced stack size of 512KB by default, this is too low +// for large GGML graphs in which graph nodes are traversed recursively. To address this we instead use pthreads so that stack +// size can be increased in order to be in parity with linux. + +#include + +#if defined(__APPLE__) + +#include +#include + +using namespace std; + +namespace tts_server_threading { + // The implementation calls pthread_create() with the stack size parameter equal to the Linux 8MB default, on platforms that support it. + class native_thread { + pthread_t thread; + static constexpr size_t THREAD_STACK_SIZE = 8 * 1024 * 1024; + public: + native_thread() = default; + native_thread(const native_thread&) = delete; + template + explicit native_thread(Function&& fun, Args&&... args) { + auto func = new function( + std::bind(std::forward(fun), std::forward(args)...)); + + pthread_attr_t attr_storage, *attr = &attr_storage; + pthread_attr_init(attr); + pthread_attr_setstacksize(attr, THREAD_STACK_SIZE); + + auto start_routine = [](void* ptr) -> void* { + auto f = reinterpret_cast*>(ptr); + // Call the function + (*f)(); + delete f; + return nullptr; + }; + + pthread_create(&thread, attr, start_routine, func); + } + + void join() { pthread_join(thread, nullptr); } + }; +} + +#else + +namespace tts_server_threading { + using native_thread = thread; +} + +#endif From bbe276e507bc0882d7b1c674f9325045c6d7b7a6 Mon Sep 17 00:00:00 2001 From: Matthew Willett-Jeffries Date: Wed, 4 Jun 2025 16:07:36 -0400 Subject: [PATCH 2/4] fixed issue on linux --- examples/server/tts_server_threading_osx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/server/tts_server_threading_osx.h b/examples/server/tts_server_threading_osx.h index 2bd9952..dc0d084 100644 --- a/examples/server/tts_server_threading_osx.h +++ b/examples/server/tts_server_threading_osx.h @@ -48,7 +48,7 @@ namespace tts_server_threading { #else namespace tts_server_threading { - using native_thread = thread; + using native_thread = std::thread; } #endif From 56b2a2c19dc02b5e1803906d1771ce5fa5cd6111 Mon Sep 17 00:00:00 2001 From: Matthew Willett-Jeffries Date: Wed, 4 Jun 2025 16:21:22 -0400 Subject: [PATCH 3/4] Update examples/server/tts_server_threading_osx.h --- examples/server/tts_server_threading_osx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/server/tts_server_threading_osx.h b/examples/server/tts_server_threading_osx.h index dc0d084..4537328 100644 --- a/examples/server/tts_server_threading_osx.h +++ b/examples/server/tts_server_threading_osx.h @@ -1,6 +1,6 @@ #pragma once -// On OSX threads other than the main thread are created with a reduced stack size of 512KB by default, this is too low +// OSX threads other than the main thread are created with a reduced stack size of 512KB by default, this is too low // for large GGML graphs in which graph nodes are traversed recursively. To address this we instead use pthreads so that stack // size can be increased in order to be in parity with linux. From ed393b836ebb9ff58793ce6bd4a58155f5c75177 Mon Sep 17 00:00:00 2001 From: Matthew Willett-Jeffries Date: Wed, 4 Jun 2025 16:22:02 -0400 Subject: [PATCH 4/4] Update examples/server/tts_server_threading_osx.h --- examples/server/tts_server_threading_osx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/server/tts_server_threading_osx.h b/examples/server/tts_server_threading_osx.h index 4537328..22c2061 100644 --- a/examples/server/tts_server_threading_osx.h +++ b/examples/server/tts_server_threading_osx.h @@ -2,7 +2,7 @@ // OSX threads other than the main thread are created with a reduced stack size of 512KB by default, this is too low // for large GGML graphs in which graph nodes are traversed recursively. To address this we instead use pthreads so that stack -// size can be increased in order to be in parity with linux. +// size can be increased in parity with linux. #include