mmwillet · mmwillet · Jun 5, 2025 · Jun 4, 2025 · Jun 4, 2025 · Jun 4, 2025
diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt
@@ -12,6 +12,7 @@ set(TARGET_SRCS
     server.cpp
     httplib.h
     json.hpp
+    tts_server_threading_osx.h
 )
 set(PUBLIC_ASSETS
     index.html

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -29,6 +29,7 @@
 #include "audio_file.h"
 #include "args.h"
 #include "common.h"
+#include "tts_server_threading_osx.h"
 
 #include "index.html.hpp"
 
@@ -232,7 +233,7 @@ struct worker {
     std::unordered_map<std::string, struct tts_runner *> runners;
     std::string text_encoder_path;
     std::atomic<bool> running = true;
-    std::thread * thread = nullptr;
+    tts_server_threading::native_thread * thread = nullptr;
 
     int task_timeout;
 
@@ -813,7 +814,7 @@ int main(int argc, const char ** argv) {
             init_worker(&model_map, *args.get_int_param("--n-threads"), !args.get_bool_param("--use-metal"), default_generation_config, w);
         } else {
             worker * w = new worker(tqueue, rmap, args.get_string_param("--text-encoder-path"), *args.get_int_param("--timeout"));
-            w->thread = new std::thread(init_worker, &model_map, *args.get_int_param("--n-threads"), !args.get_bool_param("--use-metal"), default_generation_config, w);
+            w->thread = new tts_server_threading::native_thread(init_worker, &model_map, *args.get_int_param("--n-threads"), !args.get_bool_param("--use-metal"), default_generation_config, w);
             pool->push_back(w);
         }
     }

diff --git a/examples/server/tts_server_threading_osx.h b/examples/server/tts_server_threading_osx.h
@@ -0,0 +1,54 @@
+#pragma once
+
+// OSX threads other than the main thread are created with a reduced stack size of 512KB by default, this is too low 
+// for large GGML graphs in which graph nodes are traversed recursively. To address this we instead use pthreads so that stack
+// size can be increased in parity with linux.
+
+#include <thread>
+
+#if defined(__APPLE__)
+
+#include <pthread.h>
+#include <functional>
+
+using namespace std;
+
+namespace tts_server_threading {
+	// The implementation calls pthread_create() with the stack size parameter equal to the Linux 8MB default, on platforms that support it.
+	class native_thread {
+	    pthread_t thread;
+	    static constexpr size_t THREAD_STACK_SIZE = 8 * 1024 * 1024;
+	public:
+		native_thread() = default;
+		native_thread(const native_thread&) = delete;
+	    template<class Function, class... Args>
+	    explicit native_thread(Function&& fun, Args&&... args) {
+	        auto func = new function<void()>(
+	          std::bind(std::forward<Function>(fun), std::forward<Args>(args)...));
+
+	        pthread_attr_t attr_storage, *attr = &attr_storage;
+	        pthread_attr_init(attr);
+	        pthread_attr_setstacksize(attr, THREAD_STACK_SIZE);
+
+	        auto start_routine = [](void* ptr) -> void* {
+	            auto f = reinterpret_cast<function<void()>*>(ptr);
+	            // Call the function
+	            (*f)();
+	            delete f;
+	            return nullptr;
+	        };
+
+	        pthread_create(&thread, attr, start_routine, func);
+	    }
+
+	    void join() { pthread_join(thread, nullptr); }
+	};
+}
+
+#else
+
+namespace tts_server_threading {
+	using native_thread = std::thread;
+}
+
+#endif