From 780bfe8b4ac218142f78dc96b6daa3ca811e54a4 Mon Sep 17 00:00:00 2001
From: Matthew Willett-Jeffries <mwillettjeffries2@gmail.com>
Date: Wed, 4 Jun 2025 15:51:32 -0400
Subject: [PATCH 1/4] fix for stack overflow on osx

---
 examples/server/CMakeLists.txt             |  1 +
 examples/server/server.cpp                 |  5 +-
 examples/server/tts_server_threading_osx.h | 54 ++++++++++++++++++++++
 3 files changed, 58 insertions(+), 2 deletions(-)
 create mode 100644 examples/server/tts_server_threading_osx.h
diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt
index 139d9a2..e9c60f0 100644
--- a/examples/server/CMakeLists.txt
+++ b/examples/server/CMakeLists.txt
@@ -12,6 +12,7 @@ set(TARGET_SRCS
     server.cpp
     httplib.h
     json.hpp
+    tts_server_threading_osx.h
 )
 set(PUBLIC_ASSETS
     index.html
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 97854f6..92aae79 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -29,6 +29,7 @@
 #include "audio_file.h"
 #include "args.h"
 #include "common.h"
+#include "tts_server_threading_osx.h"
 
 #include "index.html.hpp"
 
@@ -232,7 +233,7 @@ struct worker {
     std::unordered_map<std::string, struct tts_runner *> runners;
     std::string text_encoder_path;
     std::atomic<bool> running = true;
-    std::thread * thread = nullptr;
+    tts_server_threading::native_thread * thread = nullptr;
 
     int task_timeout;
 
@@ -813,7 +814,7 @@ int main(int argc, const char ** argv) {
             init_worker(&model_map, *args.get_int_param("--n-threads"), !args.get_bool_param("--use-metal"), default_generation_config, w);
         } else {
             worker * w = new worker(tqueue, rmap, args.get_string_param("--text-encoder-path"), *args.get_int_param("--timeout"));
-            w->thread = new std::thread(init_worker, &model_map, *args.get_int_param("--n-threads"), !args.get_bool_param("--use-metal"), default_generation_config, w);
+            w->thread = new tts_server_threading::native_thread(init_worker, &model_map, *args.get_int_param("--n-threads"), !args.get_bool_param("--use-metal"), default_generation_config, w);
             pool->push_back(w);
         }
     }
diff --git a/examples/server/tts_server_threading_osx.h b/examples/server/tts_server_threading_osx.h
new file mode 100644
index 0000000..2bd9952
--- /dev/null
+++ b/examples/server/tts_server_threading_osx.h
@@ -0,0 +1,54 @@
+#pragma once
+
+// On OSX threads other than the main thread are created with a reduced stack size of 512KB by default, this is too low 
+// for large GGML graphs in which graph nodes are traversed recursively. To address this we instead use pthreads so that stack
+// size can be increased in order to be in parity with linux.
+
+#include <thread>
+
+#if defined(__APPLE__)
+
+#include <pthread.h>
+#include <functional>
+
+using namespace std;
+
+namespace tts_server_threading {
+	// The implementation calls pthread_create() with the stack size parameter equal to the Linux 8MB default, on platforms that support it.
+	class native_thread {
+	    pthread_t thread;
+	    static constexpr size_t THREAD_STACK_SIZE = 8 * 1024 * 1024;
+	public:
+		native_thread() = default;
+		native_thread(const native_thread&) = delete;
+	    template<class Function, class... Args>
+	    explicit native_thread(Function&& fun, Args&&... args) {
+	        auto func = new function<void()>(
+	          std::bind(std::forward<Function>(fun), std::forward<Args>(args)...));
+
+	        pthread_attr_t attr_storage, *attr = &attr_storage;
+	        pthread_attr_init(attr);
+	        pthread_attr_setstacksize(attr, THREAD_STACK_SIZE);
+
+	        auto start_routine = [](void* ptr) -> void* {
+	            auto f = reinterpret_cast<function<void()>*>(ptr);
+	            // Call the function
+	            (*f)();
+	            delete f;
+	            return nullptr;
+	        };
+
+	        pthread_create(&thread, attr, start_routine, func);
+	    }
+
+	    void join() { pthread_join(thread, nullptr); }
+	};
+}
+
+#else
+
+namespace tts_server_threading {
+	using native_thread = thread;
+}
+
+#endif

From bbe276e507bc0882d7b1c674f9325045c6d7b7a6 Mon Sep 17 00:00:00 2001
From: Matthew Willett-Jeffries <mwillettjeffries2@gmail.com>
Date: Wed, 4 Jun 2025 16:07:36 -0400
Subject: [PATCH 2/4] fixed issue on linux

---
 examples/server/tts_server_threading_osx.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/server/tts_server_threading_osx.h b/examples/server/tts_server_threading_osx.h
index 2bd9952..dc0d084 100644
--- a/examples/server/tts_server_threading_osx.h
+++ b/examples/server/tts_server_threading_osx.h
@@ -48,7 +48,7 @@ namespace tts_server_threading {
 #else
 
 namespace tts_server_threading {
-	using native_thread = thread;
+	using native_thread = std::thread;
 }
 
 #endif

From 56b2a2c19dc02b5e1803906d1771ce5fa5cd6111 Mon Sep 17 00:00:00 2001
From: Matthew Willett-Jeffries <mwillettjeffries2@gmail.com>
Date: Wed, 4 Jun 2025 16:21:22 -0400
Subject: [PATCH 3/4] Update examples/server/tts_server_threading_osx.h

---
 examples/server/tts_server_threading_osx.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/server/tts_server_threading_osx.h b/examples/server/tts_server_threading_osx.h
index dc0d084..4537328 100644
--- a/examples/server/tts_server_threading_osx.h
+++ b/examples/server/tts_server_threading_osx.h
@@ -1,6 +1,6 @@
 #pragma once
 
-// On OSX threads other than the main thread are created with a reduced stack size of 512KB by default, this is too low 
+// OSX threads other than the main thread are created with a reduced stack size of 512KB by default, this is too low 
 // for large GGML graphs in which graph nodes are traversed recursively. To address this we instead use pthreads so that stack
 // size can be increased in order to be in parity with linux.
 

From ed393b836ebb9ff58793ce6bd4a58155f5c75177 Mon Sep 17 00:00:00 2001
From: Matthew Willett-Jeffries <mwillettjeffries2@gmail.com>
Date: Wed, 4 Jun 2025 16:22:02 -0400
Subject: [PATCH 4/4] Update examples/server/tts_server_threading_osx.h

---
 examples/server/tts_server_threading_osx.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/server/tts_server_threading_osx.h b/examples/server/tts_server_threading_osx.h
index 4537328..22c2061 100644
--- a/examples/server/tts_server_threading_osx.h
+++ b/examples/server/tts_server_threading_osx.h
@@ -2,7 +2,7 @@
 
 // OSX threads other than the main thread are created with a reduced stack size of 512KB by default, this is too low 
 // for large GGML graphs in which graph nodes are traversed recursively. To address this we instead use pthreads so that stack
-// size can be increased in order to be in parity with linux.
+// size can be increased in parity with linux.
 
 #include <thread>