diff --git a/API-README.md b/API-README.md
new file mode 100644
index 0000000..c841961
--- /dev/null
+++ b/API-README.md
@@ -0,0 +1,36 @@
+# WotR-API-TextToSpeechMod
+By [lvaskys](https://github.com/lvaskys)
+
+This README is for the API implementation that adds the ability to use a backend API for TTS instead of the Windows TTS engine. Currently, [Auralis](https://github.com/astramind-ai/Auralis) (based on xttsv2) and [Kokoro-FastAPI](https://github.com/remsky/Kokoro-FastAPI) are supported. I believe an NVIDIA gpu is required for both, but see their documentation for more information.
+
+## How to use
+
+This mod's main features are configured in a `settings.json` file that lives in the base mod folder. Comments are included to help guide your configuration. Of note is `speech_impl` which defines the implementation of the speech service to use, either `AuralisSpeech` or `KokoroSpeech` (this new implementation), or `WindowsSpeech` or `AppleSpeech` for the original implementation.
+
+The API service must be up and running for the mod to work. See the documentation for the API service you are using for more information on how to set it up. I used WSL to run Auralis, although it may work in native Windows now as well, I'm not sure. For Kokoro, I used the [docker-run](https://github.com/remsky/Kokoro-FastAPI?tab=readme-ov-file#get-started) instructions. I think docker on Windows may require WSL for proper sharing of gpu to the container, so you may need to install it either way.
+
+Make sure the endpoint matches. If you are keeping the `settings.json` file as is, then for Auralis:
+```
+auralis.openai --host 127.0.0.1 --port 8000 --model AstraMindAI/xttsv2 --gpt_model AstraMindAI/xtts2-gpt --max_concurrency 4 --vllm_logging_level warn
+```
+or Kokoro:
+```
+docker run --gpus all -p 8000:8880 ghcr.io/remsky/kokoro-fastapi-gpu:v0.2.2
+```
+
+**Note: in order to use Auralis, you must provide a wav file for server to use for one-shot voice cloning.** Currently, this is set up to live in your base game directory, not your mod directory. Although perhaps that can be fixed in the future. An example file you can use is [female_01.wav](samples/female_01.wav).
+
+## Other new features
+This supports cancelling playback with the controller cancel/B/Circle button. Specifically, it will cancel the current sentence or two sentence chunk being played and continue with the next sentence. This allows for a kind of "fast-forward" type effect if you don't feel like listening to the entire dialogue, but still want to hear later portions. Like, for example, if your reading outpaces the speaker.
+
+## Multilingual Support
+This should theoretically be multi-lingual, but is untested. Both XTTS and Kokoro support multiple languages.
+
+## Linux/WINE Support
+I believe some people have wanted a version of this mod for Linux. I have not tested this on Linux/WINE, but Auralis or Kokoro should theoretically work.
+
+## Limitations/Broken Features
+These new features only support one speaker at the moment. Both voiced and narrator content will be spoken with the chosen voice. I may fully implement male/female/narrator as it was in the original mod, or maybe even characters-specific voices. But as it stands for now, this is a good initial release and works fine for my own needs. However, Windows/Apple implementation should work as intended.
+
+## Motivation and thoughts
+Windows natural TTS voices are pretty good, but lack proper cadence and emotion. Also, I did not want to pay for API, but rather have it run locally. XTTS is excellent in that regard, and seems to pick up on cues without even feeding it any additional information. The sound quality is poorer, however, and it is a good bit slower, but still responsive enough for my needs. Kororo is another TTS I heard about, and decided to add it as an option as well for another alternative. It's super fast, many times more than realtime, and the quality is excellent. The cadence and emotion aren't super, though, and seem rather similar to Windows natural voices.
\ No newline at end of file
diff --git a/SpeechMod/Configuration/Settings/JsonSettingsSerializer.cs b/SpeechMod/Configuration/Settings/JsonSettingsSerializer.cs
new file mode 100644
index 0000000..080f38a
--- /dev/null
+++ b/SpeechMod/Configuration/Settings/JsonSettingsSerializer.cs
@@ -0,0 +1,82 @@
+using System;
+using System.IO;
+using System.Text;
+using Newtonsoft.Json;
+using Newtonsoft.Json.Linq;
+using Newtonsoft.Json.Serialization;
+
+namespace SpeechMod.Configuration.Settings
+{
+    /// <summary>
+    /// Handles serialization and deserialization of JsonSettings to and from JSON files.
+    /// </summary>
+    public static class JsonSettingsSerializer
+    {
+        private static readonly JsonSerializerSettings SerializerSettings = new JsonSerializerSettings
+        {
+            Formatting = Formatting.Indented,
+            ContractResolver = new CamelCasePropertyNamesContractResolver(),
+            NullValueHandling = NullValueHandling.Include,
+        };
+
+        
+        /// <summary>
+        /// Saves JsonSettings object to the specified JSON file.
+        /// Isn't used right now, but might be in the future
+        /// </summary>
+        /// <param name="settings">The settings object to serialize</param>
+        /// <param name="filePath">Path to save the JSON file</param>
+        /// <returns>True if successful, false otherwise</returns>
+        public static bool SaveSettings(JsonSettings settings, string filePath)
+        {
+            if (!File.Exists(filePath)) {
+                return false;
+            }
+            
+            try
+            {
+                string json = JsonConvert.SerializeObject(settings, SerializerSettings);
+                File.WriteAllText(filePath, json, Encoding.UTF8);
+                Main.Logger?.Log($"Settings saved to {filePath}");
+                return true;
+            }
+            catch (Exception ex)
+            {
+                Main.Logger?.Error($"Failed to save settings: {ex.Message}");
+                return false;
+            }
+        }
+
+        /// <summary>
+        /// Loads JsonSettings from the specified JSON file
+        /// </summary>
+        /// <param name="filePath">Path to the JSON file (optional, uses default if not specified)</param>
+        /// <returns>Loaded JsonSettings object or default if file doesn't exist or is invalid</returns>
+        public static JsonSettings LoadSettings(string filePath = null)
+        {
+            Main.Logger?.Log("Loading JSON settings...");
+
+            try
+            {
+                if (!File.Exists(filePath))
+                {
+                    Main.Logger?.Log($"Settings file not found at {filePath}, creating with defaults");
+                    JsonSettings defaultSettings = new JsonSettings();
+                    //SaveSettings(defaultSettings, filePath);
+                    return defaultSettings;
+                }
+
+                string json = File.ReadAllText(filePath, Encoding.UTF8);
+                JsonSettings settings = JsonConvert.DeserializeObject<JsonSettings>(json, SerializerSettings);
+                Main.Logger?.Log($"Settings loaded from {filePath}");
+                return settings;
+            }
+            catch (Exception ex)
+            {
+                Main.Logger?.Error($"Failed to load settings: {ex.Message}");
+                return new JsonSettings(); // Return default settings on error
+            }
+        }
+
+    }
+}
diff --git a/SpeechMod/JsonSettings.cs b/SpeechMod/JsonSettings.cs
new file mode 100644
index 0000000..06841a2
--- /dev/null
+++ b/SpeechMod/JsonSettings.cs
@@ -0,0 +1,57 @@
+namespace SpeechMod;
+
+// These default settings will normally be overwritten upon deserialization from settings.json
+public class JsonSettings
+{
+    // AuralisSpeech, KokoroSpeech, AppleSpeech, WindowsSpeech
+    public string speech_impl = "AuralisSpeech";
+
+    public string endpoint = "http://127.0.0.1:8000/v1/audio/speech";
+
+    // possibly create setting for audio file download location
+    //public string audio_file_download_location = Path.GetTempPath();
+
+    // Auralis-specific settings
+    public AuralisJsonSettings auralis_settings = new AuralisJsonSettings();
+
+    // Kokoro-specific settings
+    public KokoroJsonSettings kokoro_settings = new KokoroJsonSettings();
+
+}
+
+public class AuralisJsonSettings
+{
+    public string path_to_voice_one_shot = "female_01.wav";
+    public string response_format = "wav";
+    public float speed = 1.0f;
+    public string model = "xttsv2";
+    public bool enhance_speech = true;
+    public bool sound_norm_refs = false;
+    public int max_ref_length = 60;
+    public int gpt_cond_len = 30;
+    public int gpt_cond_chunk_len = 4;
+    public float temperature = 0.75f;
+    public float top_p = 0.85f;
+    public int top_k = 50;
+    public float repetition_penalty = 5.0f;
+    public float length_penalty = 1.0f;
+    public bool do_sample = true;
+    public string language = "auto";
+    
+}
+
+public class KokoroJsonSettings
+{
+    public string voice = "af_heart";
+    public string model = "kokoro";
+    public float speed = 1.0f;
+    public string lang_code = "a";
+    public string response_format = "wav";
+
+    // Normalization options
+    public bool normalize = true;
+    public bool unit_normalization = false;
+    public bool url_normalization = true;
+    public bool email_normalization = true;
+    public bool optional_pluralization_normalization = true;
+}
diff --git a/SpeechMod/Main.cs b/SpeechMod/Main.cs
index e5cdbfa..d2d3aaf 100644
--- a/SpeechMod/Main.cs
+++ b/SpeechMod/Main.cs
@@ -1,10 +1,13 @@
 ﻿using HarmonyLib;
+using Rewired;
 using SpeechMod.Configuration;
+using SpeechMod.Configuration.Settings;
 using SpeechMod.Keybinds;
 using SpeechMod.Unity;
 using SpeechMod.Voice;
 using System;
 using System.Collections.Generic;
+using System.IO;
 using System.Linq;
 using System.Reflection;
 using TMPro;
@@ -20,6 +23,7 @@ public static class Main
 {
     public static UnityModManager.ModEntry.ModLogger Logger;
     public static Settings Settings;
+    public static JsonSettings JsonSettings;
     public static bool Enabled;
 
     public static string[] FontStyleNames = Enum.GetNames(typeof(FontStyles));
@@ -45,6 +49,8 @@ private static bool Load(UnityModManager.ModEntry modEntry)
 
         Logger = modEntry.Logger;
 
+        JsonSettings = JsonSettingsSerializer.LoadSettings(Path.Combine(modEntry.Path, "settings.json"));
+
         if (!SetSpeech())
             return false;
 
@@ -69,11 +75,26 @@ private static bool Load(UnityModManager.ModEntry modEntry)
 
         PhoneticDictionary.LoadDictionary();
 
+        // For ReInput.players.AllPlayers : 
+        // 0 System, 1 MainPlayer
+        if (ReInput.players.allPlayerCount >= 1)
+        {
+            Rewired.Player p = ReInput.players.AllPlayers[1];
+
+            p.AddInputEventDelegate(doButtonWork, UpdateLoopType.Update, InputActionEventType.ButtonJustPressed, "Decline");
+        }
+
         Debug.Log("Pathfinder: Wrath of the Righteous Speech Mod Initialized!");
         m_Loaded = true;
         return true;
     }
 
+    public static void doButtonWork(InputActionEventData data)
+    {
+        // Interrupts current speech and plays the next phrase (if any)
+        Speech.NextPhrase();
+    }
+
     private static void SetUpSettings()
     {
         if (ModConfigurationManager.Instance.GroupedSettings.TryGetValue("main", out _))
@@ -132,19 +153,57 @@ private static bool SetAvailableVoices()
         return true;
     }
 
+    // TODO clean up UMM configuration to better show what speech implementation is being used
+    // and what can be changed in-game. I prefer the json way, anyway, so I'm not sure how much
+    // I will actually change this
     private static bool SetSpeech()
     {
+        // Dispose of existing speech instance if it exists
+        if (Speech is IDisposable disposableSpeech)
+        {
+            disposableSpeech.Dispose();
+        }
+
+        // keep the setting of uielements/config section the same for now (until maybe I change it)
+        // but use the json config for the speech implementation instantiation
+        try {
+            var className = JsonSettings.speech_impl;
+
+            Logger.Log("Setting speech impl...." + className);
+
+            Assembly assembly = Assembly.GetExecutingAssembly();
+            Type type = assembly.GetTypes()
+                .FirstOrDefault(t => t.Name.Equals(className, StringComparison.Ordinal));
+            
+            if (type == null)
+            {
+                throw new ArgumentException($"Class '{className}' not found in the current assembly.");
+            }
+
+            Speech = (ISpeech) Activator.CreateInstance(type);
+        }
+        catch (Exception e)
+        {
+            Logger.Critical($"Failed to instantiate speech implementation: {JsonSettings.speech_impl}");
+            Logger.Critical(e.ToString());
+            return false;
+        }
+        
         switch (Application.platform)
         {
             case RuntimePlatform.OSXPlayer:
-                Speech = new AppleSpeech();
+                //Speech = new AppleSpeech();
                 SpeechExtensions.AddUiElements<AppleVoiceUnity>(Constants.APPLE_VOICE_NAME);
                 break;
             case RuntimePlatform.WindowsPlayer:
-                Speech = new WindowsSpeech();
+                //Speech = new WindowsSpeech();
+                //Speech = new AuralisSpeech();
+                //Speech = new KokoroSpeech();
                 SpeechExtensions.AddUiElements<WindowsVoiceUnity>(Constants.WINDOWS_VOICE_NAME);
                 break;
             default:
+                // I'm not sure if this will ever run, as the Linux version does not exist.
+                // Those running Linux use wine of some sort, which I believe would still show as Windows
                 Logger.Critical($"SpeechMod is not supported on {Application.platform}!");
                 return false;
         }
@@ -154,6 +213,11 @@ private static bool SetSpeech()
 
     private static bool OnToggle(UnityModManager.ModEntry modEntry, bool value)
     {
+        if (!value && Speech is IDisposable disposableSpeech)
+        {
+            disposableSpeech.Dispose();
+            Speech = null;
+        }
         Enabled = value;
         return true;
     }
diff --git a/SpeechMod/SpeechMod.csproj b/SpeechMod/SpeechMod.csproj
index 44be776..1337b4b 100644
--- a/SpeechMod/SpeechMod.csproj
+++ b/SpeechMod/SpeechMod.csproj
@@ -25,10 +25,14 @@
         <Reference Include="$(PathfinderWOTRInstallDir)\Wrath_Data\Managed\UnityModManager\UnityModManager.dll*" Publicize="true" Private="false" />
         <Reference Include="$(PathfinderWOTRInstallDir)\Wrath_Data\Managed\Owlcat.Runtime.UI.dll*" Publicize="true" Private="false" />
         <Reference Include="$(PathfinderWOTRInstallDir)\Wrath_Data\Managed\Assembly-CSharp.dll*" Publicize="true" Private="false" />
+
+        <Reference Include="$(PathfinderWOTRInstallDir)\Wrath_Data\Managed\Rewired_Core.dll*" Publicize="true" Private="false" />
+        <Reference Include="$(PathfinderWOTRInstallDir)\Wrath_Data\Managed\Rewired_Windows.dll*" Publicize="true" Private="false" />
     </ItemGroup>
     <ItemGroup>
         <None Include="Info.json" CopyToOutputDirectory="PreserveNewest" Link="%(Filename)%(Extension)" />
         <None Include="PhoneticDictionary.json" CopyToOutputDirectory="PreserveNewest" Link="%(Filename)%(Extension)" />
+		<None Include="settings.json" CopyToOutputDirectory="PreserveNewest" Link="%(Filename)%(Extension)" />
     </ItemGroup>
     <!--<ItemGroup Condition="'$(TargetFramework.TrimEnd(`0123456789`))' == 'net'">
         <PackageReference Include="Microsoft.NETFramework.ReferenceAssemblies" Version="1.0.3" PrivateAssets="all" />
@@ -37,6 +41,11 @@
         <PackageReference Include="Microsoft.NETFramework.ReferenceAssemblies" Version="1.0.2" PrivateAssets="all" />
         <PackageReference Include="BepInEx.AssemblyPublicizer.MSBuild" IncludeAssets="build; contentfiles" Version="0.4.2" PrivateAssets="all" />
         <PackageReference Include="MicroUtils.HarmonyAnalyzers" IncludeAssets="runtime; build; native; contentfiles; analyzers" Version="*-*" PrivateAssets="all" />
+        <PackageReference Include="NAudio" Version="2.2.1" />
+        <PackageReference Include="System.Net.Http.Json" Version="9.0.1" />
+    </ItemGroup>
+    <ItemGroup>
+      <Reference Include="System.Net.Http" />
     </ItemGroup>
     <ItemGroup>
       <None Update="Localization\enGB.json">
diff --git a/SpeechMod/Voice/APISpeech.cs b/SpeechMod/Voice/APISpeech.cs
new file mode 100644
index 0000000..2982680
--- /dev/null
+++ b/SpeechMod/Voice/APISpeech.cs
@@ -0,0 +1,434 @@
+using Newtonsoft.Json;
+using SpeechMod.Unity;
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Net.Http;
+using System.Net.Http.Json;
+using System.Text;
+using System.Text.RegularExpressions;
+using System.Threading.Tasks;
+using NAudio.Wave;
+using System.Threading;
+using SpeechMod.Voice.Models;
+
+namespace SpeechMod.Voice
+{
+    public abstract class APISpeech : ISpeech, System.IDisposable
+    {
+        public static HttpClient sharedHttpClient = new();
+        protected ConcurrentQueue<string> filesToPlay = new();
+        private readonly object _syncLock = new object();
+        private bool _isPlaying = false;
+        private IWavePlayer wavePlayer;
+        private WaveStream waveStream;
+        private EventHandler<StoppedEventArgs> playbackStoppedHandler;
+        private CancellationTokenSource _playbackCts;
+
+        public APISpeech()
+        {
+            _playbackCts = new CancellationTokenSource();
+            Task.Run(() => doPlayback(_playbackCts.Token), _playbackCts.Token);
+        }
+
+        // an infinite loop that will keep checking the concurrentqueue
+        // if it's not playing, and there is stuff in the queue, then flag _isPlaying, pop, 
+        // wait until done, then continue...
+        private void doPlayback(CancellationToken token)
+        {
+            while (!token.IsCancellationRequested)
+            {
+                bool shouldPlay = false;
+                string fileToPlay = null;
+
+                lock (_syncLock)
+                {
+                    if (!_isPlaying && filesToPlay.TryDequeue(out fileToPlay))
+                    {
+                        _isPlaying = true;
+                        shouldPlay = true;
+                    }
+                }
+
+                if (shouldPlay && fileToPlay != null)
+                {
+                    try
+                    {
+                        PlayFile(fileToPlay);
+                    }
+                    finally
+                    {
+                        lock (_syncLock)
+                        {
+                            _isPlaying = false;
+                        }
+                    }
+                }
+                else
+                {
+                    // Only sleep if we have nothing to do
+                    Thread.Sleep(100);
+                }
+            }
+        }
+
+        private void PlayFile(string filePath)
+        {
+            try
+            {
+                string path = Path.GetFullPath(filePath);
+                Main.Logger?.Log("Playing audio file using NAudio");
+                Main.Logger?.Log(path);
+                
+                // Dispose previous player and stream if they exist
+                DisposeWaveObjects();
+                
+                // Create a new WaveOut device
+                wavePlayer = new WaveOutEvent();
+                
+                // Create a new AudioFileReader for the wave file
+                waveStream = new AudioFileReader(path);
+                
+                // Set up event handling for when playback is finished
+                var localFilePath = filePath; // Capture for the lambda
+                playbackStoppedHandler = (sender, args) =>
+                {
+                    Main.Logger?.Log("Playback finished");
+                    DisposeWaveObjects();
+                    DeleteFile(localFilePath);
+                };
+
+                wavePlayer.PlaybackStopped += playbackStoppedHandler;
+                
+                // Connect the reader to the player and start playback
+                wavePlayer.Init(waveStream);
+                wavePlayer.Play();
+                
+                // Wait until playback is complete using a ManualResetEvent instead of busy waiting
+                using (var playbackWaitEvent = new ManualResetEventSlim(false))
+                {
+                    EventHandler<StoppedEventArgs> tempHandler = null;
+                    tempHandler = (s, e) => playbackWaitEvent.Set();
+                    
+                    try
+                    {
+                        wavePlayer.PlaybackStopped += tempHandler;
+                        playbackWaitEvent.Wait();
+                    }
+                    finally
+                    {
+                        if (wavePlayer != null)
+                        {
+                            wavePlayer.PlaybackStopped -= tempHandler;
+                        }
+                    }
+                }
+                
+                Main.Logger?.Log("Done playing");
+            }
+            catch (Exception ex)
+            {
+                Main.Logger?.Log($"Error playing audio: {ex.Message}");
+                DisposeWaveObjects();
+                DeleteFile(filePath);
+            }
+        }
+        
+        private void DisposeWaveObjects()
+        {
+            lock (_syncLock)
+            {
+                if (wavePlayer != null)
+                {
+                    if (playbackStoppedHandler != null)
+                    {
+                        wavePlayer.PlaybackStopped -= playbackStoppedHandler;
+                    }
+
+                    wavePlayer.Stop();
+                    wavePlayer.Dispose();
+                    wavePlayer = null;
+                }
+                
+                if (waveStream != null)
+                {
+                    waveStream.Dispose();
+                    waveStream = null;
+                }
+            }
+        }
+
+        private void DeleteFile(string filePath)
+        {
+            if (File.Exists(filePath))
+            {
+                File.Delete(filePath);
+            }
+        }
+
+
+        // TODO maybe use UMM config for this, but for now is using settings.json
+        public string[] GetAvailableVoices()
+        {
+            return new string[] { "APIVoice" };
+        }
+
+        // Updated to use thread-safe property access
+        public string GetStatusMessage()
+        {
+            lock (_syncLock)
+            {
+                if (_isPlaying)
+                {
+                    return "Speaking";
+                }
+                else
+                {
+                    return "Ready";
+                }
+            }
+        }
+
+        public bool IsSpeaking()
+        {
+            lock (_syncLock)
+            {
+                return _isPlaying || !filesToPlay.IsEmpty;
+            }
+        }
+
+        // TODO actually implement delay? what is it used for?
+        public void Speak(string text, float delay = 0)
+        {
+            if (string.IsNullOrEmpty(text))
+            {
+                Main.Logger?.Warning("No text to speak!");
+                return;
+            }
+
+            PrepareSpeechText(text);
+        }
+
+        public void SpeakAs(string text, VoiceType voiceType, float delay = 0)
+        {
+            if (string.IsNullOrEmpty(text))
+            {
+                Main.Logger?.Warning("No text to speak!");
+                return;
+            }
+
+            // TODO maybe implement gender specific/nararrator voices later
+            //if (!Main.Settings.UseGenderSpecificVoices)
+            //{
+
+            //}
+
+            Speak(text, delay);
+            return;
+        }
+
+        public void SpeakDialog(string text, float delay = 0)
+        {
+            if (string.IsNullOrEmpty(text))
+            {
+                Main.Logger?.Warning("No text to speak!");
+                return;
+            }
+
+            // TODO maybe implement gender specific/nararrator voices later
+            //if (!Main.Settings.UseGenderSpecificVoices)
+            //{
+
+            //}
+
+            Speak(text, delay);
+            return;
+        }
+
+        public void SpeakPreview(string text, VoiceType voiceType)
+        {
+            if (string.IsNullOrEmpty(text))
+            {
+                Main.Logger?.Warning("No text to speak!");
+                return;
+            }
+
+            // TODO maybe implement gender specific/nararrator voices later
+            //if (!Main.Settings.UseGenderSpecificVoices)
+            //{
+
+            //}
+
+            Speak(text, 0);
+            return;
+        }
+
+        public void Stop() 
+        {
+            lock (_syncLock)
+            {
+                _isPlaying = true; // Prevent new playback
+            }
+            
+            StopWavePlayer();
+
+            // Clear the queue
+            while (filesToPlay.TryDequeue(out var fileToRemove))
+            {
+                DeleteFile(fileToRemove);
+            }
+
+            lock (_syncLock)
+            {
+                _isPlaying = false;
+            }
+        }
+
+        private void StopWavePlayer()
+        {
+            lock (_syncLock)
+            {
+                if (wavePlayer != null && waveStream != null)
+                {
+                    Main.Logger?.Log("Stopping audio playback");
+
+                    // Trigger the same handler that would occur naturally when playback stops
+                    // This will ensure the file is deleted and resources are disposed
+                    if (playbackStoppedHandler != null)
+                    {
+                        playbackStoppedHandler(wavePlayer, new StoppedEventArgs());
+                    }
+                    else
+                    {
+                        // Fallback in case the handler is not set
+                        DisposeWaveObjects();
+                    }
+                }
+            }
+        }
+
+        public void NextPhrase()
+        {
+            lock (_syncLock)
+            {
+                // make sure no new playback starts
+                _isPlaying = true;
+            }
+            
+            StopWavePlayer();
+            
+            lock (_syncLock)
+            {
+                _isPlaying = false;
+            }
+        }
+
+        protected abstract Task ProcessAndQueueFile(string item, int count);
+       
+
+        // TODO add serction to remove nararator, or later to split it into a seperate voice
+        // TODO how do we know what's voiced yet?  (I think thatgets handled elsewhere...if we're here, it needs ai voice)
+        // TODO FIXME could use some cleanup
+        public string PrepareSpeechText(string text)
+        {
+#if DEBUG
+            Main.Logger?.Log("PrepareSpeechText: " + text);
+#endif
+            string[] textArr;
+            // getting rid of all tags?
+            // TODO might not need to anymore, I think they;re never added anymore
+            text = new Regex("<[^>]+>").Replace(text, "");
+            text = text.PrepareText();
+            // separate each new line into a separate string then add to an array of strings called textArray
+            // TODO this should never happen I think. FIXME later
+            textArr = text.Split(new string[] { "\r\n", "\n" }, StringSplitOptions.None);
+            //UnityEngine.Debug.Log("After adding stuff: " + text);
+            //UnityEngine.Debug.Log("Text length: " + textArr.Length);
+
+            string pattern = @"(?<=\/>|>)([^<]+)";
+            Regex regex = new Regex(pattern);
+
+            textArr = textArr.Select(item => Regex.Replace(item, @"<silence(?:\s+msec=""(\d+)"")?\/>", match => match.Groups[1].Success ? "..." : "")).ToArray();
+
+
+            // each line split up into more chunks, count up to 20 words, then round up to the 
+            // nearest sentence
+
+            // chunking like this makes more sense than trying to stream from the tts service
+            // because the quality of the output is much better when chunked in at least a few
+            // sentences rather than streaming. I think it might have to do with how much context
+            // the tts service has with a full sentence or two vs maybe only a few words at a time
+            // with streaming
+
+            List<string> phrases = new List<string>();
+            foreach (string line in textArr)
+            {
+                string phrase = "";
+                string[] splitBySpaces = line.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries);
+                int wordCount = 0;
+                for (int i = 0; i < splitBySpaces.Length; i++)
+                {
+                    wordCount++;
+                    phrase += " " + splitBySpaces[i];
+
+                    if (wordCount >= 20 && (
+                        splitBySpaces[i].EndsWith(".") ||
+                        splitBySpaces[i].EndsWith("?") ||
+                        splitBySpaces[i].EndsWith("!")
+                        ))
+                    {
+                        phrases.Add(phrase);
+                        wordCount = 0;
+                        phrase = "";
+                    }
+                }
+
+                // add the lst phrase if didn't end in period
+                // and check if not empty
+                if (!string.IsNullOrWhiteSpace(phrase))
+                {
+                    phrases.Add(phrase);
+                }
+            }
+
+            
+            Task.Run(async () =>
+            {
+                await ProcessText(phrases.ToArray());
+            });
+#if DEBUG
+            if (System.Reflection.Assembly.GetEntryAssembly() == null)
+                Main.Logger?.Warning("Invalid " + text);
+#endif
+            return text;
+        }
+
+        private async Task ProcessText(string[] text)
+        {
+            // this stop call may be redundant
+            Stop();
+
+            int count = 0;
+            foreach (var item in text)
+            {
+                await ProcessAndQueueFile(item, count++);
+            }
+        }
+
+        // Implement IDisposable to properly clean up resources
+        public virtual void Dispose()
+        {
+            // Cancel ongoing playback operations
+            _playbackCts?.Cancel();
+            
+            // Stop playback
+            Stop();
+            
+            // Dispose resources
+            _playbackCts?.Dispose();
+        }
+    }
+}
diff --git a/SpeechMod/Voice/AppleSpeech.cs b/SpeechMod/Voice/AppleSpeech.cs
index 042d40d..0267bc7 100644
--- a/SpeechMod/Voice/AppleSpeech.cs
+++ b/SpeechMod/Voice/AppleSpeech.cs
@@ -80,6 +80,11 @@ public void Stop()
         AppleVoiceUnity.Stop();
     }
 
+    public void NextPhrase() 
+    {
+        AppleVoiceUnity.Stop();
+    }
+
     public string[] GetAvailableVoices()
     {
         var arguments = "say -v '?' | awk '{\\$3=\\\"\\\"; printf \\\"%s;\\\", \\$1\\\"#\\\"\\$2}' | rev | cut -c 2- | rev";
diff --git a/SpeechMod/Voice/AuralisSpeech.cs b/SpeechMod/Voice/AuralisSpeech.cs
new file mode 100644
index 0000000..66cd7ab
--- /dev/null
+++ b/SpeechMod/Voice/AuralisSpeech.cs
@@ -0,0 +1,79 @@
+﻿using Newtonsoft.Json;
+using SpeechMod.Unity;
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Net.Http;
+using System.Net.Http.Json;
+using System.Text;
+using System.Text.RegularExpressions;
+using System.Threading.Tasks;
+using NAudio.Wave;
+using System.Threading;
+using SpeechMod.Voice.Models;
+
+namespace SpeechMod.Voice
+{
+    public class AuralisSpeech : APISpeech
+    {
+        public AuralisSpeech() : base(){}
+       
+        protected override async Task ProcessAndQueueFile(string item, int count)
+        {
+            var jsonSettings = Main.JsonSettings;
+
+            byte[] fileBytes = File.ReadAllBytes(jsonSettings.auralis_settings.path_to_voice_one_shot);
+            string base64String = Convert.ToBase64String(fileBytes);
+
+            var reqItem = new AuralisRequestItem();
+            reqItem.voice = new string[] { base64String };
+            reqItem.input = item;
+            
+            reqItem.response_format = jsonSettings.auralis_settings.response_format;
+            reqItem.speed = jsonSettings.auralis_settings.speed;
+            reqItem.model = jsonSettings.auralis_settings.model;
+
+            reqItem.enhance_speech = jsonSettings.auralis_settings.enhance_speech;
+            reqItem.sound_norm_refs = jsonSettings.auralis_settings.sound_norm_refs;
+            reqItem.max_ref_length = jsonSettings.auralis_settings.max_ref_length;
+            reqItem.gpt_cond_len = jsonSettings.auralis_settings.gpt_cond_len;
+            reqItem.gpt_cond_chunk_len = jsonSettings.auralis_settings.gpt_cond_chunk_len;
+            reqItem.temperature = jsonSettings.auralis_settings.temperature;
+            reqItem.top_p = jsonSettings.auralis_settings.top_p;
+            reqItem.top_k = jsonSettings.auralis_settings.top_k;
+            reqItem.repetition_penalty = jsonSettings.auralis_settings.repetition_penalty;
+            reqItem.length_penalty = jsonSettings.auralis_settings.length_penalty;
+            reqItem.do_sample = jsonSettings.auralis_settings.do_sample;
+            reqItem.language = jsonSettings.auralis_settings.language;
+
+            var content = JsonContent.Create(reqItem);
+
+            var response = await sharedHttpClient.PostAsync(jsonSettings.endpoint, content);
+            var contentStream = await response.Content.ReadAsStreamAsync();
+
+            string tempDir = Path.Combine(Path.GetTempPath(), "WotRSpeechMod");
+            
+            // Create the temp directory if it doesn't exist
+            if (!Directory.Exists(tempDir))
+            {
+                Directory.CreateDirectory(tempDir);
+            }
+            
+            string guid = Guid.NewGuid().ToString();
+            string fileName = $"audio_{guid}.wav";
+            string outputPath = Path.Combine(tempDir, fileName);
+
+            using FileStream stream = File.OpenWrite(outputPath);
+            contentStream.CopyTo(stream);
+
+            filesToPlay.Enqueue(outputPath);
+            //return outputPath;
+        }
+
+
+
+    }
+}
diff --git a/SpeechMod/Voice/ISpeech.cs b/SpeechMod/Voice/ISpeech.cs
index 41d8205..810de44 100644
--- a/SpeechMod/Voice/ISpeech.cs
+++ b/SpeechMod/Voice/ISpeech.cs
@@ -10,4 +10,5 @@ public interface ISpeech
     void SpeakAs(string text, VoiceType voiceType, float delay = 0f);
     void Speak(string text, float delay = 0f);
     void Stop();
+    void NextPhrase();
 }
\ No newline at end of file
diff --git a/SpeechMod/Voice/KokoroSpeech.cs b/SpeechMod/Voice/KokoroSpeech.cs
new file mode 100644
index 0000000..c2dc75b
--- /dev/null
+++ b/SpeechMod/Voice/KokoroSpeech.cs
@@ -0,0 +1,65 @@
+﻿using SpeechMod.Voice.Models;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Net.Http.Json;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace SpeechMod.Voice
+{
+    public class KokoroSpeech : APISpeech
+    {
+        public KokoroSpeech() : base() { }
+
+        protected override async Task ProcessAndQueueFile(string item, int count)
+        {
+            var jsonSettings = Main.JsonSettings;
+
+            var reqItem = new KokoroRequestItem();
+            reqItem.voice = jsonSettings.kokoro_settings.voice;
+            reqItem.model = jsonSettings.kokoro_settings.model;
+            reqItem.speed = jsonSettings.kokoro_settings.speed;
+            reqItem.lang_code = jsonSettings.kokoro_settings.lang_code;
+            reqItem.response_format = jsonSettings.kokoro_settings.response_format;
+
+            var normalizationOptions = new NormalizationOptions();
+            normalizationOptions.normalize = jsonSettings.kokoro_settings.normalize;
+            normalizationOptions.unit_normalization = jsonSettings.kokoro_settings.unit_normalization;
+            normalizationOptions.url_normalization = jsonSettings.kokoro_settings.url_normalization;
+            normalizationOptions.email_normalization = jsonSettings.kokoro_settings.email_normalization;
+            normalizationOptions.optional_pluralization_normalization = jsonSettings.kokoro_settings.optional_pluralization_normalization;
+
+            reqItem.normalization_options = normalizationOptions;
+            
+            // non-configurable fields
+            reqItem.input = item;
+            reqItem.stream = true;
+            reqItem.return_download_link = false;
+
+            var content = JsonContent.Create(reqItem);
+
+            var response = await sharedHttpClient.PostAsync(jsonSettings.endpoint, content);
+            var contentStream = await response.Content.ReadAsStreamAsync();
+
+            string tempDir = Path.Combine(Path.GetTempPath(), "WotRSpeechMod");
+
+            // Create the temp directory if it doesn't exist
+            if (!Directory.Exists(tempDir))
+            {
+                Directory.CreateDirectory(tempDir);
+            }
+
+            string guid = Guid.NewGuid().ToString();
+            string fileName = $"audio_{guid}.wav";
+            string outputPath = Path.Combine(tempDir, fileName);
+
+            using FileStream stream = File.OpenWrite(outputPath);
+            contentStream.CopyTo(stream);
+
+            filesToPlay.Enqueue(outputPath);
+            //return outputPath;
+        }
+    }
+}
diff --git a/SpeechMod/Voice/Models/AuralisRequestItem.cs b/SpeechMod/Voice/Models/AuralisRequestItem.cs
new file mode 100644
index 0000000..81d6e95
--- /dev/null
+++ b/SpeechMod/Voice/Models/AuralisRequestItem.cs
@@ -0,0 +1,36 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace SpeechMod.Voice.Models
+{
+    public class AuralisRequestItem
+    {
+        public string model { get; set; }
+        public string[] voice { get; set; }
+        public string input { get; set; }
+        public string response_format { get; set; }
+        public float speed { get; set; }
+
+        public bool enhance_speech { get; set; }
+
+        public bool sound_norm_refs { get; set; }
+
+        public int max_ref_length { get; set; }
+        public int gpt_cond_len { get; set; }
+        public int gpt_cond_chunk_len { get; set; }
+        public float temperature { get; set; }
+
+        public float top_p { get; set; }
+        public float top_k { get; set; }
+        public float repetition_penalty { get; set; }
+        public float length_penalty { get; set; }
+
+        public bool do_sample { get; set; }
+
+        public string language { get; set; }
+
+    }
+}
diff --git a/SpeechMod/Voice/Models/KokoroRequestItem.cs b/SpeechMod/Voice/Models/KokoroRequestItem.cs
new file mode 100644
index 0000000..a45353e
--- /dev/null
+++ b/SpeechMod/Voice/Models/KokoroRequestItem.cs
@@ -0,0 +1,22 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace SpeechMod.Voice.Models
+{
+    public class KokoroRequestItem
+    {
+        public string model { get; set; }
+        public string voice { get; set; }
+        public string input { get; set; }
+        public string response_format { get; set; }
+        public float speed { get; set; }
+        public bool stream { get; set; }
+        public bool return_download_link { get; set; }
+        public string lang_code { get; set; }
+        public NormalizationOptions normalization_options { get; set; }
+
+    }
+}
diff --git a/SpeechMod/Voice/Models/NormalizationOptions.cs b/SpeechMod/Voice/Models/NormalizationOptions.cs
new file mode 100644
index 0000000..b1483b1
--- /dev/null
+++ b/SpeechMod/Voice/Models/NormalizationOptions.cs
@@ -0,0 +1,17 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace SpeechMod.Voice.Models
+{
+    public class NormalizationOptions
+    {
+        public bool normalize {  get; set; }
+        public bool unit_normalization { get; set; }
+        public bool url_normalization { get; set; }
+        public bool email_normalization { get; set; }
+        public bool optional_pluralization_normalization { get; set; }
+    }
+}
diff --git a/SpeechMod/Voice/WindowsSpeech.cs b/SpeechMod/Voice/WindowsSpeech.cs
index ded2a76..00ff5dd 100644
--- a/SpeechMod/Voice/WindowsSpeech.cs
+++ b/SpeechMod/Voice/WindowsSpeech.cs
@@ -217,6 +217,11 @@ public void Stop()
         WindowsVoiceUnity.Stop();
     }
 
+    public void NextPhrase()
+    {
+        WindowsVoiceUnity.Stop();
+    }
+
     public string[] GetAvailableVoices()
     {
         return WindowsVoiceUnity.GetAvailableVoices();
diff --git a/SpeechMod/settings.json b/SpeechMod/settings.json
new file mode 100644
index 0000000..24d4347
--- /dev/null
+++ b/SpeechMod/settings.json
@@ -0,0 +1,73 @@
+{
+  // Implementation of Speech service to use. Current options are:
+  // AuralisSpeech  - A faster TTS implementation of xttsv2 (and theorically others in the future) https://github.com/astramind-ai/Auralis
+  //                - The most natural sounding TTS, but the slowest. Can be fine-tuned.
+  // KokoroSpeech   - A very, very fast, somewhat natural-sounding TTS. Wrapper API at https://github.com/remsky/Kokoro-FastAPI
+  //                - Extremely fast, and comparable to WindowsSpeech's natural voices. Limited set of voices and currently cannot be fine-tuned.
+  // WindowsSpeech  - Builtin Windows TTS service
+  // AppleSpeech    - Builtin macOS TTS service
+  "speech_impl": "AuralisSpeech",
+  "endpoint": "http://127.0.0.1:8000/v1/audio/speech",
+
+  "auralis_settings": {
+    // path to file to use for one-shot speech cloning, required
+    // your base game directory is considered the working directory, otherwise you can provide an absolute path
+    "path_to_voice_one_shot": "female_01.wav",
+    "response_format": "wav",
+    "speed": 1.0,
+    "model": "xttsv2",
+    "enhance_speech": true,
+    "sound_norm_refs": false,
+    "max_ref_length": 60,
+    "gpt_cond_len": 30,
+    "gpt_cond_chunk_len": 4,
+    "temperature": 0.75,
+    "top_p": 0.85,
+    "top_k": 50,
+    "repetition_penalty": 5.0,
+    "length_penalty": 1.0,
+    "do_sample": true,
+    "language": "auto"
+  },
+
+  "kokoro_settings": {
+    // Available voices:
+    //af_alloy, af_aoede, af_bella, af_heart, af_jadzia, af_jessica, af_kore, af_nicole, af_nova, af_river, af_sarah, af_sky, af_v0, af_v0bella, af_v0irulan, af_v0nicole, af_v0sarah, af_v0sky, 
+    //am_adam, am_echo, am_eric, am_fenrir, am_liam, am_michael, am_onyx, am_puck, am_santa, am_v0adam, am_v0gurney, am_v0michael, 
+    //bf_alice, bf_emma, bf_lily, bf_v0emma, bf_v0isabella, 
+    //bm_daniel, bm_fable, bm_george, bm_lewis, bm_v0george, bm_v0lewis, 
+    //ef_dora, 
+    //em_alex, em_santa, 
+    //ff_siwis, 
+    //hf_alpha, hf_beta, 
+    //hm_omega, hm_psi, 
+    //if_sara, 
+    //im_nicola, 
+    //jf_alpha, jf_gongitsune, jf_nezumi, jf_tebukuro, 
+    //jm_kumo, 
+    //pf_dora, 
+    //pm_alex, pm_santa, 
+    //zf_xiaobei, zf_xiaoni, zf_xiaoxiao, zf_xiaoyi,
+    //zm_yunjian, zm_yunxi, zm_yunxia, zm_yunyang
+    "voice": "af_heart",
+    "model": "kokoro",
+    "speed": 1.0,
+    // Possible language codes:
+    //'a': 'American English'
+    //'b': 'British English'
+    //'e': 'es'
+    //'f': 'fr-fr'
+    //'h': 'hi'
+    //'i': 'it'
+    //'p': 'pt-br'
+    //'j': 'Japanese'
+    //'z': 'Mandarin Chinese'
+    "lang_code": "a",
+    "response_format": "wav",
+    "normalize": true,
+    "unit_normalization": false,
+    "url_normalization": true,
+    "email_normalization": true,
+    "optional_pluralization_normalization": true
+  }
+}
diff --git a/samples/female_01.wav b/samples/female_01.wav
new file mode 100644
index 0000000..6fd8a4e
Binary files /dev/null and b/samples/female_01.wav differ