Skip to content

Commit 68ef8e1

Browse files
authored
Refactor TTS C API (#3127)
This pull request focuses on refactoring the C API for Text-to-Speech (TTS) functionality within sherpa-onnx. The changes primarily aim to enhance the API's robustness and maintainability by introducing comprehensive null pointer checks for input parameters, standardizing configuration handling with default values, and ensuring proper management of callback functions. These improvements contribute to a more stable and predictable TTS interface.
1 parent 9787280 commit 68ef8e1

File tree

4 files changed

+192
-40
lines changed

4 files changed

+192
-40
lines changed

.github/workflows/c-api.yaml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,41 @@ jobs:
7575
otool -L ./install/lib/libsherpa-onnx-c-api.dylib
7676
fi
7777
78+
- name: Test PocketTTS
79+
shell: bash
80+
run: |
81+
name=pocket-tts-en-c-api
82+
gcc -o $name ./c-api-examples/$name.c \
83+
-I ./build/install/include \
84+
-L ./build/install/lib/ \
85+
-l sherpa-onnx-c-api \
86+
-l onnxruntime
87+
88+
ls -lh $name
89+
90+
if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
91+
ldd ./$name
92+
echo "----"
93+
readelf -d ./$name
94+
fi
95+
96+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
97+
tar xf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
98+
rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
99+
100+
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
101+
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
102+
103+
./$name
104+
105+
rm $name
106+
rm -rf sherpa-onnx-pocket-tts-int8-2026-01-26
107+
108+
- uses: actions/upload-artifact@v4
109+
with:
110+
name: pocket-tts-wavs-${{ matrix.os }}
111+
path: ./generated-pocket-en.wav
112+
78113
- name: Test FunASR Nano
79114
shell: bash
80115
run: |

sherpa-onnx/c-api/c-api.cc

Lines changed: 154 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1378,30 +1378,40 @@ static const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateInternal(
13781378
}
13791379

13801380
static const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateInternal(
1381-
const SherpaOnnxOfflineTts *tts, const char *text, GenerationConfig *config,
1381+
const SherpaOnnxOfflineTts *tts, const char *text,
1382+
const GenerationConfig *config,
13821383
std::function<int32_t(const float *, int32_t, float)> callback) {
13831384
sherpa_onnx::GenerationConfig cfg;
1384-
if (config->reference_audio_len > 0 && config->reference_audio) {
1385+
if (config->reference_audio) {
1386+
if (config->reference_audio_len <= 0) {
1387+
SHERPA_ONNX_LOGE("Invalid reference audio len: %d",
1388+
config->reference_audio_len);
1389+
return nullptr;
1390+
}
1391+
13851392
cfg.reference_audio.assign(
13861393
config->reference_audio,
13871394
config->reference_audio + config->reference_audio_len);
13881395
}
1389-
if (config->silence_scale > 0) cfg.silence_scale = config->silence_scale;
1390-
if (config->speed > 0) cfg.speed = config->speed;
1396+
1397+
cfg.silence_scale = SHERPA_ONNX_OR(config->silence_scale, 0.2);
1398+
cfg.speed = SHERPA_ONNX_OR(config->speed, 1.0);
13911399
cfg.sid = config->sid;
1392-
if (config->reference_sample_rate > 0)
1393-
cfg.reference_sample_rate = config->reference_sample_rate;
1394-
if (config->reference_text)
1395-
cfg.reference_text = config->reference_text;
1396-
if (config->num_steps > 0) cfg.num_steps = config->num_steps;
1400+
1401+
cfg.reference_sample_rate = config->reference_sample_rate;
1402+
1403+
cfg.reference_text = SHERPA_ONNX_OR(config->reference_text, "");
1404+
cfg.num_steps = SHERPA_ONNX_OR(config->num_steps, 5);
1405+
13971406
if (config->extra) {
13981407
try {
13991408
auto json = nlohmann::json::parse(config->extra);
14001409
for (auto &[k, v] : json.items()) {
14011410
cfg.extra.insert_or_assign(std::string(k), v.dump());
14021411
}
14031412
} catch (const nlohmann::json::parse_error &e) {
1404-
SHERPA_ONNX_LOGE("Failed to parse extra JSON: %s", e.what());
1413+
SHERPA_ONNX_LOGE("Failed to parse extra JSON: '%s'", e.what());
1414+
SHERPA_ONNX_LOGE("Ignore the extra opt");
14051415
}
14061416
}
14071417

@@ -1426,50 +1436,120 @@ static const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateInternal(
14261436
const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
14271437
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
14281438
float speed) {
1439+
if (!tts) {
1440+
SHERPA_ONNX_LOGE("tts is nullptr");
1441+
return nullptr;
1442+
}
1443+
1444+
if (!text) {
1445+
SHERPA_ONNX_LOGE("text is nullptr");
1446+
return nullptr;
1447+
}
1448+
14291449
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, nullptr);
14301450
}
14311451

14321452
const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallback(
14331453
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
14341454
SherpaOnnxGeneratedAudioCallback callback) {
1435-
auto wrapper = [callback](const float *samples, int32_t n,
1436-
float /*progress*/) {
1437-
return callback(samples, n);
1438-
};
1455+
if (!tts) {
1456+
SHERPA_ONNX_LOGE("tts is nullptr");
1457+
return nullptr;
1458+
}
1459+
1460+
if (!text) {
1461+
SHERPA_ONNX_LOGE("text is nullptr");
1462+
return nullptr;
1463+
}
1464+
1465+
if (callback) {
1466+
auto wrapper = [callback](const float *samples, int32_t n,
1467+
float /*progress*/) {
1468+
return callback(samples, n);
1469+
};
14391470

1440-
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
1471+
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed,
1472+
std::move(wrapper));
1473+
} else {
1474+
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, nullptr);
1475+
}
14411476
}
14421477

14431478
const SherpaOnnxGeneratedAudio *
14441479
SherpaOnnxOfflineTtsGenerateWithProgressCallback(
14451480
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
14461481
SherpaOnnxGeneratedAudioProgressCallback callback) {
1447-
auto wrapper = [callback](const float *samples, int32_t n, float progress) {
1448-
return callback(samples, n, progress);
1449-
};
1450-
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
1482+
if (!tts) {
1483+
SHERPA_ONNX_LOGE("tts is nullptr");
1484+
return nullptr;
1485+
}
1486+
1487+
if (!text) {
1488+
SHERPA_ONNX_LOGE("text is nullptr");
1489+
return nullptr;
1490+
}
1491+
1492+
if (callback) {
1493+
auto wrapper = [callback](const float *samples, int32_t n, float progress) {
1494+
return callback(samples, n, progress);
1495+
};
1496+
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed,
1497+
std::move(wrapper));
1498+
} else {
1499+
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, nullptr);
1500+
}
14511501
}
14521502

14531503
const SherpaOnnxGeneratedAudio *
14541504
SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
14551505
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
14561506
SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg) {
1457-
auto wrapper = [callback, arg](const float *samples, int32_t n,
1458-
float progress) {
1459-
return callback(samples, n, progress, arg);
1460-
};
1461-
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
1507+
if (!tts) {
1508+
SHERPA_ONNX_LOGE("tts is nullptr");
1509+
return nullptr;
1510+
}
1511+
1512+
if (!text) {
1513+
SHERPA_ONNX_LOGE("text is nullptr");
1514+
return nullptr;
1515+
}
1516+
1517+
if (callback) {
1518+
auto wrapper = [callback, arg](const float *samples, int32_t n,
1519+
float progress) {
1520+
return callback(samples, n, progress, arg);
1521+
};
1522+
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed,
1523+
std::move(wrapper));
1524+
} else {
1525+
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, nullptr);
1526+
}
14621527
}
14631528

14641529
const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(
14651530
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
14661531
SherpaOnnxGeneratedAudioCallbackWithArg callback, void *arg) {
1467-
auto wrapper = [callback, arg](const float *samples, int32_t n,
1468-
float /*progress*/) {
1469-
return callback(samples, n, arg);
1470-
};
1532+
if (!tts) {
1533+
SHERPA_ONNX_LOGE("tts is nullptr");
1534+
return nullptr;
1535+
}
14711536

1472-
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
1537+
if (!text) {
1538+
SHERPA_ONNX_LOGE("text is nullptr");
1539+
return nullptr;
1540+
}
1541+
1542+
if (callback) {
1543+
auto wrapper = [callback, arg](const float *samples, int32_t n,
1544+
float /*progress*/) {
1545+
return callback(samples, n, arg);
1546+
};
1547+
1548+
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed,
1549+
std::move(wrapper));
1550+
} else {
1551+
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, nullptr);
1552+
}
14731553
}
14741554

14751555
const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithZipvoice(
@@ -1480,8 +1560,23 @@ const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithZipvoice(
14801560
return nullptr;
14811561
}
14821562

1483-
std::string text_s = text ? text : "";
1484-
std::string ptext_s = prompt_text ? prompt_text : "";
1563+
if (!text) {
1564+
SHERPA_ONNX_LOGE("text is nullptr");
1565+
return nullptr;
1566+
}
1567+
1568+
if (!prompt_text) {
1569+
SHERPA_ONNX_LOGE("prompt_text is nullptr");
1570+
return nullptr;
1571+
}
1572+
1573+
if (!prompt_samples) {
1574+
SHERPA_ONNX_LOGE("prompt_samples is nullptr");
1575+
return nullptr;
1576+
}
1577+
1578+
std::string text_s = text;
1579+
std::string ptext_s = prompt_text;
14851580

14861581
std::vector<float> prompt_vec;
14871582
if (prompt_samples && n_prompt > 0) {
@@ -1509,15 +1604,34 @@ const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithZipvoice(
15091604
}
15101605

15111606
const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithConfig(
1512-
const SherpaOnnxOfflineTts *tts, const char *text, GenerationConfig *config,
1607+
const SherpaOnnxOfflineTts *tts, const char *text,
1608+
const GenerationConfig *config,
15131609
SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg) {
1514-
auto wrapper = [callback, arg](const float *samples, int32_t n,
1515-
float progress) {
1516-
if (!callback) return 1;
1517-
return callback(samples, n, progress, arg);
1518-
};
1610+
if (!tts) {
1611+
return nullptr;
1612+
}
15191613

1520-
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, config, std::move(wrapper));
1614+
if (!text) {
1615+
SHERPA_ONNX_LOGE("text is nullptr");
1616+
return nullptr;
1617+
}
1618+
1619+
if (!config) {
1620+
SHERPA_ONNX_LOGE("config is nullptr");
1621+
return nullptr;
1622+
}
1623+
1624+
if (callback) {
1625+
auto wrapper = [callback, arg](const float *samples, int32_t n,
1626+
float progress) {
1627+
return callback(samples, n, progress, arg);
1628+
};
1629+
1630+
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, config,
1631+
std::move(wrapper));
1632+
} else {
1633+
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, config, nullptr);
1634+
}
15211635
}
15221636

15231637
void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
@@ -1594,7 +1708,8 @@ const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithZipvoice(
15941708
}
15951709

15961710
const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithConfig(
1597-
const SherpaOnnxOfflineTts *tts, const char *text, GenerationConfig *config,
1711+
const SherpaOnnxOfflineTts *tts, const char *text,
1712+
const GenerationConfig *config,
15981713
SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg) {
15991714
SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
16001715
return nullptr;

sherpa-onnx/c-api/c-api.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1228,7 +1228,8 @@ SHERPA_ONNX_API typedef struct GenerationConfig {
12281228
// returned pointer to avoid memory leak.
12291229
SHERPA_ONNX_API const SherpaOnnxGeneratedAudio *
12301230
SherpaOnnxOfflineTtsGenerateWithConfig(
1231-
const SherpaOnnxOfflineTts *tts, const char *text, GenerationConfig *config,
1231+
const SherpaOnnxOfflineTts *tts, const char *text,
1232+
const GenerationConfig *config,
12321233
SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg);
12331234

12341235
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio(

sherpa-onnx/c-api/sherpa-onnx-symbols-c.exp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ _SherpaOnnxOfflineSpeechDenoiserRun
101101
_SherpaOnnxOfflineTtsGenerate
102102
_SherpaOnnxOfflineTtsGenerateWithCallback
103103
_SherpaOnnxOfflineTtsGenerateWithCallbackWithArg
104+
_SherpaOnnxOfflineTtsGenerateWithConfig
104105
_SherpaOnnxOfflineTtsGenerateWithProgressCallback
105106
_SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg
106107
_SherpaOnnxOfflineTtsGenerateWithZipvoice

0 commit comments

Comments
 (0)