Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Fix benchmarks.md HuggingFace link and add tdtCtc110m unit tests
Addresses review feedback:
1. Fix incorrect HuggingFace link in benchmarks.md
   - Was: parakeet-tdt-0.6b-v3-coreml (v3 model)
   - Now: parakeet-tdt-ctc-110m-coreml (correct 110M model)

2. Add comprehensive unit tests for tdtCtc110m model version:
   - Test hasFusedEncoder property (true for 110m)
   - Test encoderHiddenSize (512 vs 1024 for v2/v3)
   - Test blankId (1024 same as v2)
   - Test decoderLayers (1 vs 2 for v2/v3)
   - Test repo mapping (.parakeetTdtCtc110m)
   - Test usesSplitFrontend (false for fused model)
   - Test default cache directory structure
   - Test vocabulary filename (parakeet_vocab.json array format)
   - Test all model versions have required properties

3. Add ModelNames tests for parakeetTdtCtc110m repo:
   - Test repo properties (remotePath, name, folderName)
   - Test vocabulary uses array format
   - Test uses requiredModelsFused (3 files, no separate Encoder)
   - Test required model count (3 .mlmodelc files)
   - Test requiredModelsFused structure

All tests passing (27 AsrModelsTests + 18 ModelNamesTests = 45 tests)
  • Loading branch information
Alex-Wengg committed Mar 26, 2026
commit cc05622d4b12d2853a39e86481f36b0a59aba946
105 changes: 105 additions & 0 deletions Tests/FluidAudioTests/ASR/AsrModelsTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -305,4 +305,109 @@ final class AsrModelsTests: XCTestCase {
"Model type \(modelType) should use CPU+ANE")
}
}

// MARK: - TDT-CTC-110M Model Version Tests

func testTdtCtc110mHasFusedEncoder() {
// tdtCtc110m has fused preprocessor+encoder
XCTAssertTrue(AsrModelVersion.tdtCtc110m.hasFusedEncoder)

// v2 and v3 have separate encoder
XCTAssertFalse(AsrModelVersion.v2.hasFusedEncoder)
XCTAssertFalse(AsrModelVersion.v3.hasFusedEncoder)
}

func testTdtCtc110mEncoderHiddenSize() {
// tdtCtc110m uses 512-dim encoder output
XCTAssertEqual(AsrModelVersion.tdtCtc110m.encoderHiddenSize, 512)

// v2 and v3 use 1024-dim encoder output
XCTAssertEqual(AsrModelVersion.v2.encoderHiddenSize, 1024)
XCTAssertEqual(AsrModelVersion.v3.encoderHiddenSize, 1024)
}

func testTdtCtc110mBlankId() {
// tdtCtc110m uses blank ID 1024 (same as v2)
XCTAssertEqual(AsrModelVersion.tdtCtc110m.blankId, 1024)
XCTAssertEqual(AsrModelVersion.v2.blankId, 1024)

// v3 uses blank ID 8192
XCTAssertEqual(AsrModelVersion.v3.blankId, 8192)
}

func testTdtCtc110mDecoderLayers() {
// tdtCtc110m uses 1 decoder LSTM layer
XCTAssertEqual(AsrModelVersion.tdtCtc110m.decoderLayers, 1)

// v2 and v3 use 2 decoder LSTM layers
XCTAssertEqual(AsrModelVersion.v2.decoderLayers, 2)
XCTAssertEqual(AsrModelVersion.v3.decoderLayers, 2)
}

func testTdtCtc110mRepo() {
// Verify correct HuggingFace repo
XCTAssertEqual(AsrModelVersion.tdtCtc110m.repo, .parakeetTdtCtc110m)
XCTAssertEqual(AsrModelVersion.v2.repo, .parakeetV2)
XCTAssertEqual(AsrModelVersion.v3.repo, .parakeet)
}

func testTdtCtc110mUsesSplitFrontend() {
// Create a mock AsrModels instance for tdtCtc110m
// Note: We can't create actual MLModel instances without model files
// So we test the version property directly

// tdtCtc110m has fused frontend (no split)
XCTAssertFalse(AsrModelVersion.tdtCtc110m.hasFusedEncoder == false)

// Test the inverse logic used in usesSplitFrontend
let tdtCtc110mUsesSplit = !AsrModelVersion.tdtCtc110m.hasFusedEncoder
XCTAssertFalse(tdtCtc110mUsesSplit, "tdtCtc110m should not use split frontend")

// v2 and v3 use split frontend
let v2UsesSplit = !AsrModelVersion.v2.hasFusedEncoder
let v3UsesSplit = !AsrModelVersion.v3.hasFusedEncoder
XCTAssertTrue(v2UsesSplit, "v2 should use split frontend")
XCTAssertTrue(v3UsesSplit, "v3 should use split frontend")
}

func testTdtCtc110mDefaultCacheDirectory() {
let cacheDir = AsrModels.defaultCacheDirectory(for: .tdtCtc110m)

// Verify path contains correct repo folder name
XCTAssertTrue(cacheDir.path.contains(Repo.parakeetTdtCtc110m.folderName))
XCTAssertTrue(cacheDir.path.contains("FluidAudio"))
XCTAssertTrue(cacheDir.path.contains("Models"))

// Verify it's an absolute path
XCTAssertTrue(cacheDir.isFileURL)
XCTAssertTrue(cacheDir.path.starts(with: "/"))
}

func testTdtCtc110mVocabularyFilename() {
// tdtCtc110m uses parakeet_vocab.json (array format)
let vocabFile = ModelNames.ASR.vocabularyFileArray
XCTAssertEqual(vocabFile, "parakeet_vocab.json")

// Verify it has .json extension
XCTAssertTrue(vocabFile.hasSuffix(".json"))
XCTAssertTrue(vocabFile.contains("vocab"))
}

func testAllModelVersionsHaveRequiredProperties() {
let versions: [AsrModelVersion] = [.v2, .v3, .tdtCtc110m]

for version in versions {
// All versions should have valid repo
XCTAssertNotNil(version.repo)

// All versions should have positive encoder hidden size
XCTAssertGreaterThan(version.encoderHiddenSize, 0)

// All versions should have positive blank ID
XCTAssertGreaterThan(version.blankId, 0)

// All versions should have at least 1 decoder layer
XCTAssertGreaterThan(version.decoderLayers, 0)
}
}
}
69 changes: 69 additions & 0 deletions Tests/FluidAudioTests/ASR/ModelNamesTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -113,4 +113,73 @@ final class ModelNamesTests: XCTestCase {
XCTAssertFalse(ModelNames.Qwen3ASR.requiredModels.isEmpty)
XCTAssertFalse(ModelNames.Qwen3ASR.requiredModelsFull.isEmpty)
}

// MARK: - TDT-CTC-110M Repo Tests

func testParakeetTdtCtc110mRepoProperties() {
let repo = Repo.parakeetTdtCtc110m

// Verify remote path (owner/repo)
XCTAssertEqual(repo.remotePath, "FluidInference/parakeet-tdt-ctc-110m-coreml")

// Verify name (repo slug with -coreml suffix)
XCTAssertEqual(repo.name, "parakeet-tdt-ctc-110m-coreml")

// Verify folder name (simplified local folder name)
XCTAssertEqual(repo.folderName, "parakeet-tdt-ctc-110m")

// Should have no subpath (not a variant repo)
XCTAssertNil(repo.subPath)
}

func testParakeetTdtCtc110mVocabulary() {
// tdtCtc110m uses array-format vocabulary
let vocabFile = ModelNames.ASR.vocabulary(for: .parakeetTdtCtc110m)
XCTAssertEqual(vocabFile, "parakeet_vocab.json")
XCTAssertEqual(vocabFile, ModelNames.ASR.vocabularyFileArray)
}

func testParakeetTdtCtc110mUsesRequiredModelsFused() {
// tdtCtc110m has fused preprocessor+encoder, so uses requiredModelsFused
let models = ModelNames.getRequiredModelNames(for: .parakeetTdtCtc110m, variant: nil)

// Should match ASR.requiredModelsFused (3 .mlmodelc files, no vocab in this set)
XCTAssertEqual(Set(models), Set(ModelNames.ASR.requiredModelsFused))

// Should NOT match regular ASR.requiredModels (which includes separate Encoder)
XCTAssertNotEqual(Set(models), Set(ModelNames.ASR.requiredModels))

// Verify it includes Preprocessor (fused with encoder)
XCTAssertTrue(models.contains("Preprocessor.mlmodelc"))

// Verify it does NOT include separate Encoder
XCTAssertFalse(models.contains("Encoder.mlmodelc"))
}

func testParakeetTdtCtc110mRequiredModelCount() {
let models = ModelNames.getRequiredModelNames(for: .parakeetTdtCtc110m, variant: nil)

// Fused models have 1 less file than regular (no separate Encoder)
// Expected: Preprocessor (fused), Decoder, JointDecision = 3 .mlmodelc files
// Note: vocabulary is handled separately, not in requiredModelsFused
XCTAssertEqual(models.count, 3, "tdtCtc110m should have 3 .mlmodelc files (fused preprocessor+encoder)")
}

func testASRRequiredModelsFusedStructure() {
let fusedModels = ModelNames.ASR.requiredModelsFused

// Should contain core models
XCTAssertTrue(fusedModels.contains("Preprocessor.mlmodelc"))
XCTAssertTrue(fusedModels.contains("Decoder.mlmodelc"))
XCTAssertTrue(fusedModels.contains("JointDecision.mlmodelc"))

// Should NOT contain vocabulary (handled separately)
XCTAssertFalse(fusedModels.contains("parakeet_vocab.json"))

// Should NOT contain separate Encoder
XCTAssertFalse(fusedModels.contains("Encoder.mlmodelc"))

// Should be 1 less than regular models (which has 4: Preprocessor, Encoder, Decoder, Joint)
XCTAssertEqual(fusedModels.count, ModelNames.ASR.requiredModels.count - 1)
}
}
2 changes: 1 addition & 1 deletion benchmarks.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ swift build -c release

- TDT (Token-and-Duration Transducer) decoder with CTC-constrained beam search
- Fused preprocessor+encoder reduces model load time and memory usage
- Models available at: [FluidInference/parakeet-tdt-0.6b-v3-coreml](https://huggingface.co/FluidInference/parakeet-tdt-0.6b-v3-coreml)
- Models available at: [FluidInference/parakeet-tdt-ctc-110m-coreml](https://huggingface.co/FluidInference/parakeet-tdt-ctc-110m-coreml)
- iOS test app validates on-device performance with LibriSpeech ground truth

---
Expand Down
Loading