Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion TLSphinx.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -187,14 +187,16 @@
attributes = {
LastSwiftMigration = 0720;
LastSwiftUpdateCheck = 0720;
LastUpgradeCheck = 0720;
LastUpgradeCheck = 0800;
ORGANIZATIONNAME = "Bruno Berisso";
TargetAttributes = {
9B5187AE1B18E1E80000E78D = {
CreatedOnToolsVersion = 6.3.2;
LastSwiftMigration = 0800;
};
9B5187B91B18E1E80000E78D = {
CreatedOnToolsVersion = 6.3.2;
LastSwiftMigration = 0800;
};
};
};
Expand Down Expand Up @@ -279,8 +281,10 @@
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
Expand Down Expand Up @@ -328,8 +332,10 @@
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
Expand All @@ -349,6 +355,7 @@
IPHONEOS_DEPLOYMENT_TARGET = 8.3;
MTL_ENABLE_DEBUG_INFO = NO;
SDKROOT = iphoneos;
SWIFT_OPTIMIZATION_LEVEL = "-Owholemodule";
TARGETED_DEVICE_FAMILY = "1,2";
VALIDATE_PRODUCT = YES;
VERSIONING_SYSTEM = "apple-generic";
Expand All @@ -361,7 +368,9 @@
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ENABLE_MODULES = YES;
"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "";
DEFINES_MODULE = YES;
DEVELOPMENT_TEAM = "";
DYLIB_COMPATIBILITY_VERSION = 1;
DYLIB_CURRENT_VERSION = 1;
DYLIB_INSTALL_NAME_BASE = "@rpath";
Expand All @@ -384,6 +393,7 @@
SKIP_INSTALL = YES;
SWIFT_INCLUDE_PATHS = Sphinx/include;
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
SWIFT_VERSION = 3.0;
};
name = Debug;
};
Expand All @@ -392,7 +402,9 @@
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ENABLE_MODULES = YES;
"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "";
DEFINES_MODULE = YES;
DEVELOPMENT_TEAM = "";
DYLIB_COMPATIBILITY_VERSION = 1;
DYLIB_CURRENT_VERSION = 1;
DYLIB_INSTALL_NAME_BASE = "@rpath";
Expand All @@ -414,12 +426,14 @@
PRODUCT_NAME = "$(TARGET_NAME)";
SKIP_INSTALL = YES;
SWIFT_INCLUDE_PATHS = Sphinx/include;
SWIFT_VERSION = 3.0;
};
name = Release;
};
9B5187C91B18E1E80000E78D /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
DEVELOPMENT_TEAM = "";
FRAMEWORK_SEARCH_PATHS = "$(inherited)";
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
Expand All @@ -438,12 +452,14 @@
);
PRODUCT_BUNDLE_IDENTIFIER = "com.tryolabs.$(PRODUCT_NAME:rfc1034identifier)";
PRODUCT_NAME = "$(TARGET_NAME)";
SWIFT_VERSION = 3.0;
};
name = Debug;
};
9B5187CA1B18E1E80000E78D /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
DEVELOPMENT_TEAM = "";
FRAMEWORK_SEARCH_PATHS = "$(inherited)";
HEADER_SEARCH_PATHS = (
"$(inherited)",
Expand All @@ -458,6 +474,7 @@
);
PRODUCT_BUNDLE_IDENTIFIER = "com.tryolabs.$(PRODUCT_NAME:rfc1034identifier)";
PRODUCT_NAME = "$(TARGET_NAME)";
SWIFT_VERSION = 3.0;
};
name = Release;
};
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "0720"
LastUpgradeVersion = "0800"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
Expand Down
12 changes: 6 additions & 6 deletions TLSphinx/Config.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@
import Foundation
import Sphinx.Base

public class Config {
open class Config {

var cmdLnConf: COpaquePointer
private var cArgs: [UnsafeMutablePointer<Int8>]
var cmdLnConf: OpaquePointer?
fileprivate var cArgs: [UnsafeMutablePointer<Int8>?]

public init?(args: (String,String)...) {

// Create [UnsafeMutablePointer<Int8>].
cArgs = args.flatMap { (name, value) -> [UnsafeMutablePointer<Int8>] in
cArgs = args.flatMap { (name, value) -> [UnsafeMutablePointer<Int8>?] in
//strdup move the strings to the heap and return a UnsageMutablePointer<Int8>
return [strdup(name),strdup(value)]
}
Expand All @@ -38,7 +38,7 @@ public class Config {
}


public var showDebugInfo: Bool {
open var showDebugInfo: Bool {
get {
if cmdLnConf != nil {
return cmd_ln_str_r(cmdLnConf, "-logfn") == nil
Expand All @@ -56,4 +56,4 @@ public class Config {
}
}
}
}
}
86 changes: 43 additions & 43 deletions TLSphinx/Decoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,18 @@ import Sphinx


private enum SpeechStateEnum : CustomStringConvertible {
case Silence
case Speech
case Utterance
case silence
case speech
case utterance

var description: String {
get {
switch(self) {
case .Silence:
case .silence:
return "Silence"
case .Speech:
case .speech:
return "Speech"
case .Utterance:
case .utterance:
return "Utterance"
}
}
Expand All @@ -33,26 +33,26 @@ private enum SpeechStateEnum : CustomStringConvertible {

private extension AVAudioPCMBuffer {

func toNSDate() -> NSData {
func toNSDate() -> Data {
let channels = UnsafeBufferPointer(start: int16ChannelData, count: 1)
let ch0Data = NSData(bytes: channels[0], length:Int(frameCapacity * format.streamDescription.memory.mBytesPerFrame))
let ch0Data = Data(bytes: UnsafeMutablePointer<Int16>(channels[0]), count:Int(frameCapacity * format.streamDescription.pointee.mBytesPerFrame))
return ch0Data
}

}


public class Decoder {
open class Decoder {

private var psDecoder: COpaquePointer
private var engine: AVAudioEngine!
private var speechState: SpeechStateEnum
fileprivate var psDecoder: OpaquePointer?
fileprivate var engine: AVAudioEngine!
fileprivate var speechState: SpeechStateEnum

public var bufferSize: Int = 2048
open var bufferSize: Int = 2048

public init?(config: Config) {

speechState = .Silence
speechState = .silence

if config.cmdLnConf != nil{
psDecoder = ps_init(config.cmdLnConf)
Expand All @@ -72,61 +72,61 @@ public class Decoder {
assert(refCount == 0, "Can't free decoder, it's shared among instances")
}

private func process_raw(data: NSData) -> CInt {
fileprivate func process_raw(_ data: Data) -> CInt {
//Sphinx expect words of 2 bytes but the NSFileHandle read one byte at time so the lenght of the data for sphinx is the half of the real one.
let dataLenght = data.length / 2
let numberOfFrames = ps_process_raw(psDecoder, UnsafePointer(data.bytes), dataLenght, SFalse, SFalse)
let dataLenght = data.count / 2
let numberOfFrames = ps_process_raw(psDecoder, (data as NSData).bytes.bindMemory(to: int16.self, capacity: data.count), dataLenght, SFalse, SFalse)
let hasSpeech = in_speech()

switch (speechState) {
case .Silence where hasSpeech:
speechState = .Speech
case .Speech where !hasSpeech:
speechState = .Utterance
case .Utterance where !hasSpeech:
speechState = .Silence
case .silence where hasSpeech:
speechState = .speech
case .speech where !hasSpeech:
speechState = .utterance
case .utterance where !hasSpeech:
speechState = .silence
default:
break
}

return numberOfFrames
}

private func in_speech() -> Bool {
fileprivate func in_speech() -> Bool {
return ps_get_in_speech(psDecoder) == 1
}

private func start_utt() -> Bool {
fileprivate func start_utt() -> Bool {
return ps_start_utt(psDecoder) == 0
}

private func end_utt() -> Bool {
fileprivate func end_utt() -> Bool {
return ps_end_utt(psDecoder) == 0
}

private func get_hyp() -> Hypothesis? {
fileprivate func get_hyp() -> Hypothesis? {
var score: CInt = 0
let string: UnsafePointer<CChar> = ps_get_hyp(psDecoder, &score)

if let text = String.fromCString(string) {
if let text = String(validatingUTF8: string) {
return Hypothesis(text: text, score: Int(score))
} else {
return nil
}
}

private func hypotesisForSpeechAtPath (filePath: String) -> Hypothesis? {
fileprivate func hypotesisForSpeechAtPath (_ filePath: String) -> Hypothesis? {

if let fileHandle = NSFileHandle(forReadingAtPath: filePath) {
if let fileHandle = FileHandle(forReadingAtPath: filePath) {

start_utt()

let hypothesis = fileHandle.reduceChunks(bufferSize, initial: nil, reducer: { [unowned self] (data: NSData, partialHyp: Hypothesis?) -> Hypothesis? in
let hypothesis = fileHandle.reduceChunks(bufferSize, initial: nil, reducer: { [unowned self] (data: Data, partialHyp: Hypothesis?) -> Hypothesis? in

self.process_raw(data)

var resultantHyp = partialHyp
if self.speechState == .Utterance {
if self.speechState == .utterance {

self.end_utt()
resultantHyp = partialHyp + self.get_hyp()
Expand All @@ -140,7 +140,7 @@ public class Decoder {
fileHandle.closeFile()

//Process any pending speech
if speechState == .Speech {
if speechState == .speech {
return hypothesis + get_hyp()
} else {
return hypothesis
Expand All @@ -151,19 +151,19 @@ public class Decoder {
}
}

public func decodeSpeechAtPath (filePath: String, complete: (Hypothesis?) -> ()) {
open func decodeSpeechAtPath (_ filePath: String, complete: @escaping (Hypothesis?) -> ()) {

dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0)) {
DispatchQueue.global(priority: DispatchQueue.GlobalQueuePriority.default).async {

let hypothesis = self.hypotesisForSpeechAtPath(filePath)

dispatch_async(dispatch_get_main_queue()) {
DispatchQueue.main.async {
complete(hypothesis)
}
}
}

public func startDecodingSpeech (utteranceComplete: (Hypothesis?) -> ()) {
open func startDecodingSpeech (_ utteranceComplete: @escaping (Hypothesis?) -> ()) {

do {
try AVAudioSession.sharedInstance().setCategory(AVAudioSessionCategoryRecord)
Expand All @@ -179,20 +179,20 @@ public class Decoder {
return
}

let formatIn = AVAudioFormat(commonFormat: .PCMFormatInt16, sampleRate: 44100, channels: 1, interleaved: false)
let formatIn = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: 44100, channels: 1, interleaved: false)
engine.connect(input, to: engine.outputNode, format: formatIn)

input.installTapOnBus(0, bufferSize: 4096, format: formatIn, block: { (buffer: AVAudioPCMBuffer!, time: AVAudioTime!) -> Void in
input.installTap(onBus: 0, bufferSize: 4096, format: formatIn, block: { (buffer: AVAudioPCMBuffer!, time: AVAudioTime!) -> Void in

let audioData = buffer.toNSDate()
self.process_raw(audioData)

if self.speechState == .Utterance {
if self.speechState == .utterance {

self.end_utt()
let hypothesis = self.get_hyp()

dispatch_async(dispatch_get_main_queue(), {
DispatchQueue.main.async(execute: {
utteranceComplete(hypothesis)
})

Expand All @@ -213,9 +213,9 @@ public class Decoder {
}
}

public func stopDecodingSpeech () {
open func stopDecodingSpeech () {
engine.stop()
engine.mainMixerNode.removeTapOnBus(0)
engine.mainMixerNode.removeTap(onBus: 0)
engine.reset()
engine = nil
}
Expand Down
10 changes: 5 additions & 5 deletions TLSphinx/Globals.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@ import Foundation
let STrue: CInt = 1
let SFalse: CInt = 0

extension NSFileHandle {
extension FileHandle {

func reduceChunks<T>(size: Int, initial: T, reducer: (NSData, T) -> T) -> T {
func reduceChunks<T>(_ size: Int, initial: T, reducer: (Data, T) -> T) -> T {

var reduceValue = initial
var chuckData = readDataOfLength(size)
var chuckData = readData(ofLength: size)

while chuckData.length > 0 {
while chuckData.count > 0 {
reduceValue = reducer(chuckData, reduceValue)
chuckData = readDataOfLength(size)
chuckData = readData(ofLength: size)
}

return reduceValue
Expand Down
Loading