Alignment for best path can be obtained.

ondrejklejch · ondrejklejch · commit 444e479dd5b5 · 2016-10-06T13:59:40.000+01:00
diff --git a/alex_asr/decoder.pyx b/alex_asr/decoder.pyx
@@ -22,6 +22,7 @@ cdef extern from "src/decoder.h" namespace "alex_asr":
         void FrameIn(unsigned char *frame, size_t frame_len) except +
         bool GetBestPath(vector[int] *v_out, float *lik) except +
         bool GetLattice(alex_asr.fst.libfst.LogVectorFst *fst_out, double *tot_lik) except +
+        bool GetTimeAlignment(vector[int] *words, vector[int] *times, vector[int] *durations) except +
         string GetWord(int word_id) except +
         void InputFinished() except +
         bool EndpointDetected() except +
@@ -126,6 +127,24 @@ cdef class Decoder:
         self.utt_decoded = 0
         return (lik, r)
 
+    def get_time_alignment(self):
+        """get_best_path(self)
+        Get time alignment of the current 1-best decoding hypothesis.
+
+        Returns:
+            tuple: (list of word id's, list of start times, list of durations)
+        """
+
+        cdef vector[int] w
+        cdef vector[int] t
+        cdef vector[int] d
+        self.thisptr.GetTimeAlignment(address(w), address(t), address(d))
+        words = [w[i] for i in xrange(w.size()) if w[i] != 0]
+        times = [t[i] for i in xrange(t.size()) if w[i] != 0]
+        durations = [d[i] for i in xrange(d.size()) if w[i] != 0]
+
+        return (words, times, durations)
+
     def get_word(self, word_id):
         """get_word(self, word_id)
         Get word string form given word id.
diff --git a/src/decoder.cc b/src/decoder.cc
@@ -2,6 +2,7 @@
 #include "src/utils.h"
 
 #include "online2/onlinebin-util.h"
+#include "lat/kaldi-lattice.h"
 
 using namespace kaldi;
 
@@ -218,6 +219,22 @@ namespace alex_asr {
         return ok;
     }
 
+    bool Decoder::GetTimeAlignment(std::vector<int> *words, std::vector<int> *times, std::vector<int> *lengths) {
+        Lattice lat;
+        CompactLattice compact_lat;
+        CompactLattice compact_best_path;
+        bool ok = true;
+
+        ok = ok && decoder_->GetRawLattice(&lat);
+        BaseFloat lat_beam = config_->decoder_opts.lattice_beam;
+        DeterminizeLatticePhonePrunedWrapper(*trans_model_, &lat, lat_beam, &compact_lat, config_->decoder_opts.det_opts);
+
+        CompactLatticeShortestPath(compact_lat, &compact_best_path);
+        ok = ok && CompactLatticeToWordAlignment(compact_best_path, words, times, lengths);
+
+        return ok;
+    }
+
     string Decoder::GetWord(int word_id) {
         return words_->Find(word_id);
     }
diff --git a/src/decoder.h b/src/decoder.h
@@ -11,6 +11,8 @@
 #include "feat/online-feature.h"
 #include "matrix/matrix-lib.h"
 #include "util/common-utils.h"
+#include "lat/kaldi-lattice.h"
+#include "lat/lattice-functions.h"
 #include "nnet2/online-nnet2-decodable.h"
 #include "online2/online-gmm-decodable.h"
 #include "online2/online-endpoint.h"
@@ -29,6 +31,7 @@ namespace alex_asr {
         void FrameIn(VectorBase<BaseFloat> *waveform_in);
         bool GetBestPath(std::vector<int> *v_out, BaseFloat *prob);
         bool GetLattice(fst::VectorFst<fst::LogArc> * out_fst, double *tot_lik, bool end_of_utt=true);
+        bool GetTimeAlignment(std::vector<int> *words, std::vector<int> *times, std::vector<int> *lengths);
         string GetWord(int word_id);
         void InputFinished();
         bool EndpointDetected();
diff --git a/test/test.py b/test/test.py
@@ -45,5 +45,11 @@ def word_ids_to_str_hyp(decoder, word_ids):
         for arc in state.arcs:
             print ('    %s' % decoder.get_word(arc.ilabel))
 
+    print ('Resulting time alignment:')
+    words, times, durations = decoder.get_time_alignment()
+    words = word_ids_to_str_hyp(decoder, words).split()
 
+    for (word, time, duration) in zip(words, times, durations):
+        if word != "<eps>":
+            print (word, time, duration)