Вы находитесь на странице: 1из 5

## OpenDCD - Dynamic WFST Speech Decoder

Basic execution

```bash
./dcd-recog [options] <trans_model_rs> <fst_rs> <feat_rs> <out_rs>
./dcd-recog arcs.far la.C.det.L.fst,G.fst ark:- recog.far
```

Four types of **decoder\_type**

- **hmm\_lattice** : ``HMMTransitionModel, Decodable, StdAr, Lattice``


- **hmm\_lattice\_kaldi**: ``HMMTransitionModel, Decodable, KaldiLatticeArc,
Lattice``
- **hmm\_simple**: ``HMMTransitionModel, Decodable, StdArc, SimpleLattice``
- **generic\_lattice**: ``GenericTransitionModel, Decodable, StdArc, Lattice``

####DecoderMainEntry

```cpp
DecoderMainEntryBase* runner = FindEntry(“hmm_lattice”)
// runner = DecodeMainEntry<HMMTransitionModel, Lattice, StdArc>

struct DecodeMainEntry: public DecodeMainEntryBase {


typedef CLevelDecoder<StdFst, HMMTransitionModel, Lattice> D;

virtual int Run(ParseOptions &po, SearchOptions *opts, const string


&word_symbols_file) {
// CLevelDecoderMain<HMMTransitionModel, Lattice, StdArc>(po,
opts, ...)
return CLevelDecoderMain<T, L, B>(po, opts, word_symbols_file);
}
};

```

####CLevelDecoderMain

```cpp
template<class TransModel, class L, class B>
int CLevelDecoderMain(ParseOptions &po, SearchOptions *opts,
const string &word_symbol_file) {
typename TransModel::FrontEnd FrontEnd;
typename CLevelDecoder<StdArc, TransModel, L> Decoder;

string trans_model_rs = po.GetArgs(1);


string fst_rs = po.GetArgs(2);
string feat_rs = po.GetArgs(3);
string out_ws = po.GetArgs(4);

// The decoded results are in the FAR format.


farWriter<B>* farwriter = FarWriter<B>::Create(out_ws, fst::FAR_DEFAULT);

Cascade<StdArc>* cascade = 0;
TransModel* trans_model = 0;
SymbolTable* wordsyms = 0;
// OpenFst Cascade Fst for CL.fst and G.fst, cascade would do dynamic
// expansion for you.
cascade = Cascade<StdArc>::Read(fst_rs);
// HMMTransitionModel::ReadFsts
trans_model = TransModel::ReadFsts(trans_model_rs, opts->trans_scale);

wordsyms = SymbolTable::ReadText(word_symbols_file);
opts->wordsyms = wordsyms;

SequentialBaseFloatMatrixReader feature_reader(feat_rs);

// Iterate over all the input utterance.


int num = 0;
for (; !feature_reader.Done(); feature_reader.Next(), ++num) {
if (num % opts->fst_reset_period == 0) {
fst = cascade->Rebuild();
}
decoder = new Decoder(fst, trans_model, *opts);
}
const Matrix<float>& features = feature_reader.Value();
FrontEnd* frontend = new FrontEnd(features, 1.0);
VectorFst<B> ofst;
VectorFst<B> lattice;
trans_model->SetInput(frontend, *opts);

// CLevelDecoder->Decode(trans_model, *opts, &ofst, ...)


// opts->source = key
// trans_model->SetInput(frontend, *opts)
float cost = decoder->Decode(trans_model, *opts, &ofst,
opts->gen_lattice ? &lattice : 0);
}
```

**CLevelDecoder::Decode()**

```cpp
template <class ARC>
float Decode(TransModel* trans_model, const SearchOptions& opts,
Vector<ARC>* ofst, VectorFst<ARC>* lfst = 0) {
// Bad Taste
trans_model_ = trans_model;

if (!BeginDecode()) { ... }

for (; !trans_model->Done();) {
ExpandActiveStates();
ExpandActiveArcs();
ExpandEpsilonArcs();
trans_model->Next();
}

float best_cost = EndDecode(ofst, lfst, search_opts_.nbest);


}
```

**CLevelDecoder::BeginDecode()**

```cpp
bool BeginDecode() {
// fst_ is indeed a cascade of CL.fst and G.fst
// fst_->Start() will combine two start states from each.
int s = fst_->Start();
LatticeState ls = lattice_->CreateStartState(s);
Token token(ls, 0);

SearchState* ss = FindSearchState(s);
ss->Activate(token, time_, &active_states_);
ExpandEpsilonArcs();
return !active_states_.empty();
}
```

**CLevelDecoder::FindSearchState()**

```cpp
virtual SearchState* FindSearchState(int state) {
if (search_hash_.find(state) == search_hash_.end()) {
SearchState* ss = AllocSearchState();
ss->Init(*fst_, state, *trans_model_, search_opts);
search_hash_[state] = ss;
}
return search_hash_[state];
}
```
**TokenTpl**

```cpp
template <class L>
class TokenTpl<L> {
LatticeState tb_;
float cost_;
unsigned int alignment_;
}
```

**SearchState**

```cpp
class SearchState {
typename VectorHelper<SearchArc>::Vector arcs_;
typename VectorHelper<SearchArc>::Vector eps_arc_;

Token token_;
...
};
```

**ExpandEpsilonArcs()**

```cpp
void ExpandEpsilonArcs() {
EpsQueue q;
float best = kMaxCost;
float threshold = kMaxCost;
float worst = kMinCost;
int num_before = active_states_.size();
// Add all active states to queue q and calculate pruning threshold.
for (int i = 0; i != active_states_.size(); ++i) {
SearchState *ss = active_states_[i];
best = min(best, ss->Cost());
if (ss->NumEpsilons()) {
ss->AddToEpsQueue();
q.push_back(ss);
}
}

// Recursively add Eps arcs and prune the arcs based previous calculated
// threshold.
while (!q.empty()) {
SearchState *ss = q.front();
q.pop_front();
ss->RemoveFromEpsQueue();
if (ss->Cost() < threshold) {
search_state_.EpsilonExpanded(ss->StateId());
float f = ss->ExpandEpsilonArcs(&active_states_, &q, this,
search_opts_.prune_eps ? best +
search_opts_.beam
: KMaxCost, search_opts_);
if (f < best) {
best = f;
threshold = best + search_opts_.beam;
}
}
}
}
```

**ExpandActiveArcs()**

```cpp
void ExpandActiveArcs() {
ArcExpandResults best_worst_arcs
= ExpandActiveArcs_ArcExpansion(0, active_arcs_.size());
ExpandActiveArcs_BandPruning();
ExpandActiveArcs_ListCompaction();
}
```

Trace route

```cpp
// src/bin/dcd-recog.cc
runner->Run(po, &opts, word_symbols);
int CLevelDecoderMain(ParseOptions &po, SearchOptions *opts, ...) {
Cascade<StdArc>* cascade = 0;
TransModel* trans_model = 0;
SymbolTable* wordsyms = 0;

cascade = Cascade<StdArc>::Read(fst_rs);
trans_model = TransModel::ReadFsts(trans_model_rs, opts->trans_scale);
wordsyms = SymbolTable::ReadText(word_symbols_file);
opts->wordsyms = wordsyms;
for (; !feature_reader.Done(); feature_reader.Next(), ++num) {
if (num % opts->fst_reset_period == 0) {
decoder = new Decoder(fst, trans_model, *opts);
}
FrontEnd* frontend = new FrontEnd(features, 1.0f);
trans_model->SetInput(frontend, *opts);
decoder->Decode(trans_model, *opts, &ofst,
opts->gen_lattice ? &lattice : 0);
}
}
// src/include/dcd/clevel-decoder.h
float Decode(TransModel* trans_model, const SearchOptions& opts,
VectorFst<ARC>* ofst, VectorFst<ARC>* lfst = 0) {
trans_model_ = trans_model;
if (!BeginDecode())
...

bool BeginDecode() {
// return impl_->Start();
int s = fst_->Start();
if (s == kNoStateId)
return false;

LatticeState ls = lattice_->CreateStartState(s);
Token token(ls, 0);
// State* ls = NewState(-1, state);
// ls->forward_cost_ = 0.0f;
SearchState* ss = FindSearchState(s);
ss->Activate(token, time_, &active_states_);

```

SearchState represents the state in search graph


every time we visit a state, we will cache it in a Hashmap
(OpenDCD use rdest

Вам также может понравиться