Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/k2-fsa/sherpa
Browse files Browse the repository at this point in the history
  • Loading branch information
shaynemei committed Aug 31, 2023
2 parents 57d7b94 + ac59664 commit e62b912
Show file tree
Hide file tree
Showing 23 changed files with 519 additions and 38 deletions.
8 changes: 8 additions & 0 deletions sherpa/bin/offline_transducer_asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,13 @@ def add_model_args(parser: argparse.ArgumentParser):
help="Feature dimension of the model",
)

parser.add_argument(
"--use-bbpe",
type=str2bool,
default=False,
help="Whether the model to be used is trained with bbpe",
)


def add_decoding_args(parser: argparse.ArgumentParser):
parser.add_argument(
Expand Down Expand Up @@ -413,6 +420,7 @@ def create_recognizer(args) -> sherpa.OfflineRecognizer:
use_gpu=args.use_gpu,
num_active_paths=args.num_active_paths,
context_score=args.context_score,
use_bbpe=args.use_bbpe,
feat_config=feat_config,
decoding_method=args.decoding_method,
fast_beam_search_config=fast_beam_search_config,
Expand Down
8 changes: 8 additions & 0 deletions sherpa/bin/offline_transducer_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,13 @@ def add_model_args(parser: argparse.ArgumentParser):
help="Feature dimension of the model",
)

parser.add_argument(
"--use-bbpe",
type=sherpa.str2bool,
default=False,
help="Whether the model to be used is trained with bbpe",
)


def add_decoding_args(parser: argparse.ArgumentParser):
parser.add_argument(
Expand Down Expand Up @@ -645,6 +652,7 @@ def create_recognizer(args) -> sherpa.OfflineRecognizer:
tokens=args.tokens,
use_gpu=args.use_gpu,
num_active_paths=args.num_active_paths,
use_bbpe=args.use_bbpe,
feat_config=feat_config,
decoding_method=args.decoding_method,
fast_beam_search_config=fast_beam_search_config,
Expand Down
8 changes: 8 additions & 0 deletions sherpa/bin/online_transducer_asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,13 @@ def add_model_args(parser: argparse.ArgumentParser):
help="Feature dimension of the model",
)

parser.add_argument(
"--use-bbpe",
type=str2bool,
default=False,
help="Whether the model to be used is trained with bbpe",
)


def add_decoding_args(parser: argparse.ArgumentParser):
parser.add_argument(
Expand Down Expand Up @@ -402,6 +409,7 @@ def create_recognizer(args) -> sherpa.OnlineRecognizer:
use_gpu=args.use_gpu,
num_active_paths=args.num_active_paths,
context_score=args.context_score,
use_bbpe=args.use_bbpe,
feat_config=feat_config,
decoding_method=args.decoding_method,
fast_beam_search_config=fast_beam_search_config,
Expand Down
8 changes: 8 additions & 0 deletions sherpa/bin/streaming_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,13 @@ def add_model_args(parser: argparse.ArgumentParser):
help="Feature dimension of the model",
)

parser.add_argument(
"--use-bbpe",
type=sherpa.str2bool,
default=False,
help="Whether the model to be used is trained with bbpe",
)


def add_decoding_args(parser: argparse.ArgumentParser):
parser.add_argument(
Expand Down Expand Up @@ -413,6 +420,7 @@ def create_recognizer(args) -> sherpa.OnlineRecognizer:
tokens=args.tokens,
use_gpu=args.use_gpu,
num_active_paths=args.num_active_paths,
use_bbpe=args.use_bbpe,
temperature=args.temperature,
feat_config=feat_config,
decoding_method=args.decoding_method,
Expand Down
6 changes: 6 additions & 0 deletions sherpa/cpp_api/feature-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ void FeatureConfig::Register(ParseOptions *po) {
fbank_opts.mel_opts.num_bins = 80;
RegisterMelBanksOptions(po, &fbank_opts.mel_opts);

fbank_opts.mel_opts.high_freq = -400;
fbank_opts.frame_opts.remove_dc_offset = true;
fbank_opts.frame_opts.round_to_power_of_two = true;
fbank_opts.energy_floor = 1e-10;
fbank_opts.frame_opts.snip_edges = false;
fbank_opts.frame_opts.samp_freq = 16000;
po->Register("normalize-samples", &normalize_samples,
"true to use samples in the range [-1, 1]. "
"false to use samples in the range [-32768, 32767]. "
Expand Down
13 changes: 10 additions & 3 deletions sherpa/cpp_api/offline-recognizer-transducer-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "sherpa/cpp_api/feature-config.h"
#include "sherpa/cpp_api/offline-recognizer-impl.h"
#include "sherpa/csrc/byte_util.h"
#include "sherpa/csrc/context-graph.h"
#include "sherpa/csrc/offline-conformer-transducer-model.h"
#include "sherpa/csrc/offline-transducer-decoder.h"
Expand All @@ -25,7 +26,7 @@ namespace sherpa {

static OfflineRecognitionResult Convert(
const OfflineTransducerDecoderResult &src, const SymbolTable &sym_table,
int32_t frame_shift_ms, int32_t subsampling_factor) {
int32_t frame_shift_ms, int32_t subsampling_factor, bool use_bbpe) {
OfflineRecognitionResult r;
r.tokens.reserve(src.tokens.size());
r.timestamps.reserve(src.timestamps.size());
Expand All @@ -37,6 +38,12 @@ static OfflineRecognitionResult Convert(

r.tokens.push_back(std::move(sym));
}

if (use_bbpe) {
auto bu = GetByteUtil();
text = bu->Decode(text);
}

r.text = std::move(text);

float frame_shift_s = frame_shift_ms / 1000. * subsampling_factor;
Expand Down Expand Up @@ -69,7 +76,7 @@ class OfflineRecognizerTransducerImpl : public OfflineRecognizerImpl {
std::make_unique<OfflineTransducerGreedySearchDecoder>(model_.get());
} else if (config.decoding_method == "modified_beam_search") {
decoder_ = std::make_unique<OfflineTransducerModifiedBeamSearchDecoder>(
model_.get(), config.num_active_paths);
model_.get(), config.num_active_paths, config.temperature);
} else if (config.decoding_method == "fast_beam_search") {
config.fast_beam_search_config.Validate();

Expand Down Expand Up @@ -133,7 +140,7 @@ class OfflineRecognizerTransducerImpl : public OfflineRecognizerImpl {
auto ans =
Convert(results[i], symbol_table_,
config_.feat_config.fbank_opts.frame_opts.frame_shift_ms,
model_->SubsamplingFactor());
model_->SubsamplingFactor(), config_.use_bbpe);

ss[i]->SetResult(ans);
}
Expand Down
8 changes: 8 additions & 0 deletions sherpa/cpp_api/offline-recognizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,13 @@ void OfflineRecognizerConfig::Register(ParseOptions *po) {
"The bonus score for each token in context word/phrase. "
"Used only when decoding_method is modified_beam_search");

po->Register("use-bbpe", &use_bbpe,
"true if the model to use is trained with byte level bpe, "
"The byte level bpe modeling unit is mainly used on CJK "
"languages or multilingual datasets, it can further break "
"the multi-byte unicode characters into byte sequence and "
"then train some kind of sub-char bpes.");

po->Register("temperature", &temperature,
"Softmax temperature,. "
"Used only when decoding_method is modified_beam_search.");
Expand Down Expand Up @@ -155,6 +162,7 @@ std::string OfflineRecognizerConfig::ToString() const {
os << "decoding_method=\"" << decoding_method << "\", ";
os << "num_active_paths=" << num_active_paths << ", ";
os << "context_score=" << context_score << ", ";
os << "use_bbpe=" << (use_bbpe ? "True" : "False") << ", ";
os << "temperature=" << temperature << ")";

return os.str();
Expand Down
3 changes: 3 additions & 0 deletions sherpa/cpp_api/offline-recognizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ struct OfflineRecognizerConfig {
/// used only for modified_beam_search
float context_score = 1.5;

// True if the model used is trained with byte level bpe.
bool use_bbpe = false;

// temperature for the softmax in the joiner
float temperature = 1.0;

Expand Down
20 changes: 18 additions & 2 deletions sherpa/cpp_api/online-recognizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <utility>

#include "nlohmann/json.hpp"
#include "sherpa/csrc/byte_util.h"
#include "sherpa/csrc/file-utils.h"
#include "sherpa/csrc/log.h"
#include "sherpa/csrc/online-conformer-transducer-model.h"
Expand Down Expand Up @@ -114,6 +115,13 @@ void OnlineRecognizerConfig::Register(ParseOptions *po) {
"pruned_transducer_stateless7_streaming in icefall."
"Number of frames before subsampling during decoding.");

po->Register("use-bbpe", &use_bbpe,
"true if the model to use is trained with byte level bpe, "
"The byte level bpe modeling unit is mainly used on CJK "
"languages or multilingual datasets, it can further break "
"the multi-byte unicode characters into byte sequence and "
"then train some kind of sub-char bpes.");

po->Register("temperature", &temperature,
"Softmax temperature,. "
"Used only when decoding_method is modified_beam_search.");
Expand Down Expand Up @@ -177,14 +185,16 @@ std::string OnlineRecognizerConfig::ToString() const {
os << "left_context=" << left_context << ", ";
os << "right_context=" << right_context << ", ";
os << "chunk_size=" << chunk_size << ", ";
os << "use_bbpe=" << (use_bbpe ? "True" : "False") << ", ";
os << "temperature=" << temperature << ")";
return os.str();
}

static OnlineRecognitionResult Convert(const OnlineTransducerDecoderResult &src,
const SymbolTable &sym_table,
int32_t frame_shift_ms,
int32_t subsampling_factor) {
int32_t subsampling_factor,
bool use_bbpe) {
OnlineRecognitionResult r;
r.tokens.reserve(src.tokens.size());
r.timestamps.reserve(src.timestamps.size());
Expand All @@ -196,6 +206,12 @@ static OnlineRecognitionResult Convert(const OnlineTransducerDecoderResult &src,

r.tokens.push_back(std::move(sym));
}

if (use_bbpe) {
auto bu = GetByteUtil();
text = bu->Decode(text);
}

r.text = std::move(text);

float frame_shift_s = frame_shift_ms / 1000. * subsampling_factor;
Expand Down Expand Up @@ -440,7 +456,7 @@ class OnlineRecognizer::OnlineRecognizerImpl {

auto ans = Convert(r, symbol_table_,
config_.feat_config.fbank_opts.frame_opts.frame_shift_ms,
model_->SubsamplingFactor());
model_->SubsamplingFactor(), config_.use_bbpe);

ans.is_final = is_final;
ans.segment = s->GetWavSegment();
Expand Down
3 changes: 3 additions & 0 deletions sherpa/cpp_api/online-recognizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ struct OnlineRecognizerConfig {
// In number of frames after subsampling
int32_t chunk_size = 12;

// True if the model used is trained with byte level bpe.
bool use_bbpe = false;

// temperature for the softmax in the joiner
float temperature = 1.0;

Expand Down
2 changes: 2 additions & 0 deletions sherpa/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Please sort the filenames alphabetically
set(sherpa_srcs
byte_util.cc
context-graph.cc
fbank-features.cc
file-utils.cc
Expand Down Expand Up @@ -66,6 +67,7 @@ if(SHERPA_ENABLE_TESTS)
# test-offline-conformer-transducer-model.cc
# test-online-conv-emformer-transducer-model.cc

test-byte-util.cc
test-context-graph.cc
test-hypothesis.cc
test-log.cc
Expand Down
Loading

0 comments on commit e62b912

Please sign in to comment.