Add streaming Paraformer for Cantonese. (#548)

k2-fsa · Mar 1, 2024 · 0b0f176 · 0b0f176
1 parent 8e82146
commit 0b0f176
Show file tree

Hide file tree

Showing 5 changed files with 168 additions and 2 deletions.
diff --git a/...rmer/code-paraformer/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.int8.txt b/...rmer/code-paraformer/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.int8.txt
@@ -0,0 +1,8 @@
+/Users/fangjun/open-source/sherpa-onnx/sherpa-onnx/csrc/parse-options.cc:Read:361 ./build/bin/sherpa-onnx --tokens=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/tokens.txt --paraformer-encoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/encoder.int8.onnx --paraformer-decoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/decoder.int8.onnx ./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/test_wavs/1.wav 
+
+OnlineRecognizerConfig(feat_config=FeatureExtractorConfig(sampling_rate=16000, feature_dim=80), model_config=OnlineModelConfig(transducer=OnlineTransducerModelConfig(encoder="", decoder="", joiner=""), paraformer=OnlineParaformerModelConfig(encoder="./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/encoder.int8.onnx", decoder="./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/decoder.int8.onnx"), wenet_ctc=OnlineWenetCtcModelConfig(model="", chunk_size=16, num_left_chunks=4), zipformer2_ctc=OnlineZipformer2CtcModelConfig(model=""), tokens="./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/tokens.txt", num_threads=1, debug=False, provider="cpu", model_type=""), lm_config=OnlineLMConfig(model="", scale=0.5), endpoint_config=EndpointConfig(rule1=EndpointRule(must_contain_nonsilence=False, min_trailing_silence=2.4, min_utterance_length=0), rule2=EndpointRule(must_contain_nonsilence=True, min_trailing_silence=1.2, min_utterance_length=0), rule3=EndpointRule(must_contain_nonsilence=False, min_trailing_silence=0, min_utterance_length=20)), enable_endpoint=True, max_active_paths=4, hotwords_score=1.5, hotwords_file="", decoding_method="greedy_search", blank_penalty=0)
+./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/test_wavs/1.wav
+Elapsed seconds: 0.84, Real time factor (RTF): 0.14
+有无人知道湾仔活道系点去
+{ "text": "有无人知道湾仔活道系点去", "tokens": [ "有", "无", "人", "知", "道", "湾", "仔", "活", "道", "系", "点", "去" ], "timestamps": [  ], "ys_probs": [  ], "lm_probs": [  ], "context_scores": [  ], "segment": 0, "start_time": 0.00, "is_final": false}
+
diff --git a/...araformer/code-paraformer/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.txt b/...araformer/code-paraformer/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.txt
@@ -0,0 +1,8 @@
+/Users/fangjun/open-source/sherpa-onnx/sherpa-onnx/csrc/parse-options.cc:Read:361 ./build/bin/sherpa-onnx --tokens=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/tokens.txt --paraformer-encoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/encoder.int8.onnx --paraformer-decoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/decoder.int8.onnx ./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/test_wavs/1.wav 
+
+OnlineRecognizerConfig(feat_config=FeatureExtractorConfig(sampling_rate=16000, feature_dim=80), model_config=OnlineModelConfig(transducer=OnlineTransducerModelConfig(encoder="", decoder="", joiner=""), paraformer=OnlineParaformerModelConfig(encoder="./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/encoder.int8.onnx", decoder="./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/decoder.int8.onnx"), wenet_ctc=OnlineWenetCtcModelConfig(model="", chunk_size=16, num_left_chunks=4), zipformer2_ctc=OnlineZipformer2CtcModelConfig(model=""), tokens="./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/tokens.txt", num_threads=1, debug=False, provider="cpu", model_type=""), lm_config=OnlineLMConfig(model="", scale=0.5), endpoint_config=EndpointConfig(rule1=EndpointRule(must_contain_nonsilence=False, min_trailing_silence=2.4, min_utterance_length=0), rule2=EndpointRule(must_contain_nonsilence=True, min_trailing_silence=1.2, min_utterance_length=0), rule3=EndpointRule(must_contain_nonsilence=False, min_trailing_silence=0, min_utterance_length=20)), enable_endpoint=True, max_active_paths=4, hotwords_score=1.5, hotwords_file="", decoding_method="greedy_search", blank_penalty=0)
+./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/test_wavs/1.wav
+Elapsed seconds: 0.98, Real time factor (RTF): 0.16
+有无人知道湾仔活道系点去
+{ "text": "有无人知道湾仔活道系点去", "tokens": [ "有", "无", "人", "知", "道", "湾", "仔", "活", "道", "系", "点", "去" ], "timestamps": [  ], "ys_probs": [  ], "lm_probs": [  ], "context_scores": [  ], "segment": 0, "start_time": 0.00, "is_final": false}
+
diff --git a/docs/source/onnx/pretrained_models/online-paraformer/paraformer-models.rst b/docs/source/onnx/pretrained_models/online-paraformer/paraformer-models.rst
@@ -136,3 +136,131 @@ Real-time speech recognition from a microphone
    If your system is Linux (including embedded Linux), you can also use
    :ref:`sherpa-onnx-alsa` to do real-time speech recognition with your
    microphone if ``sherpa-onnx-microphone`` does not work for you.
+
+.. _sherpa_onnx_online_paraformer_trilingual_zh_yue_en:
+
+csukuangfj/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en (Chinese + Cantonese + English)
+-------------------------------------------------------------------------------------------------------
+
+This model is converted from
+
+`<https://modelscope.cn/models/dengcunqin/speech_paraformer-large_asr_nat-zh-cantonese-en-16k-vocab8501-online/files>`_
+
+You can find the conversion code after downloading and unzipping the model.
+
+In the following, we describe how to download it and use it with `sherpa-onnx`_.
+
+Download the model
+~~~~~~~~~~~~~~~~~~
+
+Please use the following commands to download it.
+
+.. code-block:: bash
+
+  cd /path/to/sherpa-onnx
+
+  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.tar.bz2
+  tar xvf sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.tar.bz2
+
+Please check that the file sizes of the pre-trained models are correct. See
+the file sizes of ``*.onnx`` files below.
+
+.. code-block:: bash
+
+  sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en fangjun$ ls -lh *.onnx
+  -rw-r--r--  1 fangjun  staff    69M Feb 29 19:44 decoder.int8.onnx
+  -rw-r--r--  1 fangjun  staff   218M Feb 29 19:44 decoder.onnx
+  -rw-r--r--  1 fangjun  staff   159M Feb 29 19:44 encoder.int8.onnx
+  -rw-r--r--  1 fangjun  staff   607M Feb 29 19:44 encoder.onnx
+
+Decode a single wave file
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. hint::
+
+   It supports decoding only wave files of a single channel with 16-bit
+   encoded samples, while the sampling rate does not need to be 16 kHz.
+
+fp32
+^^^^
+
+The following code shows how to use ``fp32`` models to decode a wave file:
+
+.. code-block:: bash
+
+  cd /path/to/sherpa-onnx
+
+  ./build/bin/sherpa-onnx \
+    --tokens=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/tokens.txt \
+    --paraformer-encoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/encoder.onnx \
+    --paraformer-decoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/decoder.onnx \
+    ./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/test_wavs/1.wav
+
+.. note::
+
+   Please use ``./build/bin/Release/sherpa-onnx.exe`` for Windows.
+
+.. caution::
+
+   If you use Windows and get encoding issues, please run:
+
+      .. code-block:: bash
+
+          CHCP 65001
+
+   in your commandline.
+
+You should see the following output:
+
+.. literalinclude:: ./code-paraformer/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.txt
+
+int8
+^^^^
+
+The following code shows how to use ``int8`` models to decode a wave file:
+
+.. code-block:: bash
+
+  cd /path/to/sherpa-onnx
+
+  ./build/bin/sherpa-onnx \
+    --tokens=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/tokens.txt \
+    --paraformer-encoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/encoder.int8.onnx \
+    --paraformer-decoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/decoder.int8.onnx \
+    ./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/test_wavs/1.wav
+
+.. note::
+
+   Please use ``./build/bin/Release/sherpa-onnx.exe`` for Windows.
+
+.. caution::
+
+   If you use Windows and get encoding issues, please run:
+
+      .. code-block:: bash
+
+          CHCP 65001
+
+   in your commandline.
+
+You should see the following output:
+
+.. literalinclude:: ./code-paraformer/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.int8.txt
+
+Real-time speech recognition from a microphone
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: bash
+
+  cd /path/to/sherpa-onnx
+
+  ./build/bin/sherpa-onnx-microphone \
+    --tokens=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
+    --paraformer-encoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/encoder.int8.onnx \
+    --paraformer-decoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/decoder.int8.onnx
+
+.. hint::
+
+   If your system is Linux (including embedded Linux), you can also use
+   :ref:`sherpa-onnx-alsa` to do real-time speech recognition with your
+   microphone if ``sherpa-onnx-microphone`` does not work for you.
diff --git a/docs/source/onnx/wasm/hf-spaces.rst b/docs/source/onnx/wasm/hf-spaces.rst
@@ -3,7 +3,7 @@
 Huggingface Spaces (WebAssembly)
 ================================
 
-We provide three `Huggingface`_ spaces so that you can try real-time
+We provide four `Huggingface`_ spaces so that you can try real-time
 speech recognition with `WebAssembly`_ in your browser.
 
 English only (Zipformer)
@@ -27,6 +27,7 @@ English only (Zipformer)
    The script for building this space can be found at
    `<https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml>`_
 
+
 Chinese + English (Zipformer)
 -----------------------------
 
@@ -59,7 +60,7 @@ Chinese + English (Paraformer)
 
     `<https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer/summary>`_
 
-.. figure:: ./pic/wasm-hf-zh-en-zipformer.png
+.. figure:: ./pic/wasm-hf-zh-en-paraformer.png
    :alt: start page of wasm
    :width: 800
    :target: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer
@@ -68,3 +69,24 @@ Chinese + English (Paraformer)
 
    The script for building this space can be found at
    `<https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml>`_
+
+Chinese + English + Cantonese (Paraformer)
+------------------------------------------
+
+`<https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer>`_
+
+.. hint::
+
+   If you don't have access to `Huggingface`_, please visit the following mirror:
+
+    `<https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer/summary>`_
+
+.. figure:: ./pic/wasm-hf-zh-yue-en-paraformer.png
+   :alt: start page of wasm
+   :width: 800
+   :target: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer
+
+.. note::
+
+   The script for building this space can be found at
+   `<https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml>`_
diff --git a/docs/source/onnx/wasm/pic/wasm-hf-zh-yue-en-paraformer.png b/docs/source/onnx/wasm/pic/wasm-hf-zh-yue-en-paraformer.png