diff --git a/docs/source/onnx/pretrained_models/online-paraformer/code-paraformer/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.int8.txt b/docs/source/onnx/pretrained_models/online-paraformer/code-paraformer/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.int8.txt new file mode 100644 index 000000000..812b066f8 --- /dev/null +++ b/docs/source/onnx/pretrained_models/online-paraformer/code-paraformer/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.int8.txt @@ -0,0 +1,8 @@ +/Users/fangjun/open-source/sherpa-onnx/sherpa-onnx/csrc/parse-options.cc:Read:361 ./build/bin/sherpa-onnx --tokens=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/tokens.txt --paraformer-encoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/encoder.int8.onnx --paraformer-decoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/decoder.int8.onnx ./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/test_wavs/1.wav + +OnlineRecognizerConfig(feat_config=FeatureExtractorConfig(sampling_rate=16000, feature_dim=80), model_config=OnlineModelConfig(transducer=OnlineTransducerModelConfig(encoder="", decoder="", joiner=""), paraformer=OnlineParaformerModelConfig(encoder="./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/encoder.int8.onnx", decoder="./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/decoder.int8.onnx"), wenet_ctc=OnlineWenetCtcModelConfig(model="", chunk_size=16, num_left_chunks=4), zipformer2_ctc=OnlineZipformer2CtcModelConfig(model=""), tokens="./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/tokens.txt", num_threads=1, debug=False, provider="cpu", model_type=""), lm_config=OnlineLMConfig(model="", scale=0.5), endpoint_config=EndpointConfig(rule1=EndpointRule(must_contain_nonsilence=False, min_trailing_silence=2.4, min_utterance_length=0), rule2=EndpointRule(must_contain_nonsilence=True, min_trailing_silence=1.2, min_utterance_length=0), rule3=EndpointRule(must_contain_nonsilence=False, min_trailing_silence=0, min_utterance_length=20)), enable_endpoint=True, max_active_paths=4, hotwords_score=1.5, hotwords_file="", decoding_method="greedy_search", blank_penalty=0) +./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/test_wavs/1.wav +Elapsed seconds: 0.84, Real time factor (RTF): 0.14 +有无人知道湾仔活道系点去 +{ "text": "有无人知道湾仔活道系点去", "tokens": [ "有", "无", "人", "知", "道", "湾", "仔", "活", "道", "系", "点", "去" ], "timestamps": [ ], "ys_probs": [ ], "lm_probs": [ ], "context_scores": [ ], "segment": 0, "start_time": 0.00, "is_final": false} + diff --git a/docs/source/onnx/pretrained_models/online-paraformer/code-paraformer/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.txt b/docs/source/onnx/pretrained_models/online-paraformer/code-paraformer/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.txt new file mode 100644 index 000000000..1da84b806 --- /dev/null +++ b/docs/source/onnx/pretrained_models/online-paraformer/code-paraformer/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.txt @@ -0,0 +1,8 @@ +/Users/fangjun/open-source/sherpa-onnx/sherpa-onnx/csrc/parse-options.cc:Read:361 ./build/bin/sherpa-onnx --tokens=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/tokens.txt --paraformer-encoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/encoder.int8.onnx --paraformer-decoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/decoder.int8.onnx ./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/test_wavs/1.wav + +OnlineRecognizerConfig(feat_config=FeatureExtractorConfig(sampling_rate=16000, feature_dim=80), model_config=OnlineModelConfig(transducer=OnlineTransducerModelConfig(encoder="", decoder="", joiner=""), paraformer=OnlineParaformerModelConfig(encoder="./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/encoder.int8.onnx", decoder="./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/decoder.int8.onnx"), wenet_ctc=OnlineWenetCtcModelConfig(model="", chunk_size=16, num_left_chunks=4), zipformer2_ctc=OnlineZipformer2CtcModelConfig(model=""), tokens="./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/tokens.txt", num_threads=1, debug=False, provider="cpu", model_type=""), lm_config=OnlineLMConfig(model="", scale=0.5), endpoint_config=EndpointConfig(rule1=EndpointRule(must_contain_nonsilence=False, min_trailing_silence=2.4, min_utterance_length=0), rule2=EndpointRule(must_contain_nonsilence=True, min_trailing_silence=1.2, min_utterance_length=0), rule3=EndpointRule(must_contain_nonsilence=False, min_trailing_silence=0, min_utterance_length=20)), enable_endpoint=True, max_active_paths=4, hotwords_score=1.5, hotwords_file="", decoding_method="greedy_search", blank_penalty=0) +./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/test_wavs/1.wav +Elapsed seconds: 0.98, Real time factor (RTF): 0.16 +有无人知道湾仔活道系点去 +{ "text": "有无人知道湾仔活道系点去", "tokens": [ "有", "无", "人", "知", "道", "湾", "仔", "活", "道", "系", "点", "去" ], "timestamps": [ ], "ys_probs": [ ], "lm_probs": [ ], "context_scores": [ ], "segment": 0, "start_time": 0.00, "is_final": false} + diff --git a/docs/source/onnx/pretrained_models/online-paraformer/paraformer-models.rst b/docs/source/onnx/pretrained_models/online-paraformer/paraformer-models.rst index ce97d012c..a67f356e2 100644 --- a/docs/source/onnx/pretrained_models/online-paraformer/paraformer-models.rst +++ b/docs/source/onnx/pretrained_models/online-paraformer/paraformer-models.rst @@ -136,3 +136,131 @@ Real-time speech recognition from a microphone If your system is Linux (including embedded Linux), you can also use :ref:`sherpa-onnx-alsa` to do real-time speech recognition with your microphone if ``sherpa-onnx-microphone`` does not work for you. + +.. _sherpa_onnx_online_paraformer_trilingual_zh_yue_en: + +csukuangfj/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en (Chinese + Cantonese + English) +------------------------------------------------------------------------------------------------------- + +This model is converted from + +``_ + +You can find the conversion code after downloading and unzipping the model. + +In the following, we describe how to download it and use it with `sherpa-onnx`_. + +Download the model +~~~~~~~~~~~~~~~~~~ + +Please use the following commands to download it. + +.. code-block:: bash + + cd /path/to/sherpa-onnx + + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.tar.bz2 + tar xvf sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.tar.bz2 + +Please check that the file sizes of the pre-trained models are correct. See +the file sizes of ``*.onnx`` files below. + +.. code-block:: bash + + sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en fangjun$ ls -lh *.onnx + -rw-r--r-- 1 fangjun staff 69M Feb 29 19:44 decoder.int8.onnx + -rw-r--r-- 1 fangjun staff 218M Feb 29 19:44 decoder.onnx + -rw-r--r-- 1 fangjun staff 159M Feb 29 19:44 encoder.int8.onnx + -rw-r--r-- 1 fangjun staff 607M Feb 29 19:44 encoder.onnx + +Decode a single wave file +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. hint:: + + It supports decoding only wave files of a single channel with 16-bit + encoded samples, while the sampling rate does not need to be 16 kHz. + +fp32 +^^^^ + +The following code shows how to use ``fp32`` models to decode a wave file: + +.. code-block:: bash + + cd /path/to/sherpa-onnx + + ./build/bin/sherpa-onnx \ + --tokens=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/tokens.txt \ + --paraformer-encoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/encoder.onnx \ + --paraformer-decoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/decoder.onnx \ + ./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/test_wavs/1.wav + +.. note:: + + Please use ``./build/bin/Release/sherpa-onnx.exe`` for Windows. + +.. caution:: + + If you use Windows and get encoding issues, please run: + + .. code-block:: bash + + CHCP 65001 + + in your commandline. + +You should see the following output: + +.. literalinclude:: ./code-paraformer/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.txt + +int8 +^^^^ + +The following code shows how to use ``int8`` models to decode a wave file: + +.. code-block:: bash + + cd /path/to/sherpa-onnx + + ./build/bin/sherpa-onnx \ + --tokens=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/tokens.txt \ + --paraformer-encoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/encoder.int8.onnx \ + --paraformer-decoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/decoder.int8.onnx \ + ./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/test_wavs/1.wav + +.. note:: + + Please use ``./build/bin/Release/sherpa-onnx.exe`` for Windows. + +.. caution:: + + If you use Windows and get encoding issues, please run: + + .. code-block:: bash + + CHCP 65001 + + in your commandline. + +You should see the following output: + +.. literalinclude:: ./code-paraformer/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.int8.txt + +Real-time speech recognition from a microphone +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: bash + + cd /path/to/sherpa-onnx + + ./build/bin/sherpa-onnx-microphone \ + --tokens=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \ + --paraformer-encoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/encoder.int8.onnx \ + --paraformer-decoder=./sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/decoder.int8.onnx + +.. hint:: + + If your system is Linux (including embedded Linux), you can also use + :ref:`sherpa-onnx-alsa` to do real-time speech recognition with your + microphone if ``sherpa-onnx-microphone`` does not work for you. diff --git a/docs/source/onnx/wasm/hf-spaces.rst b/docs/source/onnx/wasm/hf-spaces.rst index 19794108b..96077691d 100644 --- a/docs/source/onnx/wasm/hf-spaces.rst +++ b/docs/source/onnx/wasm/hf-spaces.rst @@ -3,7 +3,7 @@ Huggingface Spaces (WebAssembly) ================================ -We provide three `Huggingface`_ spaces so that you can try real-time +We provide four `Huggingface`_ spaces so that you can try real-time speech recognition with `WebAssembly`_ in your browser. English only (Zipformer) @@ -27,6 +27,7 @@ English only (Zipformer) The script for building this space can be found at ``_ + Chinese + English (Zipformer) ----------------------------- @@ -59,7 +60,7 @@ Chinese + English (Paraformer) ``_ -.. figure:: ./pic/wasm-hf-zh-en-zipformer.png +.. figure:: ./pic/wasm-hf-zh-en-paraformer.png :alt: start page of wasm :width: 800 :target: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer @@ -68,3 +69,24 @@ Chinese + English (Paraformer) The script for building this space can be found at ``_ + +Chinese + English + Cantonese (Paraformer) +------------------------------------------ + +``_ + +.. hint:: + + If you don't have access to `Huggingface`_, please visit the following mirror: + + ``_ + +.. figure:: ./pic/wasm-hf-zh-yue-en-paraformer.png + :alt: start page of wasm + :width: 800 + :target: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer + +.. note:: + + The script for building this space can be found at + ``_ diff --git a/docs/source/onnx/wasm/pic/wasm-hf-zh-yue-en-paraformer.png b/docs/source/onnx/wasm/pic/wasm-hf-zh-yue-en-paraformer.png new file mode 100644 index 000000000..ea53103a9 Binary files /dev/null and b/docs/source/onnx/wasm/pic/wasm-hf-zh-yue-en-paraformer.png differ