Update Model Info in zipformer-transducer-models.rst (#477)

* Update zipformer-transducer-models.rst * minor fixes
k2-fsa · Oct 9, 2023 · 5b44be9 · 5b44be9
1 parent 115b856
commit 5b44be9
Show file tree

Hide file tree

Showing 3 changed files with 164 additions and 1 deletion.
diff --git a/...s/offline-transducer/code-zipformer/sherpa-onnx-zipformer-multi-zh-hans-2023-9-2-int8.txt b/...s/offline-transducer/code-zipformer/sherpa-onnx-zipformer-multi-zh-hans-2023-9-2-int8.txt
@@ -0,0 +1,24 @@
+/Users/runner/work/sherpa-onnx/sherpa-onnx/sherpa-onnx/csrc/parse-options.cc:Read:361 sherpa-onnx-offline --tokens=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/tokens.txt --encoder=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/encoder-epoch-20-avg-1.int8.onnx --decoder=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/decoder-epoch-20-avg-1.int8.onnx --joiner=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/joiner-epoch-20-avg-1.int8.onnx ./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/0.wav ./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/1.wav ./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/8k.wav 
+
+OfflineRecognizerConfig(feat_config=OfflineFeatureExtractorConfig(sampling_rate=16000, feature_dim=80), model_config=OfflineModelConfig(transducer=OfflineTransducerModelConfig(encoder_filename="./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/encoder-epoch-20-avg-1.int8.onnx", decoder_filename="./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/decoder-epoch-20-avg-1.int8.onnx", joiner_filename="./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/joiner-epoch-20-avg-1.int8.onnx"), paraformer=OfflineParaformerModelConfig(model=""), nemo_ctc=OfflineNemoEncDecCtcModelConfig(model=""), whisper=OfflineWhisperModelConfig(encoder="", decoder="", language="", task="transcribe"), tdnn=OfflineTdnnModelConfig(model=""), tokens="./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/tokens.txt", num_threads=2, debug=False, provider="cpu", model_type=""), lm_config=OfflineLMConfig(model="", scale=0.5), decoding_method="greedy_search", max_active_paths=4, hotwords_file="", hotwords_score=1.5)
+Creating recognizer ...
+Started
+/Users/runner/work/sherpa-onnx/sherpa-onnx/sherpa-onnx/csrc/offline-stream.cc:AcceptWaveformImpl:117 Creating a resampler:
+   in_sample_rate: 8000
+   output_sample_rate: 16000
+
+Done!
+
+./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/0.wav
+{"text":" 对我做了介绍那么我想说的是大家如果对我的研究感兴趣","timestamps":"[0.00, 0.16, 0.40, 0.60, 0.84, 1.08, 1.60, 1.72, 1.88, 2.04, 2.28, 2.44, 2.60, 2.96, 3.12, 3.32, 3.40, 3.60, 3.76, 3.84, 4.00, 4.16, 4.32, 4.52, 4.56]","tokens":[" 对","我","做","了","介","绍","那","么","我","想","说","的","是","大","家","如","果","对","我","的","研","究","感","兴","趣"]}
+----
+./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/1.wav
+{"text":" 重点想谈三个问题首先就是这一轮全球金融动<0xE8><0x8D><0xA1>的表现","timestamps":"[0.00, 0.12, 0.48, 0.68, 0.92, 1.12, 1.28, 1.48, 1.80, 2.04, 2.40, 2.56, 2.76, 2.96, 3.08, 3.32, 3.48, 3.68, 3.84, 4.00, 4.20, 4.24, 4.28, 4.40, 4.60, 4.84]","tokens":[" 重","点","想","谈","三","个","问","题","首","先","就","是","这","一","轮","全","球","金","融","动","<0xE8>","<0x8D>","<0xA1>","的","表","现"]}
+----
+./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/8k.wav
+{"text":" 深入地分析这一次全球金融动<0xE8><0x8D><0xA1>背后的根源","timestamps":"[0.00, 0.04, 0.24, 0.52, 0.76, 1.00, 1.40, 1.64, 1.80, 2.12, 2.36, 2.64, 2.80, 3.04, 3.16, 3.20, 3.24, 3.44, 3.64, 3.76, 3.96, 4.20]","tokens":[" ","深","入","地","分","析","这","一","次","全","球","金","融","动","<0xE8>","<0x8D>","<0xA1>","背","后","的","根","源"]}
+----
+num threads: 2
+decoding method: greedy_search
+Elapsed seconds: 0.305 s
+Real time factor (RTF): 0.305 / 15.289 = 0.020
diff --git a/...models/offline-transducer/code-zipformer/sherpa-onnx-zipformer-multi-zh-hans-2023-9-2.txt b/...models/offline-transducer/code-zipformer/sherpa-onnx-zipformer-multi-zh-hans-2023-9-2.txt
@@ -0,0 +1,24 @@
+/Users/runner/work/sherpa-onnx/sherpa-onnx/sherpa-onnx/csrc/parse-options.cc:Read:361 sherpa-onnx-offline --tokens=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/tokens.txt --encoder=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/encoder-epoch-20-avg-1.onnx --decoder=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/decoder-epoch-20-avg-1.onnx --joiner=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/joiner-epoch-20-avg-1.onnx ./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/0.wav ./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/1.wav ./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/8k.wav 
+
+OfflineRecognizerConfig(feat_config=OfflineFeatureExtractorConfig(sampling_rate=16000, feature_dim=80), model_config=OfflineModelConfig(transducer=OfflineTransducerModelConfig(encoder_filename="./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/encoder-epoch-20-avg-1.onnx", decoder_filename="./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/decoder-epoch-20-avg-1.onnx", joiner_filename="./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/joiner-epoch-20-avg-1.onnx"), paraformer=OfflineParaformerModelConfig(model=""), nemo_ctc=OfflineNemoEncDecCtcModelConfig(model=""), whisper=OfflineWhisperModelConfig(encoder="", decoder="", language="", task="transcribe"), tdnn=OfflineTdnnModelConfig(model=""), tokens="./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/tokens.txt", num_threads=2, debug=False, provider="cpu", model_type=""), lm_config=OfflineLMConfig(model="", scale=0.5), decoding_method="greedy_search", max_active_paths=4, hotwords_file="", hotwords_score=1.5)
+Creating recognizer ...
+Started
+/Users/runner/work/sherpa-onnx/sherpa-onnx/sherpa-onnx/csrc/offline-stream.cc:AcceptWaveformImpl:117 Creating a resampler:
+   in_sample_rate: 8000
+   output_sample_rate: 16000
+
+Done!
+
+./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/0.wav
+{"text":" 对我做了介绍那么我想说的是大家如果对我的研究感兴趣","timestamps":"[0.00, 0.16, 0.40, 0.60, 0.84, 1.08, 1.60, 1.72, 1.88, 2.04, 2.24, 2.44, 2.60, 2.96, 3.12, 3.32, 3.40, 3.60, 3.72, 3.84, 4.00, 4.16, 4.32, 4.52, 4.68]","tokens":[" 对","我","做","了","介","绍","那","么","我","想","说","的","是","大","家","如","果","对","我","的","研","究","感","兴","趣"]}
+----
+./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/1.wav
+{"text":" 重点想谈三个问题首先就是这一轮全球金融动<0xE8><0x8D><0xA1>的表现","timestamps":"[0.00, 0.12, 0.48, 0.68, 0.92, 1.12, 1.28, 1.48, 1.80, 2.04, 2.40, 2.56, 2.76, 2.96, 3.08, 3.32, 3.48, 3.68, 3.84, 4.00, 4.20, 4.24, 4.28, 4.40, 4.60, 4.84]","tokens":[" 重","点","想","谈","三","个","问","题","首","先","就","是","这","一","轮","全","球","金","融","动","<0xE8>","<0x8D>","<0xA1>","的","表","现"]}
+----
+./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/8k.wav
+{"text":" 深入地分析这一次全球金融动<0xE8><0x8D><0xA1>背后的根源","timestamps":"[0.00, 0.04, 0.24, 0.52, 0.76, 1.00, 1.40, 1.64, 1.80, 2.12, 2.32, 2.64, 2.80, 3.00, 3.20, 3.24, 3.28, 3.44, 3.64, 3.76, 3.96, 4.20]","tokens":[" ","深","入","地","分","析","这","一","次","全","球","金","融","动","<0xE8>","<0x8D>","<0xA1>","背","后","的","根","源"]}
+----
+num threads: 2
+decoding method: greedy_search
+Elapsed seconds: 0.362 s
+Real time factor (RTF): 0.362 / 15.289 = 0.024
diff --git a/...ource/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.rst b/...ource/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.rst
@@ -5,9 +5,124 @@ Zipformer-transducer-based Models
 
 .. hint::
 
-   Please refer to :ref:`install_sherpa_onnx` to install `sherpa-onnx`_
+   Please refer to :ref:`install_sherpa_onnx` to install `sherpa-onnx`
    before you read this section.
 
+zrjin/sherpa-onnx-zipformer-multi-zh-hans-2023-9-2 (Chinese)
+------------------------------------------------------------
+
+This model is from
+
+`<https://huggingface.co/zrjin/sherpa-onnx-zipformer-multi-zh-hans-2023-9-2>`_
+
+which supports Chinese as it is trained on whatever datasets involved in the `multi-zh_hans <https://github.com/k2-fsa/icefall/tree/master/egs/multi_zh-hans/ASR/>`_ recipe.
+
+If you are interested in how the model is trained, please refer to
+`<https://github.com/k2-fsa/icefall/pull/1238>`_.
+
+In the following, we describe how to download it and use it with `sherpa-onnx`_.
+
+Download the model
+~~~~~~~~~~~~~~~~~~
+
+Please use the following commands to download it.
+
+.. code-block:: bash
+
+  cd /path/to/sherpa-onnx
+
+  GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/zrjin/sherpa-onnx-zipformer-multi-zh-hans-2023-9-2
+  cd sherpa-onnx-zipformer-multi-zh-hans-2023-9-2
+
+  git lfs pull --include "*.onnx"
+
+Please check that the file sizes of the pre-trained models are correct. See
+the file sizes of ``*.onnx`` files below.
+
+.. code-block:: bash
+
+  sherpa-onnx-zipformer-multi-zh-hans-2023-9-2 zengruijin$ ls -lh *.onnx
+  -rw-rw-r--@ 1 zengruijin  staff   1.2M Sep 18 07:04 decoder-epoch-20-avg-1.int8.onnx
+  -rw-rw-r--@ 1 zengruijin  staff   4.9M Sep 18 07:04 decoder-epoch-20-avg-1.onnx
+  -rw-rw-r--@ 1 zengruijin  staff    66M Sep 18 07:04 encoder-epoch-20-avg-1.int8.onnx
+  -rw-rw-r--@ 1 zengruijin  staff   248M Sep 18 07:05 encoder-epoch-20-avg-1.onnx
+  -rw-rw-r--@ 1 zengruijin  staff   1.0M Sep 18 07:05 joiner-epoch-20-avg-1.int8.onnx
+  -rw-rw-r--@ 1 zengruijin  staff   3.9M Sep 18 07:05 joiner-epoch-20-avg-1.onnx
+
+Decode wave files
+~~~~~~~~~~~~~~~~~
+
+.. hint::
+
+   It supports decoding only wave files of a single channel with 16-bit
+   encoded samples, while the sampling rate does not need to be 16 kHz.
+
+fp32
+^^^^
+
+The following code shows how to use ``fp32`` models to decode wave files:
+
+.. code-block:: bash
+
+  cd /path/to/sherpa-onnx
+
+  ./build/bin/sherpa-onnx-offline \
+    --tokens=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/tokens.txt \
+    --encoder=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/encoder-epoch-20-avg-1.onnx \
+    --decoder=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/decoder-epoch-20-avg-1.onnx \
+    --joiner=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/joiner-epoch-20-avg-1.onnx \
+    ./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/0.wav \
+    ./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/1.wav \
+    ./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/8k.wav
+
+.. note::
+
+   Please use ``./build/bin/Release/sherpa-onnx-offline.exe`` for Windows.
+
+You should see the following output:
+
+.. literalinclude:: ./code-zipformer/sherpa-onnx-zipformer-multi-zh-hans-2023-9-2.txt
+
+int8
+^^^^
+
+The following code shows how to use ``int8`` models to decode wave files:
+
+.. code-block:: bash
+
+  cd /path/to/sherpa-onnx
+
+  ./build/bin/sherpa-onnx-offline \
+    --tokens=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/tokens.txt \
+    --encoder=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/encoder-epoch-20-avg-1.int8.onnx \
+    --decoder=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/decoder-epoch-20-avg-1.int8.onnx \
+    --joiner=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/joiner-epoch-20-avg-1.int8.onnx \
+    ./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/0.wav \
+    ./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/1.wav \
+    ./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/8k.wav
+
+.. note::
+
+   Please use ``./build/bin/Release/sherpa-onnx-offline.exe`` for Windows.
+
+You should see the following output:
+
+.. literalinclude:: ./code-zipformer/sherpa-onnx-zipformer-multi-zh-hans-2023-9-2-int8.txt
+
+Speech recognition from a microphone
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: bash
+
+  cd /path/to/sherpa-onnx
+
+  ./build/bin/sherpa-onnx-microphone-offline \
+    --tokens=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/tokens.txt \
+    --encoder=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/encoder-epoch-20-avg-1.onnx \
+    --decoder=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/decoder-epoch-0-avg-1.onnx \
+    --joiner=./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/joiner-epoch-20-avg-1.onnx
+
+
 yfyeung/icefall-asr-cv-corpus-13.0-2023-03-09-en-pruned-transducer-stateless7-2023-04-17 (English)
 --------------------------------------------------------------------------------------------------