Skip to content

Commit

Permalink
Merge branch 'master' of github.com:alphacep/vosk-api
Browse files Browse the repository at this point in the history
  • Loading branch information
nshmyrev committed Aug 31, 2021
2 parents e2af710 + 6b1b620 commit 7b3ea0b
Show file tree
Hide file tree
Showing 2 changed files with 170 additions and 61 deletions.
91 changes: 61 additions & 30 deletions go/example/test_simple.go
Original file line number Diff line number Diff line change
@@ -1,36 +1,67 @@
package test_simple
package main

import (
"flag"
"os"
"github.com/alphacep/vosk-api/go"
"bufio"
"flag"
"fmt"
"io"
"log"
"os"

vosk "github.com/alphacep/vosk-api/go"
)

func main() {
var filename string
flag.StringVar(&filename, "f", "", "file to transcribe")
flag.Parse()
model, err := vosk.NewModel("model")
rec, err := vosk.NewRecognizer(model)

file, err := os.Open(filename)
if err != nil {
panic(err)
}
defer file.Close()

fileinfo, err := file.Stat()
if err != nil {
panic(err)
}

filesize := fileinfo.Size()
buffer := make([]byte, filesize)

_, err = file.Read(buffer)
if err != nil {
panic(err)
}

println(vosk.VoskFinalResult(rec, buffer))
var filename string
flag.StringVar(&filename, "f", "", "file to transcribe")
flag.Parse()

model, err := vosk.NewModel("model")
if err != nil {
log.Fatal(err)
}

spkModel, err := vosk.NewSpkModel("model-spk")
if err != nil {
log.Fatal(err)
}

sampleRate := 16000.0
rec, err := vosk.NewRecognizer(model, sampleRate, spkModel)
if err != nil {
log.Fatal(err)
}
rec.SetWords(1)

file, err := os.Open(filename)
if err != nil {
panic(err)
}
defer file.Close()

file, err := os.Open(filename)
if err != nil {
panic(err)
}
defer file.Close()

reader := bufio.NewReader(file)
buf := make([]byte, 4096)

for {
_, err := reader.Read(buf)
if err != nil {
if err != io.EOF {
log.Fatal(err)
}

break
}

if rec.AcceptWaveform(buf) != 0 {
fmt.Println(string(rec.Result()))
}
}

fmt.Println(string(rec.FinalResult()))
}
140 changes: 109 additions & 31 deletions go/vosk.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,55 +8,133 @@ import "C"

// VoskModel contains a reference to the C VoskModel
type VoskModel struct {
model *C.struct_VoskModel
model *C.struct_VoskModel
}

// NewModel creates a new VoskModel instance
func NewModel(modelPath string) (*VoskModel, error) {
internal := C.vosk_model_new(C.CString(modelPath))
model := &VoskModel{model: internal}
return model, nil
}

func freeModel(model *VoskModel) {
C.vosk_model_free(model.model)
}

// FindWord checks if a word can be recognized by the model.
// Returns the word symbol if the word exists inside the model or
// -1 otherwise.
func (m *VoskModel) FindWord(word []byte) int {
cbuf := C.CBytes(word)
defer C.free(cbuf)
i := C.vosk_model_find_word(m.model, (*C.char)(cbuf))
return int(i)
}

// VoskSpkModel contains a reference to the C VoskSpkModel
type VoskSpkModel struct {
spkModel *C.struct_VoskSpkModel
spkModel *C.struct_VoskSpkModel
}

// NewSpkModel creates a new VoskSpkModel instance
func NewSpkModel(spkModelPath string) (*VoskSpkModel, error) {
internal := C.vosk_spk_model_new(C.CString(spkModelPath))
spkModel := &VoskSpkModel{spkModel: internal}
return spkModel, nil
}

func freeSpkModel(model *VoskSpkModel) {
C.vosk_spk_model_free(model.spkModel)
}

// VoskRecognizer contains a reference to the C VoskRecognizer
type VoskRecognizer struct {
rec *C.struct_VoskRecognizer
rec *C.struct_VoskRecognizer
}

func VoskFinalResult(recognizer *VoskRecognizer, buffer []byte) string {
cbuf := C.CBytes(buffer)
defer C.free(cbuf)
_ = C.vosk_recognizer_accept_waveform(recognizer.rec, (*C.char)(cbuf), C.int(len(buffer)))
result := C.GoString(C.vosk_recognizer_final_result(recognizer.rec))
return result
func freeRecognizer(recognizer *VoskRecognizer) {
C.vosk_recognizer_free(recognizer.rec)
}

// NewModel creates a new VoskModel instance
func NewModel(modelPath string) (*VoskModel, error) {
var internal *C.struct_VoskModel
internal = C.vosk_model_new(C.CString(modelPath))
model := &VoskModel{model: internal}
return model, nil
// NewRecognizer creates a new VoskRecognizer instance
func NewRecognizer(model *VoskModel, sampleRate float64) (*VoskRecognizer, error) {
internal := C.vosk_recognizer_new(model.model, C.float(sampleRate))
rec := &VoskRecognizer{rec: internal}
return rec, nil
}

// NewRecognizer creates a new VoskRecognizer instance
func NewRecognizer(model *VoskModel) (*VoskRecognizer, error) {
var internal *C.struct_VoskRecognizer
internal = C.vosk_recognizer_new(model.model, 16000.0)
rec := &VoskRecognizer{rec: internal}
return rec, nil
// NewRecognizerSpk creates a new VoskRecognizer instance with a speaker model.
func NewRecognizerSpk(model *VoskModel, sampleRate float64, spkModel *VoskSpkModel) (*VoskRecognizer, error) {
internal := C.vosk_recognizer_new_spk(model.model, C.float(sampleRate), spkModel.spkModel)
rec := &VoskRecognizer{rec: internal}
return rec, nil
}

func freeModel(model *VoskModel) {
C.vosk_model_free(model.model)
// NewRecognizerGrm creates a new VoskRecognizer instance with the phrase list.
func NewRecognizerGrm(model *VoskModel, sampleRate float64, grammer []byte) (*VoskRecognizer, error) {
cbuf := C.CBytes(grammer)
defer C.free(cbuf)
internal := C.vosk_recognizer_new_grm(model.model, C.float(sampleRate), (*C.char)(cbuf))
rec := &VoskRecognizer{rec: internal}
return rec, nil
}

func freeRecognizer(recognizer *VoskRecognizer) {
C.vosk_recognizer_free(recognizer.rec)
// SetSpkModel adds a speaker model to an already initialized recognizer.
func (r *VoskRecognizer) SetSpkModel(spkModel *VoskSpkModel) {
C.vosk_recognizer_set_spk_model(r.rec, spkModel.spkModel)
}

// NewSpkModel creates a new VoskSpkModel instance
func NewSpkModel(spkModelPath string) (*VoskSpkModel, error) {
var internal *C.struct_VoskSpkModel
internal = C.vosk_spk_model_new(C.CString(spkModelPath))
spkModel := &VoskSpkModel{spkModel: internal}
return spkModel, nil
// SetMaxAlternatives configures the recognizer to output n-best results.
func (r *VoskRecognizer) SetMaxAlternatives(maxAlternatives int) {
C.vosk_recognizer_set_max_alternatives(r.rec, C.int(maxAlternatives))
}

// SetWords enables words with times in the ouput.
func (r *VoskRecognizer) SetWords(words int) {
C.vosk_recognizer_set_words(r.rec, C.int(words))
}

// AcceptWaveform accepts and processes a new chunk of the voice data.
func (r *VoskRecognizer) AcceptWaveform(buffer []byte) int {
cbuf := C.CBytes(buffer)
defer C.free(cbuf)
i := C.vosk_recognizer_accept_waveform(r.rec, (*C.char)(cbuf), C.int(len(buffer)))
return int(i)
}

// Result returns a speech recognition result.
func (r *VoskRecognizer) Result() []byte {
return []byte(C.GoString(C.vosk_recognizer_result(r.rec)))
}

// PartialResult returns a partial speech recognition result.
func (r *VoskRecognizer) PartialResult() []byte {
return []byte(C.GoString(C.vosk_recognizer_result(r.rec)))
}

// FinalResult returns a speech recognition result. Same as result, but doesn't wait
// for silence.
func (r *VoskRecognizer) FinalResult() []byte {
return []byte(C.GoString(C.vosk_recognizer_final_result(r.rec)))
}

// Reset resets the recognizer.
func (r *VoskRecognizer) Reset() {
C.vosk_recognizer_reset(r.rec)
}

// SetLogLevel sets the log level for Kaldi messages.
func SetLogLevel(logLevel int) {
C.vosk_set_log_level(C.int(logLevel))
}

// GPUInit automatically selects a CUDA device and allows multithreading.
func GPUInit() {
C.vosk_gpu_init()
}

// GPUThreadInit inits CUDA device in a multi-threaded environment.
func GPUThreadInit() {
C.vosk_gpu_thread_init()
}

0 comments on commit 7b3ea0b

Please sign in to comment.