Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

swift/objc: adding sessionId and overall scores #2642

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,10 @@ - (void)pronunciationAssessFromMicrophone {

[pronunicationConfig enableProsodyAssessment];

[speechRecognizer addSessionStartedEventHandler: ^ (SPXRecognizer *sender, SPXSessionEventArgs *eventArgs) {
NSLog(@"SESSION ID: %@", eventArgs.sessionId);
}];

[pronunicationConfig applyToRecognizer:speechRecognizer];
[self updateRecognitionStatusText:(@"Assessing...")];
[self->recorder record];
Expand Down Expand Up @@ -798,15 +802,19 @@ - (void)pronunciationAssessFromFile {

[pronunicationConfig enableProsodyAssessment];

[speechRecognizer addSessionStartedEventHandler: ^ (SPXRecognizer *sender, SPXSessionEventArgs *eventArgs) {
NSLog(@"SESSION ID: %@", eventArgs.sessionId);
}];

[pronunicationConfig applyToRecognizer:speechRecognizer];
[self updateRecognitionStatusText:(@"Assessing...")];

// connect callbacks
__block double sumAccuracy = 0;
__block double sumProsody = 0;
__block double sumFluency = 0;
__block int sumWords = 0;
__block int countProsody = 0;
NSMutableArray *recognizedWords = [NSMutableArray array];

[speechRecognizer addRecognizedEventHandler: ^ (SPXSpeechRecognizer *recognizer, SPXSpeechRecognitionEventArgs *eventArgs) {
NSLog(@"Received final result event. SessionId: %@, recognition result:%@. Status %ld. offset %llu duration %llu resultid:%@", eventArgs.sessionId, eventArgs.result.text, (long)eventArgs.result.reason, eventArgs.result.offset, eventArgs.result.duration, eventArgs.result.resultId);
SPXPronunciationAssessmentResult *pronunciationResult = [[SPXPronunciationAssessmentResult alloc]init:eventArgs.result];
Expand All @@ -816,9 +824,9 @@ - (void)pronunciationAssessFromFile {
[self updateRecognitionResultText:resultText];
NSArray *words = [eventArgs.result.text componentsSeparatedByString:@" "];
NSUInteger wordCount = [words count];
sumAccuracy += pronunciationResult.accuracyScore * wordCount;
sumFluency += pronunciationResult.fluencyScore * wordCount;
sumWords += wordCount;

[recognizedWords addObjectsFromArray:pronunciationResult.words];
}];

__block bool end = false;
Expand All @@ -844,9 +852,58 @@ - (void)pronunciationAssessFromFile {
[speechRecognizer stopContinuousRecognition];

if (sumWords > 0) {
// Overall accuracy and fluency scores are the weighted average of scores of all sentences.
NSString *resultText = [NSString stringWithFormat:@"Assessment finished. \nOverall accuracy score: %.2f, prosody score: %.2f, fluency score: %.2f.", sumAccuracy / sumWords, sumProsody / countProsody, sumFluency / sumWords];
// Accuracy score
double totalAccurayScore = 0;
int accuracyCount = 0;
int validCount = 0;
double durationSum = 0.0;

for (SPXWordLevelTimingResult *word in recognizedWords) {
if (![word.errorType isEqualToString:@"Insertion"]) {
totalAccurayScore += word.accuracyScore;
accuracyCount += 1;
}

if ([word.errorType isEqualToString:@"None"]) {
durationSum += word.duration + 0.01;
validCount += 1;
}

}
double accurayScore = (accuracyCount > 0) ? (totalAccurayScore) / accuracyCount : NAN;

// Fluency score
SPXWordLevelTimingResult *firstWord = [recognizedWords firstObject];
double startOffset = firstWord.offset;

SPXWordLevelTimingResult *lastWord = [recognizedWords lastObject];
double endOffset = lastWord.offset + lastWord.duration + 0.01;

double fluencyScore = durationSum / (endOffset - startOffset) * 100.0;

// Completeness score
double completenessScore = (double)validCount / (double)accuracyCount * 100.0;
if (completenessScore > 100) {
completenessScore = 100;
}

// Prosody score
double prosodyScore = sumProsody / countProsody;

double minScore = MIN(accurayScore, MIN(prosodyScore, MIN(completenessScore, fluencyScore)));

// Pronunciation score
double pronunciationScore = 0.2 * (accurayScore + prosodyScore + completenessScore + fluencyScore) + 0.2 * minScore;

// Overall scores.
NSString *resultText = [NSString stringWithFormat:@"Assessment finished. \nOverall accuracy score: %.2f, prosody score: %.2f, fluency score: %.2f, completeness score: %.2f, pronunciation score: %.2f", accurayScore, prosodyScore, fluencyScore, completenessScore, pronunciationScore];
[self updateRecognitionResultText:resultText];

for (NSInteger idx = 0; idx < recognizedWords.count; idx++) {
SPXWordLevelTimingResult *word = recognizedWords[idx];
NSLog(@" %ld: word: %@\taccuracy score: %.2f\terror type: %@",
(long)(idx + 1), word.word, word.accuracyScore, word.errorType);
}
}
}

Expand Down Expand Up @@ -891,6 +948,11 @@ - (void)pronunciationAssessFromStream {

[pronAssessmentConfig enableProsodyAssessment];

dispatch_semaphore_t semaphore = dispatch_semaphore_create(0);
[speechRecognizer addSessionStartedEventHandler: ^ (SPXRecognizer *sender, SPXSessionEventArgs *eventArgs) {
NSLog(@"SESSION ID: %@", eventArgs.sessionId);
}];

[pronAssessmentConfig applyToRecognizer:speechRecognizer error:nil];

[audioInputStream write:audioData];
Expand Down Expand Up @@ -924,7 +986,9 @@ - (void)pronunciationAssessFromStream {
NSDate *endTime = [NSDate date];
double timeCost = [endTime timeIntervalSinceDate:startTime] * 1000;
NSLog(@"Time cost: %fms", timeCost);
dispatch_semaphore_signal(semaphore);
}];
dispatch_semaphore_wait(semaphore, DISPATCH_TIME_FOREVER);
}

/*
Expand Down Expand Up @@ -955,6 +1019,11 @@ - (void)pronunciationAssessConfiguredWithJson {

[pronAssessmentConfig enableProsodyAssessment];

dispatch_semaphore_t semaphore = dispatch_semaphore_create(0);
[speechRecognizer addSessionStartedEventHandler: ^ (SPXRecognizer *sender, SPXSessionEventArgs *eventArgs) {
NSLog(@"SESSION ID: %@", eventArgs.sessionId);
}];

[pronAssessmentConfig applyToRecognizer:speechRecognizer error:nil];

[self updateRecognitionResultText:@"Analysising"];
Expand All @@ -981,7 +1050,9 @@ - (void)pronunciationAssessConfiguredWithJson {
NSString *finalResult = [NSString stringWithString:mResult];
NSLog(@"%@", finalResult);
[self updateRecognitionResultText:finalResult];
dispatch_semaphore_signal(semaphore);
}];
dispatch_semaphore_wait(semaphore, DISPATCH_TIME_FOREVER);
}

/*
Expand Down Expand Up @@ -1017,6 +1088,10 @@ - (void)pronunciationAssessWithContentAssessment {
[self updateRecognitionResultText:(@"Speech Recognition Error")];
return;
}

[speechRecognizer addSessionStartedEventHandler: ^ (SPXRecognizer *sender, SPXSessionEventArgs *eventArgs) {
NSLog(@"SESSION ID: %@", eventArgs.sessionId);
}];

// Create pronunciation assessment config, set grading system, granularity
SPXPronunciationAssessmentConfiguration *pronunicationConfig =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
archiveVersion = 1;
classes = {
};
objectVersion = 51;
objectVersion = 54;
objects = {

/* Begin PBXBuildFile section */
Expand All @@ -15,6 +15,9 @@
3CB230BA2692E546009AD484 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 3CB230B92692E546009AD484 /* Assets.xcassets */; };
3CB230BD2692E546009AD484 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 3CB230BB2692E546009AD484 /* LaunchScreen.storyboard */; };
3CB230E526943123009AD484 /* pronunciation_assessment.wav in Resources */ = {isa = PBXBuildFile; fileRef = 3CB230E426943123009AD484 /* pronunciation_assessment.wav */; };
520398BD2CCB91FC00FD9FF6 /* zhcn_short_dummy_sample.wav in Resources */ = {isa = PBXBuildFile; fileRef = 520398BA2CCB91FC00FD9FF6 /* zhcn_short_dummy_sample.wav */; };
520398BE2CCB91FC00FD9FF6 /* zhcn_continuous_mode_sample.txt in Resources */ = {isa = PBXBuildFile; fileRef = 520398BB2CCB91FC00FD9FF6 /* zhcn_continuous_mode_sample.txt */; };
520398BF2CCB91FC00FD9FF6 /* zhcn_continuous_mode_sample.wav in Resources */ = {isa = PBXBuildFile; fileRef = 520398BC2CCB91FC00FD9FF6 /* zhcn_continuous_mode_sample.wav */; };
52CF43DF2AEBC4D200227EF3 /* pronunciation_assessment_fall.wav in Resources */ = {isa = PBXBuildFile; fileRef = 52CF43DE2AEBC4D200227EF3 /* pronunciation_assessment_fall.wav */; };
52FC64FE29CC4CB2000C8918 /* whatstheweatherlike.wav in Resources */ = {isa = PBXBuildFile; fileRef = 52FC64FD29CC4CB2000C8918 /* whatstheweatherlike.wav */; };
/* End PBXBuildFile section */
Expand All @@ -29,6 +32,9 @@
3CB230BC2692E546009AD484 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
3CB230BE2692E546009AD484 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
3CB230E426943123009AD484 /* pronunciation_assessment.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = pronunciation_assessment.wav; sourceTree = "<group>"; };
520398BA2CCB91FC00FD9FF6 /* zhcn_short_dummy_sample.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = zhcn_short_dummy_sample.wav; sourceTree = "<group>"; };
520398BB2CCB91FC00FD9FF6 /* zhcn_continuous_mode_sample.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = zhcn_continuous_mode_sample.txt; sourceTree = "<group>"; };
520398BC2CCB91FC00FD9FF6 /* zhcn_continuous_mode_sample.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = zhcn_continuous_mode_sample.wav; sourceTree = "<group>"; };
52CF43DE2AEBC4D200227EF3 /* pronunciation_assessment_fall.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = pronunciation_assessment_fall.wav; sourceTree = "<group>"; };
52FC64FD29CC4CB2000C8918 /* whatstheweatherlike.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = whatstheweatherlike.wav; sourceTree = "<group>"; };
7216C25C7CBBBBCE69D14199 /* Pods-speech-samples.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-speech-samples.debug.xcconfig"; path = "Target Support Files/Pods-speech-samples/Pods-speech-samples.debug.xcconfig"; sourceTree = "<group>"; };
Expand Down Expand Up @@ -59,6 +65,9 @@
3CB230A42692E52F009AD484 = {
isa = PBXGroup;
children = (
520398BB2CCB91FC00FD9FF6 /* zhcn_continuous_mode_sample.txt */,
520398BC2CCB91FC00FD9FF6 /* zhcn_continuous_mode_sample.wav */,
520398BA2CCB91FC00FD9FF6 /* zhcn_short_dummy_sample.wav */,
52CF43DE2AEBC4D200227EF3 /* pronunciation_assessment_fall.wav */,
52FC64FD29CC4CB2000C8918 /* whatstheweatherlike.wav */,
3CB230E426943123009AD484 /* pronunciation_assessment.wav */,
Expand Down Expand Up @@ -160,7 +169,10 @@
buildActionMask = 2147483647;
files = (
3CB230BD2692E546009AD484 /* LaunchScreen.storyboard in Resources */,
520398BD2CCB91FC00FD9FF6 /* zhcn_short_dummy_sample.wav in Resources */,
3CB230BA2692E546009AD484 /* Assets.xcassets in Resources */,
520398BE2CCB91FC00FD9FF6 /* zhcn_continuous_mode_sample.txt in Resources */,
520398BF2CCB91FC00FD9FF6 /* zhcn_continuous_mode_sample.wav in Resources */,
52FC64FE29CC4CB2000C8918 /* whatstheweatherlike.wav in Resources */,
52CF43DF2AEBC4D200227EF3 /* pronunciation_assessment_fall.wav in Resources */,
3CB230E526943123009AD484 /* pronunciation_assessment.wav in Resources */,
Expand Down
Loading