From 5ef9091f286589b510ee16996e8646ee7dff84c3 Mon Sep 17 00:00:00 2001 From: Michael Welles Date: Sat, 3 Oct 2020 21:30:44 -0400 Subject: [PATCH] Tests and fixes for panics when concurrent calls are made to FindNormalizedFold() --- fuzzy/fuzzy.go | 52 ++++++++++++++++++++++++++++----------------- fuzzy/fuzzy_test.go | 22 +++++++++++++++++++ 2 files changed, 54 insertions(+), 20 deletions(-) diff --git a/fuzzy/fuzzy.go b/fuzzy/fuzzy.go index f5f3b33..8e3ca49 100644 --- a/fuzzy/fuzzy.go +++ b/fuzzy/fuzzy.go @@ -12,10 +12,22 @@ import ( "golang.org/x/text/unicode/norm" ) -var foldTransformer = unicodeFoldTransformer{} -var noopTransformer = transform.Nop -var normalizeTransformer = transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC) -var normalizeFoldTransformer = transform.Chain(normalizeTransformer, foldTransformer) +func noopTransformer() transform.Transformer { + return transform.Nop +} + +func foldTransformer() transform.Transformer { + return unicodeFoldTransformer{} +} + +func normalizeTransformer() transform.Transformer { + return transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC) +} + +func normalizedFoldTransformer() transform.Transformer { + return transform.Chain(normalizeTransformer(), foldTransformer()) +} + // Match returns true if source matches target using a fuzzy-searching // algorithm. Note that it doesn't implement Levenshtein distance (see @@ -23,22 +35,22 @@ var normalizeFoldTransformer = transform.Chain(normalizeTransformer, foldTransfo // approximation. The method will return true only if each character in the // source can be found in the target and occurs after the preceding matches. func Match(source, target string) bool { - return match(source, target, noopTransformer) + return match(source, target, noopTransformer()) } // MatchFold is a case-insensitive version of Match. func MatchFold(source, target string) bool { - return match(source, target, foldTransformer) + return match(source, target, foldTransformer()) } // MatchNormalized is a unicode-normalized version of Match. func MatchNormalized(source, target string) bool { - return match(source, target, normalizeTransformer) + return match(source, target, normalizeTransformer()) } // MatchNormalizedFold is a unicode-normalized and case-insensitive version of Match. func MatchNormalizedFold(source, target string) bool { - return match(source, target, normalizeFoldTransformer) + return match(source, target, normalizedFoldTransformer()) } func match(source, target string, transformer transform.Transformer) bool { @@ -71,22 +83,22 @@ Outer: // Find will return a list of strings in targets that fuzzy matches source. func Find(source string, targets []string) []string { - return find(source, targets, noopTransformer) + return find(source, targets, noopTransformer()) } // FindFold is a case-insensitive version of Find. func FindFold(source string, targets []string) []string { - return find(source, targets, foldTransformer) + return find(source, targets, foldTransformer()) } // FindNormalized is a unicode-normalized version of Find. func FindNormalized(source string, targets []string) []string { - return find(source, targets, normalizeTransformer) + return find(source, targets, normalizeTransformer()) } // FindNormalizedFold is a unicode-normalized and case-insensitive version of Find. func FindNormalizedFold(source string, targets []string) []string { - return find(source, targets, normalizeFoldTransformer) + return find(source, targets, normalizedFoldTransformer()) } func find(source string, targets []string, transformer transform.Transformer) []string { @@ -108,22 +120,22 @@ func find(source string, targets []string, transformer transform.Transformer) [] // the Levenshtein calculation, only deletions need be considered, required // additions and substitutions would fail the match test. func RankMatch(source, target string) int { - return rank(source, target, noopTransformer) + return rank(source, target, noopTransformer()) } // RankMatchFold is a case-insensitive version of RankMatch. func RankMatchFold(source, target string) int { - return rank(source, target, foldTransformer) + return rank(source, target, foldTransformer()) } // RankMatchNormalized is a unicode-normalized version of RankMatch. func RankMatchNormalized(source, target string) int { - return rank(source, target, normalizeTransformer) + return rank(source, target, normalizeTransformer()) } // RankMatchNormalizedFold is a unicode-normalized and case-insensitive version of RankMatch. func RankMatchNormalizedFold(source, target string) int { - return rank(source, target, normalizeFoldTransformer) + return rank(source, target, normalizedFoldTransformer()) } func rank(source, target string, transformer transform.Transformer) int { @@ -164,22 +176,22 @@ Outer: // RankFind is similar to Find, except it will also rank all matches using // Levenshtein distance. func RankFind(source string, targets []string) Ranks { - return rankFind(source, targets, noopTransformer) + return rankFind(source, targets, noopTransformer()) } // RankFindFold is a case-insensitive version of RankFind. func RankFindFold(source string, targets []string) Ranks { - return rankFind(source, targets, foldTransformer) + return rankFind(source, targets, foldTransformer()) } // RankFindNormalized is a unicode-normalized version of RankFind. func RankFindNormalized(source string, targets []string) Ranks { - return rankFind(source, targets, normalizeTransformer) + return rankFind(source, targets, normalizeTransformer()) } // RankFindNormalizedFold is a unicode-normalized and case-insensitive version of RankFind. func RankFindNormalizedFold(source string, targets []string) Ranks { - return rankFind(source, targets, normalizeFoldTransformer) + return rankFind(source, targets, normalizedFoldTransformer()) } func rankFind(source string, targets []string, transformer transform.Transformer) Ranks { diff --git a/fuzzy/fuzzy_test.go b/fuzzy/fuzzy_test.go index 3a32330..f4f9525 100644 --- a/fuzzy/fuzzy_test.go +++ b/fuzzy/fuzzy_test.go @@ -212,6 +212,28 @@ func TestRankMatchNormalizedFold(t *testing.T) { } } +func TestRankMatchNormalizedFoldConcurrent(t *testing.T) { + target := strings.Split("Lorem ipsum dolor sit amet, consectetur adipiscing elit", " ") + source := "ips" + procs := 10 + iter := 10 + type empty struct{} + done := make(chan empty) + for i := 0; i <= procs; i++ { + go func() { + for n := 0; n < iter; n++ { + _ = RankFindNormalizedFold(source, target) + } + done <- empty{} + }() + } + cnt := 0; + for i := 0; i < procs; i++ { + <- done + cnt++ + } +} + func TestRankFind(t *testing.T) { target := []string{"cartwheel", "foobar", "wheel", "baz"} wanted := []Rank{