Skip to content

Commit

Permalink
in RegExp(yarr), UCHAR_ALPHABETIC chars not applied by u_tolower, u_t…
Browse files Browse the repository at this point in the history
…oupper

Signed-off-by: Seonghyun Kim <[email protected]>
  • Loading branch information
ksh8281 committed Jul 10, 2024
1 parent 03daa6b commit bbac89d
Showing 1 changed file with 16 additions and 2 deletions.
18 changes: 16 additions & 2 deletions third_party/yarr/YarrInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2193,8 +2193,22 @@ class ByteCompiler {
{
if (m_pattern.ignoreCase()) {
#if defined(ENABLE_ICU)
char32_t lo = u_tolower(ch);
char32_t hi = u_toupper(ch);
char32_t lo;
char32_t hi;
if (ch < 128) {
lo = tolower(ch);
hi = toupper(ch);
} else {
// if ch is ALPHABETIC like latin or greek, we should not apply u_tolower or u_toupper (print('iI\u0130'.replace(/\u0130/gi, '#')))
auto v = u_getIntPropertyValue(ch, UProperty::UCHAR_ALPHABETIC);
if (v) {
lo = ch;
hi = ch;
} else {
lo = u_tolower(ch);
hi = u_toupper(ch);
}
}
#else
char32_t lo = tolower(ch);
char32_t hi = toupper(ch);
Expand Down

0 comments on commit bbac89d

Please sign in to comment.