Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix buffer overflow in text reading #1302

Merged
merged 1 commit into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 6 additions & 62 deletions build/android/escargot/src/main/cpp/JNIGlobals.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,60 +35,6 @@ static void gcCallback(void* data)
}
}

static const char32_t offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 0x03C82080UL, static_cast<char32_t>(0xFA082080UL), static_cast<char32_t>(0x82082080UL) };

static char32_t readUTF8Sequence(const char*& sequence, bool& valid, int& charlen)
{
unsigned length;
const char sch = *sequence;
valid = true;
if ((sch & 0x80) == 0)
length = 1;
else {
unsigned char ch2 = static_cast<unsigned char>(*(sequence + 1));
if ((sch & 0xE0) == 0xC0
&& (ch2 & 0xC0) == 0x80)
length = 2;
else {
unsigned char ch3 = static_cast<unsigned char>(*(sequence + 2));
if ((sch & 0xF0) == 0xE0
&& (ch2 & 0xC0) == 0x80
&& (ch3 & 0xC0) == 0x80)
length = 3;
else {
unsigned char ch4 = static_cast<unsigned char>(*(sequence + 3));
if ((sch & 0xF8) == 0xF0
&& (ch2 & 0xC0) == 0x80
&& (ch3 & 0xC0) == 0x80
&& (ch4 & 0xC0) == 0x80)
length = 4;
else {
valid = false;
sequence++;
return -1;
}
}
}
}

charlen = length;
char32_t ch = 0;
switch (length) {
case 4:
ch += static_cast<unsigned char>(*sequence++);
ch <<= 6; // Fall through.
case 3:
ch += static_cast<unsigned char>(*sequence++);
ch <<= 6; // Fall through.
case 2:
ch += static_cast<unsigned char>(*sequence++);
ch <<= 6; // Fall through.
case 1:
ch += static_cast<unsigned char>(*sequence++);
}
return ch - offsetsFromUTF8[length - 1];
}

static OptionalRef<StringRef> builtinHelperFileRead(OptionalRef<ExecutionStateRef> state, const char* fileName, const char* builtinName)
{
FILE* fp = fopen(fileName, "r");
Expand All @@ -101,24 +47,22 @@ static OptionalRef<StringRef> builtinHelperFileRead(OptionalRef<ExecutionStateRe
size_t readLen;
while ((readLen = fread(buf, 1, sizeof buf, fp))) {
if (!hasNonLatin1Content) {
const char* source = buf;
int charlen;
bool valid;
while (source < buf + readLen) {
char32_t ch = readUTF8Sequence(source, valid, charlen);
if (ch > 255) {
for (size_t i = 0; i < readLen; i++) {
unsigned char ch = buf[i];
if (ch & 0x80) {
// check non-latin1 character
hasNonLatin1Content = true;
fseek(fp, 0, SEEK_SET);
break;
} else {
str += (unsigned char)ch;
}
str += ch;
}
} else {
utf8Str.append(buf, readLen);
}
}
fclose(fp);

if (StringRef::isCompressibleStringEnabled()) {
if (state) {
if (hasNonLatin1Content) {
Expand Down
71 changes: 6 additions & 65 deletions src/shell/Shell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,63 +196,6 @@ ValueRef* builtinPrint(ExecutionStateRef* state, ValueRef* thisValue, size_t arg
return ValueRef::createUndefined();
}

static const char32_t offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 0x03C82080UL, static_cast<char32_t>(0xFA082080UL), static_cast<char32_t>(0x82082080UL) };

char32_t readUTF8Sequence(const char*& sequence, bool& valid, int& charlen)
{
unsigned length;
const char sch = *sequence;
valid = true;

if ((sch & 0x80) == 0)
length = 1;
else {
unsigned char ch2 = static_cast<unsigned char>(*(sequence + 1));

if ((sch & 0xE0) == 0xC0 && (ch2 & 0xC0) == 0x80) {
length = 2;
} else {
unsigned char ch3 = static_cast<unsigned char>(*(sequence + 2));

if ((sch & 0xF0) == 0xE0 && (ch2 & 0xC0) == 0x80 && (ch3 & 0xC0) == 0x80) {
length = 3;
} else {
unsigned char ch4 = static_cast<unsigned char>(*(sequence + 3));

if ((sch & 0xF8) == 0xF0 && (ch2 & 0xC0) == 0x80
&& (ch3 & 0xC0) == 0x80 && (ch4 & 0xC0) == 0x80) {
length = 4;
} else {
valid = false;
sequence++;
return -1;
}
}
}
}

charlen = length;
char32_t ch = 0;
switch (length) {
case 4:
ch += static_cast<unsigned char>(*sequence++);
ch <<= 6;
// Fall through.
case 3:
ch += static_cast<unsigned char>(*sequence++);
ch <<= 6;
// Fall through.
case 2:
ch += static_cast<unsigned char>(*sequence++);
ch <<= 6;
// Fall through.
case 1:
ch += static_cast<unsigned char>(*sequence++);
}

return ch - offsetsFromUTF8[length - 1];
}

static OptionalRef<StringRef> builtinHelperFileRead(OptionalRef<ExecutionStateRef> state, const char* fileName, const char* builtinName)
{
FILE* fp = fopen(fileName, "r");
Expand All @@ -265,24 +208,22 @@ static OptionalRef<StringRef> builtinHelperFileRead(OptionalRef<ExecutionStateRe
size_t readLen;
while ((readLen = fread(buf, 1, sizeof buf, fp))) {
if (!hasNonLatin1Content) {
const char* source = buf;
int charlen;
bool valid;
while (source < buf + readLen) {
char32_t ch = readUTF8Sequence(source, valid, charlen);
if (ch > 255) {
for (size_t i = 0; i < readLen; i++) {
unsigned char ch = buf[i];
if (ch & 0x80) {
// check non-latin1 character
hasNonLatin1Content = true;
fseek(fp, 0, SEEK_SET);
break;
} else {
str += (unsigned char)ch;
}
str += ch;
}
} else {
utf8Str.append(buf, readLen);
}
}
fclose(fp);

if (StringRef::isCompressibleStringEnabled()) {
if (state) {
if (hasNonLatin1Content) {
Expand Down