Skip to content

Commit

Permalink
Implement String.prototype.{ isWellFormed, toWellFormed }
Browse files Browse the repository at this point in the history
Signed-off-by: Seonghyun Kim <[email protected]>
  • Loading branch information
ksh8281 authored and clover2123 committed Jul 4, 2024
1 parent 18ec8bc commit d59154a
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 16 deletions.
38 changes: 38 additions & 0 deletions src/builtins/BuiltinString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1575,6 +1575,36 @@ static Value builtinStringIncludes(ExecutionState& state, Value thisValue, size_
return Value(true);
}

// https://tc39.es/ecma262/multipage/text-processing.html#sec-string.prototype.iswellformed
static Value builtinStringIsWellFormed(ExecutionState& state, Value thisValue, size_t argc, Value* argv, Optional<Object*> newTarget)
{
// Let O be ? RequireObjectCoercible(this value).
// Let S be ? ToString(O).
RESOLVE_THIS_BINDING_TO_STRING(S, String, isWellFormed);
// Return IsStringWellFormedUnicode(S)
return Value(S->isWellFormed());
}

// https://tc39.es/ecma262/multipage/text-processing.html#sec-string.prototype.towellformed
static Value builtinStringToWellFormed(ExecutionState& state, Value thisValue, size_t argc, Value* argv, Optional<Object*> newTarget)
{
// Let O be ? RequireObjectCoercible(this value).
// Let S be ? ToString(O).
RESOLVE_THIS_BINDING_TO_STRING(S, String, toWellFormed);
// Let strLen be the length of S.
// Let k be 0.
// Let result be the empty String.
// Repeat, while k < strLen,
// Let cp be CodePointAt(S, k).
// If cp.[[IsUnpairedSurrogate]] is true, then
// Set result to the string-concatenation of result and 0xFFFD (REPLACEMENT CHARACTER).
// Else,
// Set result to the string-concatenation of result and UTF16EncodeCodePoint(cp.[[CodePoint]]).
// Set k to k + cp.[[CodeUnitCount]].
// Return result.
return S->toWellFormed();
}

static Value builtinStringIteratorNext(ExecutionState& state, Value thisValue, size_t argc, Value* argv, Optional<Object*> newTarget)
{
if (!thisValue.isObject() || !thisValue.asObject()->isStringIteratorObject()) {
Expand Down Expand Up @@ -1745,6 +1775,14 @@ void GlobalObject::installString(ExecutionState& state)
ObjectPropertyDescriptor(new NativeFunctionObject(state, NativeFunctionInfo(strings->at, builtinStringAt, 1, NativeFunctionInfo::Strict)),
(ObjectPropertyDescriptor::PresentAttribute)(ObjectPropertyDescriptor::WritablePresent | ObjectPropertyDescriptor::ConfigurablePresent)));

m_stringPrototype->directDefineOwnProperty(state, ObjectPropertyName(strings->isWellFormed),
ObjectPropertyDescriptor(new NativeFunctionObject(state, NativeFunctionInfo(strings->isWellFormed, builtinStringIsWellFormed, 0, NativeFunctionInfo::Strict)),
(ObjectPropertyDescriptor::PresentAttribute)(ObjectPropertyDescriptor::WritablePresent | ObjectPropertyDescriptor::ConfigurablePresent)));

m_stringPrototype->directDefineOwnProperty(state, ObjectPropertyName(strings->toWellFormed),
ObjectPropertyDescriptor(new NativeFunctionObject(state, NativeFunctionInfo(strings->toWellFormed, builtinStringToWellFormed, 0, NativeFunctionInfo::Strict)),
(ObjectPropertyDescriptor::PresentAttribute)(ObjectPropertyDescriptor::WritablePresent | ObjectPropertyDescriptor::ConfigurablePresent)));

#define DEFINE_STRING_ADDITIONAL_HTML_FUNCTION(fnName, argLength) \
m_stringPrototype->directDefineOwnProperty(state, ObjectPropertyName(strings->fnName), \
ObjectPropertyDescriptor(new NativeFunctionObject(state, NativeFunctionInfo(strings->fnName, builtinString##fnName, argLength, NativeFunctionInfo::Strict)), \
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/StaticStrings.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ namespace Escargot {
F(isPrototypeOf) \
F(isSafeInteger) \
F(isSealed) \
F(isWellFormed) \
F(isView) \
F(italics) \
F(iterator) \
Expand Down Expand Up @@ -468,6 +469,7 @@ namespace Escargot {
F(toTimeString) \
F(toUTCString) \
F(toUpperCase) \
F(toWellFormed) \
F(transfer) \
F(trim) \
F(trimEnd) \
Expand Down
63 changes: 63 additions & 0 deletions src/runtime/String.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,30 @@ size_t utf32ToUtf16(char32_t i, char16_t* u)
}
}

bool isWellFormed(const char16_t*& utf16, const char16_t* bufferEnd)
{
if (utf16[0] >= 0xd800 && utf16[0] <= 0xdbff) {
if (utf16 + 1 < bufferEnd) {
if (utf16[1] >= 0xdc00 && utf16[1] <= 0xdfff) {
utf16 += 2;
return true;
} else {
utf16 += 1;
return false;
}
} else {
utf16 += 1;
return false;
}
} else if (utf16[0] >= 0xdc00 && utf16[0] <= 0xdfff) {
utf16 += 1;
return false;
} else {
utf16 += 1;
return true;
}
}

bool StringBufferAccessData::equals16Bit(const char16_t* c1, const char* c2, size_t len)
{
while (len > 0) {
Expand Down Expand Up @@ -844,6 +868,45 @@ uint32_t String::tryToUseAsIndexProperty() const
return tryToUseAsIndex32();
}

bool String::isWellFormed() const
{
auto bad = bufferAccessData();
if (bad.has8BitContent) {
return true;
}
auto utf16 = bad.bufferAs16Bit;
auto end = bad.bufferAs16Bit + bad.length;
while (utf16 < end) {
if (!::Escargot::isWellFormed(utf16, end)) {
return false;
}
}
return true;
}

String* String::toWellFormed()
{
if (isWellFormed()) {
return this;
}
auto bad = bufferAccessData();
auto utf16 = bad.bufferAs16Bit;
auto end = bad.bufferAs16Bit + bad.length;
UTF16StringData result;
while (utf16 < end) {
auto start = utf16;
if (::Escargot::isWellFormed(utf16, end)) {
while (start < utf16) {
result.pushBack(*start);
start++;
}
} else {
result.pushBack(0xfffd);
}
}
return new UTF16String(std::move(result));
}

size_t String::find(String* str, size_t pos) const
{
const size_t srcStrLen = str->length();
Expand Down
4 changes: 4 additions & 0 deletions src/runtime/String.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ ASCIIStringData utf16StringToASCIIString(const char16_t* buf, const size_t len);
ASCIIStringDataNonGCStd dtoa(double number);
size_t utf32ToUtf8(char32_t uc, char* UTF8);
size_t utf32ToUtf16(char32_t i, char16_t* u);
bool isWellFormed(const char16_t*& utf16, const char16_t* bufferEnd);

// these functions only care ascii range(0~127)
bool islower(char32_t ch);
Expand Down Expand Up @@ -534,6 +535,9 @@ class String : public PointerValue {
return has8BitContent();
}

bool isWellFormed() const;
String* toWellFormed();

template <typename Any>
const Any* characters() const
{
Expand Down
16 changes: 0 additions & 16 deletions tools/test/test262/excludelist.orig.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1189,24 +1189,8 @@
<test id="built-ins/ShadowRealm/prototype/importValue/throws-typeerror-import-throws"><reason>TODO</reason></test>
<test id="built-ins/ShadowRealm/prototype/importValue/validates-realm-object"><reason>TODO</reason></test>
<test id="built-ins/ShadowRealm/prototype/proto"><reason>TODO</reason></test>
<test id="built-ins/String/prototype/isWellFormed/length"><reason>TODO</reason></test>
<test id="built-ins/String/prototype/isWellFormed/name"><reason>TODO</reason></test>
<test id="built-ins/String/prototype/isWellFormed/not-a-constructor"><reason>TODO</reason></test>
<test id="built-ins/String/prototype/isWellFormed/prop-desc"><reason>TODO</reason></test>
<test id="built-ins/String/prototype/isWellFormed/return-abrupt-from-this"><reason>TODO</reason></test>
<test id="built-ins/String/prototype/isWellFormed/returns-boolean"><reason>TODO</reason></test>
<test id="built-ins/String/prototype/isWellFormed/to-string"><reason>TODO</reason></test>
<test id="built-ins/String/prototype/isWellFormed/to-string-primitive"><reason>TODO</reason></test>
<test id="built-ins/String/prototype/match/duplicate-named-groups-properties"><reason>TODO</reason></test>
<test id="built-ins/String/prototype/match/duplicate-named-indices-groups-properties"><reason>TODO</reason></test>
<test id="built-ins/String/prototype/toWellFormed/length"><reason>TODO</reason></test>
<test id="built-ins/String/prototype/toWellFormed/name"><reason>TODO</reason></test>
<test id="built-ins/String/prototype/toWellFormed/not-a-constructor"><reason>TODO</reason></test>
<test id="built-ins/String/prototype/toWellFormed/prop-desc"><reason>TODO</reason></test>
<test id="built-ins/String/prototype/toWellFormed/return-abrupt-from-this"><reason>TODO</reason></test>
<test id="built-ins/String/prototype/toWellFormed/returns-well-formed-string"><reason>TODO</reason></test>
<test id="built-ins/String/prototype/toWellFormed/to-string"><reason>TODO</reason></test>
<test id="built-ins/String/prototype/toWellFormed/to-string-primitive"><reason>TODO</reason></test>
<test id="built-ins/Temporal/Calendar/argument-wrong-type"><reason>TODO</reason></test>
<test id="built-ins/Temporal/Calendar/from/calendar-case-insensitive"><reason>TODO</reason></test>
<test id="built-ins/Temporal/Calendar/from/calendar-number"><reason>TODO</reason></test>
Expand Down

0 comments on commit d59154a

Please sign in to comment.