From e8eb8eb90d7ea243425209c84df085bf76fb053f Mon Sep 17 00:00:00 2001 From: Mikhail Tavarez Date: Sun, 7 Jul 2024 10:36:35 -0500 Subject: [PATCH] add rune width --- gojo/unicode/__init__.mojo | 2 +- gojo/unicode/utf8/__init__.mojo | 1 + gojo/unicode/utf8/string.mojo | 49 +- gojo/unicode/utf8/table.mojo | 1296 +++++++++++++++++++++++++++++++ gojo/unicode/utf8/width.mojo | 106 +++ 5 files changed, 1427 insertions(+), 27 deletions(-) create mode 100644 gojo/unicode/utf8/table.mojo create mode 100644 gojo/unicode/utf8/width.mojo diff --git a/gojo/unicode/__init__.mojo b/gojo/unicode/__init__.mojo index 877eafd..b300770 100644 --- a/gojo/unicode/__init__.mojo +++ b/gojo/unicode/__init__.mojo @@ -1 +1 @@ -from .utf8 import rune_count_in_string, UnicodeString +from .utf8 import rune_count_in_string, UnicodeString, rune_width, string_width, Condition, DEFAULT_CONDITION diff --git a/gojo/unicode/utf8/__init__.mojo b/gojo/unicode/utf8/__init__.mojo index 123e793..6f3d8a9 100644 --- a/gojo/unicode/utf8/__init__.mojo +++ b/gojo/unicode/utf8/__init__.mojo @@ -3,3 +3,4 @@ This would not be possible without his help. """ from .runes import rune_count_in_string from .string import UnicodeString +from .width import string_width, rune_width, Condition, DEFAULT_CONDITION diff --git a/gojo/unicode/utf8/string.mojo b/gojo/unicode/utf8/string.mojo index 77c4cc8..f0c852d 100644 --- a/gojo/unicode/utf8/string.mojo +++ b/gojo/unicode/utf8/string.mojo @@ -8,8 +8,7 @@ alias simd_width_u8 = simdwidthof[DType.uint8]() @value struct UnicodeString(Stringable, Sized): - """A string that supports Unicode characters of printable size 1 - (ie not east asian characters and such.). + """A string that supports Unicode characters. The algorithms to handle UTF-8 are from @maxim on the Mojo Discord. Thanks! """ @@ -48,30 +47,28 @@ struct UnicodeString(Stringable, Sized): fn __str__(self) -> String: return self.inner - # @always_inline - # fn __getitem__(self, slice: Slice) -> String: - # # Copy N bytes + null terminator into new pointer and construct string. - # var copy_src = self.inner - # var copy = DTypePointer[DType.uint8](copy_src.unsafe_uint8_ptr()) - # var bytes_left = len(self.inner) - - # var result = DTypePointer[DType.uint8].alloc(len(self.inner)) - # var total_char_length: Int = 0 - # for _ in range(slice.start, slice.end): - # print(total_char_length, bytes_left) - # # Number of bytes of the current character - # var char_length = int((copy.load() >> 7 == 0).cast[DType.uint8]() * 1 + countl_zero(~copy.load())) - - # memcpy(result.offset(total_char_length), copy, char_length) - - # # Move iterator forward - # bytes_left -= char_length - # copy += char_length - # total_char_length += char_length - # print(total_char_length, char_length, bytes_left) - - # result[total_char_length] = 0 - # return StringRef(result, total_char_length + 1) + @always_inline + fn __getitem__(self: Reference[Self], slice: Slice) -> StringSlice[self.is_mutable, self.lifetime]: + """TODO: Doesn't handle negative indices.""" + var bytes_left = len(self[].inner) + var total_char_length: Int = 0 + for _ in range(slice.start, slice.end): + # Number of bytes of the current character + var char_length = int( + (DTypePointer[DType.uint8](self[].inner.unsafe_uint8_ptr() + total_char_length).load() >> 7 == 0).cast[ + DType.uint8 + ]() + * 1 + + countl_zero(~DTypePointer[DType.uint8](self[].inner.unsafe_uint8_ptr() + total_char_length).load()) + ) + + # Move iterator forward + bytes_left -= char_length + total_char_length += char_length + + return StringSlice[self.is_mutable, self.lifetime]( + unsafe_from_utf8_ptr=self[].inner.unsafe_uint8_ptr(), len=total_char_length + ) @always_inline fn bytecount(self) -> Int: diff --git a/gojo/unicode/utf8/table.mojo b/gojo/unicode/utf8/table.mojo new file mode 100644 index 0000000..32717af --- /dev/null +++ b/gojo/unicode/utf8/table.mojo @@ -0,0 +1,1296 @@ +@register_passable("trivial") +struct Interval: + var first: UInt32 + var last: UInt32 + + fn __init__(inout self, first: UInt32, last: UInt32): + self.first = first + self.last = last + + +alias combining = List[Interval]( + Interval(0x0300, 0x036F), + Interval(0x0483, 0x0489), + Interval(0x07EB, 0x07F3), + Interval(0x0C00, 0x0C00), + Interval(0x0C04, 0x0C04), + Interval(0x0D00, 0x0D01), + Interval(0x135D, 0x135F), + Interval(0x1A7F, 0x1A7F), + Interval(0x1AB0, 0x1AC0), + Interval(0x1B6B, 0x1B73), + Interval(0x1DC0, 0x1DF9), + Interval(0x1DFB, 0x1DFF), + Interval(0x20D0, 0x20F0), + Interval(0x2CEF, 0x2CF1), + Interval(0x2DE0, 0x2DFF), + Interval(0x3099, 0x309A), + Interval(0xA66F, 0xA672), + Interval(0xA674, 0xA67D), + Interval(0xA69E, 0xA69F), + Interval(0xA6F0, 0xA6F1), + Interval(0xA8E0, 0xA8F1), + Interval(0xFE20, 0xFE2F), + Interval(0x101FD, 0x101FD), + Interval(0x10376, 0x1037A), + Interval(0x10EAB, 0x10EAC), + Interval(0x10F46, 0x10F50), + Interval(0x11300, 0x11301), + Interval(0x1133B, 0x1133C), + Interval(0x11366, 0x1136C), + Interval(0x11370, 0x11374), + Interval(0x16AF0, 0x16AF4), + Interval(0x1D165, 0x1D169), + Interval(0x1D16D, 0x1D172), + Interval(0x1D17B, 0x1D182), + Interval(0x1D185, 0x1D18B), + Interval(0x1D1AA, 0x1D1AD), + Interval(0x1D242, 0x1D244), + Interval(0x1E000, 0x1E006), + Interval(0x1E008, 0x1E018), + Interval(0x1E01B, 0x1E021), + Interval(0x1E023, 0x1E024), + Interval(0x1E026, 0x1E02A), + Interval(0x1E8D0, 0x1E8D6), +) + +alias doublewidth = List[Interval]( + Interval(0x1100, 0x115F), + Interval(0x231A, 0x231B), + Interval(0x2329, 0x232A), + Interval(0x23E9, 0x23EC), + Interval(0x23F0, 0x23F0), + Interval(0x23F3, 0x23F3), + Interval(0x25FD, 0x25FE), + Interval(0x2614, 0x2615), + Interval(0x2648, 0x2653), + Interval(0x267F, 0x267F), + Interval(0x2693, 0x2693), + Interval(0x26A1, 0x26A1), + Interval(0x26AA, 0x26AB), + Interval(0x26BD, 0x26BE), + Interval(0x26C4, 0x26C5), + Interval(0x26CE, 0x26CE), + Interval(0x26D4, 0x26D4), + Interval(0x26EA, 0x26EA), + Interval(0x26F2, 0x26F3), + Interval(0x26F5, 0x26F5), + Interval(0x26FA, 0x26FA), + Interval(0x26FD, 0x26FD), + Interval(0x2705, 0x2705), + Interval(0x270A, 0x270B), + Interval(0x2728, 0x2728), + Interval(0x274C, 0x274C), + Interval(0x274E, 0x274E), + Interval(0x2753, 0x2755), + Interval(0x2757, 0x2757), + Interval(0x2795, 0x2797), + Interval(0x27B0, 0x27B0), + Interval(0x27BF, 0x27BF), + Interval(0x2B1B, 0x2B1C), + Interval(0x2B50, 0x2B50), + Interval(0x2B55, 0x2B55), + Interval(0x2E80, 0x2E99), + Interval(0x2E9B, 0x2EF3), + Interval(0x2F00, 0x2FD5), + Interval(0x2FF0, 0x2FFB), + Interval(0x3000, 0x303E), + Interval(0x3041, 0x3096), + Interval(0x3099, 0x30FF), + Interval(0x3105, 0x312F), + Interval(0x3131, 0x318E), + Interval(0x3190, 0x31E3), + Interval(0x31F0, 0x321E), + Interval(0x3220, 0x3247), + Interval(0x3250, 0x4DBF), + Interval(0x4E00, 0xA48C), + Interval(0xA490, 0xA4C6), + Interval(0xA960, 0xA97C), + Interval(0xAC00, 0xD7A3), + Interval(0xF900, 0xFAFF), + Interval(0xFE10, 0xFE19), + Interval(0xFE30, 0xFE52), + Interval(0xFE54, 0xFE66), + Interval(0xFE68, 0xFE6B), + Interval(0xFF01, 0xFF60), + Interval(0xFFE0, 0xFFE6), + Interval(0x16FE0, 0x16FE4), + Interval(0x16FF0, 0x16FF1), + Interval(0x17000, 0x187F7), + Interval(0x18800, 0x18CD5), + Interval(0x18D00, 0x18D08), + Interval(0x1B000, 0x1B11E), + Interval(0x1B150, 0x1B152), + Interval(0x1B164, 0x1B167), + Interval(0x1B170, 0x1B2FB), + Interval(0x1F004, 0x1F004), + Interval(0x1F0CF, 0x1F0CF), + Interval(0x1F18E, 0x1F18E), + Interval(0x1F191, 0x1F19A), + Interval(0x1F200, 0x1F202), + Interval(0x1F210, 0x1F23B), + Interval(0x1F240, 0x1F248), + Interval(0x1F250, 0x1F251), + Interval(0x1F260, 0x1F265), + Interval(0x1F300, 0x1F320), + Interval(0x1F32D, 0x1F335), + Interval(0x1F337, 0x1F37C), + Interval(0x1F37E, 0x1F393), + Interval(0x1F3A0, 0x1F3CA), + Interval(0x1F3CF, 0x1F3D3), + Interval(0x1F3E0, 0x1F3F0), + Interval(0x1F3F4, 0x1F3F4), + Interval(0x1F3F8, 0x1F43E), + Interval(0x1F440, 0x1F440), + Interval(0x1F442, 0x1F4FC), + Interval(0x1F4FF, 0x1F53D), + Interval(0x1F54B, 0x1F54E), + Interval(0x1F550, 0x1F567), + Interval(0x1F57A, 0x1F57A), + Interval(0x1F595, 0x1F596), + Interval(0x1F5A4, 0x1F5A4), + Interval(0x1F5FB, 0x1F64F), + Interval(0x1F680, 0x1F6C5), + Interval(0x1F6CC, 0x1F6CC), + Interval(0x1F6D0, 0x1F6D2), + Interval(0x1F6D5, 0x1F6D7), + Interval(0x1F6EB, 0x1F6EC), + Interval(0x1F6F4, 0x1F6FC), + Interval(0x1F7E0, 0x1F7EB), + Interval(0x1F90C, 0x1F93A), + Interval(0x1F93C, 0x1F945), + Interval(0x1F947, 0x1F978), + Interval(0x1F97A, 0x1F9CB), + Interval(0x1F9CD, 0x1F9FF), + Interval(0x1FA70, 0x1FA74), + Interval(0x1FA78, 0x1FA7A), + Interval(0x1FA80, 0x1FA86), + Interval(0x1FA90, 0x1FAA8), + Interval(0x1FAB0, 0x1FAB6), + Interval(0x1FAC0, 0x1FAC2), + Interval(0x1FAD0, 0x1FAD6), + Interval(0x20000, 0x2FFFD), + Interval(0x30000, 0x3FFFD), +) + +alias ambiguous = List[Interval]( + Interval(0x00A1, 0x00A1), + Interval(0x00A4, 0x00A4), + Interval(0x00A7, 0x00A8), + Interval(0x00AA, 0x00AA), + Interval(0x00AD, 0x00AE), + Interval(0x00B0, 0x00B4), + Interval(0x00B6, 0x00BA), + Interval(0x00BC, 0x00BF), + Interval(0x00C6, 0x00C6), + Interval(0x00D0, 0x00D0), + Interval(0x00D7, 0x00D8), + Interval(0x00DE, 0x00E1), + Interval(0x00E6, 0x00E6), + Interval(0x00E8, 0x00EA), + Interval(0x00EC, 0x00ED), + Interval(0x00F0, 0x00F0), + Interval(0x00F2, 0x00F3), + Interval(0x00F7, 0x00FA), + Interval(0x00FC, 0x00FC), + Interval(0x00FE, 0x00FE), + Interval(0x0101, 0x0101), + Interval(0x0111, 0x0111), + Interval(0x0113, 0x0113), + Interval(0x011B, 0x011B), + Interval(0x0126, 0x0127), + Interval(0x012B, 0x012B), + Interval(0x0131, 0x0133), + Interval(0x0138, 0x0138), + Interval(0x013F, 0x0142), + Interval(0x0144, 0x0144), + Interval(0x0148, 0x014B), + Interval(0x014D, 0x014D), + Interval(0x0152, 0x0153), + Interval(0x0166, 0x0167), + Interval(0x016B, 0x016B), + Interval(0x01CE, 0x01CE), + Interval(0x01D0, 0x01D0), + Interval(0x01D2, 0x01D2), + Interval(0x01D4, 0x01D4), + Interval(0x01D6, 0x01D6), + Interval(0x01D8, 0x01D8), + Interval(0x01DA, 0x01DA), + Interval(0x01DC, 0x01DC), + Interval(0x0251, 0x0251), + Interval(0x0261, 0x0261), + Interval(0x02C4, 0x02C4), + Interval(0x02C7, 0x02C7), + Interval(0x02C9, 0x02CB), + Interval(0x02CD, 0x02CD), + Interval(0x02D0, 0x02D0), + Interval(0x02D8, 0x02DB), + Interval(0x02DD, 0x02DD), + Interval(0x02DF, 0x02DF), + Interval(0x0300, 0x036F), + Interval(0x0391, 0x03A1), + Interval(0x03A3, 0x03A9), + Interval(0x03B1, 0x03C1), + Interval(0x03C3, 0x03C9), + Interval(0x0401, 0x0401), + Interval(0x0410, 0x044F), + Interval(0x0451, 0x0451), + Interval(0x2010, 0x2010), + Interval(0x2013, 0x2016), + Interval(0x2018, 0x2019), + Interval(0x201C, 0x201D), + Interval(0x2020, 0x2022), + Interval(0x2024, 0x2027), + Interval(0x2030, 0x2030), + Interval(0x2032, 0x2033), + Interval(0x2035, 0x2035), + Interval(0x203B, 0x203B), + Interval(0x203E, 0x203E), + Interval(0x2074, 0x2074), + Interval(0x207F, 0x207F), + Interval(0x2081, 0x2084), + Interval(0x20AC, 0x20AC), + Interval(0x2103, 0x2103), + Interval(0x2105, 0x2105), + Interval(0x2109, 0x2109), + Interval(0x2113, 0x2113), + Interval(0x2116, 0x2116), + Interval(0x2121, 0x2122), + Interval(0x2126, 0x2126), + Interval(0x212B, 0x212B), + Interval(0x2153, 0x2154), + Interval(0x215B, 0x215E), + Interval(0x2160, 0x216B), + Interval(0x2170, 0x2179), + Interval(0x2189, 0x2189), + Interval(0x2190, 0x2199), + Interval(0x21B8, 0x21B9), + Interval(0x21D2, 0x21D2), + Interval(0x21D4, 0x21D4), + Interval(0x21E7, 0x21E7), + Interval(0x2200, 0x2200), + Interval(0x2202, 0x2203), + Interval(0x2207, 0x2208), + Interval(0x220B, 0x220B), + Interval(0x220F, 0x220F), + Interval(0x2211, 0x2211), + Interval(0x2215, 0x2215), + Interval(0x221A, 0x221A), + Interval(0x221D, 0x2220), + Interval(0x2223, 0x2223), + Interval(0x2225, 0x2225), + Interval(0x2227, 0x222C), + Interval(0x222E, 0x222E), + Interval(0x2234, 0x2237), + Interval(0x223C, 0x223D), + Interval(0x2248, 0x2248), + Interval(0x224C, 0x224C), + Interval(0x2252, 0x2252), + Interval(0x2260, 0x2261), + Interval(0x2264, 0x2267), + Interval(0x226A, 0x226B), + Interval(0x226E, 0x226F), + Interval(0x2282, 0x2283), + Interval(0x2286, 0x2287), + Interval(0x2295, 0x2295), + Interval(0x2299, 0x2299), + Interval(0x22A5, 0x22A5), + Interval(0x22BF, 0x22BF), + Interval(0x2312, 0x2312), + Interval(0x2460, 0x24E9), + Interval(0x24EB, 0x254B), + Interval(0x2550, 0x2573), + Interval(0x2580, 0x258F), + Interval(0x2592, 0x2595), + Interval(0x25A0, 0x25A1), + Interval(0x25A3, 0x25A9), + Interval(0x25B2, 0x25B3), + Interval(0x25B6, 0x25B7), + Interval(0x25BC, 0x25BD), + Interval(0x25C0, 0x25C1), + Interval(0x25C6, 0x25C8), + Interval(0x25CB, 0x25CB), + Interval(0x25CE, 0x25D1), + Interval(0x25E2, 0x25E5), + Interval(0x25EF, 0x25EF), + Interval(0x2605, 0x2606), + Interval(0x2609, 0x2609), + Interval(0x260E, 0x260F), + Interval(0x261C, 0x261C), + Interval(0x261E, 0x261E), + Interval(0x2640, 0x2640), + Interval(0x2642, 0x2642), + Interval(0x2660, 0x2661), + Interval(0x2663, 0x2665), + Interval(0x2667, 0x266A), + Interval(0x266C, 0x266D), + Interval(0x266F, 0x266F), + Interval(0x269E, 0x269F), + Interval(0x26BF, 0x26BF), + Interval(0x26C6, 0x26CD), + Interval(0x26CF, 0x26D3), + Interval(0x26D5, 0x26E1), + Interval(0x26E3, 0x26E3), + Interval(0x26E8, 0x26E9), + Interval(0x26EB, 0x26F1), + Interval(0x26F4, 0x26F4), + Interval(0x26F6, 0x26F9), + Interval(0x26FB, 0x26FC), + Interval(0x26FE, 0x26FF), + Interval(0x273D, 0x273D), + Interval(0x2776, 0x277F), + Interval(0x2B56, 0x2B59), + Interval(0x3248, 0x324F), + Interval(0xE000, 0xF8FF), + Interval(0xFE00, 0xFE0F), + Interval(0xFFFD, 0xFFFD), + Interval(0x1F100, 0x1F10A), + Interval(0x1F110, 0x1F12D), + Interval(0x1F130, 0x1F169), + Interval(0x1F170, 0x1F18D), + Interval(0x1F18F, 0x1F190), + Interval(0x1F19B, 0x1F1AC), + Interval(0xE0100, 0xE01EF), + Interval(0xF0000, 0xFFFFD), + Interval(0x100000, 0x10FFFD), +) + +alias narrow = List[Interval]( + Interval(0x0020, 0x007E), + Interval(0x00A2, 0x00A3), + Interval(0x00A5, 0x00A6), + Interval(0x00AC, 0x00AC), + Interval(0x00AF, 0x00AF), + Interval(0x27E6, 0x27ED), + Interval(0x2985, 0x2986), +) + +alias neutral = List[Interval]( + Interval(0x0000, 0x001F), + Interval(0x007F, 0x00A0), + Interval(0x00A9, 0x00A9), + Interval(0x00AB, 0x00AB), + Interval(0x00B5, 0x00B5), + Interval(0x00BB, 0x00BB), + Interval(0x00C0, 0x00C5), + Interval(0x00C7, 0x00CF), + Interval(0x00D1, 0x00D6), + Interval(0x00D9, 0x00DD), + Interval(0x00E2, 0x00E5), + Interval(0x00E7, 0x00E7), + Interval(0x00EB, 0x00EB), + Interval(0x00EE, 0x00EF), + Interval(0x00F1, 0x00F1), + Interval(0x00F4, 0x00F6), + Interval(0x00FB, 0x00FB), + Interval(0x00FD, 0x00FD), + Interval(0x00FF, 0x0100), + Interval(0x0102, 0x0110), + Interval(0x0112, 0x0112), + Interval(0x0114, 0x011A), + Interval(0x011C, 0x0125), + Interval(0x0128, 0x012A), + Interval(0x012C, 0x0130), + Interval(0x0134, 0x0137), + Interval(0x0139, 0x013E), + Interval(0x0143, 0x0143), + Interval(0x0145, 0x0147), + Interval(0x014C, 0x014C), + Interval(0x014E, 0x0151), + Interval(0x0154, 0x0165), + Interval(0x0168, 0x016A), + Interval(0x016C, 0x01CD), + Interval(0x01CF, 0x01CF), + Interval(0x01D1, 0x01D1), + Interval(0x01D3, 0x01D3), + Interval(0x01D5, 0x01D5), + Interval(0x01D7, 0x01D7), + Interval(0x01D9, 0x01D9), + Interval(0x01DB, 0x01DB), + Interval(0x01DD, 0x0250), + Interval(0x0252, 0x0260), + Interval(0x0262, 0x02C3), + Interval(0x02C5, 0x02C6), + Interval(0x02C8, 0x02C8), + Interval(0x02CC, 0x02CC), + Interval(0x02CE, 0x02CF), + Interval(0x02D1, 0x02D7), + Interval(0x02DC, 0x02DC), + Interval(0x02DE, 0x02DE), + Interval(0x02E0, 0x02FF), + Interval(0x0370, 0x0377), + Interval(0x037A, 0x037F), + Interval(0x0384, 0x038A), + Interval(0x038C, 0x038C), + Interval(0x038E, 0x0390), + Interval(0x03AA, 0x03B0), + Interval(0x03C2, 0x03C2), + Interval(0x03CA, 0x0400), + Interval(0x0402, 0x040F), + Interval(0x0450, 0x0450), + Interval(0x0452, 0x052F), + Interval(0x0531, 0x0556), + Interval(0x0559, 0x058A), + Interval(0x058D, 0x058F), + Interval(0x0591, 0x05C7), + Interval(0x05D0, 0x05EA), + Interval(0x05EF, 0x05F4), + Interval(0x0600, 0x061C), + Interval(0x061E, 0x070D), + Interval(0x070F, 0x074A), + Interval(0x074D, 0x07B1), + Interval(0x07C0, 0x07FA), + Interval(0x07FD, 0x082D), + Interval(0x0830, 0x083E), + Interval(0x0840, 0x085B), + Interval(0x085E, 0x085E), + Interval(0x0860, 0x086A), + Interval(0x08A0, 0x08B4), + Interval(0x08B6, 0x08C7), + Interval(0x08D3, 0x0983), + Interval(0x0985, 0x098C), + Interval(0x098F, 0x0990), + Interval(0x0993, 0x09A8), + Interval(0x09AA, 0x09B0), + Interval(0x09B2, 0x09B2), + Interval(0x09B6, 0x09B9), + Interval(0x09BC, 0x09C4), + Interval(0x09C7, 0x09C8), + Interval(0x09CB, 0x09CE), + Interval(0x09D7, 0x09D7), + Interval(0x09DC, 0x09DD), + Interval(0x09DF, 0x09E3), + Interval(0x09E6, 0x09FE), + Interval(0x0A01, 0x0A03), + Interval(0x0A05, 0x0A0A), + Interval(0x0A0F, 0x0A10), + Interval(0x0A13, 0x0A28), + Interval(0x0A2A, 0x0A30), + Interval(0x0A32, 0x0A33), + Interval(0x0A35, 0x0A36), + Interval(0x0A38, 0x0A39), + Interval(0x0A3C, 0x0A3C), + Interval(0x0A3E, 0x0A42), + Interval(0x0A47, 0x0A48), + Interval(0x0A4B, 0x0A4D), + Interval(0x0A51, 0x0A51), + Interval(0x0A59, 0x0A5C), + Interval(0x0A5E, 0x0A5E), + Interval(0x0A66, 0x0A76), + Interval(0x0A81, 0x0A83), + Interval(0x0A85, 0x0A8D), + Interval(0x0A8F, 0x0A91), + Interval(0x0A93, 0x0AA8), + Interval(0x0AAA, 0x0AB0), + Interval(0x0AB2, 0x0AB3), + Interval(0x0AB5, 0x0AB9), + Interval(0x0ABC, 0x0AC5), + Interval(0x0AC7, 0x0AC9), + Interval(0x0ACB, 0x0ACD), + Interval(0x0AD0, 0x0AD0), + Interval(0x0AE0, 0x0AE3), + Interval(0x0AE6, 0x0AF1), + Interval(0x0AF9, 0x0AFF), + Interval(0x0B01, 0x0B03), + Interval(0x0B05, 0x0B0C), + Interval(0x0B0F, 0x0B10), + Interval(0x0B13, 0x0B28), + Interval(0x0B2A, 0x0B30), + Interval(0x0B32, 0x0B33), + Interval(0x0B35, 0x0B39), + Interval(0x0B3C, 0x0B44), + Interval(0x0B47, 0x0B48), + Interval(0x0B4B, 0x0B4D), + Interval(0x0B55, 0x0B57), + Interval(0x0B5C, 0x0B5D), + Interval(0x0B5F, 0x0B63), + Interval(0x0B66, 0x0B77), + Interval(0x0B82, 0x0B83), + Interval(0x0B85, 0x0B8A), + Interval(0x0B8E, 0x0B90), + Interval(0x0B92, 0x0B95), + Interval(0x0B99, 0x0B9A), + Interval(0x0B9C, 0x0B9C), + Interval(0x0B9E, 0x0B9F), + Interval(0x0BA3, 0x0BA4), + Interval(0x0BA8, 0x0BAA), + Interval(0x0BAE, 0x0BB9), + Interval(0x0BBE, 0x0BC2), + Interval(0x0BC6, 0x0BC8), + Interval(0x0BCA, 0x0BCD), + Interval(0x0BD0, 0x0BD0), + Interval(0x0BD7, 0x0BD7), + Interval(0x0BE6, 0x0BFA), + Interval(0x0C00, 0x0C0C), + Interval(0x0C0E, 0x0C10), + Interval(0x0C12, 0x0C28), + Interval(0x0C2A, 0x0C39), + Interval(0x0C3D, 0x0C44), + Interval(0x0C46, 0x0C48), + Interval(0x0C4A, 0x0C4D), + Interval(0x0C55, 0x0C56), + Interval(0x0C58, 0x0C5A), + Interval(0x0C60, 0x0C63), + Interval(0x0C66, 0x0C6F), + Interval(0x0C77, 0x0C8C), + Interval(0x0C8E, 0x0C90), + Interval(0x0C92, 0x0CA8), + Interval(0x0CAA, 0x0CB3), + Interval(0x0CB5, 0x0CB9), + Interval(0x0CBC, 0x0CC4), + Interval(0x0CC6, 0x0CC8), + Interval(0x0CCA, 0x0CCD), + Interval(0x0CD5, 0x0CD6), + Interval(0x0CDE, 0x0CDE), + Interval(0x0CE0, 0x0CE3), + Interval(0x0CE6, 0x0CEF), + Interval(0x0CF1, 0x0CF2), + Interval(0x0D00, 0x0D0C), + Interval(0x0D0E, 0x0D10), + Interval(0x0D12, 0x0D44), + Interval(0x0D46, 0x0D48), + Interval(0x0D4A, 0x0D4F), + Interval(0x0D54, 0x0D63), + Interval(0x0D66, 0x0D7F), + Interval(0x0D81, 0x0D83), + Interval(0x0D85, 0x0D96), + Interval(0x0D9A, 0x0DB1), + Interval(0x0DB3, 0x0DBB), + Interval(0x0DBD, 0x0DBD), + Interval(0x0DC0, 0x0DC6), + Interval(0x0DCA, 0x0DCA), + Interval(0x0DCF, 0x0DD4), + Interval(0x0DD6, 0x0DD6), + Interval(0x0DD8, 0x0DDF), + Interval(0x0DE6, 0x0DEF), + Interval(0x0DF2, 0x0DF4), + Interval(0x0E01, 0x0E3A), + Interval(0x0E3F, 0x0E5B), + Interval(0x0E81, 0x0E82), + Interval(0x0E84, 0x0E84), + Interval(0x0E86, 0x0E8A), + Interval(0x0E8C, 0x0EA3), + Interval(0x0EA5, 0x0EA5), + Interval(0x0EA7, 0x0EBD), + Interval(0x0EC0, 0x0EC4), + Interval(0x0EC6, 0x0EC6), + Interval(0x0EC8, 0x0ECD), + Interval(0x0ED0, 0x0ED9), + Interval(0x0EDC, 0x0EDF), + Interval(0x0F00, 0x0F47), + Interval(0x0F49, 0x0F6C), + Interval(0x0F71, 0x0F97), + Interval(0x0F99, 0x0FBC), + Interval(0x0FBE, 0x0FCC), + Interval(0x0FCE, 0x0FDA), + Interval(0x1000, 0x10C5), + Interval(0x10C7, 0x10C7), + Interval(0x10CD, 0x10CD), + Interval(0x10D0, 0x10FF), + Interval(0x1160, 0x1248), + Interval(0x124A, 0x124D), + Interval(0x1250, 0x1256), + Interval(0x1258, 0x1258), + Interval(0x125A, 0x125D), + Interval(0x1260, 0x1288), + Interval(0x128A, 0x128D), + Interval(0x1290, 0x12B0), + Interval(0x12B2, 0x12B5), + Interval(0x12B8, 0x12BE), + Interval(0x12C0, 0x12C0), + Interval(0x12C2, 0x12C5), + Interval(0x12C8, 0x12D6), + Interval(0x12D8, 0x1310), + Interval(0x1312, 0x1315), + Interval(0x1318, 0x135A), + Interval(0x135D, 0x137C), + Interval(0x1380, 0x1399), + Interval(0x13A0, 0x13F5), + Interval(0x13F8, 0x13FD), + Interval(0x1400, 0x169C), + Interval(0x16A0, 0x16F8), + Interval(0x1700, 0x170C), + Interval(0x170E, 0x1714), + Interval(0x1720, 0x1736), + Interval(0x1740, 0x1753), + Interval(0x1760, 0x176C), + Interval(0x176E, 0x1770), + Interval(0x1772, 0x1773), + Interval(0x1780, 0x17DD), + Interval(0x17E0, 0x17E9), + Interval(0x17F0, 0x17F9), + Interval(0x1800, 0x180E), + Interval(0x1810, 0x1819), + Interval(0x1820, 0x1878), + Interval(0x1880, 0x18AA), + Interval(0x18B0, 0x18F5), + Interval(0x1900, 0x191E), + Interval(0x1920, 0x192B), + Interval(0x1930, 0x193B), + Interval(0x1940, 0x1940), + Interval(0x1944, 0x196D), + Interval(0x1970, 0x1974), + Interval(0x1980, 0x19AB), + Interval(0x19B0, 0x19C9), + Interval(0x19D0, 0x19DA), + Interval(0x19DE, 0x1A1B), + Interval(0x1A1E, 0x1A5E), + Interval(0x1A60, 0x1A7C), + Interval(0x1A7F, 0x1A89), + Interval(0x1A90, 0x1A99), + Interval(0x1AA0, 0x1AAD), + Interval(0x1AB0, 0x1AC0), + Interval(0x1B00, 0x1B4B), + Interval(0x1B50, 0x1B7C), + Interval(0x1B80, 0x1BF3), + Interval(0x1BFC, 0x1C37), + Interval(0x1C3B, 0x1C49), + Interval(0x1C4D, 0x1C88), + Interval(0x1C90, 0x1CBA), + Interval(0x1CBD, 0x1CC7), + Interval(0x1CD0, 0x1CFA), + Interval(0x1D00, 0x1DF9), + Interval(0x1DFB, 0x1F15), + Interval(0x1F18, 0x1F1D), + Interval(0x1F20, 0x1F45), + Interval(0x1F48, 0x1F4D), + Interval(0x1F50, 0x1F57), + Interval(0x1F59, 0x1F59), + Interval(0x1F5B, 0x1F5B), + Interval(0x1F5D, 0x1F5D), + Interval(0x1F5F, 0x1F7D), + Interval(0x1F80, 0x1FB4), + Interval(0x1FB6, 0x1FC4), + Interval(0x1FC6, 0x1FD3), + Interval(0x1FD6, 0x1FDB), + Interval(0x1FDD, 0x1FEF), + Interval(0x1FF2, 0x1FF4), + Interval(0x1FF6, 0x1FFE), + Interval(0x2000, 0x200F), + Interval(0x2011, 0x2012), + Interval(0x2017, 0x2017), + Interval(0x201A, 0x201B), + Interval(0x201E, 0x201F), + Interval(0x2023, 0x2023), + Interval(0x2028, 0x202F), + Interval(0x2031, 0x2031), + Interval(0x2034, 0x2034), + Interval(0x2036, 0x203A), + Interval(0x203C, 0x203D), + Interval(0x203F, 0x2064), + Interval(0x2066, 0x2071), + Interval(0x2075, 0x207E), + Interval(0x2080, 0x2080), + Interval(0x2085, 0x208E), + Interval(0x2090, 0x209C), + Interval(0x20A0, 0x20A8), + Interval(0x20AA, 0x20AB), + Interval(0x20AD, 0x20BF), + Interval(0x20D0, 0x20F0), + Interval(0x2100, 0x2102), + Interval(0x2104, 0x2104), + Interval(0x2106, 0x2108), + Interval(0x210A, 0x2112), + Interval(0x2114, 0x2115), + Interval(0x2117, 0x2120), + Interval(0x2123, 0x2125), + Interval(0x2127, 0x212A), + Interval(0x212C, 0x2152), + Interval(0x2155, 0x215A), + Interval(0x215F, 0x215F), + Interval(0x216C, 0x216F), + Interval(0x217A, 0x2188), + Interval(0x218A, 0x218B), + Interval(0x219A, 0x21B7), + Interval(0x21BA, 0x21D1), + Interval(0x21D3, 0x21D3), + Interval(0x21D5, 0x21E6), + Interval(0x21E8, 0x21FF), + Interval(0x2201, 0x2201), + Interval(0x2204, 0x2206), + Interval(0x2209, 0x220A), + Interval(0x220C, 0x220E), + Interval(0x2210, 0x2210), + Interval(0x2212, 0x2214), + Interval(0x2216, 0x2219), + Interval(0x221B, 0x221C), + Interval(0x2221, 0x2222), + Interval(0x2224, 0x2224), + Interval(0x2226, 0x2226), + Interval(0x222D, 0x222D), + Interval(0x222F, 0x2233), + Interval(0x2238, 0x223B), + Interval(0x223E, 0x2247), + Interval(0x2249, 0x224B), + Interval(0x224D, 0x2251), + Interval(0x2253, 0x225F), + Interval(0x2262, 0x2263), + Interval(0x2268, 0x2269), + Interval(0x226C, 0x226D), + Interval(0x2270, 0x2281), + Interval(0x2284, 0x2285), + Interval(0x2288, 0x2294), + Interval(0x2296, 0x2298), + Interval(0x229A, 0x22A4), + Interval(0x22A6, 0x22BE), + Interval(0x22C0, 0x2311), + Interval(0x2313, 0x2319), + Interval(0x231C, 0x2328), + Interval(0x232B, 0x23E8), + Interval(0x23ED, 0x23EF), + Interval(0x23F1, 0x23F2), + Interval(0x23F4, 0x2426), + Interval(0x2440, 0x244A), + Interval(0x24EA, 0x24EA), + Interval(0x254C, 0x254F), + Interval(0x2574, 0x257F), + Interval(0x2590, 0x2591), + Interval(0x2596, 0x259F), + Interval(0x25A2, 0x25A2), + Interval(0x25AA, 0x25B1), + Interval(0x25B4, 0x25B5), + Interval(0x25B8, 0x25BB), + Interval(0x25BE, 0x25BF), + Interval(0x25C2, 0x25C5), + Interval(0x25C9, 0x25CA), + Interval(0x25CC, 0x25CD), + Interval(0x25D2, 0x25E1), + Interval(0x25E6, 0x25EE), + Interval(0x25F0, 0x25FC), + Interval(0x25FF, 0x2604), + Interval(0x2607, 0x2608), + Interval(0x260A, 0x260D), + Interval(0x2610, 0x2613), + Interval(0x2616, 0x261B), + Interval(0x261D, 0x261D), + Interval(0x261F, 0x263F), + Interval(0x2641, 0x2641), + Interval(0x2643, 0x2647), + Interval(0x2654, 0x265F), + Interval(0x2662, 0x2662), + Interval(0x2666, 0x2666), + Interval(0x266B, 0x266B), + Interval(0x266E, 0x266E), + Interval(0x2670, 0x267E), + Interval(0x2680, 0x2692), + Interval(0x2694, 0x269D), + Interval(0x26A0, 0x26A0), + Interval(0x26A2, 0x26A9), + Interval(0x26AC, 0x26BC), + Interval(0x26C0, 0x26C3), + Interval(0x26E2, 0x26E2), + Interval(0x26E4, 0x26E7), + Interval(0x2700, 0x2704), + Interval(0x2706, 0x2709), + Interval(0x270C, 0x2727), + Interval(0x2729, 0x273C), + Interval(0x273E, 0x274B), + Interval(0x274D, 0x274D), + Interval(0x274F, 0x2752), + Interval(0x2756, 0x2756), + Interval(0x2758, 0x2775), + Interval(0x2780, 0x2794), + Interval(0x2798, 0x27AF), + Interval(0x27B1, 0x27BE), + Interval(0x27C0, 0x27E5), + Interval(0x27EE, 0x2984), + Interval(0x2987, 0x2B1A), + Interval(0x2B1D, 0x2B4F), + Interval(0x2B51, 0x2B54), + Interval(0x2B5A, 0x2B73), + Interval(0x2B76, 0x2B95), + Interval(0x2B97, 0x2C2E), + Interval(0x2C30, 0x2C5E), + Interval(0x2C60, 0x2CF3), + Interval(0x2CF9, 0x2D25), + Interval(0x2D27, 0x2D27), + Interval(0x2D2D, 0x2D2D), + Interval(0x2D30, 0x2D67), + Interval(0x2D6F, 0x2D70), + Interval(0x2D7F, 0x2D96), + Interval(0x2DA0, 0x2DA6), + Interval(0x2DA8, 0x2DAE), + Interval(0x2DB0, 0x2DB6), + Interval(0x2DB8, 0x2DBE), + Interval(0x2DC0, 0x2DC6), + Interval(0x2DC8, 0x2DCE), + Interval(0x2DD0, 0x2DD6), + Interval(0x2DD8, 0x2DDE), + Interval(0x2DE0, 0x2E52), + Interval(0x303F, 0x303F), + Interval(0x4DC0, 0x4DFF), + Interval(0xA4D0, 0xA62B), + Interval(0xA640, 0xA6F7), + Interval(0xA700, 0xA7BF), + Interval(0xA7C2, 0xA7CA), + Interval(0xA7F5, 0xA82C), + Interval(0xA830, 0xA839), + Interval(0xA840, 0xA877), + Interval(0xA880, 0xA8C5), + Interval(0xA8CE, 0xA8D9), + Interval(0xA8E0, 0xA953), + Interval(0xA95F, 0xA95F), + Interval(0xA980, 0xA9CD), + Interval(0xA9CF, 0xA9D9), + Interval(0xA9DE, 0xA9FE), + Interval(0xAA00, 0xAA36), + Interval(0xAA40, 0xAA4D), + Interval(0xAA50, 0xAA59), + Interval(0xAA5C, 0xAAC2), + Interval(0xAADB, 0xAAF6), + Interval(0xAB01, 0xAB06), + Interval(0xAB09, 0xAB0E), + Interval(0xAB11, 0xAB16), + Interval(0xAB20, 0xAB26), + Interval(0xAB28, 0xAB2E), + Interval(0xAB30, 0xAB6B), + Interval(0xAB70, 0xABED), + Interval(0xABF0, 0xABF9), + Interval(0xD7B0, 0xD7C6), + Interval(0xD7CB, 0xD7FB), + Interval(0xD800, 0xDFFF), + Interval(0xFB00, 0xFB06), + Interval(0xFB13, 0xFB17), + Interval(0xFB1D, 0xFB36), + Interval(0xFB38, 0xFB3C), + Interval(0xFB3E, 0xFB3E), + Interval(0xFB40, 0xFB41), + Interval(0xFB43, 0xFB44), + Interval(0xFB46, 0xFBC1), + Interval(0xFBD3, 0xFD3F), + Interval(0xFD50, 0xFD8F), + Interval(0xFD92, 0xFDC7), + Interval(0xFDF0, 0xFDFD), + Interval(0xFE20, 0xFE2F), + Interval(0xFE70, 0xFE74), + Interval(0xFE76, 0xFEFC), + Interval(0xFEFF, 0xFEFF), + Interval(0xFFF9, 0xFFFC), + Interval(0x10000, 0x1000B), + Interval(0x1000D, 0x10026), + Interval(0x10028, 0x1003A), + Interval(0x1003C, 0x1003D), + Interval(0x1003F, 0x1004D), + Interval(0x10050, 0x1005D), + Interval(0x10080, 0x100FA), + Interval(0x10100, 0x10102), + Interval(0x10107, 0x10133), + Interval(0x10137, 0x1018E), + Interval(0x10190, 0x1019C), + Interval(0x101A0, 0x101A0), + Interval(0x101D0, 0x101FD), + Interval(0x10280, 0x1029C), + Interval(0x102A0, 0x102D0), + Interval(0x102E0, 0x102FB), + Interval(0x10300, 0x10323), + Interval(0x1032D, 0x1034A), + Interval(0x10350, 0x1037A), + Interval(0x10380, 0x1039D), + Interval(0x1039F, 0x103C3), + Interval(0x103C8, 0x103D5), + Interval(0x10400, 0x1049D), + Interval(0x104A0, 0x104A9), + Interval(0x104B0, 0x104D3), + Interval(0x104D8, 0x104FB), + Interval(0x10500, 0x10527), + Interval(0x10530, 0x10563), + Interval(0x1056F, 0x1056F), + Interval(0x10600, 0x10736), + Interval(0x10740, 0x10755), + Interval(0x10760, 0x10767), + Interval(0x10800, 0x10805), + Interval(0x10808, 0x10808), + Interval(0x1080A, 0x10835), + Interval(0x10837, 0x10838), + Interval(0x1083C, 0x1083C), + Interval(0x1083F, 0x10855), + Interval(0x10857, 0x1089E), + Interval(0x108A7, 0x108AF), + Interval(0x108E0, 0x108F2), + Interval(0x108F4, 0x108F5), + Interval(0x108FB, 0x1091B), + Interval(0x1091F, 0x10939), + Interval(0x1093F, 0x1093F), + Interval(0x10980, 0x109B7), + Interval(0x109BC, 0x109CF), + Interval(0x109D2, 0x10A03), + Interval(0x10A05, 0x10A06), + Interval(0x10A0C, 0x10A13), + Interval(0x10A15, 0x10A17), + Interval(0x10A19, 0x10A35), + Interval(0x10A38, 0x10A3A), + Interval(0x10A3F, 0x10A48), + Interval(0x10A50, 0x10A58), + Interval(0x10A60, 0x10A9F), + Interval(0x10AC0, 0x10AE6), + Interval(0x10AEB, 0x10AF6), + Interval(0x10B00, 0x10B35), + Interval(0x10B39, 0x10B55), + Interval(0x10B58, 0x10B72), + Interval(0x10B78, 0x10B91), + Interval(0x10B99, 0x10B9C), + Interval(0x10BA9, 0x10BAF), + Interval(0x10C00, 0x10C48), + Interval(0x10C80, 0x10CB2), + Interval(0x10CC0, 0x10CF2), + Interval(0x10CFA, 0x10D27), + Interval(0x10D30, 0x10D39), + Interval(0x10E60, 0x10E7E), + Interval(0x10E80, 0x10EA9), + Interval(0x10EAB, 0x10EAD), + Interval(0x10EB0, 0x10EB1), + Interval(0x10F00, 0x10F27), + Interval(0x10F30, 0x10F59), + Interval(0x10FB0, 0x10FCB), + Interval(0x10FE0, 0x10FF6), + Interval(0x11000, 0x1104D), + Interval(0x11052, 0x1106F), + Interval(0x1107F, 0x110C1), + Interval(0x110CD, 0x110CD), + Interval(0x110D0, 0x110E8), + Interval(0x110F0, 0x110F9), + Interval(0x11100, 0x11134), + Interval(0x11136, 0x11147), + Interval(0x11150, 0x11176), + Interval(0x11180, 0x111DF), + Interval(0x111E1, 0x111F4), + Interval(0x11200, 0x11211), + Interval(0x11213, 0x1123E), + Interval(0x11280, 0x11286), + Interval(0x11288, 0x11288), + Interval(0x1128A, 0x1128D), + Interval(0x1128F, 0x1129D), + Interval(0x1129F, 0x112A9), + Interval(0x112B0, 0x112EA), + Interval(0x112F0, 0x112F9), + Interval(0x11300, 0x11303), + Interval(0x11305, 0x1130C), + Interval(0x1130F, 0x11310), + Interval(0x11313, 0x11328), + Interval(0x1132A, 0x11330), + Interval(0x11332, 0x11333), + Interval(0x11335, 0x11339), + Interval(0x1133B, 0x11344), + Interval(0x11347, 0x11348), + Interval(0x1134B, 0x1134D), + Interval(0x11350, 0x11350), + Interval(0x11357, 0x11357), + Interval(0x1135D, 0x11363), + Interval(0x11366, 0x1136C), + Interval(0x11370, 0x11374), + Interval(0x11400, 0x1145B), + Interval(0x1145D, 0x11461), + Interval(0x11480, 0x114C7), + Interval(0x114D0, 0x114D9), + Interval(0x11580, 0x115B5), + Interval(0x115B8, 0x115DD), + Interval(0x11600, 0x11644), + Interval(0x11650, 0x11659), + Interval(0x11660, 0x1166C), + Interval(0x11680, 0x116B8), + Interval(0x116C0, 0x116C9), + Interval(0x11700, 0x1171A), + Interval(0x1171D, 0x1172B), + Interval(0x11730, 0x1173F), + Interval(0x11800, 0x1183B), + Interval(0x118A0, 0x118F2), + Interval(0x118FF, 0x11906), + Interval(0x11909, 0x11909), + Interval(0x1190C, 0x11913), + Interval(0x11915, 0x11916), + Interval(0x11918, 0x11935), + Interval(0x11937, 0x11938), + Interval(0x1193B, 0x11946), + Interval(0x11950, 0x11959), + Interval(0x119A0, 0x119A7), + Interval(0x119AA, 0x119D7), + Interval(0x119DA, 0x119E4), + Interval(0x11A00, 0x11A47), + Interval(0x11A50, 0x11AA2), + Interval(0x11AC0, 0x11AF8), + Interval(0x11C00, 0x11C08), + Interval(0x11C0A, 0x11C36), + Interval(0x11C38, 0x11C45), + Interval(0x11C50, 0x11C6C), + Interval(0x11C70, 0x11C8F), + Interval(0x11C92, 0x11CA7), + Interval(0x11CA9, 0x11CB6), + Interval(0x11D00, 0x11D06), + Interval(0x11D08, 0x11D09), + Interval(0x11D0B, 0x11D36), + Interval(0x11D3A, 0x11D3A), + Interval(0x11D3C, 0x11D3D), + Interval(0x11D3F, 0x11D47), + Interval(0x11D50, 0x11D59), + Interval(0x11D60, 0x11D65), + Interval(0x11D67, 0x11D68), + Interval(0x11D6A, 0x11D8E), + Interval(0x11D90, 0x11D91), + Interval(0x11D93, 0x11D98), + Interval(0x11DA0, 0x11DA9), + Interval(0x11EE0, 0x11EF8), + Interval(0x11FB0, 0x11FB0), + Interval(0x11FC0, 0x11FF1), + Interval(0x11FFF, 0x12399), + Interval(0x12400, 0x1246E), + Interval(0x12470, 0x12474), + Interval(0x12480, 0x12543), + Interval(0x13000, 0x1342E), + Interval(0x13430, 0x13438), + Interval(0x14400, 0x14646), + Interval(0x16800, 0x16A38), + Interval(0x16A40, 0x16A5E), + Interval(0x16A60, 0x16A69), + Interval(0x16A6E, 0x16A6F), + Interval(0x16AD0, 0x16AED), + Interval(0x16AF0, 0x16AF5), + Interval(0x16B00, 0x16B45), + Interval(0x16B50, 0x16B59), + Interval(0x16B5B, 0x16B61), + Interval(0x16B63, 0x16B77), + Interval(0x16B7D, 0x16B8F), + Interval(0x16E40, 0x16E9A), + Interval(0x16F00, 0x16F4A), + Interval(0x16F4F, 0x16F87), + Interval(0x16F8F, 0x16F9F), + Interval(0x1BC00, 0x1BC6A), + Interval(0x1BC70, 0x1BC7C), + Interval(0x1BC80, 0x1BC88), + Interval(0x1BC90, 0x1BC99), + Interval(0x1BC9C, 0x1BCA3), + Interval(0x1D000, 0x1D0F5), + Interval(0x1D100, 0x1D126), + Interval(0x1D129, 0x1D1E8), + Interval(0x1D200, 0x1D245), + Interval(0x1D2E0, 0x1D2F3), + Interval(0x1D300, 0x1D356), + Interval(0x1D360, 0x1D378), + Interval(0x1D400, 0x1D454), + Interval(0x1D456, 0x1D49C), + Interval(0x1D49E, 0x1D49F), + Interval(0x1D4A2, 0x1D4A2), + Interval(0x1D4A5, 0x1D4A6), + Interval(0x1D4A9, 0x1D4AC), + Interval(0x1D4AE, 0x1D4B9), + Interval(0x1D4BB, 0x1D4BB), + Interval(0x1D4BD, 0x1D4C3), + Interval(0x1D4C5, 0x1D505), + Interval(0x1D507, 0x1D50A), + Interval(0x1D50D, 0x1D514), + Interval(0x1D516, 0x1D51C), + Interval(0x1D51E, 0x1D539), + Interval(0x1D53B, 0x1D53E), + Interval(0x1D540, 0x1D544), + Interval(0x1D546, 0x1D546), + Interval(0x1D54A, 0x1D550), + Interval(0x1D552, 0x1D6A5), + Interval(0x1D6A8, 0x1D7CB), + Interval(0x1D7CE, 0x1DA8B), + Interval(0x1DA9B, 0x1DA9F), + Interval(0x1DAA1, 0x1DAAF), + Interval(0x1E000, 0x1E006), + Interval(0x1E008, 0x1E018), + Interval(0x1E01B, 0x1E021), + Interval(0x1E023, 0x1E024), + Interval(0x1E026, 0x1E02A), + Interval(0x1E100, 0x1E12C), + Interval(0x1E130, 0x1E13D), + Interval(0x1E140, 0x1E149), + Interval(0x1E14E, 0x1E14F), + Interval(0x1E2C0, 0x1E2F9), + Interval(0x1E2FF, 0x1E2FF), + Interval(0x1E800, 0x1E8C4), + Interval(0x1E8C7, 0x1E8D6), + Interval(0x1E900, 0x1E94B), + Interval(0x1E950, 0x1E959), + Interval(0x1E95E, 0x1E95F), + Interval(0x1EC71, 0x1ECB4), + Interval(0x1ED01, 0x1ED3D), + Interval(0x1EE00, 0x1EE03), + Interval(0x1EE05, 0x1EE1F), + Interval(0x1EE21, 0x1EE22), + Interval(0x1EE24, 0x1EE24), + Interval(0x1EE27, 0x1EE27), + Interval(0x1EE29, 0x1EE32), + Interval(0x1EE34, 0x1EE37), + Interval(0x1EE39, 0x1EE39), + Interval(0x1EE3B, 0x1EE3B), + Interval(0x1EE42, 0x1EE42), + Interval(0x1EE47, 0x1EE47), + Interval(0x1EE49, 0x1EE49), + Interval(0x1EE4B, 0x1EE4B), + Interval(0x1EE4D, 0x1EE4F), + Interval(0x1EE51, 0x1EE52), + Interval(0x1EE54, 0x1EE54), + Interval(0x1EE57, 0x1EE57), + Interval(0x1EE59, 0x1EE59), + Interval(0x1EE5B, 0x1EE5B), + Interval(0x1EE5D, 0x1EE5D), + Interval(0x1EE5F, 0x1EE5F), + Interval(0x1EE61, 0x1EE62), + Interval(0x1EE64, 0x1EE64), + Interval(0x1EE67, 0x1EE6A), + Interval(0x1EE6C, 0x1EE72), + Interval(0x1EE74, 0x1EE77), + Interval(0x1EE79, 0x1EE7C), + Interval(0x1EE7E, 0x1EE7E), + Interval(0x1EE80, 0x1EE89), + Interval(0x1EE8B, 0x1EE9B), + Interval(0x1EEA1, 0x1EEA3), + Interval(0x1EEA5, 0x1EEA9), + Interval(0x1EEAB, 0x1EEBB), + Interval(0x1EEF0, 0x1EEF1), + Interval(0x1F000, 0x1F003), + Interval(0x1F005, 0x1F02B), + Interval(0x1F030, 0x1F093), + Interval(0x1F0A0, 0x1F0AE), + Interval(0x1F0B1, 0x1F0BF), + Interval(0x1F0C1, 0x1F0CE), + Interval(0x1F0D1, 0x1F0F5), + Interval(0x1F10B, 0x1F10F), + Interval(0x1F12E, 0x1F12F), + Interval(0x1F16A, 0x1F16F), + Interval(0x1F1AD, 0x1F1AD), + Interval(0x1F1E6, 0x1F1FF), + Interval(0x1F321, 0x1F32C), + Interval(0x1F336, 0x1F336), + Interval(0x1F37D, 0x1F37D), + Interval(0x1F394, 0x1F39F), + Interval(0x1F3CB, 0x1F3CE), + Interval(0x1F3D4, 0x1F3DF), + Interval(0x1F3F1, 0x1F3F3), + Interval(0x1F3F5, 0x1F3F7), + Interval(0x1F43F, 0x1F43F), + Interval(0x1F441, 0x1F441), + Interval(0x1F4FD, 0x1F4FE), + Interval(0x1F53E, 0x1F54A), + Interval(0x1F54F, 0x1F54F), + Interval(0x1F568, 0x1F579), + Interval(0x1F57B, 0x1F594), + Interval(0x1F597, 0x1F5A3), + Interval(0x1F5A5, 0x1F5FA), + Interval(0x1F650, 0x1F67F), + Interval(0x1F6C6, 0x1F6CB), + Interval(0x1F6CD, 0x1F6CF), + Interval(0x1F6D3, 0x1F6D4), + Interval(0x1F6E0, 0x1F6EA), + Interval(0x1F6F0, 0x1F6F3), + Interval(0x1F700, 0x1F773), + Interval(0x1F780, 0x1F7D8), + Interval(0x1F800, 0x1F80B), + Interval(0x1F810, 0x1F847), + Interval(0x1F850, 0x1F859), + Interval(0x1F860, 0x1F887), + Interval(0x1F890, 0x1F8AD), + Interval(0x1F8B0, 0x1F8B1), + Interval(0x1F900, 0x1F90B), + Interval(0x1F93B, 0x1F93B), + Interval(0x1F946, 0x1F946), + Interval(0x1FA00, 0x1FA53), + Interval(0x1FA60, 0x1FA6D), + Interval(0x1FB00, 0x1FB92), + Interval(0x1FB94, 0x1FBCA), + Interval(0x1FBF0, 0x1FBF9), + Interval(0xE0001, 0xE0001), + Interval(0xE0020, 0xE007F), +) + +alias emoji = List[Interval]( + Interval(0x203C, 0x203C), + Interval(0x2049, 0x2049), + Interval(0x2122, 0x2122), + Interval(0x2139, 0x2139), + Interval(0x2194, 0x2199), + Interval(0x21A9, 0x21AA), + Interval(0x231A, 0x231B), + Interval(0x2328, 0x2328), + Interval(0x2388, 0x2388), + Interval(0x23CF, 0x23CF), + Interval(0x23E9, 0x23F3), + Interval(0x23F8, 0x23FA), + Interval(0x24C2, 0x24C2), + Interval(0x25AA, 0x25AB), + Interval(0x25B6, 0x25B6), + Interval(0x25C0, 0x25C0), + Interval(0x25FB, 0x25FE), + Interval(0x2600, 0x2605), + Interval(0x2607, 0x2612), + Interval(0x2614, 0x2685), + Interval(0x2690, 0x2705), + Interval(0x2708, 0x2712), + Interval(0x2714, 0x2714), + Interval(0x2716, 0x2716), + Interval(0x271D, 0x271D), + Interval(0x2721, 0x2721), + Interval(0x2728, 0x2728), + Interval(0x2733, 0x2734), + Interval(0x2744, 0x2744), + Interval(0x2747, 0x2747), + Interval(0x274C, 0x274C), + Interval(0x274E, 0x274E), + Interval(0x2753, 0x2755), + Interval(0x2757, 0x2757), + Interval(0x2763, 0x2767), + Interval(0x2795, 0x2797), + Interval(0x27A1, 0x27A1), + Interval(0x27B0, 0x27B0), + Interval(0x27BF, 0x27BF), + Interval(0x2934, 0x2935), + Interval(0x2B05, 0x2B07), + Interval(0x2B1B, 0x2B1C), + Interval(0x2B50, 0x2B50), + Interval(0x2B55, 0x2B55), + Interval(0x3030, 0x3030), + Interval(0x303D, 0x303D), + Interval(0x3297, 0x3297), + Interval(0x3299, 0x3299), + Interval(0x1F000, 0x1F0FF), + Interval(0x1F10D, 0x1F10F), + Interval(0x1F12F, 0x1F12F), + Interval(0x1F16C, 0x1F171), + Interval(0x1F17E, 0x1F17F), + Interval(0x1F18E, 0x1F18E), + Interval(0x1F191, 0x1F19A), + Interval(0x1F1AD, 0x1F1E5), + Interval(0x1F201, 0x1F20F), + Interval(0x1F21A, 0x1F21A), + Interval(0x1F22F, 0x1F22F), + Interval(0x1F232, 0x1F23A), + Interval(0x1F23C, 0x1F23F), + Interval(0x1F249, 0x1F3FA), + Interval(0x1F400, 0x1F53D), + Interval(0x1F546, 0x1F64F), + Interval(0x1F680, 0x1F6FF), + Interval(0x1F774, 0x1F77F), + Interval(0x1F7D5, 0x1F7FF), + Interval(0x1F80C, 0x1F80F), + Interval(0x1F848, 0x1F84F), + Interval(0x1F85A, 0x1F85F), + Interval(0x1F888, 0x1F88F), + Interval(0x1F8AE, 0x1F8FF), + Interval(0x1F90C, 0x1F93A), + Interval(0x1F93C, 0x1F945), + Interval(0x1F947, 0x1FAFF), + Interval(0x1FC00, 0x1FFFD), +) + +alias private = List[Interval]( + Interval(0x00E000, 0x00F8FF), + Interval(0x0F0000, 0x0FFFFD), + Interval(0x100000, 0x10FFFD), +) + +alias nonprint = List[Interval]( + Interval(0x0000, 0x001F), + Interval(0x007F, 0x009F), + Interval(0x00AD, 0x00AD), + Interval(0x070F, 0x070F), + Interval(0x180B, 0x180E), + Interval(0x200B, 0x200F), + Interval(0x2028, 0x202E), + Interval(0x206A, 0x206F), + Interval(0xD800, 0xDFFF), + Interval(0xFEFF, 0xFEFF), + Interval(0xFFF9, 0xFFFB), + Interval(0xFFFE, 0xFFFF), +) diff --git a/gojo/unicode/utf8/width.mojo b/gojo/unicode/utf8/width.mojo new file mode 100644 index 0000000..8a09ed7 --- /dev/null +++ b/gojo/unicode/utf8/width.mojo @@ -0,0 +1,106 @@ +from .table import Interval, narrow, combining, doublewidth, ambiguous, emoji, nonprint +from .string import UnicodeString + + +@value +struct Condition: + """Condition have flag EastAsianWidth whether the current locale is CJK or not.""" + + var east_asian_width: Bool + var strict_emoji_neutral: Bool + + fn rune_width(self, r: UInt32) -> Int: + """Returns the number of cells in r. + See http://www.unicode.org/reports/tr11/.""" + if r < 0 or r > 0x10FFFF: + return 0 + + if not self.east_asian_width: + if r < 0x20: + return 0 + # nonprint + elif (r >= 0x7F and r <= 0x9F) or r == 0xAD: + return 0 + elif r < 0x300: + return 1 + elif in_table(r, narrow): + return 1 + elif in_tables(r, nonprint, combining): + return 0 + elif in_table(r, doublewidth): + return 2 + else: + return 1 + else: + if in_tables(r, nonprint, combining): + return 0 + elif in_table(r, narrow): + return 1 + elif in_tables(r, ambiguous, doublewidth): + return 2 + elif in_table(r, ambiguous) or in_table(r, emoji): + return 2 + elif not self.strict_emoji_neutral and in_tables(r, ambiguous, emoji, narrow): + return 2 + else: + return 1 + + fn string_width(self, s: String) -> Int: + """Return width as you can see.""" + var width = 0 + for r in UnicodeString(s): + width += self.rune_width(ord(String(r))) + return width + + +fn in_tables(r: UInt32, *ts: List[Interval]) -> Bool: + for t in ts: + if in_table(r, t[]): + return True + return False + + +fn in_table(r: UInt32, t: List[Interval]) -> Bool: + if r < t[0].first: + return False + + var bot = 0 + var top = len(t) - 1 + while top >= bot: + var mid = (bot + top) >> 1 + + if t[mid].last < r: + bot = mid + 1 + elif t[mid].first > r: + top = mid - 1 + else: + return True + + return False + + +alias DEFAULT_CONDITION = Condition(east_asian_width=False, strict_emoji_neutral=True) + + +fn string_width(s: String) -> Int: + """Return width as you can see. + + Args: + s: The string to calculate the width of. + + Returns: + The printable width of the string. + """ + return DEFAULT_CONDITION.string_width(s) + + +fn rune_width(rune: UInt32) -> Int: + """Return width as you can see. + + Args: + rune: The rune to calculate the width of. + + Returns: + The printable width of the rune. + """ + return DEFAULT_CONDITION.rune_width(rune)