-
Notifications
You must be signed in to change notification settings - Fork 44
/
StringBuilder.v3
357 lines (353 loc) · 10.4 KB
/
StringBuilder.v3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
// Copyright 2019 Ben L. Titzer. All rights reserved.
// See LICENSE for details of Apache 2.0 license.
// Utility for building and formatting strings, including the ability to render
// integers, characters, strings, etc according to printf-like format
// specifiers.
//
// These format specifiers are supported:
//
// %d - render an integer as a decimal
// %c - render an integer as an 8-bit ASCII character
// %s - inline a string argument
// %x - render an integer as a hexadecimal
// %z - render a boolean as "true" or "false"
// %q - call a closure of type StringBuilder -> StringBuilder
//
// Additionally, a number of explicit {put*} methods provide explicit one-by-one
// appending capabilities.
class StringBuilder {
def var length: int;
def var buf: Array<byte>; // TODO: keep private
// Format a string, rendering (1) argument according to {fmt}.
def put1<T1>(fmt: string, p1: T1) -> this {
var offset = 0;
offset = putArg(fmt, offset, p1);
finishFmt(fmt, offset);
}
// Format a string, rendering (2) arguments according to {fmt}.
def put2<T1, T2>(fmt: string, p1: T1, p2: T2) -> this {
var offset = 0;
offset = putArg(fmt, offset, p1);
offset = putArg(fmt, offset, p2);
finishFmt(fmt, offset);
}
// Format a string, rendering (3) arguments according to {fmt}.
def put3<T1, T2, T3>(fmt: string, p1: T1, p2: T2, p3: T3) -> this {
var offset = 0;
offset = putArg(fmt, offset, p1);
offset = putArg(fmt, offset, p2);
offset = putArg(fmt, offset, p3);
finishFmt(fmt, offset);
}
// Append an integer (of any width) as decimal.
def putd<T>(i: T) -> this {
match (i) {
x: u32 => putd_u32(x);
x: i32 => putd_i32(x);
x: u64 => putd_u64(x);
x: i64 => putd_i64(x);
_ => System.error("TypeError", "not an integer");
}
}
// Append a signed 32-bit integer as decimal.
def putd_i32(i: i32) -> this {
acquire(11);
length += Ints.renderDecimal(i, buf, length);
}
// Append an unsigned 32-bit integer as decimal.
def putd_u32(i: u32) -> this {
acquire(10);
length += Ints.renderPosDecimal(i, buf, length);
}
// Append a signed 64-bit integer as decimal.
def putd_i64(i: i64) -> this {
acquire(20);
length += Longs.renderDecimal(i, buf, length);
}
// Append an unsigned 64-bit integer as decimal.
def putd_u64(i: u64) -> this {
acquire(20);
length += Longs.renderPosDecimal(i, buf, length);
}
// Append a variable-width integer as hexadecimal.
def putx<T>(i: T) -> this {
match (i) {
x: u8 => putx_8(x);
x: u16 => putxv_32(x);
x: u32 => putxv_32(x);
x: i32 => putxv_32(u32.view(x));
x: u64 => putxv_64(x);
x: i64 => putxv_64(u64.view(x));
_ => System.error("TypeError", "not an integer");
}
}
// Append a fixed-width 8-bit integer as hexadecimal (e.g. 0F).
def putx_8(i: u32) -> this {
acquire(2);
var n1 = (i >> 4) & 0xF, n2 = i & 0xF;
buf[length++] = Chars.hexMap_u[n1];
buf[length++] = Chars.hexMap_u[n2];
}
// Append a fixed-width 16-bit integer as hexadecimal (e.g. 034F).
def putx_16(i: u32) -> this {
acquire(4);
var shift: u5 = 12;
while (true) {
var nibble = u4.view(i >> shift);
buf[length++] = Chars.hexMap_u[nibble];
if (shift == 0) break;
shift -= 4;
}
}
// Append a fixed-width 32-bit integer as hexadecimal (e.g. 0673401F).
def putx_32(i: u32) -> this {
acquire(8);
var shift: u5 = 28;
while (true) {
var nibble = u4.view(i >> shift);
buf[length++] = Chars.hexMap_u[nibble];
if (shift == 0) break;
shift -= 4;
}
}
// Append a variable-width unsigned integer as hexadecimal.
def putxv_32(i: u32) -> this {
var v8 = u8.view(i);
if (v8 == i) { putx_8(v8); return; }
var v16 = u16.view(i);
if (v16 == i) { putx_16(v16); return; }
putx_32(i);
}
// Append a variable-width unsigned integer as hexadecimal.
def putxv_64(i: u64) -> this {
var v8 = u8.view(i);
if (v8 == i) { putx_8(v8); return; }
var v16 = u16.view(i);
if (v16 == i) { putx_16(v16); return; }
var v32 = u32.view(i);
if (v32 == i) { putx_32(v32); return; }
putx_64(i);
}
// Append a fixed-width 64-bit integer as hexadecimal (e.g. 0673401FEE77881122).
def putx_64(i: u64) -> this {
acquire(16);
var shift: u6 = 60;
while (true) {
var nibble = u4.view(i >> shift);
buf[length++] = Chars.hexMap_u[nibble];
if (shift == 0) break;
shift -= 4;
}
}
// Append a byte as a character.
def putc(c: byte) -> this {
var p = acquire(1);
buf[length++] = c;
}
// Append a string.
def puts(s: string) -> this {
var n = s.length, p = acquire(n);
for (i < n) buf[p + i] = s[i];
length += n;
}
// Append a string with quotes.
def putsq(s: string) -> this {
putc('\"');
puts(s);
putc('\"');
}
// Append a substring from {start} to {end}.
def putk(s: string, start: int, end: int) -> this {
var n = end - start, p = acquire(n);
for (i < n) buf[p + i] = s[i + start];
length += n;
}
// Call the function {f} on this builder.
def putq(f: StringBuilder -> StringBuilder) -> this {
f(this);
}
// Append a range of bytes.
def putr(r: Range<byte>) -> this {
var rlen = r.length, p = acquire(rlen);
for (i < rlen) buf[p + i] = r[i];
length += rlen;
}
// Append a boolean as "true" or "false".
def putz(b: bool) -> this {
puts(if(b, "true", "false"));
}
// Append a unicode character in UTF-8 encoding.
def pututf8(codepoint: u32) -> this {
var p = acquire(4);
var l = Utf8.encode(codepoint, buf, p);
length += l;
}
// Append a range of values as [v1, v2, ...] with each element rendered with the format specifier {fmt}.
def putRange<T>(fmt: string, r: Range<T>) -> this {
putc('[');
for (i < r.length) {
if (i > 0) csp();
put1(fmt, r[i]);
}
putc(']');
}
// Append a line terminator.
def ln() -> this {
putc('\n');
}
// Append a space.
def sp() -> this {
putc(' ');
}
// Append a tab character.
def tab() -> this {
putc('\t');
}
// Append comma-space ", ".
def csp() -> this {
puts(", ");
}
// Pad the buffer to the specified size {nlen} with the pad character {ch}.
def pad(ch: byte, nlen: int) -> this {
var diff = nlen - length;
if (diff <= 0) return;
acquire(diff);
for (i = length; i < nlen; i++) buf[i] = ch;
length = nlen;
}
// Right-justify the last {nchars} of this buffer to {len}, inserting {ch} before.
def rjustify(ch: byte, nchars: int, len: int) -> this {
if (nchars > length) nchars = length;
var diff = len - nchars;
if (diff <= 0) return;
var start = length - nchars, end = start + len;
grow(end);
for (i < nchars) buf[end - i - 1] = buf[end - diff - i - 1];
for (i < diff) buf[start + i] = ch;
length = end;
}
// Center the last {nchars} of this buffer to {len}, inserting {ch} before and after.
def center(ch: byte, nchars: int, len: int) -> this {
if (nchars > length) nchars = length;
var diff = len - nchars;
if (diff <= 0) return;
var start = length - nchars, end = start + len;
grow(end);
var lpad = diff / 2, rpad = diff - lpad;
for (i < nchars) buf[end - i - 1 - rpad] = buf[end - diff - i - 1];
for (i < lpad) buf[start + i] = ch;
for (i < rpad) buf[end - i - 1] = ch;
length = end;
}
// Reset the size of this buffer, reusing internal storage.
def reset() -> this {
length = 0;
}
// Grow the (total) internal storage of this buffer to at least {nlength}
// bytes.
def grow(nlength: int) -> this {
if (buf == null) buf = Array<byte>.new(nlength); // no buffer yet
else if (nlength > buf.length) buf = Arrays.grow(buf, nlength);
}
// Extract the characters in this buffer as a complete string.
def extract() -> string {
if (buf == null) return "";
if (length == buf.length) {
var b = buf;
buf = null;
length = 0;
return b;
}
var b = Arrays.range(buf, 0, length);
buf = null;
length = 0;
return b;
}
// Create a string from this buffer (alias for {extract()}).
def toString() -> string {
return extract();
}
// Output the bytes of this string buffer to the function {f},
// avoiding an intermediate copy. Note that it is implementation
// dependent if {f} is called multiple times.
def out(f: (Array<byte>, int, int) -> void) -> this {
if (buf != null) f(buf, 0, length); // for now, this implementation uses a contiguous buffer.
}
// Send the bytes of this string buffer to the function {f},
// avoiding an intermediate copy. Note that it is implementation
// dependent if {f} is called multiple times.
def send<R>(f: Range<byte> -> R) -> R {
return if(buf != null, f(buf[0 ... length]));
}
// Resize this string {nlength}.
def resize(nlength: int) -> this {
if (nlength > length) grow(nlength);
length = nlength;
}
// Acquire {n} more bytes of internal storage, growing if necessary.
private def acquire(n: int) -> int {
if (buf == null) buf = Array<byte>.new(n);
else if (buf.length - length < n) buf = Arrays.grow(buf, n + buf.length * 2);
return length;
}
// Append the portion of the format string from {offset} up to and
// including the next argument, returning the position after the next
// argument.
private def putArg<T>(fmt: string, offset: int, p: T) -> int {
var t = putUpToArg(fmt, offset);
match (t.0) {
-1 => ;
'd' => putd(p);
'c' => putc(byte.!(p));
'x' => putx(p);
's' => puts(string.!(p));
'z' => putz(bool.!(p));
'q' => cast<T, StringBuilder -> StringBuilder>(p)(this);
_ => {
System.error("FormatError", "invalid format specifier");
}
}
return t.1;
}
// Append the remaining portion of the format string.
private def finishFmt(fmt: string, offset: int) {
var t = putUpToArg(fmt, offset);
if (t.0 != -1) System.error("FormatError", "too many format specifiers");
}
// Append the portion of the format string from {offset} up to but
// *not* including the next argument. (Factored out in order to avoid
// code duplication due to monomorphization).
private def putUpToArg(fmt: string, offset: int) -> (int, int) {
var start = offset, end = fmt.length;
acquire(end - start);
while (true) {
if (offset == end) { // End of string.
break;
}
var c1 = fmt[offset];
if (c1 != '%') { // Normal character (not a "%").
buf[length++] = c1;
offset++;
continue;
}
if (offset == end - 1) { // Last character.
buf[length++] = c1;
offset++;
break;
}
var c2 = fmt[offset+1];
if (c2 == '%') { // Escaped percent "%%"
buf[length++] = c2;
offset += 2;
start = offset;
continue;
}
// Return the format character to caller for dispatch.
return (c2, offset + 2);
}
return (-1, offset);
}
}
def cast<F, T>(p: F) -> T {
return T.!(p);
}