From c0fcd44931fc84fb95a507ade839977a51bc7905 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 23 Sep 2024 18:04:18 +0200 Subject: [PATCH] GH-44194: [C++] Avoid repeated ArrayData::offset lookups (#44190) ### Rationale for this change In non-trivial situations such as sort kernels, the compiler might not be able to fully optimize these repeated lookups. ### What changes are included in this PR? Remove those lookups altogether. This produces a small but consistent speedup on sort kernels: ``` ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Non-regressions: (173) ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ benchmark baseline contender change % counters TableSortIndicesInt64Wide/1048576/0/1/4 6.368M items/sec 9.094M items/sec 42.801 {'family_index': 13, 'per_family_instance_index': 23, 'run_name': 'TableSortIndicesInt64Wide/1048576/0/1/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 4, 'chunks': 4.0, 'columns': 1.0, 'null_percent': 0.0} TableSortIndicesInt64Wide/1048576/4/1/4 9.314M items/sec 11.457M items/sec 23.016 {'family_index': 13, 'per_family_instance_index': 22, 'run_name': 'TableSortIndicesInt64Wide/1048576/4/1/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7, 'chunks': 4.0, 'columns': 1.0, 'null_percent': 25.0} TableSortIndicesInt64Wide/1048576/4/8/1 8.655M items/sec 10.643M items/sec 22.970 {'family_index': 13, 'per_family_instance_index': 28, 'run_name': 'TableSortIndicesInt64Wide/1048576/4/8/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 1.0, 'columns': 8.0, 'null_percent': 25.0} TableSortIndicesInt64Wide/1048576/0/8/1 9.176M items/sec 10.842M items/sec 18.153 {'family_index': 13, 'per_family_instance_index': 29, 'run_name': 'TableSortIndicesInt64Wide/1048576/0/8/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 1.0, 'columns': 8.0, 'null_percent': 0.0} TableSortIndicesInt64Wide/1048576/100/16/1 9.259M items/sec 10.761M items/sec 16.230 {'family_index': 13, 'per_family_instance_index': 24, 'run_name': 'TableSortIndicesInt64Wide/1048576/100/16/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 1.0, 'columns': 16.0, 'null_percent': 1.0} TableSortIndicesInt64Wide/1048576/100/2/1 9.505M items/sec 10.777M items/sec 13.382 {'family_index': 13, 'per_family_instance_index': 30, 'run_name': 'TableSortIndicesInt64Wide/1048576/100/2/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 1.0, 'columns': 2.0, 'null_percent': 1.0} ChunkedArraySortIndicesInt64Narrow/1048576/100 310.582 MiB/sec 351.357 MiB/sec 13.129 {'family_index': 7, 'per_family_instance_index': 6, 'run_name': 'ChunkedArraySortIndicesInt64Narrow/1048576/100', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 216, 'null_percent': 1.0} RecordBatchSortIndicesInt64Wide/1048576/0/16 9.451M items/sec 10.618M items/sec 12.354 {'family_index': 11, 'per_family_instance_index': 2, 'run_name': 'RecordBatchSortIndicesInt64Wide/1048576/0/16', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'columns': 16.0, 'null_percent': 0.0} RecordBatchSortIndicesInt64Wide/1048576/100/16 9.266M items/sec 10.365M items/sec 11.868 {'family_index': 11, 'per_family_instance_index': 0, 'run_name': 'RecordBatchSortIndicesInt64Wide/1048576/100/16', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'columns': 16.0, 'null_percent': 1.0} ChunkedArraySortIndicesInt64Narrow/8388608/100 220.506 MiB/sec 245.414 MiB/sec 11.296 {'family_index': 7, 'per_family_instance_index': 7, 'run_name': 'ChunkedArraySortIndicesInt64Narrow/8388608/100', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 19, 'null_percent': 1.0} TableSortIndicesInt64Narrow/1048576/100/1/4 47.458M items/sec 52.813M items/sec 11.284 {'family_index': 12, 'per_family_instance_index': 21, 'run_name': 'TableSortIndicesInt64Narrow/1048576/100/1/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 31, 'chunks': 4.0, 'columns': 1.0, 'null_percent': 1.0} TableSortIndicesInt64Narrow/1048576/0/1/4 50.540M items/sec 56.171M items/sec 11.142 {'family_index': 12, 'per_family_instance_index': 23, 'run_name': 'TableSortIndicesInt64Narrow/1048576/0/1/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 34, 'chunks': 4.0, 'columns': 1.0, 'null_percent': 0.0} RecordBatchSortIndicesInt64Narrow/1048576/0/16 2.756M items/sec 3.042M items/sec 10.393 {'family_index': 10, 'per_family_instance_index': 2, 'run_name': 'RecordBatchSortIndicesInt64Narrow/1048576/0/16', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2, 'columns': 16.0, 'null_percent': 0.0} TableSortIndicesInt64Narrow/1048576/100/1/32 22.492M items/sec 24.734M items/sec 9.967 {'family_index': 12, 'per_family_instance_index': 9, 'run_name': 'TableSortIndicesInt64Narrow/1048576/100/1/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 15, 'chunks': 32.0, 'columns': 1.0, 'null_percent': 1.0} RecordBatchSortIndicesInt64Wide/1048576/4/16 6.391M items/sec 6.986M items/sec 9.307 {'family_index': 11, 'per_family_instance_index': 1, 'run_name': 'RecordBatchSortIndicesInt64Wide/1048576/4/16', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 4, 'columns': 16.0, 'null_percent': 25.0} TableSortIndicesInt64Narrow/1048576/0/1/32 23.468M items/sec 25.650M items/sec 9.299 {'family_index': 12, 'per_family_instance_index': 11, 'run_name': 'TableSortIndicesInt64Narrow/1048576/0/1/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 16, 'chunks': 32.0, 'columns': 1.0, 'null_percent': 0.0} TableSortIndicesInt64Wide/1048576/0/16/1 9.946M items/sec 10.866M items/sec 9.258 {'family_index': 13, 'per_family_instance_index': 26, 'run_name': 'TableSortIndicesInt64Wide/1048576/0/16/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7, 'chunks': 1.0, 'columns': 16.0, 'null_percent': 0.0} TableSortIndicesInt64Narrow/1048576/0/16/32 2.509M items/sec 2.730M items/sec 8.813 {'family_index': 12, 'per_family_instance_index': 2, 'run_name': 'TableSortIndicesInt64Narrow/1048576/0/16/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2, 'chunks': 32.0, 'columns': 16.0, 'null_percent': 0.0} ArraySortIndicesInt64Wide/32768/100 151.673 MiB/sec 164.954 MiB/sec 8.757 {'family_index': 1, 'per_family_instance_index': 1, 'run_name': 'ArraySortIndicesInt64Wide/32768/100', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3389, 'null_percent': 1.0} TableSortIndicesInt64Wide/1048576/0/8/4 8.463M items/sec 9.167M items/sec 8.322 {'family_index': 13, 'per_family_instance_index': 17, 'run_name': 'TableSortIndicesInt64Wide/1048576/0/8/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 4.0, 'columns': 8.0, 'null_percent': 0.0} TableSortIndicesInt64Wide/1048576/100/8/4 8.221M items/sec 8.904M items/sec 8.318 {'family_index': 13, 'per_family_instance_index': 15, 'run_name': 'TableSortIndicesInt64Wide/1048576/100/8/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 4.0, 'columns': 8.0, 'null_percent': 1.0} TableSortIndicesInt64Wide/1048576/100/8/1 9.911M items/sec 10.705M items/sec 8.012 {'family_index': 13, 'per_family_instance_index': 27, 'run_name': 'TableSortIndicesInt64Wide/1048576/100/8/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 1.0, 'columns': 8.0, 'null_percent': 1.0} TableSortIndicesInt64Wide/1048576/0/2/4 8.504M items/sec 9.171M items/sec 7.836 {'family_index': 13, 'per_family_instance_index': 20, 'run_name': 'TableSortIndicesInt64Wide/1048576/0/2/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 4.0, 'columns': 2.0, 'null_percent': 0.0} TableSortIndicesInt64Wide/1048576/100/1/4 8.368M items/sec 9.021M items/sec 7.813 {'family_index': 13, 'per_family_instance_index': 21, 'run_name': 'TableSortIndicesInt64Wide/1048576/100/1/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 4.0, 'columns': 1.0, 'null_percent': 1.0} TableSortIndicesInt64Wide/1048576/100/2/4 8.277M items/sec 8.918M items/sec 7.748 {'family_index': 13, 'per_family_instance_index': 18, 'run_name': 'TableSortIndicesInt64Wide/1048576/100/2/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 5, 'chunks': 4.0, 'columns': 2.0, 'null_percent': 1.0} TableSortIndicesInt64Narrow/1048576/0/2/4 6.098M items/sec 6.569M items/sec 7.728 {'family_index': 12, 'per_family_instance_index': 20, 'run_name': 'TableSortIndicesInt64Narrow/1048576/0/2/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 4, 'chunks': 4.0, 'columns': 2.0, 'null_percent': 0.0} TableSortIndicesInt64Narrow/1048576/0/8/32 2.495M items/sec 2.683M items/sec 7.545 {'family_index': 12, 'per_family_instance_index': 5, 'run_name': 'TableSortIndicesInt64Narrow/1048576/0/8/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2, 'chunks': 32.0, 'columns': 8.0, 'null_percent': 0.0} TableSortIndicesInt64Wide/1048576/4/16/1 9.864M items/sec 10.609M items/sec 7.542 {'family_index': 13, 'per_family_instance_index': 25, 'run_name': 'TableSortIndicesInt64Wide/1048576/4/16/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 1.0, 'columns': 16.0, 'null_percent': 25.0} ArraySortIndicesInt64Wide/32768/10000 155.014 MiB/sec 166.465 MiB/sec 7.387 {'family_index': 1, 'per_family_instance_index': 0, 'run_name': 'ArraySortIndicesInt64Wide/32768/10000', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3500, 'null_percent': 0.01} ArraySortIndicesInt64Wide/32768/0 156.113 MiB/sec 167.631 MiB/sec 7.378 {'family_index': 1, 'per_family_instance_index': 5, 'run_name': 'ArraySortIndicesInt64Wide/32768/0', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3516, 'null_percent': 0.0} TableSortIndicesInt64Wide/1048576/4/2/4 8.003M items/sec 8.591M items/sec 7.351 {'family_index': 13, 'per_family_instance_index': 19, 'run_name': 'TableSortIndicesInt64Wide/1048576/4/2/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 5, 'chunks': 4.0, 'columns': 2.0, 'null_percent': 25.0} TableSortIndicesInt64Narrow/1048576/4/1/4 44.920M items/sec 48.218M items/sec 7.343 {'family_index': 12, 'per_family_instance_index': 22, 'run_name': 'TableSortIndicesInt64Narrow/1048576/4/1/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 30, 'chunks': 4.0, 'columns': 1.0, 'null_percent': 25.0} ArraySortIndicesInt64Wide/32768/10 160.956 MiB/sec 172.698 MiB/sec 7.296 {'family_index': 1, 'per_family_instance_index': 2, 'run_name': 'ArraySortIndicesInt64Wide/32768/10', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3593, 'null_percent': 10.0} TableSortIndicesInt64Wide/1048576/4/8/4 6.929M items/sec 7.419M items/sec 7.078 {'family_index': 13, 'per_family_instance_index': 16, 'run_name': 'TableSortIndicesInt64Wide/1048576/4/8/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 5, 'chunks': 4.0, 'columns': 8.0, 'null_percent': 25.0} ArraySortIndicesStringWide/1048576/100 236.706 MiB/sec 253.442 MiB/sec 7.070 {'family_index': 5, 'per_family_instance_index': 6, 'run_name': 'ArraySortIndicesStringWide/1048576/100', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 167, 'null_percent': 1.0} TableSortIndicesInt64Narrow/1048576/100/8/1 5.254M items/sec 5.625M items/sec 7.050 {'family_index': 12, 'per_family_instance_index': 27, 'run_name': 'TableSortIndicesInt64Narrow/1048576/100/8/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 4, 'chunks': 1.0, 'columns': 8.0, 'null_percent': 1.0} TableSortIndicesInt64Narrow/1048576/100/8/32 2.279M items/sec 2.438M items/sec 6.997 {'family_index': 12, 'per_family_instance_index': 3, 'run_name': 'TableSortIndicesInt64Narrow/1048576/100/8/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2, 'chunks': 32.0, 'columns': 8.0, 'null_percent': 1.0} TableSortIndicesInt64Narrow/1048576/100/16/32 2.318M items/sec 2.477M items/sec 6.887 {'family_index': 12, 'per_family_instance_index': 0, 'run_name': 'TableSortIndicesInt64Narrow/1048576/100/16/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2, 'chunks': 32.0, 'columns': 16.0, 'null_percent': 1.0} TableSortIndicesInt64Narrow/1048576/4/2/4 7.105M items/sec 7.582M items/sec 6.708 {'family_index': 12, 'per_family_instance_index': 19, 'run_name': 'TableSortIndicesInt64Narrow/1048576/4/2/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 5, 'chunks': 4.0, 'columns': 2.0, 'null_percent': 25.0} TableSortIndicesInt64Narrow/1048576/4/1/32 24.292M items/sec 25.907M items/sec 6.647 {'family_index': 12, 'per_family_instance_index': 10, 'run_name': 'TableSortIndicesInt64Narrow/1048576/4/1/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 16, 'chunks': 32.0, 'columns': 1.0, 'null_percent': 25.0} TableSortIndicesInt64Narrow/1048576/100/8/4 3.408M items/sec 3.628M items/sec 6.455 {'family_index': 12, 'per_family_instance_index': 15, 'run_name': 'TableSortIndicesInt64Narrow/1048576/100/8/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2, 'chunks': 4.0, 'columns': 8.0, 'null_percent': 1.0} ArraySortIndicesInt64Wide/1048576/100 101.067 MiB/sec 107.588 MiB/sec 6.452 {'family_index': 1, 'per_family_instance_index': 6, 'run_name': 'ArraySortIndicesInt64Wide/1048576/100', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 70, 'null_percent': 1.0} ArraySortIndicesStringWideDict/32768/2 1014.768 MiB/sec 1.054 GiB/sec 6.390 {'family_index': 6, 'per_family_instance_index': 3, 'run_name': 'ArraySortIndicesStringWideDict/32768/2', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 22229, 'null_percent': 50.0} TableSortIndicesInt64Narrow/1048576/100/16/4 3.417M items/sec 3.633M items/sec 6.340 {'family_index': 12, 'per_family_instance_index': 12, 'run_name': 'TableSortIndicesInt64Narrow/1048576/100/16/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2, 'chunks': 4.0, 'columns': 16.0, 'null_percent': 1.0} TableSortIndicesInt64Wide/1048576/0/2/1 10.260M items/sec 10.900M items/sec 6.245 {'family_index': 13, 'per_family_instance_index': 32, 'run_name': 'TableSortIndicesInt64Wide/1048576/0/2/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7, 'chunks': 1.0, 'columns': 2.0, 'null_percent': 0.0} ArraySortIndicesInt64Wide/32768/2 261.470 MiB/sec 277.785 MiB/sec 6.240 {'family_index': 1, 'per_family_instance_index': 3, 'run_name': 'ArraySortIndicesInt64Wide/32768/2', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 5788, 'null_percent': 50.0} TableSortIndicesInt64Narrow/1048576/0/2/32 4.336M items/sec 4.599M items/sec 6.064 {'family_index': 12, 'per_family_instance_index': 8, 'run_name': 'TableSortIndicesInt64Narrow/1048576/0/2/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3, 'chunks': 32.0, 'columns': 2.0, 'null_percent': 0.0} TableSortIndicesInt64Narrow/1048576/100/2/1 8.101M items/sec 8.591M items/sec 6.059 {'family_index': 12, 'per_family_instance_index': 30, 'run_name': 'TableSortIndicesInt64Narrow/1048576/100/2/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 5, 'chunks': 1.0, 'columns': 2.0, 'null_percent': 1.0} TableSortIndicesInt64Narrow/1048576/100/2/4 5.879M items/sec 6.228M items/sec 5.920 {'family_index': 12, 'per_family_instance_index': 18, 'run_name': 'TableSortIndicesInt64Narrow/1048576/100/2/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 4, 'chunks': 4.0, 'columns': 2.0, 'null_percent': 1.0} TableSortIndicesInt64Wide/1048576/100/1/1 10.529M items/sec 11.145M items/sec 5.847 {'family_index': 13, 'per_family_instance_index': 33, 'run_name': 'TableSortIndicesInt64Wide/1048576/100/1/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7, 'chunks': 1.0, 'columns': 1.0, 'null_percent': 1.0} TableSortIndicesInt64Narrow/1048576/0/2/1 8.147M items/sec 8.617M items/sec 5.763 {'family_index': 12, 'per_family_instance_index': 32, 'run_name': 'TableSortIndicesInt64Narrow/1048576/0/2/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 5, 'chunks': 1.0, 'columns': 2.0, 'null_percent': 0.0} TableSortIndicesInt64Wide/1048576/4/8/32 4.099M items/sec 4.335M items/sec 5.755 {'family_index': 13, 'per_family_instance_index': 4, 'run_name': 'TableSortIndicesInt64Wide/1048576/4/8/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3, 'chunks': 32.0, 'columns': 8.0, 'null_percent': 25.0} ArraySortIndicesInt64Wide/8388608/100 79.176 MiB/sec 83.635 MiB/sec 5.633 {'family_index': 1, 'per_family_instance_index': 7, 'run_name': 'ArraySortIndicesInt64Wide/8388608/100', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7, 'null_percent': 1.0} TableSortIndicesInt64Wide/1048576/0/16/4 8.668M items/sec 9.153M items/sec 5.601 {'family_index': 13, 'per_family_instance_index': 14, 'run_name': 'TableSortIndicesInt64Wide/1048576/0/16/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 4.0, 'columns': 16.0, 'null_percent': 0.0} ChunkedArraySortIndicesInt64Wide/32768/0 86.231 MiB/sec 91.017 MiB/sec 5.550 {'family_index': 8, 'per_family_instance_index': 5, 'run_name': 'ChunkedArraySortIndicesInt64Wide/32768/0', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1930, 'null_percent': 0.0} TableSortIndicesInt64Narrow/1048576/4/2/1 10.456M items/sec 11.035M items/sec 5.538 {'family_index': 12, 'per_family_instance_index': 31, 'run_name': 'TableSortIndicesInt64Narrow/1048576/4/2/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7, 'chunks': 1.0, 'columns': 2.0, 'null_percent': 25.0} TableSortIndicesInt64Wide/1048576/100/16/4 8.427M items/sec 8.883M items/sec 5.418 {'family_index': 13, 'per_family_instance_index': 12, 'run_name': 'TableSortIndicesInt64Wide/1048576/100/16/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 4.0, 'columns': 16.0, 'null_percent': 1.0} TableSortIndicesInt64Narrow/1048576/0/8/4 3.733M items/sec 3.929M items/sec 5.249 {'family_index': 12, 'per_family_instance_index': 17, 'run_name': 'TableSortIndicesInt64Narrow/1048576/0/8/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2, 'chunks': 4.0, 'columns': 8.0, 'null_percent': 0.0} TableSortIndicesInt64Wide/1048576/0/8/32 5.593M items/sec 5.885M items/sec 5.220 {'family_index': 13, 'per_family_instance_index': 5, 'run_name': 'TableSortIndicesInt64Wide/1048576/0/8/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 4, 'chunks': 32.0, 'columns': 8.0, 'null_percent': 0.0} TableSortIndicesInt64Wide/1048576/100/2/32 5.413M items/sec 5.693M items/sec 5.171 {'family_index': 13, 'per_family_instance_index': 6, 'run_name': 'TableSortIndicesInt64Wide/1048576/100/2/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 4, 'chunks': 32.0, 'columns': 2.0, 'null_percent': 1.0} RecordBatchSortIndicesInt64Narrow/1048576/100/16 2.504M items/sec 2.631M items/sec 5.057 {'family_index': 10, 'per_family_instance_index': 0, 'run_name': 'RecordBatchSortIndicesInt64Narrow/1048576/100/16', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2, 'columns': 16.0, 'null_percent': 1.0} TableSortIndicesInt64Wide/1048576/0/1/1 10.604M items/sec 11.136M items/sec 5.013 {'family_index': 13, 'per_family_instance_index': 35, 'run_name': 'TableSortIndicesInt64Wide/1048576/0/1/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7, 'chunks': 1.0, 'columns': 1.0, 'null_percent': 0.0} ``` ### Are these changes tested? Yes, by existing tests. ### Are there any user-facing changes? No, these are protected fields and the user is not supposed to subclass the standard array classes. * GitHub Issue: #44194 Authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/src/arrow/array/array_binary.cc | 8 ++----- cpp/src/arrow/array/array_binary.h | 26 ++++++++--------------- cpp/src/arrow/array/array_nested.cc | 11 +++++----- cpp/src/arrow/array/array_nested.h | 29 ++++++++------------------ cpp/src/arrow/array/array_primitive.cc | 18 +++++++++------- cpp/src/arrow/array/array_primitive.h | 26 +++++++++++++++-------- 6 files changed, 52 insertions(+), 66 deletions(-) diff --git a/cpp/src/arrow/array/array_binary.cc b/cpp/src/arrow/array/array_binary.cc index d83ba0ca8936d..1266819bdb311 100644 --- a/cpp/src/arrow/array/array_binary.cc +++ b/cpp/src/arrow/array/array_binary.cc @@ -125,12 +125,8 @@ FixedSizeBinaryArray::FixedSizeBinaryArray(const std::shared_ptr& type int64_t length, const std::shared_ptr& data, const std::shared_ptr& null_bitmap, - int64_t null_count, int64_t offset) - : PrimitiveArray(type, length, data, null_bitmap, null_count, offset), - byte_width_(checked_cast(*type).byte_width()) {} - -const uint8_t* FixedSizeBinaryArray::GetValue(int64_t i) const { - return raw_values_ + (i + data_->offset) * byte_width_; + int64_t null_count, int64_t offset) { + SetData(ArrayData::Make(type, length, {null_bitmap, data}, null_count, offset)); } } // namespace arrow diff --git a/cpp/src/arrow/array/array_binary.h b/cpp/src/arrow/array/array_binary.h index 19fdee61243d1..63903eac46d41 100644 --- a/cpp/src/arrow/array/array_binary.h +++ b/cpp/src/arrow/array/array_binary.h @@ -57,8 +57,6 @@ class BaseBinaryArray : public FlatArray { /// Return the pointer to the given elements bytes // XXX should GetValue(int64_t i) return a string_view? const uint8_t* GetValue(int64_t i, offset_type* out_length) const { - // Account for base offset - i += data_->offset; const offset_type pos = raw_value_offsets_[i]; *out_length = raw_value_offsets_[i + 1] - pos; return raw_data_ + pos; @@ -69,8 +67,6 @@ class BaseBinaryArray : public FlatArray { /// \param i the value index /// \return the view over the selected value std::string_view GetView(int64_t i) const { - // Account for base offset - i += data_->offset; const offset_type pos = raw_value_offsets_[i]; return std::string_view(reinterpret_cast(raw_data_ + pos), raw_value_offsets_[i + 1] - pos); @@ -99,9 +95,7 @@ class BaseBinaryArray : public FlatArray { /// Note that this buffer does not account for any slice offset std::shared_ptr value_data() const { return data_->buffers[2]; } - const offset_type* raw_value_offsets() const { - return raw_value_offsets_ + data_->offset; - } + const offset_type* raw_value_offsets() const { return raw_value_offsets_; } const uint8_t* raw_data() const { return raw_data_; } @@ -109,15 +103,12 @@ class BaseBinaryArray : public FlatArray { /// at the passed index. /// /// Does not perform boundschecking - offset_type value_offset(int64_t i) const { - return raw_value_offsets_[i + data_->offset]; - } + offset_type value_offset(int64_t i) const { return raw_value_offsets_[i]; } /// \brief Return the length of the data for the value at the passed index. /// /// Does not perform boundschecking offset_type value_length(int64_t i) const { - i += data_->offset; return raw_value_offsets_[i + 1] - raw_value_offsets_[i]; } @@ -126,8 +117,7 @@ class BaseBinaryArray : public FlatArray { /// less than the size of the data buffer (data_->buffers[2]). offset_type total_values_length() const { if (data_->length > 0) { - return raw_value_offsets_[data_->length + data_->offset] - - raw_value_offsets_[data_->offset]; + return raw_value_offsets_[data_->length] - raw_value_offsets_[0]; } else { return 0; } @@ -144,7 +134,7 @@ class BaseBinaryArray : public FlatArray { // Protected method for constructors void SetData(const std::shared_ptr& data) { this->Array::SetData(data); - raw_value_offsets_ = data->GetValuesSafe(1, /*offset=*/0); + raw_value_offsets_ = data->GetValuesSafe(1); raw_data_ = data->GetValuesSafe(2, /*offset=*/0); } @@ -293,11 +283,11 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray { const std::shared_ptr& null_bitmap = NULLPTR, int64_t null_count = kUnknownNullCount, int64_t offset = 0); - const uint8_t* GetValue(int64_t i) const; + const uint8_t* GetValue(int64_t i) const { return values_ + i * byte_width_; } const uint8_t* Value(int64_t i) const { return GetValue(i); } std::string_view GetView(int64_t i) const { - return std::string_view(reinterpret_cast(GetValue(i)), byte_width()); + return std::string_view(reinterpret_cast(GetValue(i)), byte_width_); } std::optional operator[](int64_t i) const { @@ -308,7 +298,7 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray { int32_t byte_width() const { return byte_width_; } - const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width_; } + const uint8_t* raw_values() const { return values_; } IteratorType begin() const { return IteratorType(*this); } @@ -319,8 +309,10 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray { this->PrimitiveArray::SetData(data); byte_width_ = internal::checked_cast(*type()).byte_width(); + values_ = raw_values_ + data_->offset * byte_width_; } + const uint8_t* values_; int32_t byte_width_; }; diff --git a/cpp/src/arrow/array/array_nested.cc b/cpp/src/arrow/array/array_nested.cc index bb469df1ad6b4..db52551eadc7f 100644 --- a/cpp/src/arrow/array/array_nested.cc +++ b/cpp/src/arrow/array/array_nested.cc @@ -461,8 +461,7 @@ inline void SetListData(VarLengthListLikeArray* self, self->Array::SetData(data); self->list_type_ = checked_cast(data->type.get()); - self->raw_value_offsets_ = - data->GetValuesSafe(1, /*offset=*/0); + self->raw_value_offsets_ = data->GetValuesSafe(1); // BaseListViewArray::SetData takes care of setting raw_value_sizes_. ARROW_CHECK_EQ(self->list_type_->value_type()->id(), data->child_data[0]->type->id()); @@ -654,7 +653,7 @@ ListViewArray::ListViewArray(std::shared_ptr type, int64_t length, void ListViewArray::SetData(const std::shared_ptr& data) { internal::SetListData(this, data); - raw_value_sizes_ = data->GetValuesSafe(2, /*offset=*/0); + raw_value_sizes_ = data->GetValuesSafe(2); } Result> ListViewArray::FromArrays( @@ -729,7 +728,7 @@ LargeListViewArray::LargeListViewArray(std::shared_ptr type, int64_t l void LargeListViewArray::SetData(const std::shared_ptr& data) { internal::SetListData(this, data); - raw_value_sizes_ = data->GetValuesSafe(2, /*offset=*/0); + raw_value_sizes_ = data->GetValuesSafe(2); } Result> LargeListViewArray::FromArrays( @@ -1184,7 +1183,7 @@ void UnionArray::SetData(std::shared_ptr data) { union_type_ = checked_cast(data_->type.get()); ARROW_CHECK_GE(data_->buffers.size(), 2); - raw_type_codes_ = data->GetValuesSafe(1, /*offset=*/0); + raw_type_codes_ = data->GetValuesSafe(1); boxed_fields_.resize(data_->child_data.size()); } @@ -1206,7 +1205,7 @@ void DenseUnionArray::SetData(const std::shared_ptr& data) { // No validity bitmap ARROW_CHECK_EQ(data_->buffers[0], nullptr); - raw_value_offsets_ = data->GetValuesSafe(2, /*offset=*/0); + raw_value_offsets_ = data->GetValuesSafe(2); } SparseUnionArray::SparseUnionArray(std::shared_ptr data) { diff --git a/cpp/src/arrow/array/array_nested.h b/cpp/src/arrow/array/array_nested.h index a6d4977839ef1..f122f9378b525 100644 --- a/cpp/src/arrow/array/array_nested.h +++ b/cpp/src/arrow/array/array_nested.h @@ -94,15 +94,11 @@ class VarLengthListLikeArray : public Array { const std::shared_ptr& value_type() const { return list_type_->value_type(); } /// Return pointer to raw value offsets accounting for any slice offset - const offset_type* raw_value_offsets() const { - return raw_value_offsets_ + data_->offset; - } + const offset_type* raw_value_offsets() const { return raw_value_offsets_; } // The following functions will not perform boundschecking - offset_type value_offset(int64_t i) const { - return raw_value_offsets_[i + data_->offset]; - } + offset_type value_offset(int64_t i) const { return raw_value_offsets_[i]; } /// \brief Return the size of the value at a particular index /// @@ -154,7 +150,6 @@ class BaseListArray : public VarLengthListLikeArray { /// /// \pre IsValid(i) offset_type value_length(int64_t i) const final { - i += this->data_->offset; return this->raw_value_offsets_[i + 1] - this->raw_value_offsets_[i]; } }; @@ -302,9 +297,7 @@ class BaseListViewArray : public VarLengthListLikeArray { const std::shared_ptr& value_sizes() const { return this->data_->buffers[2]; } /// \brief Return pointer to raw value offsets accounting for any slice offset - const offset_type* raw_value_sizes() const { - return raw_value_sizes_ + this->data_->offset; - } + const offset_type* raw_value_sizes() const { return raw_value_sizes_; } /// \brief Return the size of the value at a particular index /// @@ -313,9 +306,7 @@ class BaseListViewArray : public VarLengthListLikeArray { /// length of the child values array. /// /// \pre IsValid(i) - offset_type value_length(int64_t i) const final { - return this->raw_value_sizes_[i + this->data_->offset]; - } + offset_type value_length(int64_t i) const final { return this->raw_value_sizes_[i]; } protected: const offset_type* raw_value_sizes_ = NULLPTR; @@ -744,15 +735,13 @@ class ARROW_EXPORT UnionArray : public Array { /// Note that this buffer does not account for any slice offset const std::shared_ptr& type_codes() const { return data_->buffers[1]; } - const type_code_t* raw_type_codes() const { return raw_type_codes_ + data_->offset; } + const type_code_t* raw_type_codes() const { return raw_type_codes_; } /// The logical type code of the value at index. - type_code_t type_code(int64_t i) const { return raw_type_codes_[i + data_->offset]; } + type_code_t type_code(int64_t i) const { return raw_type_codes_[i]; } /// The physical child id containing value at index. - int child_id(int64_t i) const { - return union_type_->child_ids()[raw_type_codes_[i + data_->offset]]; - } + int child_id(int64_t i) const { return union_type_->child_ids()[raw_type_codes_[i]]; } const UnionType* union_type() const { return union_type_; } @@ -883,9 +872,9 @@ class ARROW_EXPORT DenseUnionArray : public UnionArray { /// Note that this buffer does not account for any slice offset const std::shared_ptr& value_offsets() const { return data_->buffers[2]; } - int32_t value_offset(int64_t i) const { return raw_value_offsets_[i + data_->offset]; } + int32_t value_offset(int64_t i) const { return raw_value_offsets_[i]; } - const int32_t* raw_value_offsets() const { return raw_value_offsets_ + data_->offset; } + const int32_t* raw_value_offsets() const { return raw_value_offsets_; } protected: const int32_t* raw_value_offsets_; diff --git a/cpp/src/arrow/array/array_primitive.cc b/cpp/src/arrow/array/array_primitive.cc index da3810aa392c9..10d4e9e6aa284 100644 --- a/cpp/src/arrow/array/array_primitive.cc +++ b/cpp/src/arrow/array/array_primitive.cc @@ -78,15 +78,16 @@ DayTimeIntervalArray::DayTimeIntervalArray(const std::shared_ptr& type int64_t length, const std::shared_ptr& data, const std::shared_ptr& null_bitmap, - int64_t null_count, int64_t offset) - : PrimitiveArray(type, length, data, null_bitmap, null_count, offset) {} + int64_t null_count, int64_t offset) { + SetData(ArrayData::Make(type, length, {null_bitmap, data}, null_count, offset)); +} DayTimeIntervalArray::DayTimeIntervalArray(int64_t length, const std::shared_ptr& data, const std::shared_ptr& null_bitmap, int64_t null_count, int64_t offset) - : PrimitiveArray(day_time_interval(), length, data, null_bitmap, null_count, offset) { -} + : DayTimeIntervalArray(day_time_interval(), length, data, null_bitmap, null_count, + offset) {} DayTimeIntervalType::DayMilliseconds DayTimeIntervalArray::GetValue(int64_t i) const { DCHECK(i < length()); @@ -105,14 +106,15 @@ MonthDayNanoIntervalArray::MonthDayNanoIntervalArray( MonthDayNanoIntervalArray::MonthDayNanoIntervalArray( const std::shared_ptr& type, int64_t length, const std::shared_ptr& data, const std::shared_ptr& null_bitmap, - int64_t null_count, int64_t offset) - : PrimitiveArray(type, length, data, null_bitmap, null_count, offset) {} + int64_t null_count, int64_t offset) { + SetData(ArrayData::Make(type, length, {null_bitmap, data}, null_count, offset)); +} MonthDayNanoIntervalArray::MonthDayNanoIntervalArray( int64_t length, const std::shared_ptr& data, const std::shared_ptr& null_bitmap, int64_t null_count, int64_t offset) - : PrimitiveArray(month_day_nano_interval(), length, data, null_bitmap, null_count, - offset) {} + : MonthDayNanoIntervalArray(month_day_nano_interval(), length, data, null_bitmap, + null_count, offset) {} MonthDayNanoIntervalType::MonthDayNanos MonthDayNanoIntervalArray::GetValue( int64_t i) const { diff --git a/cpp/src/arrow/array/array_primitive.h b/cpp/src/arrow/array/array_primitive.h index e6df92e3b788c..3e2893b7dd898 100644 --- a/cpp/src/arrow/array/array_primitive.h +++ b/cpp/src/arrow/array/array_primitive.h @@ -90,7 +90,7 @@ class NumericArray : public PrimitiveArray { using value_type = typename TypeClass::c_type; using IteratorType = stl::ArrayIterator>; - explicit NumericArray(const std::shared_ptr& data) : PrimitiveArray(data) {} + explicit NumericArray(const std::shared_ptr& data) { SetData(data); } // Only enable this constructor without a type argument for types without additional // metadata @@ -98,18 +98,17 @@ class NumericArray : public PrimitiveArray { NumericArray(enable_if_parameter_free length, const std::shared_ptr& data, const std::shared_ptr& null_bitmap = NULLPTR, - int64_t null_count = kUnknownNullCount, int64_t offset = 0) - : PrimitiveArray(TypeTraits::type_singleton(), length, data, null_bitmap, - null_count, offset) {} - - const value_type* raw_values() const { - return reinterpret_cast(raw_values_) + data_->offset; + int64_t null_count = kUnknownNullCount, int64_t offset = 0) { + SetData(ArrayData::Make(TypeTraits::type_singleton(), length, {null_bitmap, data}, + null_count, offset)); } - value_type Value(int64_t i) const { return raw_values()[i]; } + const value_type* raw_values() const { return values_; } + + value_type Value(int64_t i) const { return values_[i]; } // For API compatibility with BinaryArray etc. - value_type GetView(int64_t i) const { return Value(i); } + value_type GetView(int64_t i) const { return values_[i]; } std::optional operator[](int64_t i) const { return *IteratorType(*this, i); @@ -121,6 +120,15 @@ class NumericArray : public PrimitiveArray { protected: using PrimitiveArray::PrimitiveArray; + + void SetData(const std::shared_ptr& data) { + this->PrimitiveArray::SetData(data); + values_ = raw_values_ + ? (reinterpret_cast(raw_values_) + data_->offset) + : NULLPTR; + } + + const value_type* values_; }; /// DayTimeArray