Skip to content

Commit

Permalink
GH-44194: [C++] Avoid repeated ArrayData::offset lookups (#44190)
Browse files Browse the repository at this point in the history
### Rationale for this change

In non-trivial situations such as sort kernels, the compiler might not be able to fully optimize these repeated lookups.
    
### What changes are included in this PR?

Remove those lookups altogether. This produces a small but consistent speedup on sort kernels:

```
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Non-regressions: (173)
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
                                       benchmark           baseline          contender  change %                                                                                                                                                                                                                                        counters
         TableSortIndicesInt64Wide/1048576/0/1/4   6.368M items/sec   9.094M items/sec    42.801        {'family_index': 13, 'per_family_instance_index': 23, 'run_name': 'TableSortIndicesInt64Wide/1048576/0/1/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 4, 'chunks': 4.0, 'columns': 1.0, 'null_percent': 0.0}
         TableSortIndicesInt64Wide/1048576/4/1/4   9.314M items/sec  11.457M items/sec    23.016       {'family_index': 13, 'per_family_instance_index': 22, 'run_name': 'TableSortIndicesInt64Wide/1048576/4/1/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7, 'chunks': 4.0, 'columns': 1.0, 'null_percent': 25.0}
         TableSortIndicesInt64Wide/1048576/4/8/1   8.655M items/sec  10.643M items/sec    22.970       {'family_index': 13, 'per_family_instance_index': 28, 'run_name': 'TableSortIndicesInt64Wide/1048576/4/8/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 1.0, 'columns': 8.0, 'null_percent': 25.0}
         TableSortIndicesInt64Wide/1048576/0/8/1   9.176M items/sec  10.842M items/sec    18.153        {'family_index': 13, 'per_family_instance_index': 29, 'run_name': 'TableSortIndicesInt64Wide/1048576/0/8/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 1.0, 'columns': 8.0, 'null_percent': 0.0}
      TableSortIndicesInt64Wide/1048576/100/16/1   9.259M items/sec  10.761M items/sec    16.230    {'family_index': 13, 'per_family_instance_index': 24, 'run_name': 'TableSortIndicesInt64Wide/1048576/100/16/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 1.0, 'columns': 16.0, 'null_percent': 1.0}
       TableSortIndicesInt64Wide/1048576/100/2/1   9.505M items/sec  10.777M items/sec    13.382      {'family_index': 13, 'per_family_instance_index': 30, 'run_name': 'TableSortIndicesInt64Wide/1048576/100/2/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 1.0, 'columns': 2.0, 'null_percent': 1.0}
  ChunkedArraySortIndicesInt64Narrow/1048576/100    310.582 MiB/sec    351.357 MiB/sec    13.129                                {'family_index': 7, 'per_family_instance_index': 6, 'run_name': 'ChunkedArraySortIndicesInt64Narrow/1048576/100', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 216, 'null_percent': 1.0}
    RecordBatchSortIndicesInt64Wide/1048576/0/16   9.451M items/sec  10.618M items/sec    12.354                  {'family_index': 11, 'per_family_instance_index': 2, 'run_name': 'RecordBatchSortIndicesInt64Wide/1048576/0/16', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'columns': 16.0, 'null_percent': 0.0}
  RecordBatchSortIndicesInt64Wide/1048576/100/16   9.266M items/sec  10.365M items/sec    11.868                {'family_index': 11, 'per_family_instance_index': 0, 'run_name': 'RecordBatchSortIndicesInt64Wide/1048576/100/16', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'columns': 16.0, 'null_percent': 1.0}
  ChunkedArraySortIndicesInt64Narrow/8388608/100    220.506 MiB/sec    245.414 MiB/sec    11.296                                 {'family_index': 7, 'per_family_instance_index': 7, 'run_name': 'ChunkedArraySortIndicesInt64Narrow/8388608/100', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 19, 'null_percent': 1.0}
     TableSortIndicesInt64Narrow/1048576/100/1/4  47.458M items/sec  52.813M items/sec    11.284   {'family_index': 12, 'per_family_instance_index': 21, 'run_name': 'TableSortIndicesInt64Narrow/1048576/100/1/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 31, 'chunks': 4.0, 'columns': 1.0, 'null_percent': 1.0}
       TableSortIndicesInt64Narrow/1048576/0/1/4  50.540M items/sec  56.171M items/sec    11.142     {'family_index': 12, 'per_family_instance_index': 23, 'run_name': 'TableSortIndicesInt64Narrow/1048576/0/1/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 34, 'chunks': 4.0, 'columns': 1.0, 'null_percent': 0.0}
  RecordBatchSortIndicesInt64Narrow/1048576/0/16   2.756M items/sec   3.042M items/sec    10.393                {'family_index': 10, 'per_family_instance_index': 2, 'run_name': 'RecordBatchSortIndicesInt64Narrow/1048576/0/16', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2, 'columns': 16.0, 'null_percent': 0.0}
    TableSortIndicesInt64Narrow/1048576/100/1/32  22.492M items/sec  24.734M items/sec     9.967  {'family_index': 12, 'per_family_instance_index': 9, 'run_name': 'TableSortIndicesInt64Narrow/1048576/100/1/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 15, 'chunks': 32.0, 'columns': 1.0, 'null_percent': 1.0}
    RecordBatchSortIndicesInt64Wide/1048576/4/16   6.391M items/sec   6.986M items/sec     9.307                 {'family_index': 11, 'per_family_instance_index': 1, 'run_name': 'RecordBatchSortIndicesInt64Wide/1048576/4/16', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 4, 'columns': 16.0, 'null_percent': 25.0}
      TableSortIndicesInt64Narrow/1048576/0/1/32  23.468M items/sec  25.650M items/sec     9.299   {'family_index': 12, 'per_family_instance_index': 11, 'run_name': 'TableSortIndicesInt64Narrow/1048576/0/1/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 16, 'chunks': 32.0, 'columns': 1.0, 'null_percent': 0.0}
        TableSortIndicesInt64Wide/1048576/0/16/1   9.946M items/sec  10.866M items/sec     9.258      {'family_index': 13, 'per_family_instance_index': 26, 'run_name': 'TableSortIndicesInt64Wide/1048576/0/16/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7, 'chunks': 1.0, 'columns': 16.0, 'null_percent': 0.0}
     TableSortIndicesInt64Narrow/1048576/0/16/32   2.509M items/sec   2.730M items/sec     8.813   {'family_index': 12, 'per_family_instance_index': 2, 'run_name': 'TableSortIndicesInt64Narrow/1048576/0/16/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2, 'chunks': 32.0, 'columns': 16.0, 'null_percent': 0.0}
             ArraySortIndicesInt64Wide/32768/100    151.673 MiB/sec    164.954 MiB/sec     8.757                                          {'family_index': 1, 'per_family_instance_index': 1, 'run_name': 'ArraySortIndicesInt64Wide/32768/100', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3389, 'null_percent': 1.0}
         TableSortIndicesInt64Wide/1048576/0/8/4   8.463M items/sec   9.167M items/sec     8.322        {'family_index': 13, 'per_family_instance_index': 17, 'run_name': 'TableSortIndicesInt64Wide/1048576/0/8/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 4.0, 'columns': 8.0, 'null_percent': 0.0}
       TableSortIndicesInt64Wide/1048576/100/8/4   8.221M items/sec   8.904M items/sec     8.318      {'family_index': 13, 'per_family_instance_index': 15, 'run_name': 'TableSortIndicesInt64Wide/1048576/100/8/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 4.0, 'columns': 8.0, 'null_percent': 1.0}
       TableSortIndicesInt64Wide/1048576/100/8/1   9.911M items/sec  10.705M items/sec     8.012      {'family_index': 13, 'per_family_instance_index': 27, 'run_name': 'TableSortIndicesInt64Wide/1048576/100/8/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 1.0, 'columns': 8.0, 'null_percent': 1.0}
         TableSortIndicesInt64Wide/1048576/0/2/4   8.504M items/sec   9.171M items/sec     7.836        {'family_index': 13, 'per_family_instance_index': 20, 'run_name': 'TableSortIndicesInt64Wide/1048576/0/2/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 4.0, 'columns': 2.0, 'null_percent': 0.0}
       TableSortIndicesInt64Wide/1048576/100/1/4   8.368M items/sec   9.021M items/sec     7.813      {'family_index': 13, 'per_family_instance_index': 21, 'run_name': 'TableSortIndicesInt64Wide/1048576/100/1/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 4.0, 'columns': 1.0, 'null_percent': 1.0}
       TableSortIndicesInt64Wide/1048576/100/2/4   8.277M items/sec   8.918M items/sec     7.748      {'family_index': 13, 'per_family_instance_index': 18, 'run_name': 'TableSortIndicesInt64Wide/1048576/100/2/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 5, 'chunks': 4.0, 'columns': 2.0, 'null_percent': 1.0}
       TableSortIndicesInt64Narrow/1048576/0/2/4   6.098M items/sec   6.569M items/sec     7.728      {'family_index': 12, 'per_family_instance_index': 20, 'run_name': 'TableSortIndicesInt64Narrow/1048576/0/2/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 4, 'chunks': 4.0, 'columns': 2.0, 'null_percent': 0.0}
      TableSortIndicesInt64Narrow/1048576/0/8/32   2.495M items/sec   2.683M items/sec     7.545     {'family_index': 12, 'per_family_instance_index': 5, 'run_name': 'TableSortIndicesInt64Narrow/1048576/0/8/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2, 'chunks': 32.0, 'columns': 8.0, 'null_percent': 0.0}
        TableSortIndicesInt64Wide/1048576/4/16/1   9.864M items/sec  10.609M items/sec     7.542     {'family_index': 13, 'per_family_instance_index': 25, 'run_name': 'TableSortIndicesInt64Wide/1048576/4/16/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 1.0, 'columns': 16.0, 'null_percent': 25.0}
           ArraySortIndicesInt64Wide/32768/10000    155.014 MiB/sec    166.465 MiB/sec     7.387                                       {'family_index': 1, 'per_family_instance_index': 0, 'run_name': 'ArraySortIndicesInt64Wide/32768/10000', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3500, 'null_percent': 0.01}
               ArraySortIndicesInt64Wide/32768/0    156.113 MiB/sec    167.631 MiB/sec     7.378                                            {'family_index': 1, 'per_family_instance_index': 5, 'run_name': 'ArraySortIndicesInt64Wide/32768/0', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3516, 'null_percent': 0.0}
         TableSortIndicesInt64Wide/1048576/4/2/4   8.003M items/sec   8.591M items/sec     7.351       {'family_index': 13, 'per_family_instance_index': 19, 'run_name': 'TableSortIndicesInt64Wide/1048576/4/2/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 5, 'chunks': 4.0, 'columns': 2.0, 'null_percent': 25.0}
       TableSortIndicesInt64Narrow/1048576/4/1/4  44.920M items/sec  48.218M items/sec     7.343    {'family_index': 12, 'per_family_instance_index': 22, 'run_name': 'TableSortIndicesInt64Narrow/1048576/4/1/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 30, 'chunks': 4.0, 'columns': 1.0, 'null_percent': 25.0}
              ArraySortIndicesInt64Wide/32768/10    160.956 MiB/sec    172.698 MiB/sec     7.296                                          {'family_index': 1, 'per_family_instance_index': 2, 'run_name': 'ArraySortIndicesInt64Wide/32768/10', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3593, 'null_percent': 10.0}
         TableSortIndicesInt64Wide/1048576/4/8/4   6.929M items/sec   7.419M items/sec     7.078       {'family_index': 13, 'per_family_instance_index': 16, 'run_name': 'TableSortIndicesInt64Wide/1048576/4/8/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 5, 'chunks': 4.0, 'columns': 8.0, 'null_percent': 25.0}
          ArraySortIndicesStringWide/1048576/100    236.706 MiB/sec    253.442 MiB/sec     7.070                                        {'family_index': 5, 'per_family_instance_index': 6, 'run_name': 'ArraySortIndicesStringWide/1048576/100', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 167, 'null_percent': 1.0}
     TableSortIndicesInt64Narrow/1048576/100/8/1   5.254M items/sec   5.625M items/sec     7.050    {'family_index': 12, 'per_family_instance_index': 27, 'run_name': 'TableSortIndicesInt64Narrow/1048576/100/8/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 4, 'chunks': 1.0, 'columns': 8.0, 'null_percent': 1.0}
    TableSortIndicesInt64Narrow/1048576/100/8/32   2.279M items/sec   2.438M items/sec     6.997   {'family_index': 12, 'per_family_instance_index': 3, 'run_name': 'TableSortIndicesInt64Narrow/1048576/100/8/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2, 'chunks': 32.0, 'columns': 8.0, 'null_percent': 1.0}
   TableSortIndicesInt64Narrow/1048576/100/16/32   2.318M items/sec   2.477M items/sec     6.887 {'family_index': 12, 'per_family_instance_index': 0, 'run_name': 'TableSortIndicesInt64Narrow/1048576/100/16/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2, 'chunks': 32.0, 'columns': 16.0, 'null_percent': 1.0}
       TableSortIndicesInt64Narrow/1048576/4/2/4   7.105M items/sec   7.582M items/sec     6.708     {'family_index': 12, 'per_family_instance_index': 19, 'run_name': 'TableSortIndicesInt64Narrow/1048576/4/2/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 5, 'chunks': 4.0, 'columns': 2.0, 'null_percent': 25.0}
      TableSortIndicesInt64Narrow/1048576/4/1/32  24.292M items/sec  25.907M items/sec     6.647  {'family_index': 12, 'per_family_instance_index': 10, 'run_name': 'TableSortIndicesInt64Narrow/1048576/4/1/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 16, 'chunks': 32.0, 'columns': 1.0, 'null_percent': 25.0}
     TableSortIndicesInt64Narrow/1048576/100/8/4   3.408M items/sec   3.628M items/sec     6.455    {'family_index': 12, 'per_family_instance_index': 15, 'run_name': 'TableSortIndicesInt64Narrow/1048576/100/8/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2, 'chunks': 4.0, 'columns': 8.0, 'null_percent': 1.0}
           ArraySortIndicesInt64Wide/1048576/100    101.067 MiB/sec    107.588 MiB/sec     6.452                                          {'family_index': 1, 'per_family_instance_index': 6, 'run_name': 'ArraySortIndicesInt64Wide/1048576/100', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 70, 'null_percent': 1.0}
          ArraySortIndicesStringWideDict/32768/2   1014.768 MiB/sec      1.054 GiB/sec     6.390                                     {'family_index': 6, 'per_family_instance_index': 3, 'run_name': 'ArraySortIndicesStringWideDict/32768/2', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 22229, 'null_percent': 50.0}
    TableSortIndicesInt64Narrow/1048576/100/16/4   3.417M items/sec   3.633M items/sec     6.340  {'family_index': 12, 'per_family_instance_index': 12, 'run_name': 'TableSortIndicesInt64Narrow/1048576/100/16/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2, 'chunks': 4.0, 'columns': 16.0, 'null_percent': 1.0}
         TableSortIndicesInt64Wide/1048576/0/2/1  10.260M items/sec  10.900M items/sec     6.245        {'family_index': 13, 'per_family_instance_index': 32, 'run_name': 'TableSortIndicesInt64Wide/1048576/0/2/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7, 'chunks': 1.0, 'columns': 2.0, 'null_percent': 0.0}
               ArraySortIndicesInt64Wide/32768/2    261.470 MiB/sec    277.785 MiB/sec     6.240                                           {'family_index': 1, 'per_family_instance_index': 3, 'run_name': 'ArraySortIndicesInt64Wide/32768/2', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 5788, 'null_percent': 50.0}
      TableSortIndicesInt64Narrow/1048576/0/2/32   4.336M items/sec   4.599M items/sec     6.064     {'family_index': 12, 'per_family_instance_index': 8, 'run_name': 'TableSortIndicesInt64Narrow/1048576/0/2/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3, 'chunks': 32.0, 'columns': 2.0, 'null_percent': 0.0}
     TableSortIndicesInt64Narrow/1048576/100/2/1   8.101M items/sec   8.591M items/sec     6.059    {'family_index': 12, 'per_family_instance_index': 30, 'run_name': 'TableSortIndicesInt64Narrow/1048576/100/2/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 5, 'chunks': 1.0, 'columns': 2.0, 'null_percent': 1.0}
     TableSortIndicesInt64Narrow/1048576/100/2/4   5.879M items/sec   6.228M items/sec     5.920    {'family_index': 12, 'per_family_instance_index': 18, 'run_name': 'TableSortIndicesInt64Narrow/1048576/100/2/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 4, 'chunks': 4.0, 'columns': 2.0, 'null_percent': 1.0}
       TableSortIndicesInt64Wide/1048576/100/1/1  10.529M items/sec  11.145M items/sec     5.847      {'family_index': 13, 'per_family_instance_index': 33, 'run_name': 'TableSortIndicesInt64Wide/1048576/100/1/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7, 'chunks': 1.0, 'columns': 1.0, 'null_percent': 1.0}
       TableSortIndicesInt64Narrow/1048576/0/2/1   8.147M items/sec   8.617M items/sec     5.763      {'family_index': 12, 'per_family_instance_index': 32, 'run_name': 'TableSortIndicesInt64Narrow/1048576/0/2/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 5, 'chunks': 1.0, 'columns': 2.0, 'null_percent': 0.0}
        TableSortIndicesInt64Wide/1048576/4/8/32   4.099M items/sec   4.335M items/sec     5.755      {'family_index': 13, 'per_family_instance_index': 4, 'run_name': 'TableSortIndicesInt64Wide/1048576/4/8/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3, 'chunks': 32.0, 'columns': 8.0, 'null_percent': 25.0}
           ArraySortIndicesInt64Wide/8388608/100     79.176 MiB/sec     83.635 MiB/sec     5.633                                           {'family_index': 1, 'per_family_instance_index': 7, 'run_name': 'ArraySortIndicesInt64Wide/8388608/100', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7, 'null_percent': 1.0}
        TableSortIndicesInt64Wide/1048576/0/16/4   8.668M items/sec   9.153M items/sec     5.601      {'family_index': 13, 'per_family_instance_index': 14, 'run_name': 'TableSortIndicesInt64Wide/1048576/0/16/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 4.0, 'columns': 16.0, 'null_percent': 0.0}
        ChunkedArraySortIndicesInt64Wide/32768/0     86.231 MiB/sec     91.017 MiB/sec     5.550                                     {'family_index': 8, 'per_family_instance_index': 5, 'run_name': 'ChunkedArraySortIndicesInt64Wide/32768/0', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1930, 'null_percent': 0.0}
       TableSortIndicesInt64Narrow/1048576/4/2/1  10.456M items/sec  11.035M items/sec     5.538     {'family_index': 12, 'per_family_instance_index': 31, 'run_name': 'TableSortIndicesInt64Narrow/1048576/4/2/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7, 'chunks': 1.0, 'columns': 2.0, 'null_percent': 25.0}
      TableSortIndicesInt64Wide/1048576/100/16/4   8.427M items/sec   8.883M items/sec     5.418    {'family_index': 13, 'per_family_instance_index': 12, 'run_name': 'TableSortIndicesInt64Wide/1048576/100/16/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6, 'chunks': 4.0, 'columns': 16.0, 'null_percent': 1.0}
       TableSortIndicesInt64Narrow/1048576/0/8/4   3.733M items/sec   3.929M items/sec     5.249      {'family_index': 12, 'per_family_instance_index': 17, 'run_name': 'TableSortIndicesInt64Narrow/1048576/0/8/4', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2, 'chunks': 4.0, 'columns': 8.0, 'null_percent': 0.0}
        TableSortIndicesInt64Wide/1048576/0/8/32   5.593M items/sec   5.885M items/sec     5.220       {'family_index': 13, 'per_family_instance_index': 5, 'run_name': 'TableSortIndicesInt64Wide/1048576/0/8/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 4, 'chunks': 32.0, 'columns': 8.0, 'null_percent': 0.0}
      TableSortIndicesInt64Wide/1048576/100/2/32   5.413M items/sec   5.693M items/sec     5.171     {'family_index': 13, 'per_family_instance_index': 6, 'run_name': 'TableSortIndicesInt64Wide/1048576/100/2/32', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 4, 'chunks': 32.0, 'columns': 2.0, 'null_percent': 1.0}
RecordBatchSortIndicesInt64Narrow/1048576/100/16   2.504M items/sec   2.631M items/sec     5.057              {'family_index': 10, 'per_family_instance_index': 0, 'run_name': 'RecordBatchSortIndicesInt64Narrow/1048576/100/16', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2, 'columns': 16.0, 'null_percent': 1.0}
         TableSortIndicesInt64Wide/1048576/0/1/1  10.604M items/sec  11.136M items/sec     5.013        {'family_index': 13, 'per_family_instance_index': 35, 'run_name': 'TableSortIndicesInt64Wide/1048576/0/1/1', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7, 'chunks': 1.0, 'columns': 1.0, 'null_percent': 0.0}
```

### Are these changes tested?

Yes, by existing tests.

### Are there any user-facing changes?

No, these are protected fields and the user is not supposed to subclass the standard array classes.

* GitHub Issue: #44194

Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
  • Loading branch information
pitrou authored Sep 23, 2024
1 parent 0f7b5e5 commit c0fcd44
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 66 deletions.
8 changes: 2 additions & 6 deletions cpp/src/arrow/array/array_binary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,8 @@ FixedSizeBinaryArray::FixedSizeBinaryArray(const std::shared_ptr<DataType>& type
int64_t length,
const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap,
int64_t null_count, int64_t offset)
: PrimitiveArray(type, length, data, null_bitmap, null_count, offset),
byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()) {}

const uint8_t* FixedSizeBinaryArray::GetValue(int64_t i) const {
return raw_values_ + (i + data_->offset) * byte_width_;
int64_t null_count, int64_t offset) {
SetData(ArrayData::Make(type, length, {null_bitmap, data}, null_count, offset));
}

} // namespace arrow
26 changes: 9 additions & 17 deletions cpp/src/arrow/array/array_binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ class BaseBinaryArray : public FlatArray {
/// Return the pointer to the given elements bytes
// XXX should GetValue(int64_t i) return a string_view?
const uint8_t* GetValue(int64_t i, offset_type* out_length) const {
// Account for base offset
i += data_->offset;
const offset_type pos = raw_value_offsets_[i];
*out_length = raw_value_offsets_[i + 1] - pos;
return raw_data_ + pos;
Expand All @@ -69,8 +67,6 @@ class BaseBinaryArray : public FlatArray {
/// \param i the value index
/// \return the view over the selected value
std::string_view GetView(int64_t i) const {
// Account for base offset
i += data_->offset;
const offset_type pos = raw_value_offsets_[i];
return std::string_view(reinterpret_cast<const char*>(raw_data_ + pos),
raw_value_offsets_[i + 1] - pos);
Expand Down Expand Up @@ -99,25 +95,20 @@ class BaseBinaryArray : public FlatArray {
/// Note that this buffer does not account for any slice offset
std::shared_ptr<Buffer> value_data() const { return data_->buffers[2]; }

const offset_type* raw_value_offsets() const {
return raw_value_offsets_ + data_->offset;
}
const offset_type* raw_value_offsets() const { return raw_value_offsets_; }

const uint8_t* raw_data() const { return raw_data_; }

/// \brief Return the data buffer absolute offset of the data for the value
/// at the passed index.
///
/// Does not perform boundschecking
offset_type value_offset(int64_t i) const {
return raw_value_offsets_[i + data_->offset];
}
offset_type value_offset(int64_t i) const { return raw_value_offsets_[i]; }

/// \brief Return the length of the data for the value at the passed index.
///
/// Does not perform boundschecking
offset_type value_length(int64_t i) const {
i += data_->offset;
return raw_value_offsets_[i + 1] - raw_value_offsets_[i];
}

Expand All @@ -126,8 +117,7 @@ class BaseBinaryArray : public FlatArray {
/// less than the size of the data buffer (data_->buffers[2]).
offset_type total_values_length() const {
if (data_->length > 0) {
return raw_value_offsets_[data_->length + data_->offset] -
raw_value_offsets_[data_->offset];
return raw_value_offsets_[data_->length] - raw_value_offsets_[0];
} else {
return 0;
}
Expand All @@ -144,7 +134,7 @@ class BaseBinaryArray : public FlatArray {
// Protected method for constructors
void SetData(const std::shared_ptr<ArrayData>& data) {
this->Array::SetData(data);
raw_value_offsets_ = data->GetValuesSafe<offset_type>(1, /*offset=*/0);
raw_value_offsets_ = data->GetValuesSafe<offset_type>(1);
raw_data_ = data->GetValuesSafe<uint8_t>(2, /*offset=*/0);
}

Expand Down Expand Up @@ -293,11 +283,11 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
int64_t null_count = kUnknownNullCount, int64_t offset = 0);

const uint8_t* GetValue(int64_t i) const;
const uint8_t* GetValue(int64_t i) const { return values_ + i * byte_width_; }
const uint8_t* Value(int64_t i) const { return GetValue(i); }

std::string_view GetView(int64_t i) const {
return std::string_view(reinterpret_cast<const char*>(GetValue(i)), byte_width());
return std::string_view(reinterpret_cast<const char*>(GetValue(i)), byte_width_);
}

std::optional<std::string_view> operator[](int64_t i) const {
Expand All @@ -308,7 +298,7 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {

int32_t byte_width() const { return byte_width_; }

const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width_; }
const uint8_t* raw_values() const { return values_; }

IteratorType begin() const { return IteratorType(*this); }

Expand All @@ -319,8 +309,10 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
this->PrimitiveArray::SetData(data);
byte_width_ =
internal::checked_cast<const FixedSizeBinaryType&>(*type()).byte_width();
values_ = raw_values_ + data_->offset * byte_width_;
}

const uint8_t* values_;
int32_t byte_width_;
};

Expand Down
11 changes: 5 additions & 6 deletions cpp/src/arrow/array/array_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -461,8 +461,7 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
self->Array::SetData(data);

self->list_type_ = checked_cast<const TYPE*>(data->type.get());
self->raw_value_offsets_ =
data->GetValuesSafe<typename TYPE::offset_type>(1, /*offset=*/0);
self->raw_value_offsets_ = data->GetValuesSafe<typename TYPE::offset_type>(1);
// BaseListViewArray::SetData takes care of setting raw_value_sizes_.

ARROW_CHECK_EQ(self->list_type_->value_type()->id(), data->child_data[0]->type->id());
Expand Down Expand Up @@ -654,7 +653,7 @@ ListViewArray::ListViewArray(std::shared_ptr<DataType> type, int64_t length,

void ListViewArray::SetData(const std::shared_ptr<ArrayData>& data) {
internal::SetListData(this, data);
raw_value_sizes_ = data->GetValuesSafe<ListViewType::offset_type>(2, /*offset=*/0);
raw_value_sizes_ = data->GetValuesSafe<ListViewType::offset_type>(2);
}

Result<std::shared_ptr<ListViewArray>> ListViewArray::FromArrays(
Expand Down Expand Up @@ -729,7 +728,7 @@ LargeListViewArray::LargeListViewArray(std::shared_ptr<DataType> type, int64_t l

void LargeListViewArray::SetData(const std::shared_ptr<ArrayData>& data) {
internal::SetListData(this, data);
raw_value_sizes_ = data->GetValuesSafe<LargeListViewType::offset_type>(2, /*offset=*/0);
raw_value_sizes_ = data->GetValuesSafe<LargeListViewType::offset_type>(2);
}

Result<std::shared_ptr<LargeListViewArray>> LargeListViewArray::FromArrays(
Expand Down Expand Up @@ -1184,7 +1183,7 @@ void UnionArray::SetData(std::shared_ptr<ArrayData> data) {
union_type_ = checked_cast<const UnionType*>(data_->type.get());

ARROW_CHECK_GE(data_->buffers.size(), 2);
raw_type_codes_ = data->GetValuesSafe<int8_t>(1, /*offset=*/0);
raw_type_codes_ = data->GetValuesSafe<int8_t>(1);
boxed_fields_.resize(data_->child_data.size());
}

Expand All @@ -1206,7 +1205,7 @@ void DenseUnionArray::SetData(const std::shared_ptr<ArrayData>& data) {
// No validity bitmap
ARROW_CHECK_EQ(data_->buffers[0], nullptr);

raw_value_offsets_ = data->GetValuesSafe<int32_t>(2, /*offset=*/0);
raw_value_offsets_ = data->GetValuesSafe<int32_t>(2);
}

SparseUnionArray::SparseUnionArray(std::shared_ptr<ArrayData> data) {
Expand Down
29 changes: 9 additions & 20 deletions cpp/src/arrow/array/array_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,11 @@ class VarLengthListLikeArray : public Array {
const std::shared_ptr<DataType>& value_type() const { return list_type_->value_type(); }

/// Return pointer to raw value offsets accounting for any slice offset
const offset_type* raw_value_offsets() const {
return raw_value_offsets_ + data_->offset;
}
const offset_type* raw_value_offsets() const { return raw_value_offsets_; }

// The following functions will not perform boundschecking

offset_type value_offset(int64_t i) const {
return raw_value_offsets_[i + data_->offset];
}
offset_type value_offset(int64_t i) const { return raw_value_offsets_[i]; }

/// \brief Return the size of the value at a particular index
///
Expand Down Expand Up @@ -154,7 +150,6 @@ class BaseListArray : public VarLengthListLikeArray<TYPE> {
///
/// \pre IsValid(i)
offset_type value_length(int64_t i) const final {
i += this->data_->offset;
return this->raw_value_offsets_[i + 1] - this->raw_value_offsets_[i];
}
};
Expand Down Expand Up @@ -302,9 +297,7 @@ class BaseListViewArray : public VarLengthListLikeArray<TYPE> {
const std::shared_ptr<Buffer>& value_sizes() const { return this->data_->buffers[2]; }

/// \brief Return pointer to raw value offsets accounting for any slice offset
const offset_type* raw_value_sizes() const {
return raw_value_sizes_ + this->data_->offset;
}
const offset_type* raw_value_sizes() const { return raw_value_sizes_; }

/// \brief Return the size of the value at a particular index
///
Expand All @@ -313,9 +306,7 @@ class BaseListViewArray : public VarLengthListLikeArray<TYPE> {
/// length of the child values array.
///
/// \pre IsValid(i)
offset_type value_length(int64_t i) const final {
return this->raw_value_sizes_[i + this->data_->offset];
}
offset_type value_length(int64_t i) const final { return this->raw_value_sizes_[i]; }

protected:
const offset_type* raw_value_sizes_ = NULLPTR;
Expand Down Expand Up @@ -744,15 +735,13 @@ class ARROW_EXPORT UnionArray : public Array {
/// Note that this buffer does not account for any slice offset
const std::shared_ptr<Buffer>& type_codes() const { return data_->buffers[1]; }

const type_code_t* raw_type_codes() const { return raw_type_codes_ + data_->offset; }
const type_code_t* raw_type_codes() const { return raw_type_codes_; }

/// The logical type code of the value at index.
type_code_t type_code(int64_t i) const { return raw_type_codes_[i + data_->offset]; }
type_code_t type_code(int64_t i) const { return raw_type_codes_[i]; }

/// The physical child id containing value at index.
int child_id(int64_t i) const {
return union_type_->child_ids()[raw_type_codes_[i + data_->offset]];
}
int child_id(int64_t i) const { return union_type_->child_ids()[raw_type_codes_[i]]; }

const UnionType* union_type() const { return union_type_; }

Expand Down Expand Up @@ -883,9 +872,9 @@ class ARROW_EXPORT DenseUnionArray : public UnionArray {
/// Note that this buffer does not account for any slice offset
const std::shared_ptr<Buffer>& value_offsets() const { return data_->buffers[2]; }

int32_t value_offset(int64_t i) const { return raw_value_offsets_[i + data_->offset]; }
int32_t value_offset(int64_t i) const { return raw_value_offsets_[i]; }

const int32_t* raw_value_offsets() const { return raw_value_offsets_ + data_->offset; }
const int32_t* raw_value_offsets() const { return raw_value_offsets_; }

protected:
const int32_t* raw_value_offsets_;
Expand Down
18 changes: 10 additions & 8 deletions cpp/src/arrow/array/array_primitive.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,16 @@ DayTimeIntervalArray::DayTimeIntervalArray(const std::shared_ptr<DataType>& type
int64_t length,
const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap,
int64_t null_count, int64_t offset)
: PrimitiveArray(type, length, data, null_bitmap, null_count, offset) {}
int64_t null_count, int64_t offset) {
SetData(ArrayData::Make(type, length, {null_bitmap, data}, null_count, offset));
}

DayTimeIntervalArray::DayTimeIntervalArray(int64_t length,
const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap,
int64_t null_count, int64_t offset)
: PrimitiveArray(day_time_interval(), length, data, null_bitmap, null_count, offset) {
}
: DayTimeIntervalArray(day_time_interval(), length, data, null_bitmap, null_count,
offset) {}

DayTimeIntervalType::DayMilliseconds DayTimeIntervalArray::GetValue(int64_t i) const {
DCHECK(i < length());
Expand All @@ -105,14 +106,15 @@ MonthDayNanoIntervalArray::MonthDayNanoIntervalArray(
MonthDayNanoIntervalArray::MonthDayNanoIntervalArray(
const std::shared_ptr<DataType>& type, int64_t length,
const std::shared_ptr<Buffer>& data, const std::shared_ptr<Buffer>& null_bitmap,
int64_t null_count, int64_t offset)
: PrimitiveArray(type, length, data, null_bitmap, null_count, offset) {}
int64_t null_count, int64_t offset) {
SetData(ArrayData::Make(type, length, {null_bitmap, data}, null_count, offset));
}

MonthDayNanoIntervalArray::MonthDayNanoIntervalArray(
int64_t length, const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count, int64_t offset)
: PrimitiveArray(month_day_nano_interval(), length, data, null_bitmap, null_count,
offset) {}
: MonthDayNanoIntervalArray(month_day_nano_interval(), length, data, null_bitmap,
null_count, offset) {}

MonthDayNanoIntervalType::MonthDayNanos MonthDayNanoIntervalArray::GetValue(
int64_t i) const {
Expand Down
26 changes: 17 additions & 9 deletions cpp/src/arrow/array/array_primitive.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,26 +90,25 @@ class NumericArray : public PrimitiveArray {
using value_type = typename TypeClass::c_type;
using IteratorType = stl::ArrayIterator<NumericArray<TYPE>>;

explicit NumericArray(const std::shared_ptr<ArrayData>& data) : PrimitiveArray(data) {}
explicit NumericArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }

// Only enable this constructor without a type argument for types without additional
// metadata
template <typename T1 = TYPE>
NumericArray(enable_if_parameter_free<T1, int64_t> length,
const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
int64_t null_count = kUnknownNullCount, int64_t offset = 0)
: PrimitiveArray(TypeTraits<T1>::type_singleton(), length, data, null_bitmap,
null_count, offset) {}

const value_type* raw_values() const {
return reinterpret_cast<const value_type*>(raw_values_) + data_->offset;
int64_t null_count = kUnknownNullCount, int64_t offset = 0) {
SetData(ArrayData::Make(TypeTraits<T1>::type_singleton(), length, {null_bitmap, data},
null_count, offset));
}

value_type Value(int64_t i) const { return raw_values()[i]; }
const value_type* raw_values() const { return values_; }

value_type Value(int64_t i) const { return values_[i]; }

// For API compatibility with BinaryArray etc.
value_type GetView(int64_t i) const { return Value(i); }
value_type GetView(int64_t i) const { return values_[i]; }

std::optional<value_type> operator[](int64_t i) const {
return *IteratorType(*this, i);
Expand All @@ -121,6 +120,15 @@ class NumericArray : public PrimitiveArray {

protected:
using PrimitiveArray::PrimitiveArray;

void SetData(const std::shared_ptr<ArrayData>& data) {
this->PrimitiveArray::SetData(data);
values_ = raw_values_
? (reinterpret_cast<const value_type*>(raw_values_) + data_->offset)
: NULLPTR;
}

const value_type* values_;
};

/// DayTimeArray
Expand Down

0 comments on commit c0fcd44

Please sign in to comment.