Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-40431: [C++] Move key_hash/key_map/light_array related files to internal for prevent using by users #40484

Merged
merged 4 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -689,9 +689,9 @@ set(ARROW_COMPUTE_SRCS
compute/function.cc
compute/function_internal.cc
compute/kernel.cc
compute/key_hash.cc
compute/key_map.cc
compute/light_array.cc
compute/key_hash_internal.cc
compute/key_map_internal.cc
compute/light_array_internal.cc
compute/ordering.cc
compute/registry.cc
compute/kernels/codegen_internal.cc
Expand All @@ -717,8 +717,8 @@ set(ARROW_COMPUTE_SRCS
compute/row/row_internal.cc
compute/util.cc)

append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_avx2.cc)
append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_avx2.cc)
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_internal_avx2.cc)
append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_internal_avx2.cc)
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/compare_internal_avx2.cc)
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/encode_internal_avx2.cc)
append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/util_avx2.cc)
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/acero/asof_join_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@
#include "arrow/compute/function_internal.h"
#endif
#include "arrow/acero/time_series_util.h"
#include "arrow/compute/key_hash.h"
#include "arrow/compute/light_array.h"
#include "arrow/compute/key_hash_internal.h"
#include "arrow/compute/light_array_internal.h"
#include "arrow/record_batch.h"
#include "arrow/result.h"
#include "arrow/status.h"
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/acero/bloom_filter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#include "arrow/acero/task_util.h"
#include "arrow/acero/test_util_internal.h"
#include "arrow/acero/util.h"
#include "arrow/compute/key_hash.h"
#include "arrow/compute/key_hash_internal.h"
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/config.h"
#include "arrow/util/cpu_info.h"
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/acero/hash_join_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#include "arrow/acero/options.h"
#include "arrow/acero/schema_util.h"
#include "arrow/acero/util.h"
#include "arrow/compute/key_hash.h"
#include "arrow/compute/key_hash_internal.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/future.h"
#include "arrow/util/thread_pool.h"
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/arrow/acero/hash_join_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#pragma once

#include <cassert>
#include <vector>

#include "arrow/acero/options.h"
Expand Down Expand Up @@ -88,7 +89,7 @@ class ARROW_ACERO_EXPORT HashJoinSchema {
const Expression& filter);

bool PayloadIsEmpty(int side) {
ARROW_DCHECK(side == 0 || side == 1);
assert(side == 0 || side == 1);
return proj_maps[side].num_cols(HashJoinProjection::PAYLOAD) == 0;
}

Expand Down
14 changes: 7 additions & 7 deletions cpp/src/arrow/acero/schema_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@

#pragma once

#include <cassert>
#include <cstdint>
#include <memory>
#include <string>
#include <vector>

#include "arrow/compute/light_array.h" // for KeyColumnMetadata
#include "arrow/type.h" // for DataType, FieldRef, Field and Schema
#include "arrow/type.h" // for DataType, FieldRef, Field and Schema

namespace arrow {

Expand All @@ -47,8 +47,8 @@ struct SchemaProjectionMap {
const int* source_to_base;
const int* base_to_target;
inline int get(int i) const {
ARROW_DCHECK(i >= 0 && i < num_cols);
ARROW_DCHECK(source_to_base[i] != kMissingField);
assert(i >= 0 && i < num_cols);
assert(source_to_base[i] != kMissingField);
return base_to_target[source_to_base[i]];
}
};
Expand All @@ -66,7 +66,7 @@ class SchemaProjectionMaps {
Status Init(ProjectionIdEnum full_schema_handle, const Schema& schema,
const std::vector<ProjectionIdEnum>& projection_handles,
const std::vector<const std::vector<FieldRef>*>& projections) {
ARROW_DCHECK(projection_handles.size() == projections.size());
assert(projection_handles.size() == projections.size());
ARROW_RETURN_NOT_OK(RegisterSchema(full_schema_handle, schema));
for (size_t i = 0; i < projections.size(); ++i) {
ARROW_RETURN_NOT_OK(
Expand Down Expand Up @@ -174,7 +174,7 @@ class SchemaProjectionMaps {
}
}
// We should never get here
ARROW_DCHECK(false);
assert(false);
return -1;
}

Expand Down Expand Up @@ -207,7 +207,7 @@ class SchemaProjectionMaps {
break;
}
}
ARROW_DCHECK(field_id != SchemaProjectionMap::kMissingField);
assert(field_id != SchemaProjectionMap::kMissingField);
mapping[i] = field_id;
inverse_mapping[field_id] = i;
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/acero/swiss_join.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#include "arrow/acero/util.h"
#include "arrow/array/util.h" // MakeArrayFromScalar
#include "arrow/compute/kernels/row_encoder_internal.h"
#include "arrow/compute/key_hash.h"
#include "arrow/compute/key_hash_internal.h"
#include "arrow/compute/row/compare_internal.h"
#include "arrow/compute/row/encode_internal.h"
#include "arrow/util/bit_util.h"
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/acero/swiss_join_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
#include "arrow/acero/schema_util.h"
#include "arrow/acero/task_util.h"
#include "arrow/compute/kernels/row_encoder_internal.h"
#include "arrow/compute/key_map.h"
#include "arrow/compute/light_array.h"
#include "arrow/compute/key_map_internal.h"
#include "arrow/compute/light_array_internal.h"
#include "arrow/compute/row/encode_internal.h"

namespace arrow {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@
// specific language governing permissions and limitations
// under the License.

#include "arrow/compute/key_hash.h"
#include "arrow/compute/key_hash_internal.h"

#include <memory.h>

#include <algorithm>
#include <cstdint>

#include "arrow/compute/light_array.h"
#include "arrow/compute/light_array_internal.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/ubsan.h"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

#include <cstdint>

#include "arrow/compute/light_array.h"
#include "arrow/compute/light_array_internal.h"
#include "arrow/compute/util.h"

namespace arrow {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

#include <immintrin.h>

#include "arrow/compute/key_hash.h"
#include "arrow/compute/key_hash_internal.h"
#include "arrow/util/bit_util.h"

namespace arrow {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/key_hash_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include <unordered_set>

#include "arrow/array/builder_binary.h"
#include "arrow/compute/key_hash.h"
#include "arrow/compute/key_hash_internal.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/util.h"
#include "arrow/util/cpu_info.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

#include "arrow/compute/key_map.h"
#include "arrow/compute/key_map_internal.h"

#include <algorithm>
#include <cstdint>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

#include <immintrin.h>

#include "arrow/compute/key_map.h"
#include "arrow/compute/key_map_internal.h"
#include "arrow/util/logging.h"

namespace arrow {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

#include "arrow/compute/light_array.h"
#include "arrow/compute/light_array_internal.h"

#include <type_traits>

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/light_array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

#include "arrow/compute/light_array.h"
#include "arrow/compute/light_array_internal.h"

#include <gtest/gtest.h>
#include <numeric>
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/row/compare_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

#include <cstdint>

#include "arrow/compute/light_array.h"
#include "arrow/compute/light_array_internal.h"
#include "arrow/compute/row/encode_internal.h"
#include "arrow/compute/row/row_internal.h"
#include "arrow/compute/util.h"
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/compute/row/encode_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
#include <vector>

#include "arrow/array/data.h"
#include "arrow/compute/key_map.h"
#include "arrow/compute/light_array.h"
#include "arrow/compute/key_map_internal.h"
#include "arrow/compute/light_array_internal.h"
#include "arrow/compute/row/row_internal.h"
#include "arrow/compute/util.h"
#include "arrow/memory_pool.h"
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/compute/row/grouper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
#include "arrow/compute/api_vector.h"
#include "arrow/compute/function.h"
#include "arrow/compute/kernels/row_encoder_internal.h"
#include "arrow/compute/key_hash.h"
#include "arrow/compute/light_array.h"
#include "arrow/compute/key_hash_internal.h"
#include "arrow/compute/light_array_internal.h"
#include "arrow/compute/registry.h"
#include "arrow/compute/row/compare_internal.h"
#include "arrow/compute/row/grouper_internal.h"
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/row/row_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#include <vector>

#include "arrow/buffer.h"
#include "arrow/compute/light_array.h"
#include "arrow/compute/light_array_internal.h"
#include "arrow/memory_pool.h"
#include "arrow/status.h"
#include "arrow/util/logging.h"
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/compute/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ using internal::CpuInfo;
namespace util {

void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) {
int64_t new_top = top_ + PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t);
int64_t new_top = top_ + EstimatedAllocationSize(num_bytes);
// Stack overflow check (see GH-39582).
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you move this comment to CheckAllocSizeValid()?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

// XXX cannot return a regular Status because most consumers do not either.
ARROW_CHECK_LE(new_top, buffer_size_) << "TempVectorStack::alloc overflow";
Expand All @@ -48,7 +48,7 @@ void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) {

void TempVectorStack::release(int id, uint32_t num_bytes) {
ARROW_DCHECK(num_vectors_ == id + 1);
int64_t size = PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t);
int64_t size = EstimatedAllocationSize(num_bytes);
ARROW_DCHECK(reinterpret_cast<const uint64_t*>(buffer_->mutable_data() + top_)[-1] ==
kGuard2);
ARROW_DCHECK(top_ >= size);
Expand Down
8 changes: 6 additions & 2 deletions cpp/src/arrow/compute/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class ARROW_EXPORT TempVectorStack {
Status Init(MemoryPool* pool, int64_t size) {
num_vectors_ = 0;
top_ = 0;
buffer_size_ = PaddedAllocationSize(size) + kPadding + 2 * sizeof(uint64_t);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have already added the kPadding in PaddedAllocationSize, it's unnecessary to add it again.

buffer_size_ = EstimatedAllocationSize(size);
ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool));
// Ensure later operations don't accidentally read uninitialized memory.
std::memset(buffer->mutable_data(), 0xFF, size);
Expand All @@ -98,7 +98,11 @@ class ARROW_EXPORT TempVectorStack {
}

private:
int64_t PaddedAllocationSize(int64_t num_bytes) {
static int64_t EstimatedAllocationSize(int64_t size) {
return PaddedAllocationSize(size) + 2 * sizeof(uint64_t);
}

static int64_t PaddedAllocationSize(int64_t num_bytes) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can align all the function names to either XxxAllocSize or XxxAllocationSize?

// Round up allocation size to multiple of 8 bytes
// to avoid returning temp vectors with unaligned address.
//
Expand Down
Loading