Skip to content

Commit

Permalink
Merge pull request #1319 from NvTimLiu/release-tmp
Browse files Browse the repository at this point in the history
Merge branch 'branch-23.08' into main [skip ci]
  • Loading branch information
NvTimLiu authored Aug 10, 2023
2 parents 1ad41b4 + 2144d41 commit 73fcd5c
Show file tree
Hide file tree
Showing 59 changed files with 7,186 additions and 2,210 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/auto-merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ name: auto-merge HEAD to BASE
on:
pull_request_target:
branches:
- branch-23.06
- branch-23.08
types: [closed]

env:
HEAD: branch-23.06
BASE: branch-23.08
HEAD: branch-23.08
BASE: branch-23.10

jobs:
auto-merge:
Expand Down
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[submodule "thirdparty/cudf"]
path = thirdparty/cudf
url = https://github.com/rapidsai/cudf.git
branch = branch-23.06
branch = branch-23.08
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

repos:
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v14.0.6
rev: v16.0.1
hooks:
- id: clang-format
files: \.(cu|cuh|h|hpp|cpp|inl)$
Expand Down
6 changes: 3 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ Maven `package` goal can be used to build the RAPIDS Accelerator JNI jar. After
build the RAPIDS Accelerator JNI jar will be in the `spark-rapids-jni/target/` directory.
Be sure to select the jar with the CUDA classifier.

When building spark-rapids-jni, the pom.xml in the submodule thirdparty/cudf is completely
bypassed. For a detailed explanation please read
When building spark-rapids-jni, the pom.xml in the submodule thirdparty/cudf is completely
bypassed. For a detailed explanation please read
[this](https://github.com/NVIDIA/spark-rapids-jni/issues/1084#issuecomment-1513471739).

### Building in the Docker Container
Expand Down Expand Up @@ -148,7 +148,7 @@ $ ./build/build-in-docker install ...
```

Now cd to ~/repos/NVIDIA/spark-rapids and build with one of the options from
[spark-rapids instructions](https://github.com/NVIDIA/spark-rapids/blob/branch-23.06/CONTRIBUTING.md#building-from-source).
[spark-rapids instructions](https://github.com/NVIDIA/spark-rapids/blob/branch-23.08/CONTRIBUTING.md#building-from-source).

```bash
$ ./build/buildall
Expand Down
2 changes: 1 addition & 1 deletion ci/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ RUN scl enable rh-python38 "pip install requests 'urllib3<2.0'"
RUN mkdir /usr/local/rapids && mkdir /rapids && chmod 777 /usr/local/rapids && chmod 777 /rapids

# 3.22.3: CUDA architecture 'native' support + flexible CMAKE_<LANG>_*_LAUNCHER for ccache
ARG CMAKE_VERSION=3.23.3
ARG CMAKE_VERSION=3.26.4

RUN cd /usr/local && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && \
tar zxf cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && \
Expand Down
2 changes: 1 addition & 1 deletion ci/Jenkinsfile.premerge
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ pipeline {

stash(name: "source_tree", includes: "**")

container('docker-build') {
container('cpu') {
// check if pre-merge dockerfile modified
def dockerfileModified = sh(returnStdout: true,
script: 'BASE=$(git --no-pager log --oneline -1 | awk \'{ print $NF }\'); ' +
Expand Down
15 changes: 13 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

<groupId>com.nvidia</groupId>
<artifactId>spark-rapids-jni</artifactId>
<version>23.06.0</version>
<version>23.08.0</version>
<packaging>jar</packaging>
<name>RAPIDS Accelerator JNI for Apache Spark</name>
<description>
Expand Down Expand Up @@ -195,9 +195,20 @@
<excludes>
<exclude>**/CuFileTest.java</exclude>
<exclude>**/CudaFatalTest.java</exclude>
<exclude>**/ColumnViewNonEmptyNullsTest.java</exclude>
</excludes>
</configuration>
</execution>
<execution>
<id>non-empty-null-test</id>
<goals>
<goal>test</goal>
</goals>
<configuration>
<argLine>-da:ai.rapids.cudf.AssertEmptyNulls</argLine>
<test>ColumnViewNonEmptyNullsTest</test>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
Expand Down Expand Up @@ -259,7 +270,7 @@
<phase>test</phase>
<configuration>
<target>
<exec dir="${project.basedir}"
<exec dir="${project.basedir}"
failonerror="true"
executable="cmake">
<arg value="--build"/>
Expand Down
8 changes: 7 additions & 1 deletion src/main/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ rapids_cuda_init_architectures(SPARK_RAPIDS_JNI)

project(
SPARK_RAPIDS_JNI
VERSION 23.06.00
VERSION 23.08.00
LANGUAGES C CXX CUDA
)

Expand Down Expand Up @@ -146,19 +146,25 @@ set(CUDFJNI_INCLUDE_DIRS

add_library(
spark_rapids_jni SHARED
src/BloomFilterJni.cpp
src/CastStringJni.cpp
src/DecimalUtilsJni.cpp
src/HashJni.cpp
src/MapUtilsJni.cpp
src/NativeParquetJni.cpp
src/RowConversionJni.cpp
src/SparkResourceAdaptorJni.cpp
src/ZOrderJni.cpp
src/bloom_filter.cu
src/cast_decimal_to_string.cu
src/cast_string.cu
src/cast_string_to_float.cu
src/decimal_utils.cu
src/map_utils.cu
src/murmur_hash.cu
src/row_conversion.cu
src/utilities.cu
src/xxhash64.cu
src/zorder.cu
)

Expand Down
3 changes: 3 additions & 0 deletions src/main/cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,6 @@ ConfigureBench(ROW_CONVERSION_BENCH

ConfigureBench(STRING_TO_FLOAT_BENCH
cast_string_to_float.cpp)

ConfigureBench(BLOOM_FILTER_BENCH
bloom_filter.cu)
64 changes: 64 additions & 0 deletions src/main/cpp/benchmarks/bloom_filter.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <bloom_filter.hpp>
#include <hash.cuh>

#include <benchmarks/common/generate_input.hpp>

#include <nvbench/nvbench.cuh>

#include <cudf_test/column_utilities.hpp>

static void bloom_filter_put(nvbench::state& state)
{
constexpr int num_rows = 150'000'000;
constexpr int num_hashes = 3;

// create the bloom filter
cudf::size_type const bloom_filter_bytes = state.get_int64("bloom_filter_bytes");
cudf::size_type const bloom_filter_longs = bloom_filter_bytes / sizeof(int64_t);
auto bloom_filter = spark_rapids_jni::bloom_filter_create(num_hashes, bloom_filter_longs);

// create a column of hashed values
data_profile_builder builder;
builder.no_validity();
auto const src = create_random_table({{cudf::type_id::INT64}}, row_count{num_rows}, builder);
auto const input = spark_rapids_jni::xxhash64(*src);

auto const stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
[&](nvbench::launch& launch, auto& timer) {
timer.start();
spark_rapids_jni::bloom_filter_put(*bloom_filter, *input);
stream.synchronize();
timer.stop();
});

size_t const bytes_read = num_rows * sizeof(int64_t);
size_t const bytes_written = num_rows * sizeof(cudf::bitmask_type) * num_hashes;
auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
state.add_element_count(std::size_t{num_rows}, "Rows Inserted");
state.add_global_memory_reads(bytes_read, "Bytes read");
state.add_global_memory_writes(bytes_written, "Bytes written");
state.add_element_count(static_cast<double>(bytes_written) / time, "Write bytes/sec");
}

NVBENCH_BENCH(bloom_filter_put)
.set_name("Bloom Filter Put")
.add_int64_axis("bloom_filter_bytes",
{512 * 1024, 1024 * 1024, 2 * 1024 * 1024, 4 * 1024 * 1024, 8 * 1024 * 1024});
16 changes: 9 additions & 7 deletions src/main/cpp/benchmarks/cast_string_to_float.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,18 @@
void string_to_float(nvbench::state& state)
{
cudf::size_type const n_rows{(cudf::size_type)state.get_int64("num_rows")};
auto const float_tbl = create_random_table({cudf::type_id::FLOAT32}, row_count{n_rows});
auto const float_col = float_tbl->get_column(0);
auto const float_tbl = create_random_table({cudf::type_id::FLOAT32}, row_count{n_rows});
auto const float_col = float_tbl->get_column(0);
auto const string_col = cudf::strings::from_floats(float_col.view());

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) {
auto rows = spark_rapids_jni::string_to_float(cudf::data_type{cudf::type_id::FLOAT32}, string_col->view(), false, cudf::get_default_stream());
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto rows = spark_rapids_jni::string_to_float(cudf::data_type{cudf::type_id::FLOAT32},
string_col->view(),
false,
cudf::get_default_stream());
});
}

NVBENCH_BENCH(string_to_float)
.set_name("Strings to Float Cast")
.add_int64_axis("num_rows", {1 * 1024 * 1024, 100 * 1024 * 1024});
.set_name("Strings to Float Cast")
.add_int64_axis("num_rows", {1 * 1024 * 1024, 100 * 1024 * 1024});
54 changes: 38 additions & 16 deletions src/main/cpp/benchmarks/common/generate_input.cu
Original file line number Diff line number Diff line change
Expand Up @@ -426,14 +426,19 @@ std::unique_ptr<cudf::column> create_random_column(data_profile const& profile,
null_mask.begin());
}

auto [result_bitmask, null_count] = cudf::detail::valid_if(
null_mask.begin(), null_mask.end(), thrust::identity<bool>{}, cudf::get_default_stream());
auto [result_bitmask, null_count] =
cudf::detail::valid_if(null_mask.begin(),
null_mask.end(),
thrust::identity<bool>{},
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());

return std::make_unique<cudf::column>(
cudf::data_type{cudf::type_to_id<T>()},
num_rows,
data.release(),
profile.get_null_frequency().has_value() ? std::move(result_bitmask) : rmm::device_buffer{});
profile.get_null_frequency().has_value() ? std::move(result_bitmask) : rmm::device_buffer{},
null_count);
}

struct valid_or_zero {
Expand Down Expand Up @@ -505,13 +510,19 @@ std::unique_ptr<cudf::column> create_random_utf8_string_column(data_profile cons
thrust::make_zip_iterator(offsets.begin(), offsets.begin() + 1),
num_rows,
string_generator{chars.data(), engine});
auto [result_bitmask, null_count] = cudf::detail::valid_if(
null_mask.begin(), null_mask.end() - 1, thrust::identity<bool>{}, cudf::get_default_stream());
auto [result_bitmask, null_count] =
cudf::detail::valid_if(null_mask.begin(),
null_mask.end() - 1,
thrust::identity<bool>{},
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());

return cudf::make_strings_column(
num_rows,
std::move(offsets),
std::move(chars),
profile.get_null_frequency().has_value() ? std::move(result_bitmask) : rmm::device_buffer{});
profile.get_null_frequency().has_value() ? std::move(result_bitmask) : rmm::device_buffer{},
null_count);
}

/**
Expand Down Expand Up @@ -539,7 +550,8 @@ std::unique_ptr<cudf::column> create_random_column<cudf::string_view>(data_profi
sample_indices,
cudf::out_of_bounds_policy::DONT_CHECK,
cudf::detail::negative_index_policy::NOT_ALLOWED,
cudf::get_default_stream());
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());
return std::move(str_table->release()[0]);
}

Expand Down Expand Up @@ -623,8 +635,11 @@ std::unique_ptr<cudf::column> create_random_column<cudf::struct_view>(data_profi
auto [null_mask, null_count] = [&]() {
if (profile.get_null_frequency().has_value()) {
auto valids = valid_dist(engine, num_rows);
return cudf::detail::valid_if(
valids.begin(), valids.end(), thrust::identity<bool>{}, cudf::get_default_stream());
return cudf::detail::valid_if(valids.begin(),
valids.end(),
thrust::identity<bool>{},
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());
}
return std::pair<rmm::device_buffer, cudf::size_type>{};
}();
Expand Down Expand Up @@ -704,12 +719,18 @@ std::unique_ptr<cudf::column> create_random_column<cudf::list_view>(data_profile
thrust::device_pointer_cast(offsets.end())[-1] =
current_child_column->size(); // Always include all elements

auto offsets_column = std::make_unique<cudf::column>(
cudf::data_type{cudf::type_id::INT32}, num_rows + 1, offsets.release());

auto [null_mask, null_count] = cudf::detail::valid_if(
valids.begin(), valids.end(), thrust::identity<bool>{}, cudf::get_default_stream());
list_column = cudf::make_lists_column(
auto offsets_column = std::make_unique<cudf::column>(cudf::data_type{cudf::type_id::INT32},
num_rows + 1,
offsets.release(),
rmm::device_buffer{},
0);

auto [null_mask, null_count] = cudf::detail::valid_if(valids.begin(),
valids.end(),
thrust::identity<bool>{},
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());
list_column = cudf::make_lists_column(
num_rows,
std::move(offsets_column),
std::move(current_child_column),
Expand Down Expand Up @@ -827,7 +848,8 @@ std::pair<rmm::device_buffer, cudf::size_type> create_random_null_mask(
return cudf::detail::valid_if(thrust::make_counting_iterator<cudf::size_type>(0),
thrust::make_counting_iterator<cudf::size_type>(size),
bool_generator{seed, 1.0 - *null_probability},
cudf::get_default_stream());
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());
}
}

Expand Down
3 changes: 1 addition & 2 deletions src/main/cpp/benchmarks/common/generate_input.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,7 @@ struct distribution_params<T, std::enable_if_t<std::is_same_v<T, cudf::struct_vi

// Present for compilation only. To be implemented once reader/writers support the fixed width type.
template <typename T>
struct distribution_params<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {
};
struct distribution_params<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {};

/**
* @brief Returns a vector of types, corresponding to the input type or a type group.
Expand Down
Loading

0 comments on commit 73fcd5c

Please sign in to comment.