Skip to content

Commit

Permalink
Remove dependency on forked libcudacxx (#4938)
Browse files Browse the repository at this point in the history
Move the memory_resource and stream_view headers to DALI codebase
and adjust the naming of files/namespaces so they no longer clash
with proper libcudacxx. 
The `_LIBCUDACXX_...` macros were replaced with `_DALI_...` macros and the files were autoformatted to pass DALI linter.
This should allow to safely include both
DALI headers and libcudacxx in one code base.

Adjust the custom plugin to C++17 and regenerate the custom plugin example.
Add a WAR for old CMake not supporting that version of standard for CUDA code in CI.

Signed-off-by: Krzysztof Lecki <[email protected]>
  • Loading branch information
klecki authored Jul 7, 2023
1 parent afe1475 commit 9823dfd
Show file tree
Hide file tree
Showing 15 changed files with 1,191 additions and 94 deletions.
3 changes: 0 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,3 @@
[submodule "third_party/cocoapi"]
path = third_party/cocoapi
url = https://github.com/cocodataset/cocoapi
[submodule "third_party/libcudacxx"]
path = third_party/libcudacxx
url = https://github.com/mzient/libcudacxx.git
7 changes: 1 addition & 6 deletions cmake/Dependencies.common.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -242,11 +242,6 @@ set_target_properties(cocoapi PROPERTIES POSITION_INDEPENDENT_CODE ON)
list(APPEND DALI_LIBS cocoapi)
list(APPEND DALI_EXCLUDES libcocoapi.a)

##################################################################
# libcu++
##################################################################
include_directories(SYSTEM ${PROJECT_SOURCE_DIR}/third_party/libcudacxx/include)

##################################################################
# cfitsio
##################################################################
Expand Down
8 changes: 1 addition & 7 deletions dali/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -181,12 +181,6 @@ if (BUILD_PYTHON)
COMMAND cp -r "${PROJECT_SOURCE_DIR}/include/." "${PROJECT_BINARY_DIR}/${DALI_INCLUDE_DIR}"
)

# Copy libcu++ include files
add_custom_command(
TARGET install_headers
COMMAND cp -rL "${PROJECT_SOURCE_DIR}/third_party/libcudacxx/include/." "${PROJECT_BINARY_DIR}/${DALI_INCLUDE_DIR}/"
)

# Copy boost/preprocessor include files
add_custom_command(
TARGET install_headers
Expand Down
4 changes: 2 additions & 2 deletions dali/kernels/imgproc/resample/resampling_filters.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -73,7 +73,7 @@ void InitFilters(ResamplingFilters &filters) {
const int total_size = triangular_size + gaussian_size + cubic_size + lanczos_size;

constexpr bool need_staging =
!cuda::kind_has_property<MemoryKind, cuda::memory_access::host>::value;
!cuda_for_dali::kind_has_property<MemoryKind, cuda_for_dali::memory_access::host>::value;

using tmp_kind = std::conditional_t<need_staging, mm::memory_kind::host, MemoryKind>;
filters.filter_data = mm::alloc_raw_unique<float, tmp_kind>(total_size);
Expand Down
6 changes: 3 additions & 3 deletions dali/kernels/test/scatter_gather_test.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -73,7 +73,7 @@ class ScatterGatherTest : public testing::Test {

template <typename MemoryKind>
void Memcpy(void *dst, const void *src, size_t size, cudaMemcpyKind kind) {
if (cuda::kind_has_property<MemoryKind, cuda::memory_access::host>::value) {
if (cuda_for_dali::kind_has_property<MemoryKind, cuda_for_dali::memory_access::host>::value) {
memcpy(dst, src, size);
} else {
CUDA_CALL(cudaMemcpy(dst, src, size, kind));
Expand All @@ -82,7 +82,7 @@ class ScatterGatherTest : public testing::Test {

template <typename MemoryKind>
void Memset(void *dst, int c, size_t size) {
if (cuda::kind_has_property<MemoryKind, cuda::memory_access::host>::value) {
if (cuda_for_dali::kind_has_property<MemoryKind, cuda_for_dali::memory_access::host>::value) {
memset(dst, c, size);
} else {
CUDA_CALL(cudaMemset(dst, c, size));
Expand Down
21 changes: 11 additions & 10 deletions dali/pipeline/data/copy_to_external.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -135,22 +135,22 @@ inline void CopyToExternalImpl(void** dsts,
}

template <typename DstKind, typename SrcBackend>
inline void CopyToExternal(void* dst, const Tensor<SrcBackend> &src,
AccessOrder order, bool use_copy_kernel) {
inline void CopyToExternal(void *dst, const Tensor<SrcBackend> &src, AccessOrder order,
bool use_copy_kernel) {
const bool src_device_access = (std::is_same<SrcBackend, GPUBackend>::value || src.is_pinned());
const bool dst_device_access = cuda::kind_has_property<DstKind,
cuda::memory_access::device>::value;
const bool dst_device_access =
cuda_for_dali::kind_has_property<DstKind, cuda_for_dali::memory_access::device>::value;
use_copy_kernel &= dst_device_access && src_device_access;
using DstBackend = typename detail::kind2backend<DstKind>::type;
CopyToExternalImpl<DstBackend, SrcBackend>(dst, src, order, use_copy_kernel);
}

template <typename DstKind, typename SrcBackend>
inline void CopyToExternal(void* dst, const TensorList<SrcBackend> &src,
AccessOrder order, bool use_copy_kernel) {
inline void CopyToExternal(void *dst, const TensorList<SrcBackend> &src, AccessOrder order,
bool use_copy_kernel) {
const bool src_device_access = (std::is_same<SrcBackend, GPUBackend>::value || src.is_pinned());
const bool dst_device_access = cuda::kind_has_property<DstKind,
cuda::memory_access::device>::value;
const bool dst_device_access =
cuda_for_dali::kind_has_property<DstKind, cuda_for_dali::memory_access::device>::value;
use_copy_kernel &= dst_device_access && src_device_access;
using DstBackend = typename detail::kind2backend<DstKind>::type;
CopyToExternalImpl<DstBackend, SrcBackend>(dst, src, order, use_copy_kernel);
Expand Down Expand Up @@ -185,7 +185,8 @@ template <typename DstKind, typename SrcBackend>
inline void CopyToExternal(void** dsts, const TensorList<SrcBackend> &src,
AccessOrder order, bool use_copy_kernel) {
bool src_device_access = (std::is_same<SrcBackend, GPUBackend>::value || src.is_pinned());
bool dst_device_access = cuda::kind_has_property<DstKind, cuda::memory_access::device>::value;
bool dst_device_access =
cuda_for_dali::kind_has_property<DstKind, cuda_for_dali::memory_access::device>::value;
use_copy_kernel &= dst_device_access && src_device_access;
using DstBackend = typename detail::kind2backend<DstKind>::type;
CopyToExternalImpl<DstBackend, SrcBackend>(dsts, src, order, use_copy_kernel);
Expand Down
7 changes: 4 additions & 3 deletions dali/test/mat2tensor.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -71,8 +71,9 @@ TensorView<StorageCPU, T, ndim> view_as_tensor(cv::Mat &mat) {
template <typename MemoryKind = mm::memory_kind::device, typename T = uint8_t, int ndims = 3>
std::pair<TensorView<kind2storage_t<MemoryKind>, T, ndims>, mm::uptr<T>>
copy_as_tensor(const cv::Mat &mat) {
static_assert(cuda::kind_has_property<MemoryKind, cuda::memory_access::device>::value,
"A GPU-accessible memory kind is required.");
static_assert(
cuda_for_dali::kind_has_property<MemoryKind, cuda_for_dali::memory_access::device>::value,
"A GPU-accessible memory kind is required.");
auto tvin = kernels::view_as_tensor<const T, ndims>(mat);
return copy<MemoryKind>(tvin);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/nvidia/dali/include\n"
"/usr/local/lib/python3.10/dist-packages/nvidia/dali/include\n"
]
}
],
Expand All @@ -253,7 +253,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/nvidia/dali\n"
"/usr/local/lib/python3.10/dist-packages/nvidia/dali\n"
]
}
],
Expand All @@ -270,7 +270,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"['-I/usr/local/lib/python3.6/dist-packages/nvidia/dali/include', '-D_GLIBCXX_USE_CXX11_ABI=1']\n"
"['-I/usr/local/lib/python3.10/dist-packages/nvidia/dali/include', '-D_GLIBCXX_USE_CXX11_ABI=1']\n"
]
}
],
Expand All @@ -287,7 +287,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"['-L/usr/local/lib/python3.6/dist-packages/nvidia/dali', '-ldali']\n"
"['-L/usr/local/lib/python3.10/dist-packages/nvidia/dali', '-ldali']\n"
]
}
],
Expand Down Expand Up @@ -315,7 +315,7 @@
"output_type": "stream",
"text": [
"cmake_minimum_required(VERSION 3.10)\r\n",
"set(CMAKE_CUDA_ARCHITECTURES \"35;50;52;60;61;70;75;80;86\")\r\n",
"set(CMAKE_CUDA_ARCHITECTURES \"50;60;70;80;90\")\r\n",
"\r\n",
"project(custom_dummy_plugin LANGUAGES CUDA CXX C)\r\n",
"\r\n",
Expand All @@ -324,7 +324,7 @@
"set(CMAKE_CXX_EXTENSIONS OFF)\r\n",
"set(CMAKE_C_STANDARD 11)\r\n",
"\r\n",
"set(CMAKE_CUDA_STANDARD 14)\r\n",
"set(CMAKE_CUDA_STANDARD 17)\r\n",
"set(CMAKE_CUDA_STANDARD_REQUIRED ON)\r\n",
"\r\n",
"include_directories(SYSTEM \"${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}\")\r\n",
Expand Down Expand Up @@ -368,31 +368,29 @@
"name": "stdout",
"output_type": "stream",
"text": [
"-- The CUDA compiler identification is NVIDIA 11.4.48\n",
"-- The CXX compiler identification is GNU 7.5.0\n",
"-- The C compiler identification is GNU 7.5.0\n",
"-- The CUDA compiler identification is NVIDIA 12.0.76\n",
"-- The CXX compiler identification is GNU 11.3.0\n",
"-- The C compiler identification is GNU 11.3.0\n",
"-- Detecting CUDA compiler ABI info\n",
"-- Detecting CUDA compiler ABI info - done\n",
"-- Check for working CUDA compiler: /opt/ccache/bin/nvcc - skipped\n",
"-- Check for working CUDA compiler: /usr/local/cuda/bin/nvcc - skipped\n",
"-- Detecting CUDA compile features\n",
"-- Detecting CUDA compile features - done\n",
"-- Detecting CXX compiler ABI info\n",
"-- Detecting CXX compiler ABI info - done\n",
"-- Check for working CXX compiler: /opt/ccache/bin/g++ - skipped\n",
"-- Check for working CXX compiler: /usr/bin/c++ - skipped\n",
"-- Detecting CXX compile features\n",
"-- Detecting CXX compile features - done\n",
"-- Detecting C compiler ABI info\n",
"-- Detecting C compiler ABI info - done\n",
"-- Check for working C compiler: /opt/ccache/bin/gcc - skipped\n",
"-- Check for working C compiler: /usr/bin/cc - skipped\n",
"-- Detecting C compile features\n",
"-- Detecting C compile features - done\n",
"-- Configuring done\n",
"-- Generating done\n",
"-- Configuring done (4.4s)\n",
"-- Generating done (0.0s)\n",
"-- Build files have been written to: /dali/docs/examples/custom_operations/custom_operator/customdummy/build\n",
"\u001b[35m\u001b[1mScanning dependencies of target customdummy\u001b[0m\n",
"[ 33%] \u001b[32mBuilding CXX object CMakeFiles/customdummy.dir/dummy.cc.o\u001b[0m\n",
"[ 66%] \u001b[32mBuilding CUDA object CMakeFiles/customdummy.dir/dummy.cu.o\u001b[0m\n",
"nvcc warning : The 'compute_35', 'compute_37', 'compute_50', 'sm_35', 'sm_37' and 'sm_50' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).\n",
"[100%] \u001b[32m\u001b[1mLinking CXX shared library libcustomdummy.so\u001b[0m\n",
"[100%] Built target customdummy\n"
]
Expand Down Expand Up @@ -513,15 +511,15 @@
" \n",
" Keyword args\n",
" ------------\n",
" `bytes_per_sample_hint` : int or list of int, optional, default = [0]\n",
" `bytes_per_sample_hint` : int or list of int, optional, default = `[0]`\n",
" Output size hint, in bytes per sample.\n",
" \n",
" If specified, the operator's outputs residing in GPU or page-locked host memory will be preallocated\n",
" to accommodate a batch of samples of this size.\n",
" `preserve` : bool, optional, default = False\n",
" `preserve` : bool, optional, default = `False`\n",
" Prevents the operator from being removed from the\n",
" graph even if its outputs are not used.\n",
" `seed` : int, optional, default = -1\n",
" `seed` : int, optional, default = `-1`\n",
" Random seed.\n",
" \n",
" If not provided, it will be populated based on the global seed of the pipeline.\n",
Expand Down Expand Up @@ -552,6 +550,8 @@
"Help on class CustomDummy in module nvidia.dali.ops:\n",
"\n",
"class CustomDummy(builtins.object)\n",
" | CustomDummy(*, device='cpu', **kwargs)\n",
" | \n",
" | Make a copy of the input tensor\n",
" | \n",
" | Supported backends\n",
Expand All @@ -561,15 +561,15 @@
" | \n",
" | Keyword args\n",
" | ------------\n",
" | `bytes_per_sample_hint` : int or list of int, optional, default = [0]\n",
" | `bytes_per_sample_hint` : int or list of int, optional, default = `[0]`\n",
" | Output size hint, in bytes per sample.\n",
" | \n",
" | If specified, the operator's outputs residing in GPU or page-locked host memory will be preallocated\n",
" | to accommodate a batch of samples of this size.\n",
" | `preserve` : bool, optional, default = False\n",
" | `preserve` : bool, optional, default = `False`\n",
" | Prevents the operator from being removed from the\n",
" | graph even if its outputs are not used.\n",
" | `seed` : int, optional, default = -1\n",
" | `seed` : int, optional, default = `-1`\n",
" | Random seed.\n",
" | \n",
" | If not provided, it will be populated based on the global seed of the pipeline.\n",
Expand All @@ -586,16 +586,10 @@
" | `data` : TensorList\n",
" | Input to the operator.\n",
" | \n",
" | __init__(self, **kwargs)\n",
" | __init__(self, *, device='cpu', **kwargs)\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Data descriptors defined here:\n",
" | \n",
" | __dict__\n",
" | dictionary for instance variables (if defined)\n",
" | \n",
" | __weakref__\n",
" | list of weak references to the object (if defined)\n",
" | Readonly properties defined here:\n",
" | \n",
" | device\n",
" | \n",
Expand All @@ -606,6 +600,15 @@
" | spec\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Data descriptors defined here:\n",
" | \n",
" | __dict__\n",
" | dictionary for instance variables (if defined)\n",
" | \n",
" | __weakref__\n",
" | list of weak references to the object (if defined)\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Data and other attributes defined here:\n",
" | \n",
" | schema_name = 'CustomDummy'\n",
Expand All @@ -621,7 +624,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -635,7 +638,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
"version": "3.10.6"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.10)
set(CMAKE_CUDA_ARCHITECTURES "35;50;52;60;61;70;75;80;86")
set(CMAKE_CUDA_ARCHITECTURES "50;60;70;80;90")

project(custom_dummy_plugin LANGUAGES CUDA CXX C)

Expand All @@ -8,8 +8,11 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_C_STANDARD 11)

set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
# TODO(klecki): When the test container gets a CMake that supports C++17 as a proper option,
# swap those lines
# set(CMAKE_CUDA_STANDARD 17)
# set(CMAKE_CUDA_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -std=c++17")

include_directories(SYSTEM "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}")

Expand Down
Loading

0 comments on commit 9823dfd

Please sign in to comment.