diff --git a/.gitmodules b/.gitmodules
index 2b670675f7..ca263373bb 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -26,6 +26,3 @@
 [submodule "third_party/cocoapi"]
 	path = third_party/cocoapi
 	url = https://github.com/cocodataset/cocoapi
-[submodule "third_party/libcudacxx"]
-	path = third_party/libcudacxx
-	url = https://github.com/mzient/libcudacxx.git
diff --git a/cmake/Dependencies.common.cmake b/cmake/Dependencies.common.cmake
index 2f7ec98634..1fb4287b7c 100644
--- a/cmake/Dependencies.common.cmake
+++ b/cmake/Dependencies.common.cmake
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -242,11 +242,6 @@ set_target_properties(cocoapi PROPERTIES POSITION_INDEPENDENT_CODE ON)
 list(APPEND DALI_LIBS cocoapi)
 list(APPEND DALI_EXCLUDES libcocoapi.a)
 
-##################################################################
-# libcu++
-##################################################################
-include_directories(SYSTEM ${PROJECT_SOURCE_DIR}/third_party/libcudacxx/include)
-
 ##################################################################
 # cfitsio
 ##################################################################
diff --git a/dali/CMakeLists.txt b/dali/CMakeLists.txt
index 7d7cba8c80..8307f8ff3e 100644
--- a/dali/CMakeLists.txt
+++ b/dali/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2017-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -181,12 +181,6 @@ if (BUILD_PYTHON)
     COMMAND cp -r "${PROJECT_SOURCE_DIR}/include/." "${PROJECT_BINARY_DIR}/${DALI_INCLUDE_DIR}"
   )
 
-  # Copy libcu++ include files
-  add_custom_command(
-    TARGET install_headers
-    COMMAND cp -rL "${PROJECT_SOURCE_DIR}/third_party/libcudacxx/include/." "${PROJECT_BINARY_DIR}/${DALI_INCLUDE_DIR}/"
-  )
-
   # Copy boost/preprocessor include files
   add_custom_command(
     TARGET install_headers
diff --git a/dali/kernels/imgproc/resample/resampling_filters.cu b/dali/kernels/imgproc/resample/resampling_filters.cu
index 8d1a9d8483..9a69ddc548 100644
--- a/dali/kernels/imgproc/resample/resampling_filters.cu
+++ b/dali/kernels/imgproc/resample/resampling_filters.cu
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -73,7 +73,7 @@ void InitFilters(ResamplingFilters &filters) {
   const int total_size = triangular_size + gaussian_size + cubic_size + lanczos_size;
 
   constexpr bool need_staging =
-    !cuda::kind_has_property<MemoryKind, cuda::memory_access::host>::value;
+    !cuda_for_dali::kind_has_property<MemoryKind, cuda_for_dali::memory_access::host>::value;
 
   using tmp_kind = std::conditional_t<need_staging, mm::memory_kind::host, MemoryKind>;
   filters.filter_data = mm::alloc_raw_unique<float, tmp_kind>(total_size);
diff --git a/dali/kernels/test/scatter_gather_test.cc b/dali/kernels/test/scatter_gather_test.cc
index fbf8dcb47e..1f3206ed78 100644
--- a/dali/kernels/test/scatter_gather_test.cc
+++ b/dali/kernels/test/scatter_gather_test.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -73,7 +73,7 @@ class ScatterGatherTest : public testing::Test {
 
   template <typename MemoryKind>
   void Memcpy(void *dst, const void *src, size_t size, cudaMemcpyKind kind) {
-    if (cuda::kind_has_property<MemoryKind, cuda::memory_access::host>::value) {
+    if (cuda_for_dali::kind_has_property<MemoryKind, cuda_for_dali::memory_access::host>::value) {
       memcpy(dst, src, size);
     } else {
       CUDA_CALL(cudaMemcpy(dst, src, size, kind));
@@ -82,7 +82,7 @@ class ScatterGatherTest : public testing::Test {
 
   template <typename MemoryKind>
   void Memset(void *dst, int c, size_t size) {
-    if (cuda::kind_has_property<MemoryKind, cuda::memory_access::host>::value) {
+    if (cuda_for_dali::kind_has_property<MemoryKind, cuda_for_dali::memory_access::host>::value) {
       memset(dst, c, size);
     } else {
       CUDA_CALL(cudaMemset(dst, c, size));
diff --git a/dali/pipeline/data/copy_to_external.h b/dali/pipeline/data/copy_to_external.h
index b09a5390c3..a4208b6f30 100644
--- a/dali/pipeline/data/copy_to_external.h
+++ b/dali/pipeline/data/copy_to_external.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -135,22 +135,22 @@ inline void CopyToExternalImpl(void** dsts,
 }
 
 template <typename DstKind, typename SrcBackend>
-inline void CopyToExternal(void* dst, const Tensor<SrcBackend> &src,
-                           AccessOrder order, bool use_copy_kernel) {
+inline void CopyToExternal(void *dst, const Tensor<SrcBackend> &src, AccessOrder order,
+                           bool use_copy_kernel) {
   const bool src_device_access = (std::is_same<SrcBackend, GPUBackend>::value || src.is_pinned());
-  const bool dst_device_access = cuda::kind_has_property<DstKind,
-                                                         cuda::memory_access::device>::value;
+  const bool dst_device_access =
+      cuda_for_dali::kind_has_property<DstKind, cuda_for_dali::memory_access::device>::value;
   use_copy_kernel &= dst_device_access && src_device_access;
   using DstBackend = typename detail::kind2backend<DstKind>::type;
   CopyToExternalImpl<DstBackend, SrcBackend>(dst, src, order, use_copy_kernel);
 }
 
 template <typename DstKind, typename SrcBackend>
-inline void CopyToExternal(void* dst, const TensorList<SrcBackend> &src,
-                           AccessOrder order, bool use_copy_kernel) {
+inline void CopyToExternal(void *dst, const TensorList<SrcBackend> &src, AccessOrder order,
+                           bool use_copy_kernel) {
   const bool src_device_access = (std::is_same<SrcBackend, GPUBackend>::value || src.is_pinned());
-  const bool dst_device_access = cuda::kind_has_property<DstKind,
-                                                         cuda::memory_access::device>::value;
+  const bool dst_device_access =
+      cuda_for_dali::kind_has_property<DstKind, cuda_for_dali::memory_access::device>::value;
   use_copy_kernel &= dst_device_access && src_device_access;
   using DstBackend = typename detail::kind2backend<DstKind>::type;
   CopyToExternalImpl<DstBackend, SrcBackend>(dst, src, order, use_copy_kernel);
@@ -185,7 +185,8 @@ template <typename DstKind, typename SrcBackend>
 inline void CopyToExternal(void** dsts, const TensorList<SrcBackend> &src,
                            AccessOrder order, bool use_copy_kernel) {
   bool src_device_access = (std::is_same<SrcBackend, GPUBackend>::value || src.is_pinned());
-  bool dst_device_access = cuda::kind_has_property<DstKind, cuda::memory_access::device>::value;
+  bool dst_device_access =
+      cuda_for_dali::kind_has_property<DstKind, cuda_for_dali::memory_access::device>::value;
   use_copy_kernel &= dst_device_access && src_device_access;
   using DstBackend = typename detail::kind2backend<DstKind>::type;
   CopyToExternalImpl<DstBackend, SrcBackend>(dsts, src, order, use_copy_kernel);
diff --git a/dali/test/mat2tensor.h b/dali/test/mat2tensor.h
index d5d686d60c..f8b9afa881 100644
--- a/dali/test/mat2tensor.h
+++ b/dali/test/mat2tensor.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -71,8 +71,9 @@ TensorView<StorageCPU, T, ndim> view_as_tensor(cv::Mat &mat) {
 template <typename MemoryKind = mm::memory_kind::device, typename T = uint8_t, int ndims = 3>
 std::pair<TensorView<kind2storage_t<MemoryKind>, T, ndims>, mm::uptr<T>>
 copy_as_tensor(const cv::Mat &mat) {
-  static_assert(cuda::kind_has_property<MemoryKind, cuda::memory_access::device>::value,
-                "A GPU-accessible memory kind is required.");
+  static_assert(
+      cuda_for_dali::kind_has_property<MemoryKind, cuda_for_dali::memory_access::device>::value,
+      "A GPU-accessible memory kind is required.");
   auto tvin = kernels::view_as_tensor<const T, ndims>(mat);
   return copy<MemoryKind>(tvin);
 }
diff --git a/docs/examples/custom_operations/custom_operator/create_a_custom_operator.ipynb b/docs/examples/custom_operations/custom_operator/create_a_custom_operator.ipynb
index 2e314c91fd..82d4721ea2 100644
--- a/docs/examples/custom_operations/custom_operator/create_a_custom_operator.ipynb
+++ b/docs/examples/custom_operations/custom_operator/create_a_custom_operator.ipynb
@@ -236,7 +236,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/usr/local/lib/python3.6/dist-packages/nvidia/dali/include\n"
+      "/usr/local/lib/python3.10/dist-packages/nvidia/dali/include\n"
      ]
     }
    ],
@@ -253,7 +253,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/usr/local/lib/python3.6/dist-packages/nvidia/dali\n"
+      "/usr/local/lib/python3.10/dist-packages/nvidia/dali\n"
      ]
     }
    ],
@@ -270,7 +270,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "['-I/usr/local/lib/python3.6/dist-packages/nvidia/dali/include', '-D_GLIBCXX_USE_CXX11_ABI=1']\n"
+      "['-I/usr/local/lib/python3.10/dist-packages/nvidia/dali/include', '-D_GLIBCXX_USE_CXX11_ABI=1']\n"
      ]
     }
    ],
@@ -287,7 +287,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "['-L/usr/local/lib/python3.6/dist-packages/nvidia/dali', '-ldali']\n"
+      "['-L/usr/local/lib/python3.10/dist-packages/nvidia/dali', '-ldali']\n"
      ]
     }
    ],
@@ -315,7 +315,7 @@
      "output_type": "stream",
      "text": [
       "cmake_minimum_required(VERSION 3.10)\r\n",
-      "set(CMAKE_CUDA_ARCHITECTURES \"35;50;52;60;61;70;75;80;86\")\r\n",
+      "set(CMAKE_CUDA_ARCHITECTURES \"50;60;70;80;90\")\r\n",
       "\r\n",
       "project(custom_dummy_plugin LANGUAGES CUDA CXX C)\r\n",
       "\r\n",
@@ -324,7 +324,7 @@
       "set(CMAKE_CXX_EXTENSIONS OFF)\r\n",
       "set(CMAKE_C_STANDARD 11)\r\n",
       "\r\n",
-      "set(CMAKE_CUDA_STANDARD 14)\r\n",
+      "set(CMAKE_CUDA_STANDARD 17)\r\n",
       "set(CMAKE_CUDA_STANDARD_REQUIRED ON)\r\n",
       "\r\n",
       "include_directories(SYSTEM \"${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}\")\r\n",
@@ -368,31 +368,29 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-- The CUDA compiler identification is NVIDIA 11.4.48\n",
-      "-- The CXX compiler identification is GNU 7.5.0\n",
-      "-- The C compiler identification is GNU 7.5.0\n",
+      "-- The CUDA compiler identification is NVIDIA 12.0.76\n",
+      "-- The CXX compiler identification is GNU 11.3.0\n",
+      "-- The C compiler identification is GNU 11.3.0\n",
       "-- Detecting CUDA compiler ABI info\n",
       "-- Detecting CUDA compiler ABI info - done\n",
-      "-- Check for working CUDA compiler: /opt/ccache/bin/nvcc - skipped\n",
+      "-- Check for working CUDA compiler: /usr/local/cuda/bin/nvcc - skipped\n",
       "-- Detecting CUDA compile features\n",
       "-- Detecting CUDA compile features - done\n",
       "-- Detecting CXX compiler ABI info\n",
       "-- Detecting CXX compiler ABI info - done\n",
-      "-- Check for working CXX compiler: /opt/ccache/bin/g++ - skipped\n",
+      "-- Check for working CXX compiler: /usr/bin/c++ - skipped\n",
       "-- Detecting CXX compile features\n",
       "-- Detecting CXX compile features - done\n",
       "-- Detecting C compiler ABI info\n",
       "-- Detecting C compiler ABI info - done\n",
-      "-- Check for working C compiler: /opt/ccache/bin/gcc - skipped\n",
+      "-- Check for working C compiler: /usr/bin/cc - skipped\n",
       "-- Detecting C compile features\n",
       "-- Detecting C compile features - done\n",
-      "-- Configuring done\n",
-      "-- Generating done\n",
+      "-- Configuring done (4.4s)\n",
+      "-- Generating done (0.0s)\n",
       "-- Build files have been written to: /dali/docs/examples/custom_operations/custom_operator/customdummy/build\n",
-      "\u001b[35m\u001b[1mScanning dependencies of target customdummy\u001b[0m\n",
       "[ 33%] \u001b[32mBuilding CXX object CMakeFiles/customdummy.dir/dummy.cc.o\u001b[0m\n",
       "[ 66%] \u001b[32mBuilding CUDA object CMakeFiles/customdummy.dir/dummy.cu.o\u001b[0m\n",
-      "nvcc warning : The 'compute_35', 'compute_37', 'compute_50', 'sm_35', 'sm_37' and 'sm_50' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).\n",
       "[100%] \u001b[32m\u001b[1mLinking CXX shared library libcustomdummy.so\u001b[0m\n",
       "[100%] Built target customdummy\n"
      ]
@@ -513,15 +511,15 @@
       "    \n",
       "    Keyword args\n",
       "    ------------\n",
-      "    `bytes_per_sample_hint` : int or list of int, optional, default = [0]\n",
+      "    `bytes_per_sample_hint` : int or list of int, optional, default = `[0]`\n",
       "        Output size hint, in bytes per sample.\n",
       "        \n",
       "        If specified, the operator's outputs residing in GPU or page-locked host memory will be preallocated\n",
       "        to accommodate a batch of samples of this size.\n",
-      "    `preserve` : bool, optional, default = False\n",
+      "    `preserve` : bool, optional, default = `False`\n",
       "        Prevents the operator from being removed from the\n",
       "        graph even if its outputs are not used.\n",
-      "    `seed` : int, optional, default = -1\n",
+      "    `seed` : int, optional, default = `-1`\n",
       "        Random seed.\n",
       "        \n",
       "        If not provided, it will be populated based on the global seed of the pipeline.\n",
@@ -552,6 +550,8 @@
       "Help on class CustomDummy in module nvidia.dali.ops:\n",
       "\n",
       "class CustomDummy(builtins.object)\n",
+      " |  CustomDummy(*, device='cpu', **kwargs)\n",
+      " |  \n",
       " |  Make a copy of the input tensor\n",
       " |  \n",
       " |  Supported backends\n",
@@ -561,15 +561,15 @@
       " |  \n",
       " |  Keyword args\n",
       " |  ------------\n",
-      " |  `bytes_per_sample_hint` : int or list of int, optional, default = [0]\n",
+      " |  `bytes_per_sample_hint` : int or list of int, optional, default = `[0]`\n",
       " |      Output size hint, in bytes per sample.\n",
       " |      \n",
       " |      If specified, the operator's outputs residing in GPU or page-locked host memory will be preallocated\n",
       " |      to accommodate a batch of samples of this size.\n",
-      " |  `preserve` : bool, optional, default = False\n",
+      " |  `preserve` : bool, optional, default = `False`\n",
       " |      Prevents the operator from being removed from the\n",
       " |      graph even if its outputs are not used.\n",
-      " |  `seed` : int, optional, default = -1\n",
+      " |  `seed` : int, optional, default = `-1`\n",
       " |      Random seed.\n",
       " |      \n",
       " |      If not provided, it will be populated based on the global seed of the pipeline.\n",
@@ -586,16 +586,10 @@
       " |      `data` : TensorList\n",
       " |          Input to the operator.\n",
       " |  \n",
-      " |  __init__(self, **kwargs)\n",
+      " |  __init__(self, *, device='cpu', **kwargs)\n",
       " |  \n",
       " |  ----------------------------------------------------------------------\n",
-      " |  Data descriptors defined here:\n",
-      " |  \n",
-      " |  __dict__\n",
-      " |      dictionary for instance variables (if defined)\n",
-      " |  \n",
-      " |  __weakref__\n",
-      " |      list of weak references to the object (if defined)\n",
+      " |  Readonly properties defined here:\n",
       " |  \n",
       " |  device\n",
       " |  \n",
@@ -606,6 +600,15 @@
       " |  spec\n",
       " |  \n",
       " |  ----------------------------------------------------------------------\n",
+      " |  Data descriptors defined here:\n",
+      " |  \n",
+      " |  __dict__\n",
+      " |      dictionary for instance variables (if defined)\n",
+      " |  \n",
+      " |  __weakref__\n",
+      " |      list of weak references to the object (if defined)\n",
+      " |  \n",
+      " |  ----------------------------------------------------------------------\n",
       " |  Data and other attributes defined here:\n",
       " |  \n",
       " |  schema_name = 'CustomDummy'\n",
@@ -621,7 +624,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -635,7 +638,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.9"
+   "version": "3.10.6"
   }
  },
  "nbformat": 4,
diff --git a/docs/examples/custom_operations/custom_operator/customdummy/CMakeLists.txt b/docs/examples/custom_operations/custom_operator/customdummy/CMakeLists.txt
index 68cfeee4b1..6d97d84a7f 100644
--- a/docs/examples/custom_operations/custom_operator/customdummy/CMakeLists.txt
+++ b/docs/examples/custom_operations/custom_operator/customdummy/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.10)
-set(CMAKE_CUDA_ARCHITECTURES "35;50;52;60;61;70;75;80;86")
+set(CMAKE_CUDA_ARCHITECTURES "50;60;70;80;90")
 
 project(custom_dummy_plugin LANGUAGES CUDA CXX C)
 
@@ -8,8 +8,11 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
 set(CMAKE_C_STANDARD 11)
 
-set(CMAKE_CUDA_STANDARD 14)
-set(CMAKE_CUDA_STANDARD_REQUIRED ON)
+# TODO(klecki): When the test container gets a CMake that supports C++17 as a proper option,
+# swap those lines
+# set(CMAKE_CUDA_STANDARD 17)
+# set(CMAKE_CUDA_STANDARD_REQUIRED ON)
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -std=c++17")
 
 include_directories(SYSTEM "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}")
 
diff --git a/include/dali/core/backend_tags.h b/include/dali/core/backend_tags.h
index 22259acffe..179d123981 100644
--- a/include/dali/core/backend_tags.h
+++ b/include/dali/core/backend_tags.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2018-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -16,7 +16,7 @@
 #define DALI_CORE_BACKEND_TAGS_H_
 
 #include <type_traits>
-#include <cuda/memory_resource>
+#include "dali/core/mm/cuda_memory_resource.h"
 
 namespace dali {
 
@@ -59,22 +59,22 @@ template <typename MemoryKind>
 struct kind2storage;
 
 template <>
-struct kind2storage<cuda::memory_kind::host> {
+struct kind2storage<cuda_for_dali::memory_kind::host> {
   using type = StorageCPU;
 };
 
 template <>
-struct kind2storage<cuda::memory_kind::pinned> {
+struct kind2storage<cuda_for_dali::memory_kind::pinned> {
   using type = StorageCPU;
 };
 
 template <>
-struct kind2storage<cuda::memory_kind::device> {
+struct kind2storage<cuda_for_dali::memory_kind::device> {
   using type = StorageGPU;
 };
 
 template <>
-struct kind2storage<cuda::memory_kind::managed> {
+struct kind2storage<cuda_for_dali::memory_kind::managed> {
   using type = StorageUnified;
 };
 
diff --git a/include/dali/core/mm/cuda_memory_resource.h b/include/dali/core/mm/cuda_memory_resource.h
new file mode 100644
index 0000000000..0bdbd84c21
--- /dev/null
+++ b/include/dali/core/mm/cuda_memory_resource.h
@@ -0,0 +1,982 @@
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef DALI_CORE_MM_CUDA_MEMORY_RESOURCE_H_
+#define DALI_CORE_MM_CUDA_MEMORY_RESOURCE_H_
+
+
+#if defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
+#endif
+
+#include <cstddef>
+#include <memory>
+#include <type_traits>
+#include <utility>
+// #include <version>  // C++20
+#include "dali/core/mm/cuda_stream_view.h"
+
+#define _DALI_STD_VER 17
+
+// DALI is built with exceptions
+#define _DALI_EXT_RTTI_ENABLED
+
+#ifdef _DALI_EXT_RTTI_ENABLED
+#include <typeinfo>
+#endif
+
+// #if __has_include(<memory_resource>)
+#include <memory_resource>
+#define _DALI_STD_PMR_NS ::std::pmr
+// #elif __has_include(<experimental/memory_resource>)
+// #include <experimental/memory_resource>
+// #define _DALI_STD_PMR_NS ::std::experimental::pmr
+// #endif // __has_include(<experimental/memory_resource>)
+
+
+// no-op-define,
+#define _DALI_TEMPLATE_VIS
+#define _DALI_INLINE_VISIBILITY __host__ __device__
+
+namespace dali {
+namespace cuda_for_dali {
+
+/*!
+ * \brief Groups the tag types denoting the kind of memory of an allocation.
+ *
+ * Memory allocation kind determines where memory can be accessed and the
+ * performance characteristics of accesses.
+ *
+ * This is not a closed set, the user code can define custom memory kinds.
+ */
+namespace memory_kind {
+/*!
+ * \brief Ordinary host memory
+ */
+struct host;
+
+/*!
+ * \brief Device memory, as allocated by cudaMalloc.
+ */
+struct device;
+
+/*!
+ * \brief Device-accessible host memory.
+ */
+struct pinned;
+
+/*!
+ * \brief Virtual memory that is automatically migrated between the host and devices.
+ */
+struct managed;
+};  // namespace memory_kind
+
+namespace detail {
+
+template <typename...>
+struct __type_pack {};
+
+namespace __fallback_typeid {
+
+template <class _Tp>
+struct _DALI_TEMPLATE_VIS __unique_typeinfo {
+  static constexpr int __id = 0;
+};
+template <class _Tp>
+constexpr int __unique_typeinfo<_Tp>::__id;
+
+template <class _Tp>
+inline _DALI_INLINE_VISIBILITY constexpr const void *__get_fallback_typeid() {
+  return &__unique_typeinfo<std::decay_t<_Tp>>::__id;
+}
+
+template <typename _Tp>
+const ::std::type_info *__get_typeid() {
+#ifdef _DALI_EXT_RTTI_ENABLED
+  return &typeid(_Tp);
+#else
+  return nullptr;
+#endif
+}
+
+inline bool __compare_type(const ::std::type_info *__ti1, const void *__fallback_ti1,
+                           const ::std::type_info *__ti2, const void *__fallback_ti2) {
+#ifdef _DALI_EXT_RTTI_ENABLED
+  if (__ti1 && __ti2 && *__ti1 == *__ti2)
+    return true;
+#endif
+  return __fallback_ti1 == __fallback_ti2;
+}
+
+template <typename _Tp>
+bool __is_type(const ::std::type_info *__ti1, const void *__fallback_ti1) {
+  return __compare_type(__ti1, __fallback_ti1, __get_typeid<_Tp>(), __get_fallback_typeid<_Tp>());
+}
+
+}  // namespace __fallback_typeid
+
+}  // namespace detail
+
+template <typename _ResourcePointer, typename... _Properties>
+class basic_resource_view;
+
+/*!
+ * \brief Groups the tag types denoting the execution environment in which the memory can be
+ * accessed
+ *
+ * This is not a closed set, the user code can define custom accessibility.
+ */
+namespace memory_access {
+struct host;
+struct device;
+}  // namespace memory_access
+
+/*!
+ * \brief A memory property tag type indicating that the memory can be oversubscribed.
+ *
+ * Oversubscribable memory doesn't need to have backing physical storage at all times.
+ */
+struct oversubscribable;
+
+/*!
+ * \brief A memory property tag type indicating that the memory has a backing physical
+ *        storage in the target location at all times.
+ */
+struct resident;
+
+/*!
+ * \brief Groups the tag types that denote the actual location of the physical storage
+ *
+ * Memory kinds which can be migrated between locations can define multiple locations.
+ */
+namespace memory_location {
+/*!
+ * \brief A memory property tag type indicating that the memory is located on a device
+ */
+struct device;
+
+/*!
+ * \brief A memory property tag type indicating that the memory is located in the host memory
+ */
+struct host;
+}  // namespace memory_location
+
+template <typename _MemoryKind>
+class memory_resource;
+
+namespace detail {
+
+class memory_resource_base {
+ public:
+  static constexpr std::size_t default_alignment = alignof(std::max_align_t);
+
+  /*!
+   * \brief Allocates storage of size at least `__bytes` bytes.
+   *
+   * The returned storage is aligned to the specified `__alignment` if such
+   * alignment is supported. Otherwise throws.
+   *
+   * Storage may be accessed immediately within the execution contexts that
+   * can access the memory.
+   *
+   * \throws If storage of the requested size and alignment cannot be obtained.
+   *
+   * \param __bytes The size in bytes of the allocation
+   * \param __alignment The alignment of the allocation
+   * \return Pointer to the requested storage
+   */
+  void *allocate(size_t __bytes, size_t __alignment = default_alignment) {
+    return do_allocate(__bytes, __alignment);
+  }
+
+  /*!
+   * \brief Deallocates the storage pointed to by `__p`.
+   *
+   * `__p` must have been returned by a prior call to `allocate(__bytes,
+   * __alignment)` on a `memory_resource` that compares equal to `*this`, and
+   * the storage it points to must not yet have been deallocated, otherwise
+   * behavior is undefined.
+   *
+   * \throws Nothing.
+   *
+   * \param __p Pointer to storage to be deallocated
+   * \param __bytes The size in bytes of the allocation. This must be equal to
+   * the value of `__bytes` that was specified to the `allocate` call that
+   * returned `__p`.
+   * \param __alignment The alignment of the allocation. This must be equal to
+   * the value of `__alignment` that was specified to the `allocate` call that
+   * returned `__p`.
+   */
+  void deallocate(void *__mem, size_t __bytes, size_t __alignment = default_alignment) {
+    do_deallocate(__mem, __bytes, __alignment);
+  }
+
+  /*!
+   * \brief Tries to cast the resource to a resource of given kind
+   */
+  template <typename _Kind>
+  memory_resource<_Kind> *as_kind() noexcept {
+    using __tag = detail::__type_pack<_Kind>;
+    return static_cast<memory_resource<_Kind> *>(
+        __do_as_kind(detail::__fallback_typeid::__get_typeid<__tag>(),
+                     detail::__fallback_typeid::__get_fallback_typeid<__tag>()));
+  }
+
+  /*!
+   * \brief Tries to cast the resource to a resource of given kind
+   */
+  template <typename _Kind>
+  const memory_resource<_Kind> *as_kind() const noexcept {
+    using __tag = detail::__type_pack<_Kind>;
+    return static_cast<const memory_resource<_Kind> *>(
+        __do_as_kind(detail::__fallback_typeid::__get_typeid<__tag>(),
+                     detail::__fallback_typeid::__get_fallback_typeid<__tag>()));
+  }
+
+ protected:
+  virtual void *do_allocate(size_t __bytes, size_t __alignment) = 0;
+  virtual void do_deallocate(void *__mem, size_t __bytes, size_t __alignment) = 0;
+
+  virtual bool is_equal_base(const memory_resource_base &other) const noexcept = 0;
+
+  bool is_equal(const memory_resource_base &other) const noexcept {
+    return is_equal_base(other);
+  }
+
+  template <typename _ResourcePointer, typename... _Properties>
+  friend class cuda_for_dali::basic_resource_view;
+
+  virtual void *__do_as_kind(const ::std::type_info *__tag_type_id,
+                             const void *__tag_type_fallback_id) const noexcept = 0;
+};
+
+class stream_ordered_memory_resource_base : public virtual memory_resource_base {
+ public:
+  /*!
+   * \brief Allocates storage of size at least `__bytes` bytes in stream order
+   * on `__stream`.
+   *
+   * The returned storage is aligned to `default_alignment`.
+   *
+   * The returned storage may be used immediately only on `__stream`. Accessing
+   * it on any other stream (or the host) requires first synchronizing with
+   * `__stream`.
+   *
+   * \throws If the storage of the requested size and `default_alignment` cannot
+   * be obtained.
+   *
+   * \param __bytes The size in bytes of the allocation.
+   * \param __stream The stream on which to perform the allocation.
+   * \return Pointer to the requested storage.
+   */
+  void *allocate_async(size_t bytes, stream_view stream) {
+    return allocate_async(bytes, default_alignment, stream);
+  }
+  /*!
+   * \brief Allocates storage of size at least `__bytes` bytes in stream order
+   * on `__stream`.
+   *
+   * The returned storage is aligned to the specified `__alignment` if such
+   * alignment is supported.
+   *
+   * The returned storage may be used immediately only on `__stream`. Using it
+   * on any other stream (or the host) requires first synchronizing with
+   * `__stream`.
+   *
+   * \throws If the storage of the requested size and alignment cannot be
+   * obtained.
+   *
+   * \param __bytes The size in bytes of the allocation.
+   * \param __alignment The alignment of the allocation
+   * \param __stream The stream on which to perform the allocation.
+   * \return Pointer to the requested storage.
+   */
+  void *allocate_async(size_t bytes, size_t alignment, stream_view stream) {
+    return do_allocate_async(bytes, alignment, stream);
+  }
+
+  /*!
+   * \brief Deallocates the storage pointed to by `__p` in stream order on
+   * `__stream`.
+   *
+   * `__p` must have been returned by a prior call to
+   * `allocate_async(__bytes, default_alignment)` or `allocate(__bytes,
+   * default_alignment)` on a `stream_ordered_memory_resource` that compares
+   * equal to `*this`, and the storage it points to must not yet have been
+   * deallocated, otherwise behavior is undefined.
+   *
+   * Asynchronous, stream-ordered operations on `__stream` initiated before
+   * `deallocate_async(__p, __bytes, __stream)` may still access the storage
+   * pointed to by `__p` after `deallocate_async` returns.
+   *
+   * Storage deallocated on `__stream` may be reused by a future
+   * call to `allocate_async` on the same stream without synchronizing
+   * `__stream`. Therefore,  `__stream` is typically the last stream on which
+   * `__p` was last used. It is the caller's responsibility to ensure the
+   * storage pointed to by `__p` is not in use on any other stream (or the
+   * host), or behavior is undefined.
+   *
+   * \param __p Pointer to storage to be deallocated.
+   * \param __bytes The size in bytes of the allocation. This must be equal to
+   * the value of `__bytes` that was specified to the `allocate` or
+   * `allocate_async` call that returned `__p`.
+   * \param __stream The stream on which to perform the deallocation.
+   */
+  void deallocate_async(void *__mem, size_t __bytes, stream_view __stream) {
+    deallocate_async(__mem, __bytes, default_alignment, __stream);
+  }
+
+  /*!
+   * \brief Deallocates the storage pointed to by `__p` in stream order on
+   * `__stream`.
+   *
+   * `__p` must have been returned by a prior call to
+   * `allocate_async(__bytes, __alignment)` or `allocate(__bytes,
+   * __alignment)` on a `stream_ordered_memory_resource` that compares
+   * equal to `*this`, and the storage it points to must not yet have been
+   * deallocated, otherwise behavior is undefined.
+   *
+   * Asynchronous, stream-ordered operations on `__stream` initiated before
+   * `deallocate_async(__p, __bytes, __stream)` may still access the storage
+   * pointed to by `__p` after `deallocate_async` returns.
+   *
+   * Storage deallocated on `__stream` may be reused by a future
+   * call to `allocate_async` on the same stream without synchronizing
+   * `__stream`. Therefore,  `__stream` is typically the last stream on which
+   * `__p` was last used. It is the caller's responsibility to ensure the
+   * storage pointed to by `__p` is not in use on any other stream (or the
+   * host), or behavior is undefined.
+   *
+   * \param __p Pointer to storage to be deallocated.
+   * \param __bytes The size in bytes of the allocation. This must be equal to
+   * the value of `__bytes` that was specified to the `allocate` or
+   * `allocate_async` call that returned `__p`.
+   * \param __alignment The alignment of the allocation. This must be equal to
+   * the value of `__alignment` that was specified to the `allocate` or
+   * `allocate_async` call that returned `__p`.
+   * \param __stream The stream on which to perform the deallocation.
+   */
+  void deallocate_async(void *__mem, size_t __bytes, size_t __alignment, stream_view __stream) {
+    do_deallocate_async(__mem, __bytes, __alignment, __stream);
+  }
+
+ protected:
+  virtual void *do_allocate_async(size_t __bytes, size_t __alignment, stream_view __stream) = 0;
+  virtual void do_deallocate_async(void *__mem, size_t __bytes, size_t __alignment,
+                                   stream_view __stream) = 0;
+
+  template <typename _ResourcePointer, typename... _Properties>
+  friend class cuda_for_dali::basic_resource_view;
+};
+
+}  // namespace detail
+
+/*!
+ * \brief Abstract interface for memory allocation.
+ *
+ * \tparam _MemoryKind The kind of the allocated memory.
+ */
+template <typename _MemoryKind>
+class memory_resource : private virtual detail::memory_resource_base {
+ public:
+  using memory_kind = _MemoryKind;
+  static constexpr std::size_t default_alignment = memory_resource_base::default_alignment;
+
+  virtual ~memory_resource() = default;
+
+  /*!
+   * \brief Allocates storage of size at least `__bytes` bytes.
+   *
+   * The returned storage is aligned to the specified `__alignment` if such
+   * alignment is supported. Otherwise throws.
+   *
+   * Storage may be accessed immediately within the execution contexts that
+   * can access the memory.
+   *
+   * \throws If storage of the requested size and alignment cannot be obtained.
+   *
+   * \param __bytes The size in bytes of the allocation
+   * \param __alignment The alignment of the allocation
+   * \return Pointer to the requested storage
+   */
+  void *allocate(std::size_t __bytes, std::size_t __alignment = default_alignment) {
+    return do_allocate(__bytes, __alignment);
+  }
+
+  /*!
+   * \brief Deallocates the storage pointed to by `__p`.
+   *
+   * `__p` must have been returned by a prior call to `allocate(__bytes,
+   * __alignment)` on a `memory_resource` that compares equal to `*this`, and
+   * the storage it points to must not yet have been deallocated, otherwise
+   * behavior is undefined.
+   *
+   * \throws Nothing.
+   *
+   * \param __p Pointer to storage to be deallocated
+   * \param __bytes The size in bytes of the allocation. This must be equal to
+   * the value of `__bytes` that was specified to the `allocate` call that
+   * returned `__p`.
+   * \param __alignment The alignment of the allocation. This must be equal to
+   * the value of `__alignment` that was specified to the `allocate` call that
+   * returned `__p`.
+   */
+  void deallocate(void *__p, std::size_t __bytes, std::size_t __alignment = default_alignment) {
+    do_deallocate(__p, __bytes, __alignment);
+  }
+
+  /*!
+   * \brief Compare this resource to another.
+   *
+   * Two resources compare equal if and only if memory allocated from one
+   * resource can be deallocated from the other and vice versa.
+   *
+   * \param __other The other resource to compare against
+   */
+  bool is_equal(memory_resource const &__other) const noexcept {
+    return do_is_equal(__other);
+  }
+
+ private:
+  template <typename, typename...>
+  friend class basic_resource_view;
+
+  void *do_allocate(std::size_t __bytes, std::size_t __alignment) override = 0;
+
+  void do_deallocate(void *__p, std::size_t __bytes, std::size_t __alignment) override = 0;
+
+  // Default to identity comparison
+  virtual bool do_is_equal(memory_resource const &__other) const noexcept {
+    return this == &__other;
+  }
+
+  void *__do_as_kind(const ::std::type_info *__tag_type_id,
+                     const void *__tag_type_fallback_id) const noexcept final {
+    using __tag = detail::__type_pack<memory_kind>;
+    return detail::__fallback_typeid::__is_type<__tag>(__tag_type_id, __tag_type_fallback_id) ?
+               const_cast<memory_resource *>(this) :
+               nullptr;
+  }
+
+  bool is_equal_base(const detail::memory_resource_base &__other) const noexcept final {
+    if (auto *__other_res = __other.as_kind<memory_kind>()) {
+      return do_is_equal(*__other_res);
+    } else {
+      return false;
+    }
+  }
+};
+
+template <typename _Kind>
+inline _DALI_INLINE_VISIBILITY bool operator==(const memory_resource<_Kind> &__a,
+                                               const memory_resource<_Kind> &__b) {
+  return __a.is_equal(__b);
+}
+
+/*!
+ * \brief Abstract interface for CUDA stream-ordered memory allocation.
+ *
+ * "Stream-ordered memory allocation" extends the CUDA programming model to
+ * include memory allocation as stream-ordered operations.
+ *
+ * All asynchronous accesses of the allocation must happen between the stream
+ * execution of the allocation and the free. If storage is accessed outside of
+ * the promised stream order, a use before allocation / use after free error
+ * will cause undefined behavior.
+ *
+ * Allocating on stream `s0` returns memory that is valid to access immediately
+ * only on `s0`. Accessing it on any other stream (or the host) first requires
+ * synchronization with `s0`, otherwise behavior is undefined.
+ *
+ * Deallocating memory on stream `s1` indicates that it is valid to reuse the
+ * deallocated memory immediately for another allocation on `s1`.
+ *
+ * Asynchronous, stream-ordered operations ordered before deallocation on `s1`
+ * may still access the storage after deallocation completes.
+ *
+ * Memory may be allocated and deallocated on different streams, `s0` and `s1`
+ * respectively, but requires synchronization between `s0` and `s1` before the
+ * deallocation occurs.
+ *
+ * \tparam _MemoryKind The kind of the allocated memory.
+ */
+template <typename _MemoryKind>
+class stream_ordered_memory_resource : public virtual memory_resource<_MemoryKind>,
+                                       private virtual detail::stream_ordered_memory_resource_base {
+ public:
+  using memory_kind = _MemoryKind;
+  static constexpr std::size_t default_alignment = memory_resource<_MemoryKind>::default_alignment;
+
+  /*!
+   * \brief Allocates storage of size at least `__bytes` bytes in stream order
+   * on `__stream`.
+   *
+   * The returned storage is aligned to `default_alignment`.
+   *
+   * The returned storage may be used immediately only on `__stream`. Accessing
+   * it on any other stream (or the host) requires first synchronizing with
+   * `__stream`.
+   *
+   * \throws If the storage of the requested size and `default_alignment` cannot
+   * be obtained.
+   *
+   * \param __bytes The size in bytes of the allocation.
+   * \param __stream The stream on which to perform the allocation.
+   * \return Pointer to the requested storage.
+   */
+  void *allocate_async(std::size_t __bytes, stream_view __stream) {
+    return do_allocate_async(__bytes, default_alignment, __stream);
+  }
+
+  /*!
+   * \brief Allocates storage of size at least `__bytes` bytes in stream order
+   * on `__stream`.
+   *
+   * The returned storage is aligned to the specified `__alignment` if such
+   * alignment is supported.
+   *
+   * The returned storage may be used immediately only on `__stream`. Using it
+   * on any other stream (or the host) requires first synchronizing with
+   * `__stream`.
+   *
+   * \throws If the storage of the requested size and alignment cannot be
+   * obtained.
+   *
+   * \param __bytes The size in bytes of the allocation.
+   * \param __alignment The alignment of the allocation
+   * \param __stream The stream on which to perform the allocation.
+   * \return Pointer to the requested storage.
+   */
+  void *allocate_async(std::size_t __bytes, std::size_t __alignment, stream_view __stream) {
+    return do_allocate_async(__bytes, __alignment, __stream);
+  }
+
+  /*!
+   * \brief Deallocates the storage pointed to by `__p` in stream order on
+   * `__stream`.
+   *
+   * `__p` must have been returned by a prior call to
+   * `allocate_async(__bytes, default_alignment)` or `allocate(__bytes,
+   * default_alignment)` on a `stream_ordered_memory_resource` that compares
+   * equal to `*this`, and the storage it points to must not yet have been
+   * deallocated, otherwise behavior is undefined.
+   *
+   * Asynchronous, stream-ordered operations on `__stream` initiated before
+   * `deallocate_async(__p, __bytes, __stream)` may still access the storage
+   * pointed to by `__p` after `deallocate_async` returns.
+   *
+   * Storage deallocated on `__stream` may be reused by a future
+   * call to `allocate_async` on the same stream without synchronizing
+   * `__stream`. Therefore,  `__stream` is typically the last stream on which
+   * `__p` was last used. It is the caller's responsibility to ensure the
+   * storage pointed to by `__p` is not in use on any other stream (or the
+   * host), or behavior is undefined.
+   *
+   * \param __p Pointer to storage to be deallocated.
+   * \param __bytes The size in bytes of the allocation. This must be equal to
+   * the value of `__bytes` that was specified to the `allocate` or
+   * `allocate_async` call that returned `__p`.
+   * \param __stream The stream on which to perform the deallocation.
+   */
+  void deallocate_async(void *__p, std::size_t __bytes, stream_view __stream) {
+    do_deallocate_async(__p, __bytes, default_alignment, __stream);
+  }
+
+  /*!
+   * \brief Deallocates the storage pointed to by `__p` in stream order on
+   * `__stream`.
+   *
+   * `__p` must have been returned by a prior call to
+   * `allocate_async(__bytes, __alignment)` or `allocate(__bytes,
+   * __alignment)` on a `stream_ordered_memory_resource` that compares
+   * equal to `*this`, and the storage it points to must not yet have been
+   * deallocated, otherwise behavior is undefined.
+   *
+   * Asynchronous, stream-ordered operations on `__stream` initiated before
+   * `deallocate_async(__p, __bytes, __stream)` may still access the storage
+   * pointed to by `__p` after `deallocate_async` returns.
+   *
+   * Storage deallocated on `__stream` may be reused by a future
+   * call to `allocate_async` on the same stream without synchronizing
+   * `__stream`. Therefore,  `__stream` is typically the last stream on which
+   * `__p` was last used. It is the caller's responsibility to ensure the
+   * storage pointed to by `__p` is not in use on any other stream (or the
+   * host), or behavior is undefined.
+   *
+   * \param __p Pointer to storage to be deallocated.
+   * \param __bytes The size in bytes of the allocation. This must be equal to
+   * the value of `__bytes` that was specified to the `allocate` or
+   * `allocate_async` call that returned `__p`.
+   * \param __alignment The alignment of the allocation. This must be equal to
+   * the value of `__alignment` that was specified to the `allocate` or
+   * `allocate_async` call that returned `__p`.
+   * \param __stream The stream on which to perform the deallocation.
+   */
+  void deallocate_async(void *__p, std::size_t __bytes, std::size_t __alignment,
+                        stream_view __stream) {
+    do_deallocate_async(__p, __bytes, __alignment, __stream);
+  }
+
+ private:
+  template <typename, typename...>
+  friend class basic_resource_view;
+
+  /// Default synchronous implementation of `memory_resource::do_allocate`
+  void *do_allocate(std::size_t __bytes, std::size_t __alignment) override {
+    auto const __default_stream = stream_view{};
+    auto __p = do_allocate_async(__bytes, __alignment, __default_stream);
+    __default_stream.wait();
+    return __p;
+  }
+
+  /// Default synchronous implementation of `memory_resource::do_deallocate`
+  void do_deallocate(void *__p, std::size_t __bytes, std::size_t __alignment) override {
+    auto const __default_stream = stream_view{};
+    __default_stream.wait();
+    do_deallocate_async(__p, __bytes, __alignment, __default_stream);
+  }
+
+  void *do_allocate_async(std::size_t __bytes, std::size_t __alignment,
+                          stream_view __stream) override = 0;
+
+  void do_deallocate_async(void *__p, std::size_t __bytes, std::size_t __alignment,
+                           stream_view __stream) override = 0;
+};
+
+
+/*!
+ * \brief Indicates whether a memory kind `_MemoryKind` has a property `__property`.
+ */
+template <typename _MemoryKind, typename __property>
+struct kind_has_property : std::false_type {};
+
+/*!
+ * \brief A special property telling that given resource/resource view allocates
+ *        memory of specific kind.
+ *
+ * When a view defines this property, it implicitly has all properties of this
+ * memory kind.
+ * This property is also a property in itself and views defining properties of
+ * the underlying memory kind cannot be converted to a view defining this property.
+ * This allows for future extension of the set of properties.
+ */
+template <typename _MemoryKind>
+struct is_kind;
+
+template <typename _MemoryKind>
+struct kind_has_property<_MemoryKind, is_kind<_MemoryKind>> : std::true_type {};
+
+#define _DALI_MEMORY_KIND_PROPERTY(__kind, __property) \
+  template <>                                          \
+  struct kind_has_property<__kind, __property> : std::true_type {};
+
+_DALI_MEMORY_KIND_PROPERTY(memory_kind::host, memory_access::host);
+_DALI_MEMORY_KIND_PROPERTY(memory_kind::host, oversubscribable);
+_DALI_MEMORY_KIND_PROPERTY(memory_kind::host, memory_location::host);
+
+_DALI_MEMORY_KIND_PROPERTY(memory_kind::pinned, memory_access::host);
+_DALI_MEMORY_KIND_PROPERTY(memory_kind::pinned, memory_access::device);
+_DALI_MEMORY_KIND_PROPERTY(memory_kind::pinned, resident);
+_DALI_MEMORY_KIND_PROPERTY(memory_kind::pinned, memory_location::host);
+
+_DALI_MEMORY_KIND_PROPERTY(memory_kind::device, memory_access::device);
+_DALI_MEMORY_KIND_PROPERTY(memory_kind::device, resident);
+_DALI_MEMORY_KIND_PROPERTY(memory_kind::device, memory_location::device);
+
+_DALI_MEMORY_KIND_PROPERTY(memory_kind::managed, memory_access::host);
+_DALI_MEMORY_KIND_PROPERTY(memory_kind::managed, memory_access::device);
+_DALI_MEMORY_KIND_PROPERTY(memory_kind::managed, oversubscribable);
+_DALI_MEMORY_KIND_PROPERTY(memory_kind::managed, memory_location::host);
+_DALI_MEMORY_KIND_PROPERTY(memory_kind::managed, memory_location::device);
+
+namespace detail {
+template <typename _Property>
+std::false_type Has_Property(...);
+template <typename _Property, typename _MemoryKind>
+kind_has_property<_MemoryKind, _Property> Has_Property(const memory_resource<_MemoryKind> *);
+template <typename _Property, typename _MemoryKind>
+kind_has_property<_MemoryKind, _Property> Has_Property(
+    const stream_ordered_memory_resource<_MemoryKind> *);
+}  // namespace detail
+
+template <typename _Target, typename _Property>
+struct has_property : decltype(detail::Has_Property<_Property>(
+                          std::declval<std::remove_reference_t<_Target> *>())) {};
+
+namespace detail {
+template <typename _Property, typename... _Properties>
+struct is_property_in : std::false_type {};
+
+template <typename _Property, typename _Mismatch, typename... _Tail>
+struct is_property_in<_Property, _Mismatch, _Tail...> : is_property_in<_Property, _Tail...> {};
+
+template <typename _Property, typename... _Tail>
+struct is_property_in<_Property, _Property, _Tail...> : std::true_type {};
+
+template <typename _Property, typename _MemoryKind, typename... Tail>
+struct is_property_in<_Property, is_kind<_MemoryKind>, Tail...>
+    : kind_has_property<_MemoryKind, _Property> {};
+
+template <typename _MemoryKind, typename... Tail>
+struct is_property_in<is_kind<_MemoryKind>, is_kind<_MemoryKind>, Tail...> : std::true_type {};
+
+template <typename _FromPointer, typename _ToPointer>
+struct is_resource_pointer_convertible : std::is_convertible<_FromPointer, _ToPointer> {};
+// Private inheritance from (stream_ordered_)memory_resource_base* requires explicit partial
+// specializations as `is_convertible` will return false
+
+template <typename _FromPointer>
+struct is_resource_pointer_convertible<_FromPointer, detail::memory_resource_base *>
+    : std::conjunction<
+          std::is_pointer<_FromPointer>,
+          std::is_base_of<detail::memory_resource_base,
+                          typename ::std::pointer_traits<_FromPointer>::element_type>> {};
+
+template <typename _FromPointer>
+struct is_resource_pointer_convertible<_FromPointer, detail::stream_ordered_memory_resource_base *>
+    : std::conjunction<
+          std::is_pointer<_FromPointer>,
+          std::is_base_of<detail::stream_ordered_memory_resource_base,
+                          typename ::std::pointer_traits<_FromPointer>::element_type>> {};
+
+}  // namespace detail
+
+template <typename _Pointer, typename... _Properties, typename _Property>
+struct has_property<basic_resource_view<_Pointer, _Properties...>, _Property>
+    : detail::is_property_in<_Property, _Properties...> {};
+
+template <typename __from, typename __to>
+struct is_view_convertible;
+
+template <typename _FromPointer, typename... _FromProperties, typename _ToPointer,
+          typename... _ToProperties>
+struct is_view_convertible<basic_resource_view<_FromPointer, _FromProperties...>,
+                           basic_resource_view<_ToPointer, _ToProperties...>>
+    : std::conjunction<
+          detail::is_resource_pointer_convertible<_FromPointer, _ToPointer>,
+          has_property<basic_resource_view<_FromPointer, _FromProperties...>, _ToProperties>...> {};
+
+/*!
+ * \brief A pointer-like object to a memory resource based on resource.
+ *
+ * Resource view is an object that acts as a memory resource pointer, but provides
+ * enhanced implicit conversions. The idea behind this type is that a user of
+ * a memory resource may be interested in many kinds of resources as long as they
+ * have certain properties. For example, a function may work with any resource
+ * that can provide host-accessible memory, regardless of whether it is plain host
+ * memory, pinned memory, managed memory, or some,yet-to-be-defined future kind
+ * of memory.
+ *
+ * A resource view can be created from a memory resource pointer or from another
+ * resource view that defines a superset of the target properties.
+ *
+ * The resource view exposes the underlying resource's interface via `operator->`.
+ *
+ * The `basic_resource_view` class can be parameterized with the resource pointer type,
+ * which can be either one of the base resource classes or a concrete resource type.
+ *
+ * \tparam _ResourcePointer a pointer-like object to the underlying memory resource
+ * \tparam _Properies properties of a memory resource required by resource view
+ */
+template <typename _ResourcePointer, typename... _Properties>
+class basic_resource_view {
+ public:
+  static_assert(
+      std::is_base_of<detail::memory_resource_base,
+                      typename ::std::pointer_traits<_ResourcePointer>::element_type>::value ||
+          std::is_base_of<detail::stream_ordered_memory_resource_base,
+                          typename ::std::pointer_traits<_ResourcePointer>::element_type>::value,
+      "ResourcePointer must be a pointer to a memory_resource_base, "
+      "stream_ordered_memory_resource_base or a derived class");
+
+  basic_resource_view() = default;
+
+  basic_resource_view(int) = delete;  // NOLINT(runtime/explicit)
+
+  basic_resource_view(std::nullptr_t) {}  // NOLINT(runtime/explicit)
+
+  /*!
+   * \brief Constructs a resource view from a compatible memory resource pointer.
+   *
+   * The memory resource is considered compatible if a pointer to it can be converted to
+   * `_ResourcePointer` and the resource type has the required properties listed
+   * in `_Properties`.
+   *
+   * \tparam _Resource Type of a mmeory resource object.
+   * \param __p pointer to a memory resource object.
+   */
+  template <typename _Resource,
+            typename = std::enable_if_t<
+                detail::is_resource_pointer_convertible<_Resource *, _ResourcePointer>::value &&
+                std::conjunction<has_property<_Resource, _Properties>...>::value>>
+  basic_resource_view(_Resource *__p) : __pointer(__p) {}  // NOLINT(runtime/explicit)
+
+  /*!
+   * \brief Constructs a resource view by copying the resource pointer from a compatible resource
+   * view.
+   *
+   * A resource view is considered compatible if it defines all properties required by this
+   * view in `_Properties`.
+   *
+   * \tparam _OtherPointer The resource pointer type of the source resource view
+   * \tparam _OtherProperties The properties defined byt the source resource view
+   */
+  template <
+      typename _OtherPointer, typename... _OtherProperties,
+      typename = std::enable_if_t<is_view_convertible<
+          basic_resource_view<_OtherPointer, _OtherProperties...>, basic_resource_view>::value>>
+  basic_resource_view(basic_resource_view<_OtherPointer, _OtherProperties...> v)  // NOLINT
+      : __pointer(v.__pointer) {}
+
+  /*!
+   * \brief Exposes the interface of the underlying memory resource.
+   *
+   * \note This method should not be used to obtain the pointer to the memory resource.
+   */
+  _ResourcePointer operator->() const noexcept {
+    return __pointer;
+  }
+
+  template <typename _Ptr2, typename... _Props2>
+  bool operator==(
+      const cuda_for_dali::basic_resource_view<_Ptr2, _Props2...> &__v2) const noexcept {
+    using __view1_t = basic_resource_view;
+    using __view2_t = basic_resource_view<_Ptr2, _Props2...>;
+    if (__pointer == nullptr || __v2.__pointer == nullptr)
+      return __pointer == nullptr && __v2.__pointer == nullptr;
+    return static_cast<const detail::memory_resource_base *>(__pointer)->is_equal(*__v2.__pointer);
+  }
+
+  template <typename _Ptr2, typename... _Props2>
+  bool operator!=(
+      const cuda_for_dali::basic_resource_view<_Ptr2, _Props2...> &__v2) const noexcept {
+    return !(*this == __v2);
+  }
+
+  /*!
+   * \brief Returns true if the underlying pointer is not null.
+   */
+  constexpr explicit operator bool() const noexcept {
+    return !!__pointer;
+  }
+
+ private:
+  template <typename, typename...>
+  friend class basic_resource_view;
+
+  _ResourcePointer __pointer{};
+};
+
+template <typename _FirstProperty, typename... _Properties, typename _ResourcePointer>
+basic_resource_view<_ResourcePointer, _FirstProperty, _Properties...> view_resource(
+    _ResourcePointer __rsrc_ptr) {
+  return __rsrc_ptr;
+}
+
+
+template <typename _ResourcePointer>
+basic_resource_view<_ResourcePointer,
+                    is_kind<typename std::remove_pointer_t<_ResourcePointer>::memory_kind>>
+view_resource(_ResourcePointer __rsrc_ptr) {
+  return __rsrc_ptr;
+}
+
+
+template <typename _ResourcePointer, typename... _Properties, typename _Kind>
+bool operator==(const basic_resource_view<_ResourcePointer, _Properties...> &__view,
+                const memory_resource<_Kind> *__mr) {
+  return __view == view_resource(__mr);
+}
+
+template <typename _ResourcePointer, typename... _Properties, typename _Kind>
+bool operator!=(const basic_resource_view<_ResourcePointer, _Properties...> &__view,
+                const memory_resource<_Kind> *__mr) {
+  return __view != view_resource(__mr);
+}
+
+template <typename _ResourcePointer, typename... _Properties, typename _Kind>
+bool operator==(const memory_resource<_Kind> *__mr,
+                const basic_resource_view<_ResourcePointer, _Properties...> &__view) {
+  return view_resource(__mr) == __view;
+}
+
+template <typename _ResourcePointer, typename... _Properties, typename _Kind>
+bool operator!=(const memory_resource<_Kind> *__mr,
+                const basic_resource_view<_ResourcePointer, _Properties...> &__view) {
+  return view_resource(__mr) != __view;
+}
+
+template <typename... _Properties>
+using resource_view = basic_resource_view<detail::memory_resource_base *, _Properties...>;
+
+template <typename... _Properties>
+using stream_ordered_resource_view =
+    basic_resource_view<detail::stream_ordered_memory_resource_base *, _Properties...>;
+
+#if _DALI_STD_VER > 14
+
+#if defined(_DALI_STD_PMR_NS)
+
+namespace detail {
+class __pmr_adaptor_base : public _DALI_STD_PMR_NS::memory_resource {
+ public:
+  virtual cuda_for_dali::memory_resource<cuda_for_dali::memory_kind::host> *resource()
+      const noexcept = 0;
+};
+}  // namespace detail
+
+template <typename _Pointer>
+class pmr_adaptor final : public detail::__pmr_adaptor_base {
+  using resource_type = std::remove_reference_t<decltype(*std::declval<_Pointer>())>;
+
+  static constexpr bool __is_host_accessible_resource =
+      has_property<resource_type, memory_access::host>::value;
+  static_assert(
+      __is_host_accessible_resource,
+      "Pointer must be a pointer-like type to a type that allocates host-accessible memory.");
+
+ public:
+  pmr_adaptor(_Pointer __mr) : __mr_{std::move(__mr)} {}  // NOLINT(runtime/explicit)
+
+  using raw_pointer = std::remove_reference_t<decltype(&*std::declval<_Pointer>())>;
+
+  raw_pointer resource() const noexcept override {
+    return &*__mr_;
+  }
+
+ private:
+  void *do_allocate(std::size_t __bytes, std::size_t __alignment) override {
+    return __mr_->allocate(__bytes, __alignment);
+  }
+
+  void do_deallocate(void *__p, std::size_t __bytes, std::size_t __alignment) override {
+    return __mr_->deallocate(__p, __bytes, __alignment);
+  }
+
+  bool do_is_equal(_DALI_STD_PMR_NS::memory_resource const &__other) const noexcept override {
+    auto __other_p = dynamic_cast<detail::__pmr_adaptor_base const *>(&__other);
+    return __other_p &&
+           (__other_p->resource() == resource() || __other_p->resource()->is_equal(*resource()));
+  }
+
+  _Pointer __mr_;
+};
+#endif  // defined(_DALI_STD_PMR_NS)
+#endif  // _DALI_STD_VER > 14
+
+}  // namespace cuda_for_dali
+}  // namespace dali
+
+#if defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
+
+#endif  // DALI_CORE_MM_CUDA_MEMORY_RESOURCE_H_
diff --git a/include/dali/core/mm/cuda_stream_view.h b/include/dali/core/mm/cuda_stream_view.h
new file mode 100644
index 0000000000..4489a221ba
--- /dev/null
+++ b/include/dali/core/mm/cuda_stream_view.h
@@ -0,0 +1,130 @@
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef DALI_CORE_MM_CUDA_STREAM_VIEW_H_
+#define DALI_CORE_MM_CUDA_STREAM_VIEW_H_
+
+#include <cstddef>
+// #include <version>
+
+#include <cuda_runtime_api.h>  // NOLINT(build/include_order)
+#include <stdexcept>
+#include "dali/core/cuda_error.h"
+
+namespace dali {
+namespace cuda_for_dali {
+
+/**
+ * \brief A non-owning wrapper for a `cudaStream_t`.
+ *
+ * `stream_view` is a non-owning "view" type similar to `std::span` or `std::string_view`.
+ * \see https://en.cppreference.com/w/cpp/container/span and
+ * \see https://en.cppreference.com/w/cpp/string/basic_string_view
+ *
+ */
+class stream_view {
+ public:
+  using value_type = ::cudaStream_t;
+
+  /**
+   * \brief Constructs a `stream_view` of the "default" CUDA stream.
+   *
+   * For behavior of the default stream,
+   * \see https://docs.nvidia.com/cuda_for_dali/cuda_for_dali-runtime-api/stream-sync-behavior.html
+   *
+   */
+  constexpr stream_view() noexcept = default;
+
+  /**
+   * \brief Constructs a `stream_view` from a `cudaStream_t` handle.
+   *
+   * This constructor provides implicit conversion from `cudaStream_t`.
+   *
+   * \note: It is the callers responsibilty to ensure the `stream_view` does not
+   * outlive the stream identified by the `cudaStream_t` handle.
+   *
+   */
+  constexpr stream_view(value_type stream) : __stream{stream} {}   // NOLINT(runtime/explicit)
+
+  /// Disallow construction from an `int`, e.g., `0`.
+  stream_view(int) = delete;  // NOLINT(runtime/explicit)
+
+  /// Disallow construction from `nullptr`.
+  stream_view(std::nullptr_t) = delete;  // NOLINT(runtime/explicit)
+
+  /// Returns the wrapped `cudaStream_t` handle.
+  constexpr value_type get() const noexcept {
+    return __stream;
+  }
+
+  /**
+   * \brief Synchronizes the wrapped stream.
+   *
+   * \throws cuda_for_dali::cuda_error if synchronization fails.
+   *
+   */
+  void wait() const {
+    CUDA_CALL(::cudaStreamSynchronize(get()));  // "Failed to synchronize stream."
+  }
+
+  /**
+   * \brief Queries if all operations on the wrapped stream have completed.
+   *
+   * \throws cuda_for_dali::cuda_error if the query fails.
+   *
+   * \return `true` if all operations have completed, or `false` if not.
+   */
+  bool ready() const {
+    auto const __result = ::cudaStreamQuery(get());
+    if (__result == ::cudaSuccess) {
+      return true;
+    } else if (__result == ::cudaErrorNotReady) {
+      return false;
+    }
+    CUDA_CALL(__result);
+    return false;
+  }
+
+ private:
+  value_type __stream{0};  ///< Handle of the viewed stream
+};
+
+/**
+ * \brief Compares two `stream_view`s for equality
+ *
+ * \note Allows comparison with `cudaStream_t` due to implicit conversion to
+ * `stream_view`.
+ *
+ * \param lhs The first `stream_view` to compare
+ * \param rhs The second `stream_view` to compare
+ * \return true if equal, false if unequal
+ */
+inline constexpr bool operator==(stream_view __lhs, stream_view __rhs) {
+  return __lhs.get() == __rhs.get();
+}
+
+/**
+ * \brief Compares two `stream_view`s for inequality
+ *
+ * \note Allows comparison with `cudaStream_t` due to implicit conversion to
+ * `stream_view`.
+ *
+ * \param lhs The first `stream_view` to compare
+ * \param rhs The second `stream_view` to compare
+ * \return true if unequal, false if equal
+ */
+inline constexpr bool operator!=(stream_view __lhs, stream_view __rhs) {
+  return !(__lhs == __rhs);
+}
+
+}  // namespace cuda_for_dali
+}  // namespace dali
+
+#endif  // DALI_CORE_MM_CUDA_STREAM_VIEW_H_
diff --git a/include/dali/core/mm/memory_resource.h b/include/dali/core/mm/memory_resource.h
index 5735fb7ede..60c66a8c4d 100644
--- a/include/dali/core/mm/memory_resource.h
+++ b/include/dali/core/mm/memory_resource.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -18,7 +18,7 @@
 #include <cuda_runtime.h>
 #include <cstddef>
 
-#include <cuda/memory_resource>
+#include "dali/core/mm/cuda_memory_resource.h"
 
 namespace dali {
 
@@ -36,18 +36,18 @@ namespace dali {
  */
 namespace mm {
 
-namespace memory_kind = cuda::memory_kind;
+namespace memory_kind = cuda_for_dali::memory_kind;
 
-using cuda::memory_resource;
-using cuda::resource_view;
-using cuda::stream_ordered_resource_view;
+using cuda_for_dali::memory_resource;
+using cuda_for_dali::resource_view;
+using cuda_for_dali::stream_ordered_resource_view;
 
 using host_memory_resource = memory_resource<memory_kind::host>;
 using pinned_memory_resource = memory_resource<memory_kind::pinned>;
-using cuda::stream_view;
+using cuda_for_dali::stream_view;
 
 template <typename Kind>
-using async_memory_resource = cuda::stream_ordered_memory_resource<Kind>;
+using async_memory_resource = cuda_for_dali::stream_ordered_memory_resource<Kind>;
 
 using device_async_resource = async_memory_resource<memory_kind::device>;
 using pinned_async_resource = async_memory_resource<memory_kind::pinned>;
@@ -60,7 +60,8 @@ struct stream_context {
 namespace detail {
 
 template <typename Kind>
-constexpr bool is_host_accessible = cuda::kind_has_property<Kind, cuda::memory_access::host>::value;
+constexpr bool is_host_accessible =
+    cuda_for_dali::kind_has_property<Kind, cuda_for_dali::memory_access::host>::value;
 
 }  // namespace detail
 
diff --git a/third_party/README.rst b/third_party/README.rst
index 5b890b836a..a95851ed2e 100644
--- a/third_party/README.rst
+++ b/third_party/README.rst
@@ -19,8 +19,6 @@ This part of the repository contains extra dependencies required to build DALI,
 +-----------------+---------------------+---------------------+
 | |googletest|_   | |googletestver|_    | |googletestlic|_    |
 +-----------------+---------------------+---------------------+
-| |libcudacxx|_   | |libcudacxxver|_    | |libcudacxxlic|_    |
-+-----------------+---------------------+---------------------+
 | |pybind11|_     | |pybind11ver|_      | |pybind11lic|_      |
 +-----------------+---------------------+---------------------+
 | |rapidjson|_    | |rapidjsonver|_     | |rapidjsonlic|_     |
@@ -75,13 +73,6 @@ This part of the repository contains extra dependencies required to build DALI,
 .. |googletestlic| replace:: BSD 3-Clause License
 .. _googletestlic: https://github.com/google/googletest/blob/master/LICENSE
 
-.. |libcudacxx| replace:: libcu++
-.. _libcudacxx: https://github.com/mzient/libcudacxx.git
-.. |libcudacxxver| replace:: Custom fork (Aug 30, 2021)
-.. _libcudacxxver: https://github.com/mzient/libcudacxx/tree/863f11a16cced8b7aacfc639dacb419843a300e8
-.. |libcudacxxlic| replace:: Apache License v2.0 with LLVM Exceptions
-.. _libcudacxxlic: https://github.com/mzient/libcudacxx/blob/main/LICENSE.TXT
-
 .. |pybind11| replace:: pybind11
 .. _pybind11: https://github.com/pybind/pybind11
 .. |pybind11ver| replace:: 2.10.4
diff --git a/third_party/libcudacxx b/third_party/libcudacxx
deleted file mode 160000
index 00f1bf1fcc..0000000000
--- a/third_party/libcudacxx
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 00f1bf1fcc10bb452bb5e1e94d41b74ed666e235