From a1c9366725cf3fd41f6b86c532c55169b530b0da Mon Sep 17 00:00:00 2001 From: Andrei Schaffer Date: Wed, 28 Jun 2023 12:29:05 -0500 Subject: [PATCH] CPU variant. --- src/cunumeric/stat/histogram.cc | 141 ++++++++++++++++++++++++----- src/cunumeric/stat/histogram_cpu.h | 1 - 2 files changed, 116 insertions(+), 26 deletions(-) diff --git a/src/cunumeric/stat/histogram.cc b/src/cunumeric/stat/histogram.cc index f1f59ed0e..d28df7f67 100644 --- a/src/cunumeric/stat/histogram.cc +++ b/src/cunumeric/stat/histogram.cc @@ -17,49 +17,140 @@ #include "cunumeric/stat/histogram.h" #include "cunumeric/stat/histogram_template.inl" -namespace cunumeric { +#include "cunumeric/stat/histogram_cpu.h" +#include "cunumeric/stat/histogram_impl.h" + +#include +#include +#include +namespace cunumeric { using namespace legate; -// TODO: +namespace detail { + +// RO accessor (size, pointer) extractor: +// +template +std::tuple get_accessor_ptr(const AccessorRO& src_acc, + const Rect<1>& src_rect) +{ + size_t src_strides[1]; + const VAL* src_ptr = src_acc.ptr(src_rect, src_strides); + assert(src_strides[0] == 1); + // + // const VAL* src_ptr: need to create a copy with create_buffer(...); + // since src will get sorted (in-place); + // + size_t src_size = src_rect.hi - src_rect.lo + 1; + return std::make_tuple(src_size, src_ptr); +} +// RD accessor (size, pointer) extractor: +// +template +std::tuple get_accessor_ptr(const AccessorRD, true, 1>& src_acc, + const Rect<1>& src_rect) +{ + size_t src_strides[1]; + VAL* src_ptr = src_acc.ptr(src_rect, src_strides); + assert(src_strides[0] == 1); + // + // const VAL* src_ptr: need to create a copy with create_buffer(...); + // since src will get sorted (in-place); + // + size_t src_size = src_rect.hi - src_rect.lo + 1; + return std::make_tuple(src_size, src_ptr); +} +// accessor copy utility: +// +template +std::tuple, const VAL*> make_accessor_copy(const AccessorRO& src_acc, + const Rect<1>& src_rect) +{ + size_t src_strides[1]; + const VAL* src_ptr = src_acc.ptr(src_rect, src_strides); + assert(src_strides[0] == 1); + // + // const VAL* src_ptr: need to create a copy with create_buffer(...); + // since src will get sorted (in-place); + // + size_t src_size = src_rect.hi - src_rect.lo + 1; + Buffer src_copy = create_buffer(src_size); + return std::make_tuple(src_size, src_copy, src_ptr); +} +} // namespace detail + template struct HistogramImplBody { using VAL = legate_type_of; - void operator()(AccessorRD, true, 1> lhs, - const AccessorRO& rhs, - const Rect<1>& rect, - const Rect<1>& lhs_rect) const - { - for (size_t idx = rect.lo[0]; idx <= rect.hi[0]; ++idx) { - auto value = rhs[idx]; - assert(lhs_rect.contains(value)); - lhs.reduce(value, 1); - } - } + // for now, it has been decided to hardcode these types: + // + using BinType = double; + using WeightType = double; - void operator()(AccessorRD, true, 1> lhs, - const AccessorRO& rhs, - const AccessorRO& weights, - const Rect<1>& rect, - const Rect<1>& lhs_rect) const + // in the future we might relax relax that requirement, + // but complicate dispatching: + // + // template + void operator()(const AccessorRO& src, + const Rect<1>& src_rect, + const AccessorRO& bins, + const Rect<1>& bins_rect, + const AccessorRO& weights, + const Rect<1>& weights_rect, + const AccessorRD, true, 1>& result, + const Rect<1>& result_rect) const { - for (size_t idx = rect.lo[0]; idx <= rect.hi[0]; ++idx) { - auto value = rhs[idx]; - assert(lhs_rect.contains(value)); - lhs.reduce(value, weights[idx]); - } + auto&& [src_size, src_copy, src_ptr] = detail::make_accessor_copy(src, src_rect); + std::copy_n(src_ptr, src_size, src_copy.ptr(0)); + + auto&& [weights_size, weights_copy, weights_ptr] = + detail::make_accessor_copy(weights, weights_rect); + std::copy_n(weights_ptr, weights_size, weights_copy.ptr(0)); + + auto&& [bins_size, bins_ptr] = detail::get_accessor_ptr(bins, bins_rect); + + auto num_intervals = bins_size - 1; + Buffer local_result = create_buffer(num_intervals); + + WeightType* local_result_ptr = local_result.ptr(0); + + auto&& [global_result_size, global_result_ptr] = detail::get_accessor_ptr(result, result_rect); + + detail::histogram_weights(thrust::host, + src_copy.ptr(0), + src_size, + bins_ptr, + num_intervals, + local_result_ptr, + weights_copy.ptr(0)); + + // fold into RD result: + // + assert(num_intervals == global_result_size); + + thrust::transform( + thrust::host, + local_result_ptr, + local_result_ptr + num_intervals, + global_result_ptr, + global_result_ptr, + [](auto local_value, auto global_value) { return local_value + global_value; }); } }; /*static*/ void HistogramTask::cpu_variant(TaskContext& context) { - bincount_template(context); + histogram_template(context); } namespace // unnamed { -static void __attribute__((constructor)) register_tasks(void) { HistogramTask::register_variants(); } +static void __attribute__((constructor)) register_tasks(void) +{ + HistogramTask::register_variants(); +} } // namespace } // namespace cunumeric diff --git a/src/cunumeric/stat/histogram_cpu.h b/src/cunumeric/stat/histogram_cpu.h index 8db1f353d..c4012fe78 100644 --- a/src/cunumeric/stat/histogram_cpu.h +++ b/src/cunumeric/stat/histogram_cpu.h @@ -25,7 +25,6 @@ #include #include #include -#include #include #include