Skip to content

Commit

Permalink
CPU variant.
Browse files Browse the repository at this point in the history
  • Loading branch information
aschaffer committed Jun 28, 2023
1 parent d51eaed commit a1c9366
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 26 deletions.
141 changes: 116 additions & 25 deletions src/cunumeric/stat/histogram.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,49 +17,140 @@
#include "cunumeric/stat/histogram.h"
#include "cunumeric/stat/histogram_template.inl"

namespace cunumeric {
#include "cunumeric/stat/histogram_cpu.h"
#include "cunumeric/stat/histogram_impl.h"

#include <algorithm>
#include <numeric>
#include <tuple>

namespace cunumeric {
using namespace legate;

// TODO:
namespace detail {

// RO accessor (size, pointer) extractor:
//
template <typename VAL>
std::tuple<size_t, const VAL*> get_accessor_ptr(const AccessorRO<VAL, 1>& src_acc,
const Rect<1>& src_rect)
{
size_t src_strides[1];
const VAL* src_ptr = src_acc.ptr(src_rect, src_strides);
assert(src_strides[0] == 1);
//
// const VAL* src_ptr: need to create a copy with create_buffer(...);
// since src will get sorted (in-place);
//
size_t src_size = src_rect.hi - src_rect.lo + 1;
return std::make_tuple(src_size, src_ptr);
}
// RD accessor (size, pointer) extractor:
//
template <typename VAL>
std::tuple<size_t, VAL*> get_accessor_ptr(const AccessorRD<SumReduction<VAL>, true, 1>& src_acc,
const Rect<1>& src_rect)
{
size_t src_strides[1];
VAL* src_ptr = src_acc.ptr(src_rect, src_strides);
assert(src_strides[0] == 1);
//
// const VAL* src_ptr: need to create a copy with create_buffer(...);
// since src will get sorted (in-place);
//
size_t src_size = src_rect.hi - src_rect.lo + 1;
return std::make_tuple(src_size, src_ptr);
}
// accessor copy utility:
//
template <typename VAL>
std::tuple<size_t, Buffer<VAL>, const VAL*> make_accessor_copy(const AccessorRO<VAL, 1>& src_acc,
const Rect<1>& src_rect)
{
size_t src_strides[1];
const VAL* src_ptr = src_acc.ptr(src_rect, src_strides);
assert(src_strides[0] == 1);
//
// const VAL* src_ptr: need to create a copy with create_buffer(...);
// since src will get sorted (in-place);
//
size_t src_size = src_rect.hi - src_rect.lo + 1;
Buffer<VAL> src_copy = create_buffer<VAL>(src_size);
return std::make_tuple(src_size, src_copy, src_ptr);
}
} // namespace detail

template <Type::Code CODE>
struct HistogramImplBody<VariantKind::CPU, CODE> {
using VAL = legate_type_of<CODE>;

void operator()(AccessorRD<SumReduction<int64_t>, true, 1> lhs,
const AccessorRO<VAL, 1>& rhs,
const Rect<1>& rect,
const Rect<1>& lhs_rect) const
{
for (size_t idx = rect.lo[0]; idx <= rect.hi[0]; ++idx) {
auto value = rhs[idx];
assert(lhs_rect.contains(value));
lhs.reduce(value, 1);
}
}
// for now, it has been decided to hardcode these types:
//
using BinType = double;
using WeightType = double;

void operator()(AccessorRD<SumReduction<double>, true, 1> lhs,
const AccessorRO<VAL, 1>& rhs,
const AccessorRO<double, 1>& weights,
const Rect<1>& rect,
const Rect<1>& lhs_rect) const
// in the future we might relax relax that requirement,
// but complicate dispatching:
//
// template <typename BinType = VAL, typename WeightType = VAL>
void operator()(const AccessorRO<VAL, 1>& src,
const Rect<1>& src_rect,
const AccessorRO<BinType, 1>& bins,
const Rect<1>& bins_rect,
const AccessorRO<WeightType, 1>& weights,
const Rect<1>& weights_rect,
const AccessorRD<SumReduction<WeightType>, true, 1>& result,
const Rect<1>& result_rect) const
{
for (size_t idx = rect.lo[0]; idx <= rect.hi[0]; ++idx) {
auto value = rhs[idx];
assert(lhs_rect.contains(value));
lhs.reduce(value, weights[idx]);
}
auto&& [src_size, src_copy, src_ptr] = detail::make_accessor_copy(src, src_rect);
std::copy_n(src_ptr, src_size, src_copy.ptr(0));

auto&& [weights_size, weights_copy, weights_ptr] =
detail::make_accessor_copy(weights, weights_rect);
std::copy_n(weights_ptr, weights_size, weights_copy.ptr(0));

auto&& [bins_size, bins_ptr] = detail::get_accessor_ptr(bins, bins_rect);

auto num_intervals = bins_size - 1;
Buffer<WeightType> local_result = create_buffer<WeightType>(num_intervals);

WeightType* local_result_ptr = local_result.ptr(0);

auto&& [global_result_size, global_result_ptr] = detail::get_accessor_ptr(result, result_rect);

detail::histogram_weights(thrust::host,
src_copy.ptr(0),
src_size,
bins_ptr,
num_intervals,
local_result_ptr,
weights_copy.ptr(0));

// fold into RD result:
//
assert(num_intervals == global_result_size);

thrust::transform(
thrust::host,
local_result_ptr,
local_result_ptr + num_intervals,
global_result_ptr,
global_result_ptr,
[](auto local_value, auto global_value) { return local_value + global_value; });
}
};

/*static*/ void HistogramTask::cpu_variant(TaskContext& context)
{
bincount_template<VariantKind::CPU>(context);
histogram_template<VariantKind::CPU>(context);
}

namespace // unnamed
{
static void __attribute__((constructor)) register_tasks(void) { HistogramTask::register_variants(); }
static void __attribute__((constructor)) register_tasks(void)
{
HistogramTask::register_variants();
}
} // namespace

} // namespace cunumeric
1 change: 0 additions & 1 deletion src/cunumeric/stat/histogram_cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#include <cassert>
#include <iostream>
#include <numeric>
#include <optional>
#include <tuple>
#include <vector>

Expand Down

0 comments on commit a1c9366

Please sign in to comment.