Skip to content

Commit

Permalink
GH-15281: [C++] Replace bytes_view alias with span (#36334)
Browse files Browse the repository at this point in the history
### Rationale for this change

basic_string_view was never really intended to support types other than chars; the template which the STL intends for use in this situation is std::span 

### What changes are included in this PR?

Since std::span is added in c++20, a minimal polyfill is added named util::span.
- doesn't include support for static extents
- some accessors like `front()` are not present in util::span

### User facing changes:

- The broken alias bytes_view and its header are removed. I don't expect this to trouble anybody much since it isn't used in much of our API, and if it's really necessary it will be trivial to add it back with `using bytes_view = span<uint8_t const>;` or so

* Closes: #15281

Lead-authored-by: Benjamin Kietzman <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
  • Loading branch information
bkietz and pitrou authored Jul 11, 2023
1 parent 2bd031a commit 366dbe1
Show file tree
Hide file tree
Showing 6 changed files with 352 additions and 51 deletions.
7 changes: 1 addition & 6 deletions cpp/src/arrow/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
#include "arrow/device.h"
#include "arrow/status.h"
#include "arrow/type_fwd.h"
#include "arrow/util/bytes_view.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"

Expand Down Expand Up @@ -167,13 +166,9 @@ class ARROW_EXPORT Buffer {
/// \brief View buffer contents as a std::string_view
/// \return std::string_view
explicit operator std::string_view() const {
return std::string_view(reinterpret_cast<const char*>(data_), size_);
return {reinterpret_cast<const char*>(data_), static_cast<size_t>(size_)};
}

/// \brief View buffer contents as a util::bytes_view
/// \return util::bytes_view
explicit operator util::bytes_view() const { return util::bytes_view(data_, size_); }

/// \brief Return a pointer to the buffer's data
///
/// The buffer has to be a CPU buffer (`is_cpu()` is true).
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ add_arrow_test(utility-test
reflection_test.cc
rows_to_batches_test.cc
small_vector_test.cc
span_test.cc
stl_util_test.cc
string_test.cc
tdigest_test.cc
Expand Down
29 changes: 13 additions & 16 deletions cpp/src/arrow/util/bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/bitmap_reader.h"
#include "arrow/util/bitmap_writer.h"
#include "arrow/util/bytes_view.h"
#include "arrow/util/compare.h"
#include "arrow/util/endian.h"
#include "arrow/util/functional.h"
#include "arrow/util/span.h"
#include "arrow/util/string_builder.h"
#include "arrow/util/visibility.h"

Expand All @@ -49,9 +49,6 @@ namespace internal {
class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
public util::EqualityComparable<Bitmap> {
public:
template <typename Word>
using View = std::basic_string_view<Word>;

Bitmap() = default;

Bitmap(const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length)
Expand All @@ -72,17 +69,17 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,

Bitmap Slice(int64_t offset) const {
if (mutable_data_ != NULLPTR) {
return Bitmap(mutable_data_, offset_ + offset, length_ - offset);
return {mutable_data_, offset_ + offset, length_ - offset};
} else {
return Bitmap(data_, offset_ + offset, length_ - offset);
return {data_, offset_ + offset, length_ - offset};
}
}

Bitmap Slice(int64_t offset, int64_t length) const {
if (mutable_data_ != NULLPTR) {
return Bitmap(mutable_data_, offset_ + offset, length);
return {mutable_data_, offset_ + offset, length};
} else {
return Bitmap(data_, offset_ + offset, length);
return {data_, offset_ + offset, length};
}
}

Expand Down Expand Up @@ -158,7 +155,7 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
Bitmap bitmaps[N];
int64_t offsets[N];
int64_t bit_length = BitLength(bitmaps_arg, N);
View<Word> words[N];
util::span<const Word> words[N];
for (size_t i = 0; i < N; ++i) {
bitmaps[i] = bitmaps_arg[i];
offsets[i] = bitmaps[i].template word_offset<Word>();
Expand Down Expand Up @@ -386,15 +383,15 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
/// number of bits in this Bitmap
int64_t length() const { return length_; }

/// string_view of all bytes which contain any bit in this Bitmap
util::bytes_view bytes() const {
/// span of all bytes which contain any bit in this Bitmap
util::span<const uint8_t> bytes() const {
auto byte_offset = offset_ / 8;
auto byte_count = bit_util::CeilDiv(offset_ + length_, 8) - byte_offset;
return util::bytes_view(data_ + byte_offset, byte_count);
return {data_ + byte_offset, static_cast<size_t>(byte_count)};
}

private:
/// string_view of all Words which contain any bit in this Bitmap
/// span of all Words which contain any bit in this Bitmap
///
/// For example, given Word=uint16_t and a bitmap spanning bits [20, 36)
/// words() would span bits [16, 48).
Expand All @@ -407,15 +404,15 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
/// \warning The words may contain bytes which lie outside the buffer or are
/// uninitialized.
template <typename Word>
View<Word> words() const {
util::span<const Word> words() const {
auto bytes_addr = reinterpret_cast<intptr_t>(bytes().data());
auto words_addr = bytes_addr - bytes_addr % sizeof(Word);
auto word_byte_count =
bit_util::RoundUpToPowerOf2(static_cast<int64_t>(bytes_addr + bytes().size()),
static_cast<int64_t>(sizeof(Word))) -
words_addr;
return View<Word>(reinterpret_cast<const Word*>(words_addr),
word_byte_count / sizeof(Word));
return {reinterpret_cast<const Word*>(words_addr),
static_cast<size_t>(word_byte_count / sizeof(Word))};
}

/// offset of first bit relative to words<Word>().data()
Expand Down
29 changes: 0 additions & 29 deletions cpp/src/arrow/util/bytes_view.h

This file was deleted.

132 changes: 132 additions & 0 deletions cpp/src/arrow/util/span.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <cstddef>
#include <cstdint>
#include <cstring>
#include <iterator>
#include <type_traits>

namespace arrow::util {

/// std::span polyfill.
///
/// Does not support static extents.
template <typename T>
class span {
static_assert(sizeof(T),
R"(
std::span allows contiguous_iterators instead of just pointers, the enforcement
of which requires T to be a complete type. arrow::util::span does not support
contiguous_iterators, but T is still required to be a complete type to prevent
writing code which would break when it is replaced by std::span.)");

public:
using element_type = T;
using value_type = std::remove_cv_t<T>;
using iterator = T*;
using const_iterator = T const*;

span() = default;
span(const span&) = default;
span& operator=(const span&) = default;

template <typename M, typename = std::enable_if_t<std::is_same_v<T, M const>>>
// NOLINTNEXTLINE runtime/explicit
constexpr span(span<M> mut) : span{mut.data(), mut.size()} {}

constexpr span(T* data, size_t count) : data_{data}, size_{count} {}

constexpr span(T* begin, T* end)
: data_{begin}, size_{static_cast<size_t>(end - begin)} {}

template <
typename R,
typename DisableUnlessConstructibleFromDataAndSize =
decltype(span<T>(std::data(std::declval<R>()), std::size(std::declval<R>()))),
typename DisableUnlessSimilarTypes = std::enable_if_t<std::is_same_v<
std::decay_t<std::remove_pointer_t<decltype(std::data(std::declval<R>()))>>,
std::decay_t<T>>>>
// NOLINTNEXTLINE runtime/explicit, non-const reference
constexpr span(R&& range) : span{std::data(range), std::size(range)} {}

constexpr T* begin() const { return data_; }
constexpr T* end() const { return data_ + size_; }
constexpr T* data() const { return data_; }

constexpr size_t size() const { return size_; }
constexpr size_t size_bytes() const { return size_ * sizeof(T); }
constexpr bool empty() const { return size_ == 0; }

constexpr T& operator[](size_t i) { return data_[i]; }
constexpr const T& operator[](size_t i) const { return data_[i]; }

constexpr span subspan(size_t offset) const {
if (offset > size_) return {data_, data_};
return {data_ + offset, size_ - offset};
}

constexpr span subspan(size_t offset, size_t count) const {
auto out = subspan(offset);
if (count < out.size_) {
out.size_ = count;
}
return out;
}

constexpr bool operator==(span const& other) const {
if (size_ != other.size_) return false;

if constexpr (std::is_integral_v<T>) {
if (size_ == 0) {
return true; // memcmp does not handle null pointers, even if size_ == 0
}
return std::memcmp(data_, other.data_, size_bytes()) == 0;
} else {
T* ptr = data_;
for (T const& e : other) {
if (*ptr++ != e) return false;
}
return true;
}
}
constexpr bool operator!=(span const& other) const { return !(*this == other); }

private:
T* data_{};
size_t size_{};
};

template <typename R>
span(R& range) -> span<std::remove_pointer_t<decltype(std::data(range))>>;

template <typename T>
span(T*, size_t) -> span<T>;

template <typename T>
constexpr span<std::byte const> as_bytes(span<T> s) {
return {reinterpret_cast<std::byte const*>(s.data()), s.size_bytes()};
}

template <typename T>
constexpr span<std::byte> as_writable_bytes(span<T> s) {
return {reinterpret_cast<std::byte*>(s.data()), s.size_bytes()};
}

} // namespace arrow::util
Loading

0 comments on commit 366dbe1

Please sign in to comment.