Skip to content

Commit

Permalink
Address review comments in apache#43196
Browse files Browse the repository at this point in the history
  • Loading branch information
Kontinuation committed Sep 6, 2024
1 parent e89962f commit fb134d3
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 6 deletions.
2 changes: 1 addition & 1 deletion cpp/src/parquet/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ add_parquet_test(internals-test
public_api_test.cc
types_test.cc)

add_parquet_test(geometry-test SOURCES geometry_util_test.cc)
add_parquet_test(geometry-test SOURCES geometry_util_internal_test.cc)

set_source_files_properties(public_api_test.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
SKIP_UNITY_BUILD_INCLUSION ON)
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/parquet/column_writer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ class TestPrimitiveWriter : public PrimitiveTypedTest<TestType> {
ColumnChunkMetaData::Make(metadata_->contents(), this->descr_);
return metadata_accessor->key_value_metadata();
}

EncodedStatistics metadata_encoded_stats() {
ApplicationVersion app_version(this->writer_properties_->created_by());
auto metadata_accessor = ColumnChunkMetaData::Make(
Expand Down Expand Up @@ -1780,7 +1780,7 @@ TEST_F(TestInt32Writer, WriteKeyValueMetadataEndToEnd) {
ASSERT_OK_AND_ASSIGN(auto value, key_value_metadata->Get("foo"));
ASSERT_EQ("bar", value);
}

// Test writing and reading geometry columns
class TestGeometryValuesWriter : public TestPrimitiveWriter<ByteArrayType> {
public:
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

#include "arrow/testing/gtest_compat.h"

#include "parquet/geometry_util.h"
#include "parquet/geometry_util_internal.h"

namespace parquet::geometry {

Expand Down
34 changes: 33 additions & 1 deletion cpp/src/parquet/statistics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
#include "arrow/visit_data_inline.h"
#include "parquet/encoding.h"
#include "parquet/exception.h"
#include "parquet/geometry_util.h"
#include "parquet/geometry_util_internal.h"
#include "parquet/platform.h"
#include "parquet/schema.h"
#include "parquet/types.h"
Expand Down Expand Up @@ -109,6 +109,28 @@ class GeometryStatisticsImpl {
}
}

void UpdateSpaced(const ByteArray* values, const uint8_t* valid_bits,
int64_t valid_bits_offset, int64_t num_spaced_values,
int64_t num_values, int64_t null_count) {
DCHECK_GT(num_spaced_values, 0);

geometry::WKBBuffer buf;
try {
::arrow::internal::VisitSetBitRunsVoid(
valid_bits, valid_bits_offset, num_spaced_values,
[&](int64_t position, int64_t length) {
for (int64_t i = 0; i < num_spaced_values; i++) {
ByteArray item = SafeLoad(values + i + position);
buf.Init(item.ptr, item.len);
bounder_.ReadGeometry(&buf);
}
});
bounder_.Flush();
} catch (ParquetException& e) {
is_valid_ = false;
}
}

EncodedGeometryStatistics Encode() const {
const double* mins = bounder_.Bounds().min;
const double* maxes = bounder_.Bounds().max;
Expand Down Expand Up @@ -1093,6 +1115,16 @@ void TypedStatisticsImpl<DType>::UpdateSpaced(const T* values, const uint8_t* va
if (num_values == 0) return;
SetMinMaxPair(comparator_->GetMinMaxSpaced(values, num_spaced_values, valid_bits,
valid_bits_offset));

if constexpr (std::is_same<T, ByteArray>::value) {
if (logical_type_ == LogicalType::Type::GEOMETRY) {
if (geometry_statistics_ == nullptr) {
geometry_statistics_ = std::make_unique<GeometryStatistics>();
}
geometry_statistics_->UpdateSpaced(values, valid_bits, valid_bits_offset,
num_spaced_values, num_values, null_count);
}
}
}

template <typename DType>
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/parquet/statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ class PARQUET_EXPORT GeometryStatistics {

void Update(const ByteArray* values, int64_t num_values, int64_t null_count);

void UpdateSpaced(const ByteArray* values, const uint8_t* valid_bits,
int64_t valid_bits_offset, int64_t num_spaced_values,
int64_t num_values, int64_t null_count);

EncodedGeometryStatistics Encode();

bool is_valid() const;
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/parquet/test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
#include "parquet/column_reader.h"
#include "parquet/column_writer.h"
#include "parquet/encoding.h"
#include "parquet/geometry_util.h"
#include "parquet/geometry_util_internal.h"
#include "parquet/platform.h"

// https://github.com/google/googletest/pull/2904 might not be available
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/parquet/types.cc
Original file line number Diff line number Diff line change
Expand Up @@ -474,12 +474,20 @@ std::shared_ptr<const LogicalType> LogicalType::FromThrift(
edges = LogicalType::GeometryEdges::PLANAR;
} else if (type.GEOMETRY.edges == format::Edges::SPHERICAL) {
edges = LogicalType::GeometryEdges::SPHERICAL;
} else {
std::stringstream ss;
ss << "Unknown value for geometry edges: " << type.GEOMETRY.edges;
throw ParquetException(ss.str());
}

LogicalType::GeometryEncoding::geometry_encoding encoding =
LogicalType::GeometryEncoding::UNKNOWN;
if (type.GEOMETRY.encoding == format::GeometryEncoding::WKB) {
encoding = LogicalType::GeometryEncoding::WKB;
} else {
std::stringstream ss;
ss << "Unknown value for geometry encoding: " << type.GEOMETRY.edges;
throw ParquetException(ss.str());
}

std::string metadata;
Expand Down

0 comments on commit fb134d3

Please sign in to comment.