Skip to content

Commit

Permalink
apacheGH-42100: [C++][Parquet] ParquetFilePrinter::JSONPrint print le…
Browse files Browse the repository at this point in the history
…ngth of FLBA (apache#41981)

### Rationale for this change

Print FLBA length in `ParquetFilePrinter::JSONPrint`

### What changes are included in this PR?

Print FLBA length in `ParquetFilePrinter::JSONPrint`

### Are these changes tested?

no need

### Are there any user-facing changes?

no

* GitHub Issue: apache#42100

Authored-by: mwish <[email protected]>
Signed-off-by: mwish <[email protected]>
  • Loading branch information
mapleFU authored Jun 11, 2024
1 parent 64b1109 commit 6597467
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 10 deletions.
3 changes: 2 additions & 1 deletion cpp/src/parquet/printer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,8 @@ void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list<int> selected
const ColumnDescriptor* descr = file_metadata->schema()->Column(i);
stream << " { \"Id\": \"" << i << "\","
<< " \"Name\": \"" << descr->path()->ToDotString() << "\","
<< " \"PhysicalType\": \"" << TypeToString(descr->physical_type()) << "\","
<< " \"PhysicalType\": \""
<< TypeToString(descr->physical_type(), descr->type_length()) << "\","
<< " \"ConvertedType\": \"" << ConvertedTypeToString(descr->converted_type())
<< "\","
<< " \"LogicalType\": " << (descr->logical_type())->ToJSON() << " }";
Expand Down
47 changes: 38 additions & 9 deletions cpp/src/parquet/reader_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1077,7 +1077,24 @@ Column 1
ASSERT_EQ(header_output + dump_output, ss_dump.str());
}

TEST(TestJSONWithLocalFile, JSONOutput) {
class TestJSONWithLocalFile : public ::testing::Test {
public:
static std::string ReadFromLocalFile(std::string_view local_file_name) {
std::stringstream ss;
// empty list means print all
std::list<int> columns;

auto reader =
ParquetFileReader::OpenFile(data_file(local_file_name.data()),
/*memory_map=*/false, default_reader_properties());
ParquetFilePrinter printer(reader.get());
printer.JSONPrint(ss, columns, local_file_name.data());

return ss.str();
}
};

TEST_F(TestJSONWithLocalFile, JSONOutput) {
std::string json_output = R"###({
"FileName": "alltypes_plain.parquet",
"Version": "1.0",
Expand Down Expand Up @@ -1131,16 +1148,28 @@ TEST(TestJSONWithLocalFile, JSONOutput) {
}
)###";

std::stringstream ss;
// empty list means print all
std::list<int> columns;
std::string json_content = ReadFromLocalFile("alltypes_plain.parquet");
ASSERT_EQ(json_output, json_content);
}

auto reader =
ParquetFileReader::OpenFile(alltypes_plain(), false, default_reader_properties());
ParquetFilePrinter printer(reader.get());
printer.JSONPrint(ss, columns, "alltypes_plain.parquet");
TEST_F(TestJSONWithLocalFile, JSONOutputFLBA) {
// min-max stats for FLBA contains non-utf8 output, so we don't check
// the whole json output.
std::string json_content = ReadFromLocalFile("fixed_length_byte_array.parquet");

std::string json_contains = R"###({
"FileName": "fixed_length_byte_array.parquet",
"Version": "1.0",
"CreatedBy": "parquet-mr version 1.13.0-SNAPSHOT (build d057b39d93014fe40f5067ee4a33621e65c91552)",
"TotalRows": "1000",
"NumberOfRowGroups": "1",
"NumberOfRealColumns": "1",
"NumberOfColumns": "1",
"Columns": [
{ "Id": "0", "Name": "flba_field", "PhysicalType": "FIXED_LEN_BYTE_ARRAY(4)", "ConvertedType": "NONE", "LogicalType": {"Type": "None"} }
])###";

ASSERT_EQ(json_output, ss.str());
EXPECT_THAT(json_content, testing::HasSubstr(json_contains));
}

TEST(TestFileReader, BufferedReadsWithDictionary) {
Expand Down

0 comments on commit 6597467

Please sign in to comment.