diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index e752b4ceea..e5eaa22f02 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -556,8 +556,7 @@ private void addRowGroup( columnMetaData.getTotalUncompressedSize(), columnMetaData.getTotalSize(), columnMetaData.getFirstDataPageOffset()); - if (columnMetaData.getEncodingStats() != null - && columnMetaData.getEncodingStats().hasDictionaryPages()) { + if (columnMetaData.hasDictionaryPage()) { metaData.setDictionary_page_offset(columnMetaData.getDictionaryPageOffset()); } long bloomFilterOffset = columnMetaData.getBloomFilterOffset(); diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java index 4dcede624f..344785593f 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java @@ -225,6 +225,32 @@ public void testParquetMetadataConverterWithDictionary() throws IOException { Assert.assertEquals(dicOffsetOriginal, dicOffsetConverted); } + @Test + public void testParquetMetadataConverterWithDictionaryWithoutStats() throws IOException { + ParquetMetadata parquetMetaData = createParquetMetaData(Encoding.PLAIN_DICTIONARY, Encoding.PLAIN, false); + + ParquetMetadataConverter converter = new ParquetMetadataConverter(); + FileMetaData fmd1 = converter.toParquetMetadata(1, parquetMetaData); + + // Flag should be true + fmd1.row_groups.forEach(rowGroup -> rowGroup.columns.forEach(column -> { + assertTrue(column.meta_data.isSetDictionary_page_offset()); + })); + + ByteArrayOutputStream metaDataOutputStream = new ByteArrayOutputStream(); + Util.writeFileMetaData(fmd1, metaDataOutputStream); + ByteArrayInputStream metaDataInputStream = new ByteArrayInputStream(metaDataOutputStream.toByteArray()); + FileMetaData fmd2 = Util.readFileMetaData(metaDataInputStream); + ParquetMetadata parquetMetaDataConverted = converter.fromParquetMetadata(fmd2); + + long dicOffsetOriginal = + parquetMetaData.getBlocks().get(0).getColumns().get(0).getDictionaryPageOffset(); + long dicOffsetConverted = + parquetMetaDataConverted.getBlocks().get(0).getColumns().get(0).getDictionaryPageOffset(); + + Assert.assertEquals(dicOffsetOriginal, dicOffsetConverted); + } + @Test public void testParquetMetadataConverterWithoutDictionary() throws IOException { ParquetMetadata parquetMetaData = createParquetMetaData(null, Encoding.PLAIN); @@ -1248,17 +1274,25 @@ private static Statistics createStatsTyped(PrimitiveType type, BigInteger min } private static ParquetMetadata createParquetMetaData(Encoding dicEncoding, Encoding dataEncoding) { + return createParquetMetaData(dicEncoding, dataEncoding, true); + } + + private static ParquetMetadata createParquetMetaData( + Encoding dicEncoding, Encoding dataEncoding, boolean includeDicStats) { MessageType schema = parseMessageType("message schema { optional int32 col (INT_32); }"); org.apache.parquet.hadoop.metadata.FileMetaData fileMetaData = new org.apache.parquet.hadoop.metadata.FileMetaData(schema, new HashMap(), null); List blockMetaDataList = new ArrayList(); BlockMetaData blockMetaData = new BlockMetaData(); - EncodingStats.Builder builder = new EncodingStats.Builder(); - if (dicEncoding != null) { - builder.addDictEncoding(dicEncoding).build(); + EncodingStats es = null; + if (includeDicStats) { + EncodingStats.Builder builder = new EncodingStats.Builder(); + if (dicEncoding != null) { + builder.addDictEncoding(dicEncoding).build(); + } + builder.addDataEncoding(dataEncoding); + es = builder.build(); } - builder.addDataEncoding(dataEncoding); - EncodingStats es = builder.build(); Set e = new HashSet(); PrimitiveTypeName t = PrimitiveTypeName.INT32; ColumnPath p = ColumnPath.get("col");