diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index 194670f2df..bb9e58b7ad 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -557,8 +557,9 @@ private void addRowGroup( columnMetaData.getTotalUncompressedSize(), columnMetaData.getTotalSize(), columnMetaData.getFirstDataPageOffset()); - if (columnMetaData.getEncodingStats() != null - && columnMetaData.getEncodingStats().hasDictionaryPages()) { + if ((columnMetaData.getEncodingStats() != null + && columnMetaData.getEncodingStats().hasDictionaryPages()) + || columnMetaData.hasDictionaryPage()) { metaData.setDictionary_page_offset(columnMetaData.getDictionaryPageOffset()); } long bloomFilterOffset = columnMetaData.getBloomFilterOffset(); diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java index 2cffb51860..6b3259070e 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java @@ -206,7 +206,16 @@ public void testSchemaConverterDecimal() { @Test public void testParquetMetadataConverterWithDictionary() throws IOException { ParquetMetadata parquetMetaData = createParquetMetaData(Encoding.PLAIN_DICTIONARY, Encoding.PLAIN); + testParquetMetadataConverterWithDictionary(parquetMetaData); + } + @Test + public void testParquetMetadataConverterWithDictionaryAndWithoutEncodingStats() throws IOException { + ParquetMetadata parquetMetaData = createParquetMetaData(Encoding.PLAIN_DICTIONARY, Encoding.PLAIN, false); + testParquetMetadataConverterWithDictionary(parquetMetaData); + } + + private void testParquetMetadataConverterWithDictionary(ParquetMetadata parquetMetaData) throws IOException { ParquetMetadataConverter converter = new ParquetMetadataConverter(); FileMetaData fmd1 = converter.toParquetMetadata(1, parquetMetaData); @@ -1283,18 +1292,32 @@ private static Statistics createStatsTyped(PrimitiveType type, BigInteger min } private static ParquetMetadata createParquetMetaData(Encoding dicEncoding, Encoding dataEncoding) { + return createParquetMetaData(dicEncoding, dataEncoding, true); + } + + private static ParquetMetadata createParquetMetaData( + Encoding dicEncoding, Encoding dataEncoding, boolean includeEncodingStats) { MessageType schema = parseMessageType("message schema { optional int32 col (INT_32); }"); org.apache.parquet.hadoop.metadata.FileMetaData fileMetaData = new org.apache.parquet.hadoop.metadata.FileMetaData(schema, new HashMap(), null); List blockMetaDataList = new ArrayList(); BlockMetaData blockMetaData = new BlockMetaData(); - EncodingStats.Builder builder = new EncodingStats.Builder(); - if (dicEncoding != null) { - builder.addDictEncoding(dicEncoding).build(); + EncodingStats es = null; + if (includeEncodingStats) { + EncodingStats.Builder builder = new EncodingStats.Builder(); + if (dicEncoding != null) { + builder.addDictEncoding(dicEncoding).build(); + } + builder.addDataEncoding(dataEncoding); + es = builder.build(); } - builder.addDataEncoding(dataEncoding); - EncodingStats es = builder.build(); Set e = new HashSet(); + if (!includeEncodingStats) { + if (dicEncoding != null) { + e.add(dicEncoding); + } + e.add(dataEncoding); + } PrimitiveTypeName t = PrimitiveTypeName.INT32; ColumnPath p = ColumnPath.get("col"); CompressionCodecName c = CompressionCodecName.UNCOMPRESSED;