Skip to content

Commit

Permalink
GH-3013: Fix potential ClassCastException at reading DELTA_BYTE_ARRAY…
Browse files Browse the repository at this point in the history
… encoding (#3019)
  • Loading branch information
gszadovszky authored and wgtmac committed Sep 30, 2024
1 parent aec24e7 commit b1475a7
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,6 @@ private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int
}

if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding)
&& previousReader != null
&& dataColumn instanceof RequiresPreviousReader) {
// previous reader can only be set if reading sequentially
((RequiresPreviousReader) dataColumn).setPreviousReader(previousReader);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ public Binary readBytes() {
*/
@Override
public void setPreviousReader(ValuesReader reader) {
if (reader != null) {
if (reader instanceof DeltaByteArrayReader) {
this.previous = ((DeltaByteArrayReader) reader).previous;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

Expand All @@ -42,15 +43,17 @@
import org.apache.parquet.column.values.ValuesWriter;
import org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader;
import org.apache.parquet.column.values.deltastrings.DeltaByteArrayWriter;
import org.apache.parquet.column.values.dictionary.DictionaryValuesReader;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.io.api.PrimitiveConverter;
import org.apache.parquet.schema.PrimitiveType;
import org.junit.Assert;
import org.junit.Test;
import org.mockito.Mockito;

public class TestCorruptDeltaByteArrays {
@Test
public void testCorruptDeltaByteArrayVerisons() {
public void testCorruptDeltaByteArrayVersions() {
assertTrue(CorruptDeltaByteArrays.requiresSequentialReads(
"parquet-mr version 1.6.0 (build abcd)", Encoding.DELTA_BYTE_ARRAY));
assertTrue(CorruptDeltaByteArrays.requiresSequentialReads((String) null, Encoding.DELTA_BYTE_ARRAY));
Expand All @@ -74,7 +77,7 @@ public void testCorruptDeltaByteArrayVerisons() {
}

@Test
public void testEncodingRequiresSequentailRead() {
public void testEncodingRequiresSequentialRead() {
ParsedVersion impala = new ParsedVersion("impala", "1.2.0", "abcd");
assertFalse(CorruptDeltaByteArrays.requiresSequentialReads(impala, Encoding.DELTA_BYTE_ARRAY));
ParsedVersion broken = new ParsedVersion("parquet-mr", "1.8.0-SNAPSHOT", "abcd");
Expand Down Expand Up @@ -262,6 +265,43 @@ public void addBinary(Binary value) {
Assert.assertEquals(values, actualValues);
}

@Test
public void testPreviousReaderSetting() {
Binary previous = Binary.fromString("<<<PREVIOUS>>>");
DeltaByteArrayReader previousReader = new DeltaByteArrayReader();
setPrevious(previousReader, previous);

DeltaByteArrayReader reader = new DeltaByteArrayReader();
reader.setPreviousReader(previousReader);
assertSame(previous, getPrevious(reader));

reader.setPreviousReader(null);
assertSame("The previous field should have not changed", previous, getPrevious(reader));

reader.setPreviousReader(Mockito.mock(DictionaryValuesReader.class));
assertSame("The previous field should have not changed", previous, getPrevious(reader));
}

private Binary getPrevious(DeltaByteArrayReader reader) {
try {
Field previousField = reader.getClass().getDeclaredField("previous");
previousField.setAccessible(true);
return (Binary) previousField.get(reader);
} catch (NoSuchFieldException | IllegalAccessException e) {
throw new AssertionError("Unable to get the private field \"previous\" of the reader" + reader, e);
}
}

private void setPrevious(DeltaByteArrayReader reader, Binary previous) {
try {
Field previousField = reader.getClass().getDeclaredField("previous");
previousField.setAccessible(true);
previousField.set(reader, previous);
} catch (NoSuchFieldException | IllegalAccessException e) {
throw new AssertionError("Unable to set the private field \"previous\" of the reader" + reader, e);
}
}

public void corruptWriter(DeltaByteArrayWriter writer, String data) throws Exception {
Field previous = writer.getClass().getDeclaredField("previous");
previous.setAccessible(true);
Expand Down

0 comments on commit b1475a7

Please sign in to comment.