From 912ed5684b9a0c4488635f1db1065176bc2d1b26 Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Fri, 20 Oct 2023 16:08:35 -0500 Subject: [PATCH 1/7] cover basic types --- .../iceberg/IcebergSchemaExtractor.java | 116 ++++---- .../iceberg/TestIcebergSchemaExtractor.java | 268 +++--------------- 2 files changed, 104 insertions(+), 280 deletions(-) diff --git a/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java b/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java index 2c8dc4b9..e6af9207 100644 --- a/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java +++ b/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java @@ -19,8 +19,10 @@ package io.onetable.iceberg; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; @@ -93,36 +95,23 @@ public Schema toIceberg(OneSchema oneSchema) { * @return Internal representation of deserialized iceberg schema */ public OneSchema fromIceberg(Schema iceSchema) { - OneSchemaBuilder irSchemaBuilder = OneSchema.builder(); - - List irFields = new ArrayList<>(); - for (Types.NestedField iceField : iceSchema.columns()) { - OneType irFieldType = fromIcebergType(iceField.type()); - OneSchema irFieldSchema = - OneSchema.builder() - .dataType(irFieldType) - .isNullable(iceField.isOptional()) - .metadata(new HashMap<>()) - .build(); - if (irFieldType == OneType.DECIMAL) { - Types.DecimalType decimalType = (Types.DecimalType) iceField.type(); - irFieldSchema.getMetadata().put(OneSchema.MetadataKey.DECIMAL_SCALE, decimalType.scale()); - irFieldSchema - .getMetadata() - .put(OneSchema.MetadataKey.DECIMAL_PRECISION, decimalType.precision()); - } + return OneSchema.builder() + .dataType(OneType.RECORD) + .fields(fromIceberg(iceSchema.columns())) + .build(); + } - OneField irField = + private List fromIceberg(List iceFields) { + return iceFields.stream().map(iceField -> { + OneSchema irFieldSchema = fromIcebergType(iceField); + return OneField.builder() .name(iceField.name()) .fieldId(iceField.fieldId()) .schema(irFieldSchema) + .defaultValue(iceField.isOptional() ? OneField.Constants.NULL_DEFAULT_VALUE : null) .build(); - irFields.add(irField); - } - irSchemaBuilder.fields(irFields); - - return irSchemaBuilder.build(); + }).collect(Collectors.toList()); } static String convertFromOneTablePath(String path) { @@ -237,47 +226,66 @@ private Type toIcebergType(OneField field, AtomicInteger fieldIdTracker) { } } - /** - * Maps Iceberg type to internal type representation - * - * @param type Iceberg type - * @return Internal representation of Iceberg type - */ - OneType fromIcebergType(Type type) { - switch (type.typeId()) { - // TODO ENUM type is not supported in Iceberg + OneSchema fromIcebergType(Types.NestedField iceField) { + OneType type; + List fields = null; + Map metadata = null; + switch (iceField.type().typeId()) { case STRING: - return OneType.STRING; + type = OneType.STRING; + break; case INTEGER: - return OneType.INT; + type = OneType.INT; + break; case LONG: - return OneType.LONG; + type = OneType.LONG; + break; case BINARY: - return OneType.BYTES; - // TODO - // case FIXED: - // return OneType.FIXED; + type = OneType.BYTES; + break; case BOOLEAN: - return OneType.BOOLEAN; + type = OneType.BOOLEAN; + break; case FLOAT: - return OneType.FLOAT; + type = OneType.FLOAT; + break; case DATE: - return OneType.DATE; + type = OneType.DATE; + break; case TIMESTAMP: - return OneType.TIMESTAMP; + Types.TimestampType timestampType = (Types.TimestampType) iceField.type(); + type = timestampType.shouldAdjustToUTC() ? OneType.TIMESTAMP : OneType.TIMESTAMP_NTZ; + metadata = Collections.singletonMap(OneSchema.MetadataKey.TIMESTAMP_PRECISION, OneSchema.MetadataValue.MICROS); + break; case DOUBLE: - return OneType.DOUBLE; + type = OneType.DOUBLE; + break; case DECIMAL: - return OneType.DECIMAL; - // TODO - // case STRUCT: - // return OneType.RECORD; - // case MAP: - // return OneType.MAP; - // case LIST: - // return OneType.ARRAY; + Types.DecimalType decimalType = (Types.DecimalType) iceField.type(); + metadata = new HashMap<>(2, 1.0f); + metadata.put(OneSchema.MetadataKey.DECIMAL_PRECISION, decimalType.precision()); + metadata.put(OneSchema.MetadataKey.DECIMAL_SCALE, decimalType.scale()); + type = OneType.DECIMAL; + break; + case FIXED: + type = OneType.FIXED; + Types.FixedType fixedType = (Types.FixedType) iceField.type(); + metadata = Collections.singletonMap(OneSchema.MetadataKey.FIXED_BYTES_SIZE, fixedType.length()); + break; + case UUID: + type = OneType.FIXED; + metadata = Collections.singletonMap(OneSchema.MetadataKey.FIXED_BYTES_SIZE, 16); + break; default: - throw new NotSupportedException("Unsupported type: " + type.typeId()); + throw new NotSupportedException("Unsupported type: " + iceField.type().typeId()); } + return OneSchema.builder() + .name(iceField.type().typeId().name().toLowerCase()) + .dataType(type) + .comment(iceField.doc()) + .isNullable(iceField.isOptional()) + .metadata(metadata) + .fields(fields) + .build(); } } diff --git a/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java b/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java index 195500cc..95cec7ce 100644 --- a/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java +++ b/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java @@ -32,12 +32,14 @@ import io.onetable.model.schema.OneSchema; import io.onetable.model.schema.OneType; +import static org.junit.jupiter.api.Assertions.assertEquals; + public class TestIcebergSchemaExtractor { + private static final IcebergSchemaExtractor SCHEMA_EXTRACTOR = IcebergSchemaExtractor.getInstance(); + @Test public void testPrimitiveTypes() { - String schemaName = "testRecord"; - String doc = "What's up doc"; Map requiredEnumMetadata = new HashMap<>(); requiredEnumMetadata.put(OneSchema.MetadataKey.ENUM_VALUES, Arrays.asList("ONE", "TWO")); Map optionalEnumMetadata = new HashMap<>(); @@ -61,8 +63,6 @@ public void testPrimitiveTypes() { OneSchema oneSchemaRepresentation = OneSchema.builder() - .name(schemaName) - .comment(doc) .dataType(OneType.RECORD) .isNullable(false) .fields( @@ -75,7 +75,7 @@ public void testPrimitiveTypes() { .dataType(OneType.BOOLEAN) .isNullable(false) .build()) - .defaultValue(false) + .fieldId(1) .build(), OneField.builder() .name("optionalBoolean") @@ -86,6 +86,7 @@ public void testPrimitiveTypes() { .isNullable(true) .build()) .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .fieldId(2) .build(), OneField.builder() .name("requiredInt") @@ -95,7 +96,7 @@ public void testPrimitiveTypes() { .dataType(OneType.INT) .isNullable(false) .build()) - .defaultValue(123) + .fieldId(3) .build(), OneField.builder() .name("optionalInt") @@ -106,6 +107,7 @@ public void testPrimitiveTypes() { .isNullable(true) .build()) .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .fieldId(4) .build(), OneField.builder() .name("requiredLong") @@ -115,6 +117,7 @@ public void testPrimitiveTypes() { .dataType(OneType.LONG) .isNullable(false) .build()) + .fieldId(5) .build(), OneField.builder() .name("optionalLong") @@ -125,6 +128,7 @@ public void testPrimitiveTypes() { .isNullable(true) .build()) .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .fieldId(6) .build(), OneField.builder() .name("requiredDouble") @@ -134,6 +138,7 @@ public void testPrimitiveTypes() { .dataType(OneType.DOUBLE) .isNullable(false) .build()) + .fieldId(7) .build(), OneField.builder() .name("optionalDouble") @@ -144,6 +149,7 @@ public void testPrimitiveTypes() { .isNullable(true) .build()) .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .fieldId(8) .build(), OneField.builder() .name("requiredFloat") @@ -153,6 +159,7 @@ public void testPrimitiveTypes() { .dataType(OneType.FLOAT) .isNullable(false) .build()) + .fieldId(9) .build(), OneField.builder() .name("optionalFloat") @@ -163,6 +170,7 @@ public void testPrimitiveTypes() { .isNullable(true) .build()) .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .fieldId(10) .build(), OneField.builder() .name("requiredString") @@ -172,6 +180,7 @@ public void testPrimitiveTypes() { .dataType(OneType.STRING) .isNullable(false) .build()) + .fieldId(11) .build(), OneField.builder() .name("optionalString") @@ -182,47 +191,28 @@ public void testPrimitiveTypes() { .isNullable(true) .build()) .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .fieldId(12) .build(), OneField.builder() .name("requiredBytes") .schema( OneSchema.builder() - .name("bytes") + .name("binary") .dataType(OneType.BYTES) .isNullable(false) .build()) + .fieldId(13) .build(), OneField.builder() .name("optionalBytes") .schema( OneSchema.builder() - .name("bytes") + .name("binary") .dataType(OneType.BYTES) .isNullable(true) .build()) .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) - .build(), - OneField.builder() - .name("requiredEnum") - .schema( - OneSchema.builder() - .name("REQUIRED_ENUM") - .dataType(OneType.ENUM) - .isNullable(false) - .metadata(requiredEnumMetadata) - .build()) - .defaultValue("ONE") - .build(), - OneField.builder() - .name("optionalEnum") - .schema( - OneSchema.builder() - .name("OPTIONAL_ENUM") - .dataType(OneType.ENUM) - .isNullable(true) - .metadata(optionalEnumMetadata) - .build()) - .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .fieldId(14) .build(), OneField.builder() .name("requiredDate") @@ -232,6 +222,7 @@ public void testPrimitiveTypes() { .dataType(OneType.DATE) .isNullable(false) .build()) + .fieldId(17) .build(), OneField.builder() .name("optionalDate") @@ -242,90 +233,7 @@ public void testPrimitiveTypes() { .isNullable(true) .build()) .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) - .build(), - OneField.builder() - .name("requiredTimestampMillis") - .schema( - OneSchema.builder() - .name("timestamp") - .dataType(OneType.TIMESTAMP) - .metadata(millisTimestamp) - .isNullable(false) - .build()) - .build(), - OneField.builder() - .name("optionalTimestampMillis") - .schema( - OneSchema.builder() - .name("timestamp") - .dataType(OneType.TIMESTAMP) - .metadata(millisTimestamp) - .isNullable(true) - .build()) - .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) - .build(), - OneField.builder() - .name("requiredTimestampNtzMillis") - .schema( - OneSchema.builder() - .name("timestampNtz") - .dataType(OneType.TIMESTAMP_NTZ) - .isNullable(false) - .metadata(millisTimestamp) - .build()) - .build(), - OneField.builder() - .name("optionalTimestampNtzMillis") - .schema( - OneSchema.builder() - .name("timestampNtz") - .dataType(OneType.TIMESTAMP_NTZ) - .metadata(millisTimestamp) - .isNullable(true) - .build()) - .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) - .build(), - OneField.builder() - .name("requiredTimestampMicros") - .schema( - OneSchema.builder() - .name("timestamp") - .dataType(OneType.TIMESTAMP) - .metadata(microsTimestamp) - .isNullable(false) - .build()) - .build(), - OneField.builder() - .name("optionalTimestampMicros") - .schema( - OneSchema.builder() - .name("timestamp") - .dataType(OneType.TIMESTAMP) - .metadata(microsTimestamp) - .isNullable(true) - .build()) - .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) - .build(), - OneField.builder() - .name("requiredTimestampNtzMicros") - .schema( - OneSchema.builder() - .name("timestampNtz") - .dataType(OneType.TIMESTAMP_NTZ) - .isNullable(false) - .metadata(microsTimestamp) - .build()) - .build(), - OneField.builder() - .name("optionalTimestampNtzMicros") - .schema( - OneSchema.builder() - .name("timestampNtz") - .dataType(OneType.TIMESTAMP_NTZ) - .metadata(microsTimestamp) - .isNullable(true) - .build()) - .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .fieldId(18) .build(), OneField.builder() .name("requiredFixed") @@ -336,6 +244,7 @@ public void testPrimitiveTypes() { .isNullable(false) .metadata(fixedMetadata) .build()) + .fieldId(27) .build(), OneField.builder() .name("optionalFixed") @@ -347,6 +256,7 @@ public void testPrimitiveTypes() { .metadata(fixedMetadata) .build()) .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .fieldId(28) .build(), OneField.builder() .name("requiredDecimal") @@ -357,6 +267,7 @@ public void testPrimitiveTypes() { .isNullable(false) .metadata(doubleMetadata) .build()) + .fieldId(29) .build(), OneField.builder() .name("optionalDecimal") @@ -368,10 +279,11 @@ public void testPrimitiveTypes() { .metadata(doubleMetadata) .build()) .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .fieldId(30) .build())) .build(); - Schema expectedSchema = + Schema icebergRepresentation = new Schema( Types.NestedField.required(1, "requiredBoolean", Types.BooleanType.get()), Types.NestedField.optional(2, "optionalBoolean", Types.BooleanType.get()), @@ -387,22 +299,8 @@ public void testPrimitiveTypes() { Types.NestedField.optional(12, "optionalString", Types.StringType.get()), Types.NestedField.required(13, "requiredBytes", Types.BinaryType.get()), Types.NestedField.optional(14, "optionalBytes", Types.BinaryType.get()), - Types.NestedField.required(15, "requiredEnum", Types.StringType.get()), - Types.NestedField.optional(16, "optionalEnum", Types.StringType.get()), Types.NestedField.required(17, "requiredDate", Types.DateType.get()), Types.NestedField.optional(18, "optionalDate", Types.DateType.get()), - Types.NestedField.required( - 19, "requiredTimestampMillis", Types.TimestampType.withZone()), - Types.NestedField.optional( - 20, "optionalTimestampMillis", Types.TimestampType.withZone()), - Types.NestedField.required(21, "requiredTimestampNtzMillis", Types.LongType.get()), - Types.NestedField.optional(22, "optionalTimestampNtzMillis", Types.LongType.get()), - Types.NestedField.required( - 23, "requiredTimestampMicros", Types.TimestampType.withZone()), - Types.NestedField.optional( - 24, "optionalTimestampMicros", Types.TimestampType.withZone()), - Types.NestedField.required(25, "requiredTimestampNtzMicros", Types.LongType.get()), - Types.NestedField.optional(26, "optionalTimestampNtzMicros", Types.LongType.get()), Types.NestedField.required(27, "requiredFixed", Types.FixedType.ofLength(fixedSize)), Types.NestedField.optional(28, "optionalFixed", Types.FixedType.ofLength(fixedSize)), Types.NestedField.required( @@ -410,10 +308,13 @@ public void testPrimitiveTypes() { Types.NestedField.optional( 30, "optionalDecimal", Types.DecimalType.of(precision, scale))); - Schema actual = IcebergSchemaExtractor.getInstance().toIceberg(oneSchemaRepresentation); - Assertions.assertTrue(expectedSchema.sameSchema(actual)); + Assertions.assertTrue(icebergRepresentation.sameSchema(SCHEMA_EXTRACTOR.toIceberg(oneSchemaRepresentation))); + assertEquals(oneSchemaRepresentation, SCHEMA_EXTRACTOR.fromIceberg(icebergRepresentation)); } + // TODO test enum separately + // TODO test timestamp separately + @Test public void testMaps() { OneSchema recordMapElementSchema = @@ -531,7 +432,7 @@ public void testMaps() { Types.NestedField.required(7, "requiredDouble", Types.DoubleType.get()), Types.NestedField.optional(8, "optionalString", Types.StringType.get()))))); - Schema actual = IcebergSchemaExtractor.getInstance().toIceberg(oneSchemaRepresentation); + Schema actual = SCHEMA_EXTRACTOR.toIceberg(oneSchemaRepresentation); Assertions.assertTrue(expectedSchema.sameSchema(actual)); } @@ -613,7 +514,7 @@ public void testLists() { .build())) .build(); - Schema expectedSchema = + Schema icebergRepresentation = new Schema( Types.NestedField.required( 1, "intList", Types.ListType.ofRequired(3, Types.IntegerType.get())), @@ -626,8 +527,8 @@ public void testLists() { Types.NestedField.required(5, "requiredDouble", Types.DoubleType.get()), Types.NestedField.optional(6, "optionalString", Types.StringType.get()))))); - Schema actual = IcebergSchemaExtractor.getInstance().toIceberg(oneSchemaRepresentation); - Assertions.assertTrue(expectedSchema.sameSchema(actual)); + Assertions.assertTrue(icebergRepresentation.sameSchema(SCHEMA_EXTRACTOR.toIceberg(oneSchemaRepresentation))); + assertEquals(oneSchemaRepresentation, SCHEMA_EXTRACTOR.fromIceberg(icebergRepresentation)); } @Test @@ -699,7 +600,7 @@ public void testNestedRecords() { .build())) .build(); - Schema expectedSchema = + Schema icebergRepresentation = new Schema( Types.NestedField.optional( 1, @@ -713,8 +614,8 @@ public void testNestedRecords() { Types.StructType.of( Types.NestedField.optional( 5, "doublyNestedString", Types.StringType.get())))))); - Schema actual = IcebergSchemaExtractor.getInstance().toIceberg(oneSchemaRepresentation); - Assertions.assertTrue(expectedSchema.sameSchema(actual)); + Assertions.assertTrue(icebergRepresentation.sameSchema(SCHEMA_EXTRACTOR.toIceberg(oneSchemaRepresentation))); + assertEquals(oneSchemaRepresentation, SCHEMA_EXTRACTOR.fromIceberg(icebergRepresentation)); } @Test @@ -840,7 +741,7 @@ public void testIdHandling() { .build())) .build(); - Schema expectedSchema = + Schema icebergRepresentation = new Schema( Types.NestedField.optional( 1, @@ -862,92 +763,7 @@ public void testIdHandling() { Types.NestedField.optional( 10, "optionalString", Types.StringType.get()))))); - Schema actual = IcebergSchemaExtractor.getInstance().toIceberg(oneSchemaRepresentation); - Assertions.assertTrue(expectedSchema.sameSchema(actual)); - } - - /** Test that the schema extractor correctly maps Iceberg types to OneTable types. */ - @Test - public void fromIcebergType() { - IcebergSchemaExtractor extractor = IcebergSchemaExtractor.getInstance(); - Assertions.assertEquals(OneType.STRING, extractor.fromIcebergType(Types.StringType.get())); - Assertions.assertEquals(OneType.INT, extractor.fromIcebergType(Types.IntegerType.get())); - Assertions.assertEquals(OneType.LONG, extractor.fromIcebergType(Types.LongType.get())); - Assertions.assertEquals(OneType.BYTES, extractor.fromIcebergType(Types.BinaryType.get())); - Assertions.assertEquals(OneType.BOOLEAN, extractor.fromIcebergType(Types.BooleanType.get())); - Assertions.assertEquals(OneType.FLOAT, extractor.fromIcebergType(Types.FloatType.get())); - Assertions.assertEquals(OneType.DATE, extractor.fromIcebergType(Types.DateType.get())); - Assertions.assertEquals( - OneType.TIMESTAMP, extractor.fromIcebergType(Types.TimestampType.withZone())); - Assertions.assertEquals(OneType.DOUBLE, extractor.fromIcebergType(Types.DoubleType.get())); - Assertions.assertEquals(OneType.DECIMAL, extractor.fromIcebergType(Types.DecimalType.of(1, 1))); - - // the iceberg types below have not been implemented yet - Assertions.assertThrows( - NotSupportedException.class, () -> extractor.fromIcebergType(Types.FixedType.ofLength(1))); - Assertions.assertThrows( - NotSupportedException.class, - () -> extractor.fromIcebergType(Types.ListType.ofRequired(1, Types.StringType.get()))); - Assertions.assertThrows( - NotSupportedException.class, - () -> - extractor.fromIcebergType( - Types.MapType.ofRequired(1, 2, Types.StringType.get(), Types.StringType.get()))); - Assertions.assertThrows( - NotSupportedException.class, - () -> - extractor.fromIcebergType( - Types.StructType.of( - Types.NestedField.required(1, "test", Types.StringType.get())))); - } - - /** - * Test that the schema extractor correctly builds {@link OneSchema} with valid fields from - * Iceberg schema. - */ - @Test - public void fromIceberg() { - List iceTypes = - Arrays.asList( - Types.StringType.get(), - Types.IntegerType.get(), - Types.LongType.get(), - Types.BinaryType.get(), - Types.BooleanType.get(), - Types.FloatType.get(), - Types.DateType.get(), - Types.TimestampType.withZone(), - Types.DoubleType.get(), - Types.DecimalType.of(1, 1)); - - List nestedFields = new ArrayList<>(); - for (int i = 0; i < iceTypes.size(); i++) { - String fieldName = "test" + i; - String doc = "doc" + i; - nestedFields.add(Types.NestedField.of(i, true, fieldName, iceTypes.get(i), doc)); - } - - Schema iceSchema = new Schema(333, nestedFields); - - IcebergSchemaExtractor extractor = IcebergSchemaExtractor.getInstance(); - OneSchema irSchema = extractor.fromIceberg(iceSchema); - - // TODO schema id is missing in oneschema - // Assertions.assertEquals(irSchema.getId(), iceSchema.schemaId()); - - Assertions.assertEquals(irSchema.getFields().size(), iceTypes.size()); - for (int i = 0; i < iceTypes.size(); i++) { - OneField irField = irSchema.getFields().get(i); - Types.NestedField iceField = nestedFields.get(i); - - Assertions.assertEquals(irField.getName(), iceField.name()); - Assertions.assertEquals(irField.getFieldId(), iceField.fieldId()); - Assertions.assertEquals(irField.getSchema().isNullable(), iceField.isOptional()); - Assertions.assertEquals( - extractor.fromIcebergType(iceField.type()), irField.getSchema().getDataType()); - - // TODO doc is missing in oneschema - // Assertions.assertEquals(irField.getSchema().getDoc(), iceField.doc()); - } + Assertions.assertTrue(icebergRepresentation.sameSchema(SCHEMA_EXTRACTOR.toIceberg(oneSchemaRepresentation))); + assertEquals(oneSchemaRepresentation, SCHEMA_EXTRACTOR.fromIceberg(icebergRepresentation)); } } From af7aece116eaf36a2bd32dc650261480e3fb88eb Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Sat, 21 Oct 2023 10:06:03 -0500 Subject: [PATCH 2/7] record support --- .../iceberg/IcebergSchemaExtractor.java | 46 ++++++++++++------- .../iceberg/TestIcebergSchemaExtractor.java | 35 ++++++++------ 2 files changed, 51 insertions(+), 30 deletions(-) diff --git a/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java b/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java index e6af9207..4f10eb99 100644 --- a/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java +++ b/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java @@ -40,7 +40,6 @@ import io.onetable.exception.SchemaExtractorException; import io.onetable.model.schema.OneField; import io.onetable.model.schema.OneSchema; -import io.onetable.model.schema.OneSchema.OneSchemaBuilder; import io.onetable.model.schema.OneType; /** @@ -97,21 +96,25 @@ public Schema toIceberg(OneSchema oneSchema) { public OneSchema fromIceberg(Schema iceSchema) { return OneSchema.builder() .dataType(OneType.RECORD) - .fields(fromIceberg(iceSchema.columns())) + .fields(fromIceberg(iceSchema.columns(), null)) .build(); } - private List fromIceberg(List iceFields) { - return iceFields.stream().map(iceField -> { - OneSchema irFieldSchema = fromIcebergType(iceField); - return - OneField.builder() - .name(iceField.name()) - .fieldId(iceField.fieldId()) - .schema(irFieldSchema) - .defaultValue(iceField.isOptional() ? OneField.Constants.NULL_DEFAULT_VALUE : null) - .build(); - }).collect(Collectors.toList()); + private List fromIceberg(List iceFields, String parentPath) { + return iceFields.stream() + .map( + iceField -> { + OneSchema irFieldSchema = fromIcebergType(iceField, parentPath); + return OneField.builder() + .name(iceField.name()) + .fieldId(iceField.fieldId()) + .schema(irFieldSchema) + .parentPath(parentPath) + .defaultValue( + iceField.isOptional() ? OneField.Constants.NULL_DEFAULT_VALUE : null) + .build(); + }) + .collect(Collectors.toList()); } static String convertFromOneTablePath(String path) { @@ -226,7 +229,7 @@ private Type toIcebergType(OneField field, AtomicInteger fieldIdTracker) { } } - OneSchema fromIcebergType(Types.NestedField iceField) { + private OneSchema fromIcebergType(Types.NestedField iceField, String parentPath) { OneType type; List fields = null; Map metadata = null; @@ -255,7 +258,9 @@ OneSchema fromIcebergType(Types.NestedField iceField) { case TIMESTAMP: Types.TimestampType timestampType = (Types.TimestampType) iceField.type(); type = timestampType.shouldAdjustToUTC() ? OneType.TIMESTAMP : OneType.TIMESTAMP_NTZ; - metadata = Collections.singletonMap(OneSchema.MetadataKey.TIMESTAMP_PRECISION, OneSchema.MetadataValue.MICROS); + metadata = + Collections.singletonMap( + OneSchema.MetadataKey.TIMESTAMP_PRECISION, OneSchema.MetadataValue.MICROS); break; case DOUBLE: type = OneType.DOUBLE; @@ -270,12 +275,21 @@ OneSchema fromIcebergType(Types.NestedField iceField) { case FIXED: type = OneType.FIXED; Types.FixedType fixedType = (Types.FixedType) iceField.type(); - metadata = Collections.singletonMap(OneSchema.MetadataKey.FIXED_BYTES_SIZE, fixedType.length()); + metadata = + Collections.singletonMap(OneSchema.MetadataKey.FIXED_BYTES_SIZE, fixedType.length()); break; case UUID: type = OneType.FIXED; metadata = Collections.singletonMap(OneSchema.MetadataKey.FIXED_BYTES_SIZE, 16); break; + case STRUCT: + Types.StructType structType = (Types.StructType) iceField.type(); + fields = + fromIceberg( + structType.fields(), + parentPath == null ? iceField.name() : parentPath + "." + iceField.name()); + type = OneType.RECORD; + break; default: throw new NotSupportedException("Unsupported type: " + iceField.type().typeId()); } diff --git a/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java b/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java index 95cec7ce..688a0016 100644 --- a/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java +++ b/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java @@ -18,25 +18,24 @@ package io.onetable.iceberg; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.util.*; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.apache.iceberg.Schema; -import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; -import io.onetable.exception.NotSupportedException; import io.onetable.model.schema.OneField; import io.onetable.model.schema.OneSchema; import io.onetable.model.schema.OneType; -import static org.junit.jupiter.api.Assertions.assertEquals; - public class TestIcebergSchemaExtractor { - private static final IcebergSchemaExtractor SCHEMA_EXTRACTOR = IcebergSchemaExtractor.getInstance(); + private static final IcebergSchemaExtractor SCHEMA_EXTRACTOR = + IcebergSchemaExtractor.getInstance(); @Test public void testPrimitiveTypes() { @@ -308,7 +307,8 @@ public void testPrimitiveTypes() { Types.NestedField.optional( 30, "optionalDecimal", Types.DecimalType.of(precision, scale))); - Assertions.assertTrue(icebergRepresentation.sameSchema(SCHEMA_EXTRACTOR.toIceberg(oneSchemaRepresentation))); + Assertions.assertTrue( + icebergRepresentation.sameSchema(SCHEMA_EXTRACTOR.toIceberg(oneSchemaRepresentation))); assertEquals(oneSchemaRepresentation, SCHEMA_EXTRACTOR.fromIceberg(icebergRepresentation)); } @@ -527,7 +527,8 @@ public void testLists() { Types.NestedField.required(5, "requiredDouble", Types.DoubleType.get()), Types.NestedField.optional(6, "optionalString", Types.StringType.get()))))); - Assertions.assertTrue(icebergRepresentation.sameSchema(SCHEMA_EXTRACTOR.toIceberg(oneSchemaRepresentation))); + Assertions.assertTrue( + icebergRepresentation.sameSchema(SCHEMA_EXTRACTOR.toIceberg(oneSchemaRepresentation))); assertEquals(oneSchemaRepresentation, SCHEMA_EXTRACTOR.fromIceberg(icebergRepresentation)); } @@ -535,7 +536,6 @@ public void testLists() { public void testNestedRecords() { OneSchema oneSchemaRepresentation = OneSchema.builder() - .name("testRecord") .dataType(OneType.RECORD) .isNullable(false) .fields( @@ -543,9 +543,10 @@ public void testNestedRecords() { OneField.builder() .name("nestedOne") .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .fieldId(1) .schema( OneSchema.builder() - .name("nestedOneType") + .name("struct") .dataType(OneType.RECORD) .isNullable(true) .fields( @@ -553,9 +554,10 @@ public void testNestedRecords() { OneField.builder() .name("nestedOptionalInt") .parentPath("nestedOne") + .fieldId(2) .schema( OneSchema.builder() - .name("int") + .name("integer") .dataType(OneType.INT) .isNullable(true) .build()) @@ -564,6 +566,7 @@ public void testNestedRecords() { OneField.builder() .name("nestedRequiredDouble") .parentPath("nestedOne") + .fieldId(3) .schema( OneSchema.builder() .name("double") @@ -574,16 +577,18 @@ public void testNestedRecords() { OneField.builder() .name("nestedTwo") .parentPath("nestedOne") + .fieldId(4) .schema( OneSchema.builder() - .name("nestedTwoType") + .name("struct") .dataType(OneType.RECORD) .isNullable(false) .fields( - Arrays.asList( + Collections.singletonList( OneField.builder() .name("doublyNestedString") .parentPath("nestedOne.nestedTwo") + .fieldId(5) .schema( OneSchema.builder() .name("string") @@ -614,7 +619,8 @@ public void testNestedRecords() { Types.StructType.of( Types.NestedField.optional( 5, "doublyNestedString", Types.StringType.get())))))); - Assertions.assertTrue(icebergRepresentation.sameSchema(SCHEMA_EXTRACTOR.toIceberg(oneSchemaRepresentation))); + Assertions.assertTrue( + icebergRepresentation.sameSchema(SCHEMA_EXTRACTOR.toIceberg(oneSchemaRepresentation))); assertEquals(oneSchemaRepresentation, SCHEMA_EXTRACTOR.fromIceberg(icebergRepresentation)); } @@ -763,7 +769,8 @@ public void testIdHandling() { Types.NestedField.optional( 10, "optionalString", Types.StringType.get()))))); - Assertions.assertTrue(icebergRepresentation.sameSchema(SCHEMA_EXTRACTOR.toIceberg(oneSchemaRepresentation))); + Assertions.assertTrue( + icebergRepresentation.sameSchema(SCHEMA_EXTRACTOR.toIceberg(oneSchemaRepresentation))); assertEquals(oneSchemaRepresentation, SCHEMA_EXTRACTOR.fromIceberg(icebergRepresentation)); } } From 4a1b2113439c8004c98faa2b0104c8813b572764 Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Sat, 21 Oct 2023 10:58:12 -0500 Subject: [PATCH 3/7] list and map support --- .../io/onetable/avro/AvroSchemaConverter.java | 9 +- .../onetable/delta/DeltaSchemaExtractor.java | 9 +- .../iceberg/IcebergSchemaExtractor.java | 84 +++++++++++++++---- .../java/io/onetable/schema/SchemaUtils.java | 32 +++++++ .../iceberg/TestIcebergSchemaExtractor.java | 46 ++++++---- 5 files changed, 134 insertions(+), 46 deletions(-) create mode 100644 core/src/main/java/io/onetable/schema/SchemaUtils.java diff --git a/core/src/main/java/io/onetable/avro/AvroSchemaConverter.java b/core/src/main/java/io/onetable/avro/AvroSchemaConverter.java index 6e444c2c..4cb4a29e 100644 --- a/core/src/main/java/io/onetable/avro/AvroSchemaConverter.java +++ b/core/src/main/java/io/onetable/avro/AvroSchemaConverter.java @@ -18,6 +18,8 @@ package io.onetable.avro; +import static io.onetable.schema.SchemaUtils.getFullyQualifiedPath; + import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -288,13 +290,6 @@ private static Object getDefaultValue(Schema.Field avroField) { : avroField.defaultVal(); } - private static String getFullyQualifiedPath(String path, String fieldName) { - if (path == null || path.isEmpty()) { - return fieldName; - } - return path + "." + fieldName; - } - public Schema fromOneSchema(OneSchema oneSchema) { switch (oneSchema.getDataType()) { case RECORD: diff --git a/core/src/main/java/io/onetable/delta/DeltaSchemaExtractor.java b/core/src/main/java/io/onetable/delta/DeltaSchemaExtractor.java index 8a5c43f3..feabaa89 100644 --- a/core/src/main/java/io/onetable/delta/DeltaSchemaExtractor.java +++ b/core/src/main/java/io/onetable/delta/DeltaSchemaExtractor.java @@ -18,6 +18,8 @@ package io.onetable.delta; +import static io.onetable.schema.SchemaUtils.getFullyQualifiedPath; + import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -273,11 +275,4 @@ private OneSchema toOneSchema( .fields(fields) .build(); } - - private static String getFullyQualifiedPath(String path, String fieldName) { - if (path == null || path.isEmpty()) { - return fieldName; - } - return path + "." + fieldName; - } } diff --git a/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java b/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java index 4f10eb99..ddf05d38 100644 --- a/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java +++ b/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java @@ -18,7 +18,10 @@ package io.onetable.iceberg; +import static io.onetable.schema.SchemaUtils.getFullyQualifiedPath; + import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -104,7 +107,11 @@ private List fromIceberg(List iceFields, String par return iceFields.stream() .map( iceField -> { - OneSchema irFieldSchema = fromIcebergType(iceField, parentPath); + Type type = iceField.type(); + String doc = iceField.doc(); + boolean isOptional = iceField.isOptional(); + String fieldPath = getFullyQualifiedPath(parentPath, iceField.name()); + OneSchema irFieldSchema = fromIcebergType(type, fieldPath, doc, isOptional); return OneField.builder() .name(iceField.name()) .fieldId(iceField.fieldId()) @@ -229,11 +236,12 @@ private Type toIcebergType(OneField field, AtomicInteger fieldIdTracker) { } } - private OneSchema fromIcebergType(Types.NestedField iceField, String parentPath) { + private OneSchema fromIcebergType( + Type iceType, String fieldPath, String doc, boolean isOptional) { OneType type; List fields = null; Map metadata = null; - switch (iceField.type().typeId()) { + switch (iceType.typeId()) { case STRING: type = OneType.STRING; break; @@ -256,7 +264,7 @@ private OneSchema fromIcebergType(Types.NestedField iceField, String parentPath) type = OneType.DATE; break; case TIMESTAMP: - Types.TimestampType timestampType = (Types.TimestampType) iceField.type(); + Types.TimestampType timestampType = (Types.TimestampType) iceType; type = timestampType.shouldAdjustToUTC() ? OneType.TIMESTAMP : OneType.TIMESTAMP_NTZ; metadata = Collections.singletonMap( @@ -266,7 +274,7 @@ private OneSchema fromIcebergType(Types.NestedField iceField, String parentPath) type = OneType.DOUBLE; break; case DECIMAL: - Types.DecimalType decimalType = (Types.DecimalType) iceField.type(); + Types.DecimalType decimalType = (Types.DecimalType) iceType; metadata = new HashMap<>(2, 1.0f); metadata.put(OneSchema.MetadataKey.DECIMAL_PRECISION, decimalType.precision()); metadata.put(OneSchema.MetadataKey.DECIMAL_SCALE, decimalType.scale()); @@ -274,7 +282,7 @@ private OneSchema fromIcebergType(Types.NestedField iceField, String parentPath) break; case FIXED: type = OneType.FIXED; - Types.FixedType fixedType = (Types.FixedType) iceField.type(); + Types.FixedType fixedType = (Types.FixedType) iceType; metadata = Collections.singletonMap(OneSchema.MetadataKey.FIXED_BYTES_SIZE, fixedType.length()); break; @@ -283,21 +291,67 @@ private OneSchema fromIcebergType(Types.NestedField iceField, String parentPath) metadata = Collections.singletonMap(OneSchema.MetadataKey.FIXED_BYTES_SIZE, 16); break; case STRUCT: - Types.StructType structType = (Types.StructType) iceField.type(); - fields = - fromIceberg( - structType.fields(), - parentPath == null ? iceField.name() : parentPath + "." + iceField.name()); + Types.StructType structType = (Types.StructType) iceType; + fields = fromIceberg(structType.fields(), fieldPath); type = OneType.RECORD; break; + case MAP: + Types.MapType mapType = (Types.MapType) iceType; + OneSchema keySchema = + fromIcebergType( + mapType.keyType(), + getFullyQualifiedPath(fieldPath, OneField.Constants.MAP_VALUE_FIELD_NAME), + null, + false); + OneField keyField = + OneField.builder() + .name(OneField.Constants.MAP_KEY_FIELD_NAME) + .parentPath(fieldPath) + .schema(keySchema) + .fieldId(mapType.keyId()) + .build(); + OneSchema valueSchema = + fromIcebergType( + mapType.valueType(), + getFullyQualifiedPath(fieldPath, OneField.Constants.MAP_VALUE_FIELD_NAME), + null, + mapType.isValueOptional()); + OneField valueField = + OneField.builder() + .name(OneField.Constants.MAP_VALUE_FIELD_NAME) + .parentPath(fieldPath) + .schema(valueSchema) + .fieldId(mapType.valueId()) + .build(); + type = OneType.MAP; + fields = Arrays.asList(keyField, valueField); + break; + case LIST: + Types.ListType listType = (Types.ListType) iceType; + OneSchema elementSchema = + fromIcebergType( + listType.elementType(), + getFullyQualifiedPath(fieldPath, OneField.Constants.ARRAY_ELEMENT_FIELD_NAME), + null, + listType.isElementOptional()); + OneField elementField = + OneField.builder() + .name(OneField.Constants.ARRAY_ELEMENT_FIELD_NAME) + .parentPath(fieldPath) + .schema(elementSchema) + .fieldId(listType.elementId()) + .build(); + type = OneType.ARRAY; + fields = Collections.singletonList(elementField); + break; default: - throw new NotSupportedException("Unsupported type: " + iceField.type().typeId()); + throw new NotSupportedException("Unsupported type: " + iceType.typeId()); } return OneSchema.builder() - .name(iceField.type().typeId().name().toLowerCase()) + .name(iceType.typeId().name().toLowerCase()) .dataType(type) - .comment(iceField.doc()) - .isNullable(iceField.isOptional()) + .comment(doc) + .isNullable(isOptional) .metadata(metadata) .fields(fields) .build(); diff --git a/core/src/main/java/io/onetable/schema/SchemaUtils.java b/core/src/main/java/io/onetable/schema/SchemaUtils.java new file mode 100644 index 00000000..adb569ca --- /dev/null +++ b/core/src/main/java/io/onetable/schema/SchemaUtils.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.onetable.schema; + +import lombok.AccessLevel; +import lombok.NoArgsConstructor; + +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public class SchemaUtils { + public static String getFullyQualifiedPath(String path, String fieldName) { + if (path == null || path.isEmpty()) { + return fieldName; + } + return path + "." + fieldName; + } +} diff --git a/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java b/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java index 688a0016..cfb95630 100644 --- a/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java +++ b/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java @@ -314,18 +314,20 @@ public void testPrimitiveTypes() { // TODO test enum separately // TODO test timestamp separately + // TODO test UUID handling @Test public void testMaps() { OneSchema recordMapElementSchema = OneSchema.builder() - .name("element") + .name("struct") .isNullable(true) .fields( Arrays.asList( OneField.builder() .name("requiredDouble") .parentPath("recordMap._one_field_value") + .fieldId(7) .schema( OneSchema.builder() .name("double") @@ -336,6 +338,7 @@ public void testMaps() { OneField.builder() .name("optionalString") .parentPath("recordMap._one_field_value") + .fieldId(8) .schema( OneSchema.builder() .name("string") @@ -348,13 +351,13 @@ public void testMaps() { .build(); OneSchema oneSchemaRepresentation = OneSchema.builder() - .name("testRecord") .dataType(OneType.RECORD) .isNullable(false) .fields( Arrays.asList( OneField.builder() .name("intMap") + .fieldId(1) .schema( OneSchema.builder() .name("map") @@ -365,20 +368,21 @@ public void testMaps() { OneField.builder() .name(OneField.Constants.MAP_KEY_FIELD_NAME) .parentPath("intMap") + .fieldId(3) .schema( OneSchema.builder() - .name("map_key") + .name("string") .dataType(OneType.STRING) .isNullable(false) .build()) - .defaultValue("") .build(), OneField.builder() .name(OneField.Constants.MAP_VALUE_FIELD_NAME) .parentPath("intMap") + .fieldId(4) .schema( OneSchema.builder() - .name("int") + .name("integer") .dataType(OneType.INT) .isNullable(false) .build()) @@ -387,6 +391,7 @@ public void testMaps() { .build(), OneField.builder() .name("recordMap") + .fieldId(2) .schema( OneSchema.builder() .name("map") @@ -396,17 +401,18 @@ public void testMaps() { Arrays.asList( OneField.builder() .name(OneField.Constants.MAP_KEY_FIELD_NAME) + .fieldId(5) .parentPath("recordMap") .schema( OneSchema.builder() - .name("map_key") + .name("integer") .dataType(OneType.INT) .isNullable(false) .build()) - .defaultValue("") .build(), OneField.builder() .name(OneField.Constants.MAP_VALUE_FIELD_NAME) + .fieldId(6) .parentPath("recordMap") .schema(recordMapElementSchema) .build())) @@ -415,7 +421,7 @@ public void testMaps() { .build())) .build(); - Schema expectedSchema = + Schema icebergRepresentation = new Schema( Types.NestedField.required( 1, @@ -432,20 +438,22 @@ public void testMaps() { Types.NestedField.required(7, "requiredDouble", Types.DoubleType.get()), Types.NestedField.optional(8, "optionalString", Types.StringType.get()))))); - Schema actual = SCHEMA_EXTRACTOR.toIceberg(oneSchemaRepresentation); - Assertions.assertTrue(expectedSchema.sameSchema(actual)); + Assertions.assertTrue( + icebergRepresentation.sameSchema(SCHEMA_EXTRACTOR.toIceberg(oneSchemaRepresentation))); + assertEquals(oneSchemaRepresentation, SCHEMA_EXTRACTOR.fromIceberg(icebergRepresentation)); } @Test public void testLists() { OneSchema recordListElementSchema = OneSchema.builder() - .name("element") + .name("struct") .isNullable(true) .fields( Arrays.asList( OneField.builder() .name("requiredDouble") + .fieldId(5) .parentPath("recordList._one_field_element") .schema( OneSchema.builder() @@ -456,6 +464,7 @@ public void testLists() { .build(), OneField.builder() .name("optionalString") + .fieldId(6) .parentPath("recordList._one_field_element") .schema( OneSchema.builder() @@ -469,26 +478,27 @@ public void testLists() { .build(); OneSchema oneSchemaRepresentation = OneSchema.builder() - .name("testRecord") .dataType(OneType.RECORD) .isNullable(false) .fields( Arrays.asList( OneField.builder() .name("intList") + .fieldId(1) .schema( OneSchema.builder() - .name("array") + .name("list") .isNullable(false) .dataType(OneType.ARRAY) .fields( - Arrays.asList( + Collections.singletonList( OneField.builder() .name(OneField.Constants.ARRAY_ELEMENT_FIELD_NAME) + .fieldId(3) .parentPath("intList") .schema( OneSchema.builder() - .name("int") + .name("integer") .dataType(OneType.INT) .isNullable(false) .build()) @@ -497,15 +507,17 @@ public void testLists() { .build(), OneField.builder() .name("recordList") + .fieldId(2) .schema( OneSchema.builder() - .name("array") + .name("list") .isNullable(true) .dataType(OneType.ARRAY) .fields( - Arrays.asList( + Collections.singletonList( OneField.builder() .name(OneField.Constants.ARRAY_ELEMENT_FIELD_NAME) + .fieldId(4) .parentPath("recordList") .schema(recordListElementSchema) .build())) From 5fea95c2d4bb6851756df6adad5f29d5d8da979c Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Sat, 21 Oct 2023 11:14:47 -0500 Subject: [PATCH 4/7] add uuid and enum test cases --- .../iceberg/TestIcebergSchemaExtractor.java | 92 +++++++++++++++++-- 1 file changed, 85 insertions(+), 7 deletions(-) diff --git a/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java b/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java index cfb95630..f19210c5 100644 --- a/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java +++ b/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java @@ -19,6 +19,7 @@ package io.onetable.iceberg; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.*; @@ -39,11 +40,6 @@ public class TestIcebergSchemaExtractor { @Test public void testPrimitiveTypes() { - Map requiredEnumMetadata = new HashMap<>(); - requiredEnumMetadata.put(OneSchema.MetadataKey.ENUM_VALUES, Arrays.asList("ONE", "TWO")); - Map optionalEnumMetadata = new HashMap<>(); - optionalEnumMetadata.put(OneSchema.MetadataKey.ENUM_VALUES, Arrays.asList("THREE", "FOUR")); - int precision = 10; int scale = 5; Map doubleMetadata = new HashMap<>(); @@ -312,9 +308,91 @@ public void testPrimitiveTypes() { assertEquals(oneSchemaRepresentation, SCHEMA_EXTRACTOR.fromIceberg(icebergRepresentation)); } - // TODO test enum separately + @Test + public void testEnums() { + // there are no enums in iceberg so we convert them to string + Map requiredEnumMetadata = new HashMap<>(); + requiredEnumMetadata.put(OneSchema.MetadataKey.ENUM_VALUES, Arrays.asList("ONE", "TWO")); + Map optionalEnumMetadata = new HashMap<>(); + optionalEnumMetadata.put(OneSchema.MetadataKey.ENUM_VALUES, Arrays.asList("THREE", "FOUR")); + + OneSchema schemaWithEnums = OneSchema + .builder() + .dataType(OneType.RECORD) + .fields(Arrays.asList( + OneField.builder() + .name("requiredEnum") + .schema( + OneSchema.builder() + .name("REQUIRED_ENUM") + .dataType(OneType.ENUM) + .isNullable(false) + .metadata(requiredEnumMetadata) + .build()) + .defaultValue("ONE") + .build(), + OneField.builder() + .name("optionalEnum") + .schema( + OneSchema.builder() + .name("OPTIONAL_ENUM") + .dataType(OneType.ENUM) + .isNullable(true) + .metadata(optionalEnumMetadata) + .build()) + .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .build())) + .build(); + + Schema expectedSchema = + new Schema( + Types.NestedField.required(1, "requiredEnum", Types.StringType.get()), + Types.NestedField.optional(2, "optionalEnum", Types.StringType.get())); + assertTrue(expectedSchema.sameSchema(SCHEMA_EXTRACTOR.toIceberg(schemaWithEnums))); + } + + @Test + public void testUuids() { + // UUIDs are represented as fixed length byte arrays + Schema inputSchema = + new Schema( + Types.NestedField.required(1, "requiredUuid", Types.UUIDType.get()), + Types.NestedField.optional(2, "optionalUuid", Types.UUIDType.get())); + + int fixedSize = 16; + Map fixedMetadata = new HashMap<>(); + fixedMetadata.put(OneSchema.MetadataKey.FIXED_BYTES_SIZE, fixedSize); + OneSchema expectedSchema = OneSchema + .builder() + .dataType(OneType.RECORD) + .fields(Arrays.asList( + OneField.builder() + .name("requiredUuid") + .fieldId(1) + .schema( + OneSchema.builder() + .name("uuid") + .dataType(OneType.FIXED) + .isNullable(false) + .metadata(fixedMetadata) + .build()) + .build(), + OneField.builder() + .name("optionalUuid") + .fieldId(2) + .schema( + OneSchema.builder() + .name("uuid") + .dataType(OneType.FIXED) + .isNullable(true) + .metadata(fixedMetadata) + .build()) + .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .build())) + .build(); + assertEquals(expectedSchema, (SCHEMA_EXTRACTOR.fromIceberg(inputSchema))); + } // TODO test timestamp separately - // TODO test UUID handling @Test public void testMaps() { From b11517cb0945a1896d0bb04be2264b1d38a26125 Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Sat, 21 Oct 2023 11:32:37 -0500 Subject: [PATCH 5/7] timestamp tests --- .../iceberg/IcebergSchemaExtractor.java | 2 +- .../iceberg/TestIcebergSchemaExtractor.java | 314 ++++++++++++++---- 2 files changed, 246 insertions(+), 70 deletions(-) diff --git a/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java b/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java index ddf05d38..cbf02d4d 100644 --- a/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java +++ b/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java @@ -162,7 +162,7 @@ private Type toIcebergType(OneField field, AtomicInteger fieldIdTracker) { case INT: return Types.IntegerType.get(); case LONG: - case TIMESTAMP_NTZ: + case TIMESTAMP_NTZ: // TODO - revisit this return Types.LongType.get(); case BYTES: return Types.BinaryType.get(); diff --git a/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java b/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java index f19210c5..1ed8be32 100644 --- a/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java +++ b/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java @@ -47,14 +47,8 @@ public void testPrimitiveTypes() { doubleMetadata.put(OneSchema.MetadataKey.DECIMAL_SCALE, scale); int fixedSize = 8; - Map fixedMetadata = new HashMap<>(); - fixedMetadata.put(OneSchema.MetadataKey.FIXED_BYTES_SIZE, fixedSize); - - Map millisTimestamp = new HashMap<>(); - millisTimestamp.put(OneSchema.MetadataKey.TIMESTAMP_PRECISION, OneSchema.MetadataValue.MILLIS); - - Map microsTimestamp = new HashMap<>(); - microsTimestamp.put(OneSchema.MetadataKey.TIMESTAMP_PRECISION, OneSchema.MetadataValue.MICROS); + Map fixedMetadata = + Collections.singletonMap(OneSchema.MetadataKey.FIXED_BYTES_SIZE, fixedSize); OneSchema oneSchemaRepresentation = OneSchema.builder() @@ -311,38 +305,39 @@ public void testPrimitiveTypes() { @Test public void testEnums() { // there are no enums in iceberg so we convert them to string - Map requiredEnumMetadata = new HashMap<>(); - requiredEnumMetadata.put(OneSchema.MetadataKey.ENUM_VALUES, Arrays.asList("ONE", "TWO")); - Map optionalEnumMetadata = new HashMap<>(); - optionalEnumMetadata.put(OneSchema.MetadataKey.ENUM_VALUES, Arrays.asList("THREE", "FOUR")); - - OneSchema schemaWithEnums = OneSchema - .builder() - .dataType(OneType.RECORD) - .fields(Arrays.asList( - OneField.builder() - .name("requiredEnum") - .schema( - OneSchema.builder() - .name("REQUIRED_ENUM") - .dataType(OneType.ENUM) - .isNullable(false) - .metadata(requiredEnumMetadata) - .build()) - .defaultValue("ONE") - .build(), - OneField.builder() - .name("optionalEnum") - .schema( - OneSchema.builder() - .name("OPTIONAL_ENUM") - .dataType(OneType.ENUM) - .isNullable(true) - .metadata(optionalEnumMetadata) - .build()) - .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) - .build())) - .build(); + Map requiredEnumMetadata = + Collections.singletonMap(OneSchema.MetadataKey.ENUM_VALUES, Arrays.asList("ONE", "TWO")); + Map optionalEnumMetadata = + Collections.singletonMap(OneSchema.MetadataKey.ENUM_VALUES, Arrays.asList("THREE", "FOUR")); + + OneSchema schemaWithEnums = + OneSchema.builder() + .dataType(OneType.RECORD) + .fields( + Arrays.asList( + OneField.builder() + .name("requiredEnum") + .schema( + OneSchema.builder() + .name("REQUIRED_ENUM") + .dataType(OneType.ENUM) + .isNullable(false) + .metadata(requiredEnumMetadata) + .build()) + .defaultValue("ONE") + .build(), + OneField.builder() + .name("optionalEnum") + .schema( + OneSchema.builder() + .name("OPTIONAL_ENUM") + .dataType(OneType.ENUM) + .isNullable(true) + .metadata(optionalEnumMetadata) + .build()) + .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .build())) + .build(); Schema expectedSchema = new Schema( @@ -362,37 +357,218 @@ public void testUuids() { int fixedSize = 16; Map fixedMetadata = new HashMap<>(); fixedMetadata.put(OneSchema.MetadataKey.FIXED_BYTES_SIZE, fixedSize); - OneSchema expectedSchema = OneSchema - .builder() - .dataType(OneType.RECORD) - .fields(Arrays.asList( - OneField.builder() - .name("requiredUuid") - .fieldId(1) - .schema( - OneSchema.builder() - .name("uuid") - .dataType(OneType.FIXED) - .isNullable(false) - .metadata(fixedMetadata) - .build()) - .build(), - OneField.builder() - .name("optionalUuid") - .fieldId(2) - .schema( - OneSchema.builder() - .name("uuid") - .dataType(OneType.FIXED) - .isNullable(true) - .metadata(fixedMetadata) - .build()) - .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) - .build())) - .build(); + OneSchema expectedSchema = + OneSchema.builder() + .dataType(OneType.RECORD) + .fields( + Arrays.asList( + OneField.builder() + .name("requiredUuid") + .fieldId(1) + .schema( + OneSchema.builder() + .name("uuid") + .dataType(OneType.FIXED) + .isNullable(false) + .metadata(fixedMetadata) + .build()) + .build(), + OneField.builder() + .name("optionalUuid") + .fieldId(2) + .schema( + OneSchema.builder() + .name("uuid") + .dataType(OneType.FIXED) + .isNullable(true) + .metadata(fixedMetadata) + .build()) + .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .build())) + .build(); assertEquals(expectedSchema, (SCHEMA_EXTRACTOR.fromIceberg(inputSchema))); } - // TODO test timestamp separately + + @Test + public void testTimestamps() { + Map millisTimestamp = + Collections.singletonMap( + OneSchema.MetadataKey.TIMESTAMP_PRECISION, OneSchema.MetadataValue.MILLIS); + + Map microsTimestamp = + Collections.singletonMap( + OneSchema.MetadataKey.TIMESTAMP_PRECISION, OneSchema.MetadataValue.MICROS); + + OneSchema irSchema = + OneSchema.builder() + .dataType(OneType.RECORD) + .fields( + Arrays.asList( + OneField.builder() + .name("requiredTimestampMillis") + .schema( + OneSchema.builder() + .name("timestamp") + .dataType(OneType.TIMESTAMP) + .metadata(millisTimestamp) + .isNullable(false) + .build()) + .build(), + OneField.builder() + .name("optionalTimestampMillis") + .schema( + OneSchema.builder() + .name("timestamp") + .dataType(OneType.TIMESTAMP) + .metadata(millisTimestamp) + .isNullable(true) + .build()) + .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .build(), + OneField.builder() + .name("requiredTimestampNtzMillis") + .schema( + OneSchema.builder() + .name("timestampNtz") + .dataType(OneType.TIMESTAMP_NTZ) + .isNullable(false) + .metadata(millisTimestamp) + .build()) + .build(), + OneField.builder() + .name("optionalTimestampNtzMillis") + .schema( + OneSchema.builder() + .name("timestampNtz") + .dataType(OneType.TIMESTAMP_NTZ) + .metadata(millisTimestamp) + .isNullable(true) + .build()) + .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .build(), + OneField.builder() + .name("requiredTimestampMicros") + .schema( + OneSchema.builder() + .name("timestamp") + .dataType(OneType.TIMESTAMP) + .metadata(microsTimestamp) + .isNullable(false) + .build()) + .build(), + OneField.builder() + .name("optionalTimestampMicros") + .schema( + OneSchema.builder() + .name("timestamp") + .dataType(OneType.TIMESTAMP) + .metadata(microsTimestamp) + .isNullable(true) + .build()) + .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .build(), + OneField.builder() + .name("requiredTimestampNtzMicros") + .schema( + OneSchema.builder() + .name("timestampNtz") + .dataType(OneType.TIMESTAMP_NTZ) + .isNullable(false) + .metadata(microsTimestamp) + .build()) + .build(), + OneField.builder() + .name("optionalTimestampNtzMicros") + .schema( + OneSchema.builder() + .name("timestampNtz") + .dataType(OneType.TIMESTAMP_NTZ) + .metadata(microsTimestamp) + .isNullable(true) + .build()) + .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .build())) + .build(); + Schema expectedTargetSchema = + new Schema( + Types.NestedField.required( + 1, "requiredTimestampMillis", Types.TimestampType.withZone()), + Types.NestedField.optional( + 2, "optionalTimestampMillis", Types.TimestampType.withZone()), + Types.NestedField.required(3, "requiredTimestampNtzMillis", Types.LongType.get()), + Types.NestedField.optional(4, "optionalTimestampNtzMillis", Types.LongType.get()), + Types.NestedField.required( + 5, "requiredTimestampMicros", Types.TimestampType.withZone()), + Types.NestedField.optional( + 6, "optionalTimestampMicros", Types.TimestampType.withZone()), + Types.NestedField.required(7, "requiredTimestampNtzMicros", Types.LongType.get()), + Types.NestedField.optional(8, "optionalTimestampNtzMicros", Types.LongType.get())); + assertTrue(expectedTargetSchema.sameSchema(SCHEMA_EXTRACTOR.toIceberg(irSchema))); + + Schema sourceSchema = + new Schema( + Types.NestedField.required( + 4, "requiredTimestampWithZone", Types.TimestampType.withZone()), + Types.NestedField.optional( + 5, "optionalTimestampWithZone", Types.TimestampType.withZone()), + Types.NestedField.required( + 6, "requiredTimestampWithoutZone", Types.TimestampType.withoutZone()), + Types.NestedField.optional( + 7, "optionalTimestampWithoutZone", Types.TimestampType.withoutZone())); + OneSchema expectedIrSchema = + OneSchema.builder() + .dataType(OneType.RECORD) + .fields( + Arrays.asList( + OneField.builder() + .name("requiredTimestampWithZone") + .fieldId(4) + .schema( + OneSchema.builder() + .name("timestamp") + .dataType(OneType.TIMESTAMP) + .metadata(microsTimestamp) + .isNullable(false) + .build()) + .build(), + OneField.builder() + .name("optionalTimestampWithZone") + .fieldId(5) + .schema( + OneSchema.builder() + .name("timestamp") + .dataType(OneType.TIMESTAMP) + .metadata(microsTimestamp) + .isNullable(true) + .build()) + .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .build(), + OneField.builder() + .name("requiredTimestampWithoutZone") + .fieldId(6) + .schema( + OneSchema.builder() + .name("timestamp") + .dataType(OneType.TIMESTAMP_NTZ) + .metadata(microsTimestamp) + .isNullable(false) + .build()) + .build(), + OneField.builder() + .name("optionalTimestampWithoutZone") + .fieldId(7) + .schema( + OneSchema.builder() + .name("timestamp") + .dataType(OneType.TIMESTAMP_NTZ) + .metadata(microsTimestamp) + .isNullable(true) + .build()) + .defaultValue(OneField.Constants.NULL_DEFAULT_VALUE) + .build())) + .build(); + assertEquals(expectedIrSchema, SCHEMA_EXTRACTOR.fromIceberg(sourceSchema)); + } @Test public void testMaps() { From e931393bd0925acedd358947a0eb27518f41f8e9 Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Sun, 22 Oct 2023 22:24:36 -0500 Subject: [PATCH 6/7] add case where no IDs are in internal model --- .../iceberg/TestIcebergSchemaExtractor.java | 31 ++++++------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java b/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java index 1ed8be32..642334da 100644 --- a/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java +++ b/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java @@ -81,7 +81,7 @@ public void testPrimitiveTypes() { .name("requiredInt") .schema( OneSchema.builder() - .name("int") + .name("integer") .dataType(OneType.INT) .isNullable(false) .build()) @@ -91,7 +91,7 @@ public void testPrimitiveTypes() { .name("optionalInt") .schema( OneSchema.builder() - .name("int") + .name("integer") .dataType(OneType.INT) .isNullable(true) .build()) @@ -891,7 +891,7 @@ public void testNestedRecords() { } @Test - public void testIdHandling() { + public void testToIcebergWithNoFieldIdsSet() { OneSchema recordListElementSchema = OneSchema.builder() .name("element") @@ -900,7 +900,6 @@ public void testIdHandling() { Arrays.asList( OneField.builder() .name("requiredDouble") - .fieldId(3) .parentPath("recordList._one_field_element") .schema( OneSchema.builder() @@ -912,7 +911,6 @@ public void testIdHandling() { OneField.builder() .name("optionalString") .parentPath("recordList._one_field_element") - .fieldId(9) .schema( OneSchema.builder() .name("string") @@ -932,7 +930,6 @@ public void testIdHandling() { OneField.builder() .name("requiredDouble") .parentPath("recordMap._one_field_value") - .fieldId(7) .schema( OneSchema.builder() .name("double") @@ -943,7 +940,6 @@ public void testIdHandling() { OneField.builder() .name("optionalString") .parentPath("recordMap._one_field_value") - .fieldId(10) .schema( OneSchema.builder() .name("string") @@ -963,7 +959,6 @@ public void testIdHandling() { Arrays.asList( OneField.builder() .name("recordList") - .fieldId(1) .schema( OneSchema.builder() .name("array") @@ -974,7 +969,6 @@ public void testIdHandling() { OneField.builder() .name(OneField.Constants.ARRAY_ELEMENT_FIELD_NAME) .parentPath("recordList") - .fieldId(2) .schema(recordListElementSchema) .build())) .build()) @@ -982,7 +976,6 @@ public void testIdHandling() { .build(), OneField.builder() .name("recordMap") - .fieldId(4) .schema( OneSchema.builder() .name("map") @@ -993,7 +986,6 @@ public void testIdHandling() { OneField.builder() .name(OneField.Constants.MAP_KEY_FIELD_NAME) .parentPath("recordMap") - .fieldId(5) .schema( OneSchema.builder() .name("map_key") @@ -1003,7 +995,6 @@ public void testIdHandling() { .defaultValue("") .build(), OneField.builder() - .fieldId(6) .name(OneField.Constants.MAP_VALUE_FIELD_NAME) .parentPath("recordMap") .schema(recordMapElementSchema) @@ -1019,24 +1010,22 @@ public void testIdHandling() { 1, "recordList", Types.ListType.ofOptional( - 2, + 3, Types.StructType.of( - Types.NestedField.required(3, "requiredDouble", Types.DoubleType.get()), - Types.NestedField.optional(9, "optionalString", Types.StringType.get())))), + Types.NestedField.required(4, "requiredDouble", Types.DoubleType.get()), + Types.NestedField.optional(5, "optionalString", Types.StringType.get())))), Types.NestedField.optional( - 4, + 2, "recordMap", Types.MapType.ofOptional( - 5, 6, + 7, Types.IntegerType.get(), Types.StructType.of( - Types.NestedField.required(7, "requiredDouble", Types.DoubleType.get()), - Types.NestedField.optional( - 10, "optionalString", Types.StringType.get()))))); + Types.NestedField.required(8, "requiredDouble", Types.DoubleType.get()), + Types.NestedField.optional(9, "optionalString", Types.StringType.get()))))); Assertions.assertTrue( icebergRepresentation.sameSchema(SCHEMA_EXTRACTOR.toIceberg(oneSchemaRepresentation))); - assertEquals(oneSchemaRepresentation, SCHEMA_EXTRACTOR.fromIceberg(icebergRepresentation)); } } From 610a45f72c6a89383cf87a4d0c37f26ea2b7b5c7 Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Tue, 24 Oct 2023 09:45:57 -0500 Subject: [PATCH 7/7] change array to list, fix typo --- api/src/main/java/io/onetable/model/schema/OneType.java | 4 ++-- .../main/java/io/onetable/avro/AvroSchemaConverter.java | 4 ++-- .../main/java/io/onetable/delta/DeltaSchemaExtractor.java | 4 ++-- .../java/io/onetable/iceberg/IcebergSchemaExtractor.java | 6 +++--- .../java/io/onetable/avro/TestAvroSchemaConverter.java | 8 ++++---- .../java/io/onetable/delta/TestDeltaSchemaExtractor.java | 4 ++-- .../io/onetable/iceberg/TestIcebergSchemaExtractor.java | 6 +++--- .../test/java/io/onetable/testutil/ColumnStatMapUtil.java | 4 ++-- 8 files changed, 20 insertions(+), 20 deletions(-) diff --git a/api/src/main/java/io/onetable/model/schema/OneType.java b/api/src/main/java/io/onetable/model/schema/OneType.java index b34d0d62..dcc9988d 100644 --- a/api/src/main/java/io/onetable/model/schema/OneType.java +++ b/api/src/main/java/io/onetable/model/schema/OneType.java @@ -35,7 +35,7 @@ public enum OneType { RECORD, ENUM, - ARRAY, + LIST, MAP, UNION, FIXED, @@ -62,7 +62,7 @@ public enum OneType { new HashSet() { { add(RECORD); - add(ARRAY); + add(LIST); add(MAP); add(UNION); } diff --git a/core/src/main/java/io/onetable/avro/AvroSchemaConverter.java b/core/src/main/java/io/onetable/avro/AvroSchemaConverter.java index 4cb4a29e..6731a7eb 100644 --- a/core/src/main/java/io/onetable/avro/AvroSchemaConverter.java +++ b/core/src/main/java/io/onetable/avro/AvroSchemaConverter.java @@ -209,7 +209,7 @@ private OneSchema toOneSchema( .build(); return OneSchema.builder() .name(schema.getName()) - .dataType(OneType.ARRAY) + .dataType(OneType.LIST) .comment(schema.getDoc()) .isNullable(schema.isNullable()) .fields(Collections.singletonList(elementField)) @@ -356,7 +356,7 @@ public Schema fromOneSchema(OneSchema oneSchema) { LogicalTypes.localTimestampMillis().addToSchema(Schema.create(Schema.Type.LONG)), oneSchema); } - case ARRAY: + case LIST: OneSchema elementSchema = oneSchema.getFields().stream() .filter( diff --git a/core/src/main/java/io/onetable/delta/DeltaSchemaExtractor.java b/core/src/main/java/io/onetable/delta/DeltaSchemaExtractor.java index feabaa89..13b3853b 100644 --- a/core/src/main/java/io/onetable/delta/DeltaSchemaExtractor.java +++ b/core/src/main/java/io/onetable/delta/DeltaSchemaExtractor.java @@ -124,7 +124,7 @@ private DataType convertFieldType(OneField field) { .orElseThrow(() -> new SchemaExtractorException("Invalid map schema")); return DataTypes.createMapType( convertFieldType(key), convertFieldType(value), value.getSchema().isNullable()); - case ARRAY: + case LIST: OneField element = field.getSchema().getFields().stream() .filter( @@ -231,7 +231,7 @@ private OneSchema toOneSchema( .parentPath(parentPath) .schema(elementSchema) .build(); - type = OneType.ARRAY; + type = OneType.LIST; fields = Collections.singletonList(elementField); break; case "map": diff --git a/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java b/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java index cbf02d4d..6b195d77 100644 --- a/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java +++ b/core/src/main/java/io/onetable/iceberg/IcebergSchemaExtractor.java @@ -216,7 +216,7 @@ private Type toIcebergType(OneField field, AtomicInteger fieldIdTracker) { toIcebergType(key, fieldIdTracker), toIcebergType(value, fieldIdTracker)); } - case ARRAY: + case LIST: OneField element = field.getSchema().getFields().stream() .filter( @@ -300,7 +300,7 @@ private OneSchema fromIcebergType( OneSchema keySchema = fromIcebergType( mapType.keyType(), - getFullyQualifiedPath(fieldPath, OneField.Constants.MAP_VALUE_FIELD_NAME), + getFullyQualifiedPath(fieldPath, OneField.Constants.MAP_KEY_FIELD_NAME), null, false); OneField keyField = @@ -341,7 +341,7 @@ private OneSchema fromIcebergType( .schema(elementSchema) .fieldId(listType.elementId()) .build(); - type = OneType.ARRAY; + type = OneType.LIST; fields = Collections.singletonList(elementField); break; default: diff --git a/core/src/test/java/io/onetable/avro/TestAvroSchemaConverter.java b/core/src/test/java/io/onetable/avro/TestAvroSchemaConverter.java index d49fe4d0..1cdd4823 100644 --- a/core/src/test/java/io/onetable/avro/TestAvroSchemaConverter.java +++ b/core/src/test/java/io/onetable/avro/TestAvroSchemaConverter.java @@ -354,7 +354,7 @@ public void testAvroLists() { OneSchema.builder() .name("array") .isNullable(false) - .dataType(OneType.ARRAY) + .dataType(OneType.LIST) .fields( Arrays.asList( OneField.builder() @@ -375,7 +375,7 @@ public void testAvroLists() { OneSchema.builder() .name("array") .isNullable(true) - .dataType(OneType.ARRAY) + .dataType(OneType.LIST) .fields( Arrays.asList( OneField.builder() @@ -785,7 +785,7 @@ public void testIdSupport() { OneSchema.builder() .name("array") .isNullable(false) - .dataType(OneType.ARRAY) + .dataType(OneType.LIST) .fields( Arrays.asList( OneField.builder() @@ -824,7 +824,7 @@ public void testIdSupport() { OneSchema.builder() .name("array") .isNullable(false) - .dataType(OneType.ARRAY) + .dataType(OneType.LIST) .fields( Arrays.asList( OneField.builder() diff --git a/core/src/test/java/io/onetable/delta/TestDeltaSchemaExtractor.java b/core/src/test/java/io/onetable/delta/TestDeltaSchemaExtractor.java index 7eab8046..c12037bc 100644 --- a/core/src/test/java/io/onetable/delta/TestDeltaSchemaExtractor.java +++ b/core/src/test/java/io/onetable/delta/TestDeltaSchemaExtractor.java @@ -610,7 +610,7 @@ public void testLists() { OneSchema.builder() .name("array") .isNullable(false) - .dataType(OneType.ARRAY) + .dataType(OneType.LIST) .fields( Collections.singletonList( OneField.builder() @@ -631,7 +631,7 @@ public void testLists() { OneSchema.builder() .name("array") .isNullable(true) - .dataType(OneType.ARRAY) + .dataType(OneType.LIST) .fields( Collections.singletonList( OneField.builder() diff --git a/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java b/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java index 642334da..229ce577 100644 --- a/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java +++ b/core/src/test/java/io/onetable/iceberg/TestIcebergSchemaExtractor.java @@ -743,7 +743,7 @@ public void testLists() { OneSchema.builder() .name("list") .isNullable(false) - .dataType(OneType.ARRAY) + .dataType(OneType.LIST) .fields( Collections.singletonList( OneField.builder() @@ -766,7 +766,7 @@ public void testLists() { OneSchema.builder() .name("list") .isNullable(true) - .dataType(OneType.ARRAY) + .dataType(OneType.LIST) .fields( Collections.singletonList( OneField.builder() @@ -963,7 +963,7 @@ public void testToIcebergWithNoFieldIdsSet() { OneSchema.builder() .name("array") .isNullable(true) - .dataType(OneType.ARRAY) + .dataType(OneType.LIST) .fields( Arrays.asList( OneField.builder() diff --git a/core/src/test/java/io/onetable/testutil/ColumnStatMapUtil.java b/core/src/test/java/io/onetable/testutil/ColumnStatMapUtil.java index b74fc2f4..f418c797 100644 --- a/core/src/test/java/io/onetable/testutil/ColumnStatMapUtil.java +++ b/core/src/test/java/io/onetable/testutil/ColumnStatMapUtil.java @@ -107,7 +107,7 @@ public static Map getColumnStatMap() { .schema( OneSchema.builder() .name("array") - .dataType(OneType.ARRAY) + .dataType(OneType.LIST) .fields(Collections.singletonList(arrayLongFieldElement)) .build()) .build(); @@ -148,7 +148,7 @@ public static Map getColumnStatMap() { .schema( OneSchema.builder() .name("array") - .dataType(OneType.ARRAY) + .dataType(OneType.LIST) .fields(Collections.singletonList(nestedArrayStringFieldElement)) .build()) .build();