From 0e677d27ae8ed489f2fa6f5e95b0e498319a887e Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Wed, 16 Aug 2023 11:26:00 -0700 Subject: [PATCH] GH-35698: [C#] Update FlatBuffers (#35699) ### Rationale for this change The FlatBuffers definitions in the C# code don't seem to have been updated for a while. Adding the latest FlatBuffers code ensures that we're up to date with both the Arrow definitions and the FlatBuffers compiler. ### What changes are included in this PR? The latest version of the C# FlatBuffers runtime files were copied into the project (with visibility changed from public to internal). The latest version of the FlatBuffers compiler was used against the latest version of the Arrow .fbs source. The output of the compiler was edited to more closely match the existing code by moving it into the directory structure and namespaces of the existing classes and by changing class visibility from public to internal. A few small changes were made to the existing source to accommodate changes in the FlatBuffers runtime, most specifically that the files have moved from the namespace "FlatBuffers" to the namespace "Google.FlatBuffers". ### Are these changes tested? No substantive product changes were made. All tests still pass. ### Are there any user-facing changes? No. Resolves #35698 * Closes: #35698 Authored-by: Curt Hagenlocher Signed-off-by: Weston Pace --- .../Internal/FlightDataStream.cs | 2 +- .../Internal/FlightMessageSerializer.cs | 2 +- csharp/src/Apache.Arrow/Flatbuf/Block.cs | 9 +- .../Apache.Arrow/Flatbuf/BodyCompression.cs | 28 +- csharp/src/Apache.Arrow/Flatbuf/Buffer.cs | 12 +- .../Apache.Arrow/Flatbuf/DictionaryBatch.cs | 29 +- .../Flatbuf/DictionaryEncoding.cs | 42 +- .../Flatbuf/Enums/BodyCompressionMethod.cs | 2 +- .../Flatbuf/Enums/DictionaryKind.cs | 19 + .../Flatbuf/Enums/IntervalUnit.cs | 5 +- .../Flatbuf/Enums/MessageHeader.cs | 42 +- csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs | 127 ++- csharp/src/Apache.Arrow/Flatbuf/Field.cs | 75 +- csharp/src/Apache.Arrow/Flatbuf/FieldNode.cs | 9 +- .../Apache.Arrow/Flatbuf/FixedSizeBinary.cs | 24 +- .../src/Apache.Arrow/Flatbuf/FixedSizeList.cs | 24 +- .../Flatbuf/FlatBuffers/ByteBuffer.cs | 270 ++++-- .../Flatbuf/FlatBuffers/ByteBufferUtil.cs | 2 +- .../Flatbuf/FlatBuffers/FlatBufferBuilder.cs | 243 +++++- .../FlatBuffers/FlatBufferConstants.cs | 10 +- .../Flatbuf/FlatBuffers/FlatBufferVerify.cs | 822 ++++++++++++++++++ .../Flatbuf/FlatBuffers/IFlatbufferObject.cs | 2 +- .../Flatbuf/FlatBuffers/Offset.cs | 2 +- .../Flatbuf/FlatBuffers/Struct.cs | 13 +- .../Apache.Arrow/Flatbuf/FlatBuffers/Table.cs | 37 +- csharp/src/Apache.Arrow/Flatbuf/Footer.cs | 31 +- csharp/src/Apache.Arrow/Flatbuf/KeyValue.cs | 29 +- csharp/src/Apache.Arrow/Flatbuf/Map.cs | 41 +- csharp/src/Apache.Arrow/Flatbuf/Message.cs | 38 +- .../src/Apache.Arrow/Flatbuf/RecordBatch.cs | 27 +- .../src/Apache.Arrow/Flatbuf/RunEndEncoded.cs | 45 + csharp/src/Apache.Arrow/Flatbuf/Schema.cs | 38 +- .../Flatbuf/SparseMatrixCompressedAxis.cs | 15 + .../Flatbuf/SparseMatrixIndexCSX.cs | 95 ++ .../src/Apache.Arrow/Flatbuf/SparseTensor.cs | 106 +++ .../Apache.Arrow/Flatbuf/SparseTensorIndex.cs | 42 + .../Flatbuf/SparseTensorIndexCOO.cs | 107 +++ .../Flatbuf/SparseTensorIndexCSF.cs | 155 ++++ csharp/src/Apache.Arrow/Flatbuf/Tensor.cs | 63 +- csharp/src/Apache.Arrow/Flatbuf/TensorDim.cs | 27 +- .../src/Apache.Arrow/Flatbuf/Types/Binary.cs | 22 +- csharp/src/Apache.Arrow/Flatbuf/Types/Bool.cs | 21 +- csharp/src/Apache.Arrow/Flatbuf/Types/Date.cs | 28 +- .../src/Apache.Arrow/Flatbuf/Types/Decimal.cs | 28 +- .../Apache.Arrow/Flatbuf/Types/Duration.cs | 24 +- .../Flatbuf/Types/FloatingPoint.cs | 24 +- csharp/src/Apache.Arrow/Flatbuf/Types/Int.cs | 25 +- .../Apache.Arrow/Flatbuf/Types/Interval.cs | 24 +- .../Apache.Arrow/Flatbuf/Types/LargeBinary.cs | 21 +- .../Apache.Arrow/Flatbuf/Types/LargeList.cs | 21 +- .../Apache.Arrow/Flatbuf/Types/LargeUtf8.cs | 21 +- csharp/src/Apache.Arrow/Flatbuf/Types/List.cs | 21 +- csharp/src/Apache.Arrow/Flatbuf/Types/Null.cs | 21 +- .../src/Apache.Arrow/Flatbuf/Types/Struct_.cs | 21 +- csharp/src/Apache.Arrow/Flatbuf/Types/Time.cs | 42 +- .../Apache.Arrow/Flatbuf/Types/Timestamp.cs | 160 +++- .../src/Apache.Arrow/Flatbuf/Types/Union.cs | 31 +- csharp/src/Apache.Arrow/Flatbuf/Types/Utf8.cs | 21 +- .../src/Apache.Arrow/Ipc/ArrowFileWriter.cs | 16 +- .../Ipc/ArrowMemoryReaderImplementation.cs | 2 +- .../Ipc/ArrowReaderImplementation.cs | 2 +- .../Ipc/ArrowStreamReaderImplementation.cs | 8 +- .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 2 +- .../Ipc/ArrowTypeFlatbufferBuilder.cs | 2 +- .../Ipc/ReadOnlyMemoryBufferAllocator.cs | 2 +- 65 files changed, 2922 insertions(+), 399 deletions(-) create mode 100644 csharp/src/Apache.Arrow/Flatbuf/Enums/DictionaryKind.cs create mode 100644 csharp/src/Apache.Arrow/Flatbuf/FlatBuffers/FlatBufferVerify.cs create mode 100644 csharp/src/Apache.Arrow/Flatbuf/RunEndEncoded.cs create mode 100644 csharp/src/Apache.Arrow/Flatbuf/SparseMatrixCompressedAxis.cs create mode 100644 csharp/src/Apache.Arrow/Flatbuf/SparseMatrixIndexCSX.cs create mode 100644 csharp/src/Apache.Arrow/Flatbuf/SparseTensor.cs create mode 100644 csharp/src/Apache.Arrow/Flatbuf/SparseTensorIndex.cs create mode 100644 csharp/src/Apache.Arrow/Flatbuf/SparseTensorIndexCOO.cs create mode 100644 csharp/src/Apache.Arrow/Flatbuf/SparseTensorIndexCSF.cs diff --git a/csharp/src/Apache.Arrow.Flight/Internal/FlightDataStream.cs b/csharp/src/Apache.Arrow.Flight/Internal/FlightDataStream.cs index 8658845723222..3211212c99cb9 100644 --- a/csharp/src/Apache.Arrow.Flight/Internal/FlightDataStream.cs +++ b/csharp/src/Apache.Arrow.Flight/Internal/FlightDataStream.cs @@ -22,7 +22,7 @@ using Apache.Arrow.Flatbuf; using Apache.Arrow.Flight.Protocol; using Apache.Arrow.Ipc; -using FlatBuffers; +using Google.FlatBuffers; using Google.Protobuf; using Grpc.Core; diff --git a/csharp/src/Apache.Arrow.Flight/Internal/FlightMessageSerializer.cs b/csharp/src/Apache.Arrow.Flight/Internal/FlightMessageSerializer.cs index 36b13a63d3c0f..9df28b5033c06 100644 --- a/csharp/src/Apache.Arrow.Flight/Internal/FlightMessageSerializer.cs +++ b/csharp/src/Apache.Arrow.Flight/Internal/FlightMessageSerializer.cs @@ -19,7 +19,7 @@ using System.IO; using System.Text; using Apache.Arrow.Ipc; -using FlatBuffers; +using Google.FlatBuffers; namespace Apache.Arrow.Flight { diff --git a/csharp/src/Apache.Arrow/Flatbuf/Block.cs b/csharp/src/Apache.Arrow/Flatbuf/Block.cs index 89c065b202c3a..8df0159cd6921 100644 --- a/csharp/src/Apache.Arrow/Flatbuf/Block.cs +++ b/csharp/src/Apache.Arrow/Flatbuf/Block.cs @@ -6,13 +6,14 @@ namespace Apache.Arrow.Flatbuf { using global::System; -using global::FlatBuffers; +using global::System.Collections.Generic; +using global::Google.FlatBuffers; internal struct Block : IFlatbufferObject { private Struct __p; public ByteBuffer ByteBuffer { get { return __p.bb; } } - public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public void __init(int _i, ByteBuffer _bb) { __p = new Struct(_i, _bb); } public Block __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } /// Index to the start of the RecordBlock (note this is past the Message header) @@ -20,7 +21,7 @@ internal struct Block : IFlatbufferObject /// Length of the metadata public int MetaDataLength { get { return __p.bb.GetInt(__p.bb_pos + 8); } } /// Length of the data (this is aligned so there can be a gap between this and - /// the metatdata). + /// the metadata). public long BodyLength { get { return __p.bb.GetLong(__p.bb_pos + 16); } } public static Offset CreateBlock(FlatBufferBuilder builder, long Offset, int MetaDataLength, long BodyLength) { @@ -31,7 +32,7 @@ public static Offset CreateBlock(FlatBufferBuilder builder, long Offset, builder.PutLong(Offset); return new Offset(builder.Offset); } -}; +} } diff --git a/csharp/src/Apache.Arrow/Flatbuf/BodyCompression.cs b/csharp/src/Apache.Arrow/Flatbuf/BodyCompression.cs index dda0dd403518f..bbc0e1e46fd7c 100644 --- a/csharp/src/Apache.Arrow/Flatbuf/BodyCompression.cs +++ b/csharp/src/Apache.Arrow/Flatbuf/BodyCompression.cs @@ -6,7 +6,8 @@ namespace Apache.Arrow.Flatbuf { using global::System; -using global::FlatBuffers; +using global::System.Collections.Generic; +using global::Google.FlatBuffers; /// Optional compression for the memory buffers constituting IPC message /// bodies. Intended for use with RecordBatch but could be used for other @@ -15,12 +16,14 @@ internal struct BodyCompression : IFlatbufferObject { private Table __p; public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static void ValidateVersion() { FlatBufferConstants.FLATBUFFERS_23_5_9(); } public static BodyCompression GetRootAsBodyCompression(ByteBuffer _bb) { return GetRootAsBodyCompression(_bb, new BodyCompression()); } public static BodyCompression GetRootAsBodyCompression(ByteBuffer _bb, BodyCompression obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } - public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public void __init(int _i, ByteBuffer _bb) { __p = new Table(_i, _bb); } public BodyCompression __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - /// Compressor library + /// Compressor library. + /// For LZ4_FRAME, each compressed buffer must consist of a single frame. public CompressionType Codec { get { int o = __p.__offset(4); return o != 0 ? (CompressionType)__p.bb.GetSbyte(o + __p.bb_pos) : CompressionType.LZ4_FRAME; } } /// Indicates the way the record batch body was compressed public BodyCompressionMethod Method { get { int o = __p.__offset(6); return o != 0 ? (BodyCompressionMethod)__p.bb.GetSbyte(o + __p.bb_pos) : BodyCompressionMethod.BUFFER; } } @@ -28,20 +31,31 @@ internal struct BodyCompression : IFlatbufferObject public static Offset CreateBodyCompression(FlatBufferBuilder builder, CompressionType codec = CompressionType.LZ4_FRAME, BodyCompressionMethod method = BodyCompressionMethod.BUFFER) { - builder.StartObject(2); + builder.StartTable(2); BodyCompression.AddMethod(builder, method); BodyCompression.AddCodec(builder, codec); return BodyCompression.EndBodyCompression(builder); } - public static void StartBodyCompression(FlatBufferBuilder builder) { builder.StartObject(2); } + public static void StartBodyCompression(FlatBufferBuilder builder) { builder.StartTable(2); } public static void AddCodec(FlatBufferBuilder builder, CompressionType codec) { builder.AddSbyte(0, (sbyte)codec, 0); } public static void AddMethod(FlatBufferBuilder builder, BodyCompressionMethod method) { builder.AddSbyte(1, (sbyte)method, 0); } public static Offset EndBodyCompression(FlatBufferBuilder builder) { - int o = builder.EndObject(); + int o = builder.EndTable(); return new Offset(o); } -}; +} + +static internal class BodyCompressionVerify +{ + static public bool Verify(Google.FlatBuffers.Verifier verifier, uint tablePos) + { + return verifier.VerifyTableStart(tablePos) + && verifier.VerifyField(tablePos, 4 /*Codec*/, 1 /*CompressionType*/, 1, false) + && verifier.VerifyField(tablePos, 6 /*Method*/, 1 /*BodyCompressionMethod*/, 1, false) + && verifier.VerifyTableEnd(tablePos); + } +} } diff --git a/csharp/src/Apache.Arrow/Flatbuf/Buffer.cs b/csharp/src/Apache.Arrow/Flatbuf/Buffer.cs index 7b2315cab4b29..419800d1338e4 100644 --- a/csharp/src/Apache.Arrow/Flatbuf/Buffer.cs +++ b/csharp/src/Apache.Arrow/Flatbuf/Buffer.cs @@ -6,7 +6,8 @@ namespace Apache.Arrow.Flatbuf { using global::System; -using global::FlatBuffers; +using global::System.Collections.Generic; +using global::Google.FlatBuffers; /// ---------------------------------------------------------------------- /// A Buffer represents a single contiguous memory segment @@ -14,14 +15,17 @@ internal struct Buffer : IFlatbufferObject { private Struct __p; public ByteBuffer ByteBuffer { get { return __p.bb; } } - public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public void __init(int _i, ByteBuffer _bb) { __p = new Struct(_i, _bb); } public Buffer __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } /// The relative offset into the shared memory page where the bytes for this /// buffer starts public long Offset { get { return __p.bb.GetLong(__p.bb_pos + 0); } } /// The absolute length (in bytes) of the memory buffer. The memory is found - /// from offset (inclusive) to offset + length (non-inclusive). + /// from offset (inclusive) to offset + length (non-inclusive). When building + /// messages using the encapsulated IPC message, padding bytes may be written + /// after a buffer, but such padding bytes do not need to be accounted for in + /// the size here. public long Length { get { return __p.bb.GetLong(__p.bb_pos + 8); } } public static Offset CreateBuffer(FlatBufferBuilder builder, long Offset, long Length) { @@ -30,7 +34,7 @@ public static Offset CreateBuffer(FlatBufferBuilder builder, long Offset builder.PutLong(Offset); return new Offset(builder.Offset); } -}; +} } diff --git a/csharp/src/Apache.Arrow/Flatbuf/DictionaryBatch.cs b/csharp/src/Apache.Arrow/Flatbuf/DictionaryBatch.cs index e3afafdd5cb16..180e94108d1a7 100644 --- a/csharp/src/Apache.Arrow/Flatbuf/DictionaryBatch.cs +++ b/csharp/src/Apache.Arrow/Flatbuf/DictionaryBatch.cs @@ -6,7 +6,8 @@ namespace Apache.Arrow.Flatbuf { using global::System; -using global::FlatBuffers; +using global::System.Collections.Generic; +using global::Google.FlatBuffers; /// For sending dictionary encoding information. Any Field can be /// dictionary-encoded, but in this case none of its children may be @@ -18,37 +19,51 @@ internal struct DictionaryBatch : IFlatbufferObject { private Table __p; public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static void ValidateVersion() { FlatBufferConstants.FLATBUFFERS_23_5_9(); } public static DictionaryBatch GetRootAsDictionaryBatch(ByteBuffer _bb) { return GetRootAsDictionaryBatch(_bb, new DictionaryBatch()); } public static DictionaryBatch GetRootAsDictionaryBatch(ByteBuffer _bb, DictionaryBatch obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } - public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public void __init(int _i, ByteBuffer _bb) { __p = new Table(_i, _bb); } public DictionaryBatch __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } public long Id { get { int o = __p.__offset(4); return o != 0 ? __p.bb.GetLong(o + __p.bb_pos) : (long)0; } } public RecordBatch? Data { get { int o = __p.__offset(6); return o != 0 ? (RecordBatch?)(new RecordBatch()).__assign(__p.__indirect(o + __p.bb_pos), __p.bb) : null; } } /// If isDelta is true the values in the dictionary are to be appended to a - /// dictionary with the indicated id + /// dictionary with the indicated id. If isDelta is false this dictionary + /// should replace the existing dictionary. public bool IsDelta { get { int o = __p.__offset(8); return o != 0 ? 0!=__p.bb.Get(o + __p.bb_pos) : (bool)false; } } public static Offset CreateDictionaryBatch(FlatBufferBuilder builder, long id = 0, Offset dataOffset = default(Offset), bool isDelta = false) { - builder.StartObject(3); + builder.StartTable(3); DictionaryBatch.AddId(builder, id); DictionaryBatch.AddData(builder, dataOffset); DictionaryBatch.AddIsDelta(builder, isDelta); return DictionaryBatch.EndDictionaryBatch(builder); } - public static void StartDictionaryBatch(FlatBufferBuilder builder) { builder.StartObject(3); } + public static void StartDictionaryBatch(FlatBufferBuilder builder) { builder.StartTable(3); } public static void AddId(FlatBufferBuilder builder, long id) { builder.AddLong(0, id, 0); } public static void AddData(FlatBufferBuilder builder, Offset dataOffset) { builder.AddOffset(1, dataOffset.Value, 0); } public static void AddIsDelta(FlatBufferBuilder builder, bool isDelta) { builder.AddBool(2, isDelta, false); } public static Offset EndDictionaryBatch(FlatBufferBuilder builder) { - int o = builder.EndObject(); + int o = builder.EndTable(); return new Offset(o); } -}; +} + +static internal class DictionaryBatchVerify +{ + static public bool Verify(Google.FlatBuffers.Verifier verifier, uint tablePos) + { + return verifier.VerifyTableStart(tablePos) + && verifier.VerifyField(tablePos, 4 /*Id*/, 8 /*long*/, 8, false) + && verifier.VerifyTable(tablePos, 6 /*Data*/, RecordBatchVerify.Verify, false) + && verifier.VerifyField(tablePos, 8 /*IsDelta*/, 1 /*bool*/, 1, false) + && verifier.VerifyTableEnd(tablePos); + } +} } diff --git a/csharp/src/Apache.Arrow/Flatbuf/DictionaryEncoding.cs b/csharp/src/Apache.Arrow/Flatbuf/DictionaryEncoding.cs index 02a35fdd41b1e..ecf1dd788f468 100644 --- a/csharp/src/Apache.Arrow/Flatbuf/DictionaryEncoding.cs +++ b/csharp/src/Apache.Arrow/Flatbuf/DictionaryEncoding.cs @@ -6,52 +6,72 @@ namespace Apache.Arrow.Flatbuf { using global::System; -using global::FlatBuffers; +using global::System.Collections.Generic; +using global::Google.FlatBuffers; -/// ---------------------------------------------------------------------- -/// Dictionary encoding metadata internal struct DictionaryEncoding : IFlatbufferObject { private Table __p; public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static void ValidateVersion() { FlatBufferConstants.FLATBUFFERS_23_5_9(); } public static DictionaryEncoding GetRootAsDictionaryEncoding(ByteBuffer _bb) { return GetRootAsDictionaryEncoding(_bb, new DictionaryEncoding()); } public static DictionaryEncoding GetRootAsDictionaryEncoding(ByteBuffer _bb, DictionaryEncoding obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } - public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public void __init(int _i, ByteBuffer _bb) { __p = new Table(_i, _bb); } public DictionaryEncoding __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } /// The known dictionary id in the application where this data is used. In /// the file or streaming formats, the dictionary ids are found in the /// DictionaryBatch messages public long Id { get { int o = __p.__offset(4); return o != 0 ? __p.bb.GetLong(o + __p.bb_pos) : (long)0; } } - /// The dictionary indices are constrained to be positive integers. If this - /// field is null, the indices must be signed int32 + /// The dictionary indices are constrained to be non-negative integers. If + /// this field is null, the indices must be signed int32. To maximize + /// cross-language compatibility and performance, implementations are + /// recommended to prefer signed integer types over unsigned integer types + /// and to avoid uint64 indices unless they are required by an application. public Int? IndexType { get { int o = __p.__offset(6); return o != 0 ? (Int?)(new Int()).__assign(__p.__indirect(o + __p.bb_pos), __p.bb) : null; } } /// By default, dictionaries are not ordered, or the order does not have /// semantic meaning. In some statistical, applications, dictionary-encoding /// is used to represent ordered categorical data, and we provide a way to /// preserve that metadata here public bool IsOrdered { get { int o = __p.__offset(8); return o != 0 ? 0!=__p.bb.Get(o + __p.bb_pos) : (bool)false; } } + public DictionaryKind DictionaryKind { get { int o = __p.__offset(10); return o != 0 ? (DictionaryKind)__p.bb.GetShort(o + __p.bb_pos) : DictionaryKind.DenseArray; } } public static Offset CreateDictionaryEncoding(FlatBufferBuilder builder, long id = 0, Offset indexTypeOffset = default(Offset), - bool isOrdered = false) { - builder.StartObject(3); + bool isOrdered = false, + DictionaryKind dictionaryKind = DictionaryKind.DenseArray) { + builder.StartTable(4); DictionaryEncoding.AddId(builder, id); DictionaryEncoding.AddIndexType(builder, indexTypeOffset); + DictionaryEncoding.AddDictionaryKind(builder, dictionaryKind); DictionaryEncoding.AddIsOrdered(builder, isOrdered); return DictionaryEncoding.EndDictionaryEncoding(builder); } - public static void StartDictionaryEncoding(FlatBufferBuilder builder) { builder.StartObject(3); } + public static void StartDictionaryEncoding(FlatBufferBuilder builder) { builder.StartTable(4); } public static void AddId(FlatBufferBuilder builder, long id) { builder.AddLong(0, id, 0); } public static void AddIndexType(FlatBufferBuilder builder, Offset indexTypeOffset) { builder.AddOffset(1, indexTypeOffset.Value, 0); } public static void AddIsOrdered(FlatBufferBuilder builder, bool isOrdered) { builder.AddBool(2, isOrdered, false); } + public static void AddDictionaryKind(FlatBufferBuilder builder, DictionaryKind dictionaryKind) { builder.AddShort(3, (short)dictionaryKind, 0); } public static Offset EndDictionaryEncoding(FlatBufferBuilder builder) { - int o = builder.EndObject(); + int o = builder.EndTable(); return new Offset(o); } -}; +} + +static internal class DictionaryEncodingVerify +{ + static public bool Verify(Google.FlatBuffers.Verifier verifier, uint tablePos) + { + return verifier.VerifyTableStart(tablePos) + && verifier.VerifyField(tablePos, 4 /*Id*/, 8 /*long*/, 8, false) + && verifier.VerifyTable(tablePos, 6 /*IndexType*/, IntVerify.Verify, false) + && verifier.VerifyField(tablePos, 8 /*IsOrdered*/, 1 /*bool*/, 1, false) + && verifier.VerifyField(tablePos, 10 /*DictionaryKind*/, 2 /*DictionaryKind*/, 2, false) + && verifier.VerifyTableEnd(tablePos); + } +} } diff --git a/csharp/src/Apache.Arrow/Flatbuf/Enums/BodyCompressionMethod.cs b/csharp/src/Apache.Arrow/Flatbuf/Enums/BodyCompressionMethod.cs index e9f6b6e831e54..e1f0c8f79ddf4 100644 --- a/csharp/src/Apache.Arrow/Flatbuf/Enums/BodyCompressionMethod.cs +++ b/csharp/src/Apache.Arrow/Flatbuf/Enums/BodyCompressionMethod.cs @@ -17,7 +17,7 @@ internal enum BodyCompressionMethod : sbyte /// uncompressed length may be set to -1 to indicate that the data that /// follows is not compressed, which can be useful for cases where /// compression does not yield appreciable savings. - BUFFER = 0, + BUFFER = 0, }; diff --git a/csharp/src/Apache.Arrow/Flatbuf/Enums/DictionaryKind.cs b/csharp/src/Apache.Arrow/Flatbuf/Enums/DictionaryKind.cs new file mode 100644 index 0000000000000..9a67bc7c5c86f --- /dev/null +++ b/csharp/src/Apache.Arrow/Flatbuf/Enums/DictionaryKind.cs @@ -0,0 +1,19 @@ +// +// automatically generated by the FlatBuffers compiler, do not modify +// + +namespace Apache.Arrow.Flatbuf +{ + +/// ---------------------------------------------------------------------- +/// Dictionary encoding metadata +/// Maintained for forwards compatibility, in the future +/// Dictionaries might be explicit maps between integers and values +/// allowing for non-contiguous index values +internal enum DictionaryKind : short +{ + DenseArray = 0, +}; + + +} diff --git a/csharp/src/Apache.Arrow/Flatbuf/Enums/IntervalUnit.cs b/csharp/src/Apache.Arrow/Flatbuf/Enums/IntervalUnit.cs index d1363968df064..ac82c72e24e8b 100644 --- a/csharp/src/Apache.Arrow/Flatbuf/Enums/IntervalUnit.cs +++ b/csharp/src/Apache.Arrow/Flatbuf/Enums/IntervalUnit.cs @@ -7,8 +7,9 @@ namespace Apache.Arrow.Flatbuf internal enum IntervalUnit : short { - YEAR_MONTH = 0, - DAY_TIME = 1, + YEAR_MONTH = 0, + DAY_TIME = 1, + MONTH_DAY_NANO = 2, }; diff --git a/csharp/src/Apache.Arrow/Flatbuf/Enums/MessageHeader.cs b/csharp/src/Apache.Arrow/Flatbuf/Enums/MessageHeader.cs index 94d239bfa9345..1204b1e276e6e 100644 --- a/csharp/src/Apache.Arrow/Flatbuf/Enums/MessageHeader.cs +++ b/csharp/src/Apache.Arrow/Flatbuf/Enums/MessageHeader.cs @@ -15,12 +15,44 @@ namespace Apache.Arrow.Flatbuf /// it is best to send data using RecordBatch internal enum MessageHeader : byte { - NONE = 0, - Schema = 1, - DictionaryBatch = 2, - RecordBatch = 3, - Tensor = 4, + NONE = 0, + Schema = 1, + DictionaryBatch = 2, + RecordBatch = 3, + Tensor = 4, + SparseTensor = 5, }; + +static internal class MessageHeaderVerify +{ + static public bool Verify(Google.FlatBuffers.Verifier verifier, byte typeId, uint tablePos) + { + bool result = true; + switch((MessageHeader)typeId) + { + case MessageHeader.Schema: + result = SchemaVerify.Verify(verifier, tablePos); + break; + case MessageHeader.DictionaryBatch: + result = DictionaryBatchVerify.Verify(verifier, tablePos); + break; + case MessageHeader.RecordBatch: + result = RecordBatchVerify.Verify(verifier, tablePos); + break; + case MessageHeader.Tensor: + result = TensorVerify.Verify(verifier, tablePos); + break; + case MessageHeader.SparseTensor: + result = SparseTensorVerify.Verify(verifier, tablePos); + break; + default: result = true; + break; + } + return result; + } +} + + } diff --git a/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs b/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs index e8a7932a70369..10f852efb9b96 100644 --- a/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs +++ b/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs @@ -10,29 +10,112 @@ namespace Apache.Arrow.Flatbuf /// add new logical types to Type without breaking backwards compatibility internal enum Type : byte { - NONE = 0, - Null = 1, - Int = 2, - FloatingPoint = 3, - Binary = 4, - Utf8 = 5, - Bool = 6, - Decimal = 7, - Date = 8, - Time = 9, - Timestamp = 10, - Interval = 11, - List = 12, - Struct_ = 13, - Union = 14, - FixedSizeBinary = 15, - FixedSizeList = 16, - Map = 17, - Duration = 18, - LargeBinary = 19, - LargeUtf8 = 20, - LargeList = 21, + NONE = 0, + Null = 1, + Int = 2, + FloatingPoint = 3, + Binary = 4, + Utf8 = 5, + Bool = 6, + Decimal = 7, + Date = 8, + Time = 9, + Timestamp = 10, + Interval = 11, + List = 12, + Struct_ = 13, + Union = 14, + FixedSizeBinary = 15, + FixedSizeList = 16, + Map = 17, + Duration = 18, + LargeBinary = 19, + LargeUtf8 = 20, + LargeList = 21, + RunEndEncoded = 22, }; + +static internal class TypeVerify +{ + static public bool Verify(Google.FlatBuffers.Verifier verifier, byte typeId, uint tablePos) + { + bool result = true; + switch((Type)typeId) + { + case Type.Null: + result = NullVerify.Verify(verifier, tablePos); + break; + case Type.Int: + result = IntVerify.Verify(verifier, tablePos); + break; + case Type.FloatingPoint: + result = FloatingPointVerify.Verify(verifier, tablePos); + break; + case Type.Binary: + result = BinaryVerify.Verify(verifier, tablePos); + break; + case Type.Utf8: + result = Utf8Verify.Verify(verifier, tablePos); + break; + case Type.Bool: + result = BoolVerify.Verify(verifier, tablePos); + break; + case Type.Decimal: + result = DecimalVerify.Verify(verifier, tablePos); + break; + case Type.Date: + result = DateVerify.Verify(verifier, tablePos); + break; + case Type.Time: + result = TimeVerify.Verify(verifier, tablePos); + break; + case Type.Timestamp: + result = TimestampVerify.Verify(verifier, tablePos); + break; + case Type.Interval: + result = IntervalVerify.Verify(verifier, tablePos); + break; + case Type.List: + result = ListVerify.Verify(verifier, tablePos); + break; + case Type.Struct_: + result = Struct_Verify.Verify(verifier, tablePos); + break; + case Type.Union: + result = UnionVerify.Verify(verifier, tablePos); + break; + case Type.FixedSizeBinary: + result = FixedSizeBinaryVerify.Verify(verifier, tablePos); + break; + case Type.FixedSizeList: + result = FixedSizeListVerify.Verify(verifier, tablePos); + break; + case Type.Map: + result = MapVerify.Verify(verifier, tablePos); + break; + case Type.Duration: + result = DurationVerify.Verify(verifier, tablePos); + break; + case Type.LargeBinary: + result = LargeBinaryVerify.Verify(verifier, tablePos); + break; + case Type.LargeUtf8: + result = LargeUtf8Verify.Verify(verifier, tablePos); + break; + case Type.LargeList: + result = LargeListVerify.Verify(verifier, tablePos); + break; + case Type.RunEndEncoded: + result = RunEndEncodedVerify.Verify(verifier, tablePos); + break; + default: result = true; + break; + } + return result; + } +} + + } diff --git a/csharp/src/Apache.Arrow/Flatbuf/Field.cs b/csharp/src/Apache.Arrow/Flatbuf/Field.cs index a4f9e3057343e..c5c6c0a165598 100644 --- a/csharp/src/Apache.Arrow/Flatbuf/Field.cs +++ b/csharp/src/Apache.Arrow/Flatbuf/Field.cs @@ -6,49 +6,76 @@ namespace Apache.Arrow.Flatbuf { using global::System; -using global::FlatBuffers; +using global::System.Collections.Generic; +using global::Google.FlatBuffers; /// ---------------------------------------------------------------------- /// A field represents a named column in a record / row batch or child of a /// nested type. -/// -/// - children is only for nested Arrow arrays -/// - For primitive types, children will have length 0 -/// - nullable should default to true in general internal struct Field : IFlatbufferObject { private Table __p; public ByteBuffer ByteBuffer { get { return __p.bb; } } + public static void ValidateVersion() { FlatBufferConstants.FLATBUFFERS_23_5_9(); } public static Field GetRootAsField(ByteBuffer _bb) { return GetRootAsField(_bb, new Field()); } public static Field GetRootAsField(ByteBuffer _bb, Field obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); } - public void __init(int _i, ByteBuffer _bb) { __p.bb_pos = _i; __p.bb = _bb; } + public void __init(int _i, ByteBuffer _bb) { __p = new Table(_i, _bb); } public Field __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } + /// Name is not required, in i.e. a List public string Name { get { int o = __p.__offset(4); return o != 0 ? __p.__string(o + __p.bb_pos) : null; } } #if ENABLE_SPAN_T - public Span GetNameBytes() { return __p.__vector_as_span(4); } + public Span GetNameBytes() { return __p.__vector_as_span(4, 1); } #else public ArraySegment? GetNameBytes() { return __p.__vector_as_arraysegment(4); } #endif public byte[] GetNameArray() { return __p.__vector_as_array(4); } + /// Whether or not this field can contain nulls. Should be true in general. public bool Nullable { get { int o = __p.__offset(6); return o != 0 ? 0!=__p.bb.Get(o + __p.bb_pos) : (bool)false; } } - public Type TypeType { get { int o = __p.__offset(8); return o != 0 ? (Type)__p.bb.Get(o + __p.bb_pos) : Flatbuf.Type.NONE; } } - public TTable? Type() where TTable : struct, IFlatbufferObject { int o = __p.__offset(10); return o != 0 ? (TTable?)__p.__union(o) : null; } + public Type TypeType { get { int o = __p.__offset(8); return o != 0 ? (Type)__p.bb.Get(o + __p.bb_pos) : Apache.Arrow.Flatbuf.Type.NONE; } } + /// This is the type of the decoded value if the field is dictionary encoded. + public TTable? Type() where TTable : struct, IFlatbufferObject { int o = __p.__offset(10); return o != 0 ? (TTable?)__p.__union(o + __p.bb_pos) : null; } + public Null TypeAsNull() { return Type().Value; } + public Int TypeAsInt() { return Type().Value; } + public FloatingPoint TypeAsFloatingPoint() { return Type().Value; } + public Binary TypeAsBinary() { return Type().Value; } + public Utf8 TypeAsUtf8() { return Type().Value; } + public Bool TypeAsBool() { return Type().Value; } + public Decimal TypeAsDecimal() { return Type().Value; } + public Date TypeAsDate() { return Type().Value; } + public Time TypeAsTime() { return Type