From ae0946c88306e24540595a51c78ed91f756367e9 Mon Sep 17 00:00:00 2001
From: Lijuan Zhang <lijzhang@linkedin.com>
Date: Mon, 1 May 2023 03:43:08 +0000
Subject: [PATCH 1/6] add more tets

fix linter
---
 .../test_atds_autotuning_benchmark.py         |   54 +
 .../test_atds_parallelism_benchmark.py        |  107 +
 .../benchmark/test_codec_atds_benchmark.py    |   37 +
 .../benchmark/test_mixed_benchmark.py         |  108 +
 .../test_multiple_files_atds_benchmark.py     |   40 +
 .../benchmark/test_shuffle_atds_benchmark.py  |   86 +
 .../test_memory_growth_benchmark.py           |   48 +
 .../test_memory_leak_benchmark.py             |   48 +
 .../test_atds_avro/test_atds_dataset_eager.py | 2995 +++++++++++++++++
 .../test_atds_avro/test_atds_feature_eager.py |  107 +
 tests/test_atds_avro/test_atds_writer.py      |  396 +++
 .../utils/generator/mock_generator.py         |   69 +
 tests/test_parse_avro_eager.py                | 2417 +++++++++++++
 13 files changed, 6512 insertions(+)
 create mode 100644 tests/test_atds_avro/benchmark/test_atds_autotuning_benchmark.py
 create mode 100644 tests/test_atds_avro/benchmark/test_atds_parallelism_benchmark.py
 create mode 100644 tests/test_atds_avro/benchmark/test_codec_atds_benchmark.py
 create mode 100644 tests/test_atds_avro/benchmark/test_mixed_benchmark.py
 create mode 100644 tests/test_atds_avro/benchmark/test_multiple_files_atds_benchmark.py
 create mode 100644 tests/test_atds_avro/benchmark/test_shuffle_atds_benchmark.py
 create mode 100644 tests/test_atds_avro/memory_benchmark/test_memory_growth_benchmark.py
 create mode 100644 tests/test_atds_avro/memory_benchmark/test_memory_leak_benchmark.py
 create mode 100644 tests/test_atds_avro/test_atds_dataset_eager.py
 create mode 100644 tests/test_atds_avro/test_atds_feature_eager.py
 create mode 100644 tests/test_atds_avro/test_atds_writer.py
 create mode 100644 tests/test_atds_avro/utils/generator/mock_generator.py
 create mode 100644 tests/test_parse_avro_eager.py

diff --git a/tests/test_atds_avro/benchmark/test_atds_autotuning_benchmark.py b/tests/test_atds_avro/benchmark/test_atds_autotuning_benchmark.py
new file mode 100644
index 000000000..18e392c09
--- /dev/null
+++ b/tests/test_atds_avro/benchmark/test_atds_autotuning_benchmark.py
@@ -0,0 +1,54 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""ATDS benchmark with autotuning."""
+
+import pytest
+import tensorflow as tf
+
+from tests.test_atds_avro.utils.data_source import DataSource
+from tests.test_atds_avro.utils.data_source_registry import LARGE_NUM_RECORDS
+from tests.test_atds_avro.utils.atds_benchmark_utils import (
+    run_atds_benchmark_from_data_source,
+)
+from tests.test_atds_avro.utils.benchmark_utils import MIXED_TYPES_SCENARIO
+
+BATCH_SIZES = [8, 16, 32, 64, 128, 256, 512, 1024]
+PARALLELISM = [1, 2, 3, 4, 5, 6, tf.data.AUTOTUNE]
+PARAMS = [
+    (batch_size, 1024, "deflate", parallelism)
+    for batch_size in BATCH_SIZES
+    for parallelism in PARALLELISM
+]
+
+
+@pytest.mark.benchmark(
+    group="autotuning",
+)
+@pytest.mark.parametrize(
+    ["batch_size", "shuffle_buffer_size", "codec", "parallelism"], PARAMS
+)
+def test_autotuning(batch_size, shuffle_buffer_size, codec, parallelism, benchmark):
+    data_source = DataSource(
+        scenario=MIXED_TYPES_SCENARIO, num_records=LARGE_NUM_RECORDS
+    )
+    run_atds_benchmark_from_data_source(
+        data_source,
+        batch_size,
+        benchmark,
+        parallelism=parallelism,
+        codec=codec,
+        shuffle_buffer_size=shuffle_buffer_size,
+        rounds=10,
+    )
diff --git a/tests/test_atds_avro/benchmark/test_atds_parallelism_benchmark.py b/tests/test_atds_avro/benchmark/test_atds_parallelism_benchmark.py
new file mode 100644
index 000000000..63ec7b62b
--- /dev/null
+++ b/tests/test_atds_avro/benchmark/test_atds_parallelism_benchmark.py
@@ -0,0 +1,107 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""ATDS benchmark with parallelism."""
+
+import pytest
+import tensorflow as tf
+
+from tests.test_atds_avro.utils.data_source import DataSource
+from tests.test_atds_avro.utils.data_source_registry import LARGE_NUM_RECORDS
+from tests.test_atds_avro.utils.atds_benchmark_utils import (
+    run_atds_benchmark_from_data_source,
+)
+from tests.test_atds_avro.utils.benchmark_utils import MIXED_TYPES_SCENARIO
+
+
+@pytest.mark.benchmark(
+    group="parallelism",
+)
+@pytest.mark.parametrize(
+    ["batch_size", "shuffle_buffer_size", "codec", "parallelism"],
+    [
+        (128, 1024, "null", 1),
+        (128, 1024, "null", 2),
+        (128, 1024, "null", 3),
+        (128, 1024, "null", 4),
+        (128, 1024, "null", 5),
+        (128, 1024, "null", 6),
+        (128, 1024, "deflate", 1),
+        (128, 1024, "deflate", 2),
+        (128, 1024, "deflate", 3),
+        (128, 1024, "deflate", 4),
+        (128, 1024, "deflate", 5),
+        (128, 1024, "deflate", 6),
+        (128, 1024, "snappy", 1),
+        (128, 1024, "snappy", 2),
+        (128, 1024, "snappy", 3),
+        (128, 1024, "snappy", 4),
+        (128, 1024, "snappy", 5),
+        (128, 1024, "snappy", 6),
+    ],
+)
+def test_parallelism(batch_size, shuffle_buffer_size, codec, parallelism, benchmark):
+    data_source = DataSource(
+        scenario=MIXED_TYPES_SCENARIO, num_records=LARGE_NUM_RECORDS
+    )
+    run_atds_benchmark_from_data_source(
+        data_source,
+        batch_size,
+        benchmark,
+        parallelism=parallelism,
+        codec=codec,
+        shuffle_buffer_size=shuffle_buffer_size,
+        rounds=10,
+    )
+
+
+@pytest.mark.benchmark(
+    group="parallelism",
+)
+@pytest.mark.parametrize(
+    ["batch_size", "shuffle_buffer_size", "parallelism", "interleave"],
+    [
+        (32, 1024, 1, 6),
+        (32, 1024, 2, 3),
+        (32, 1024, 3, 2),
+        (32, 1024, 6, 1),
+        (32, 1024, tf.data.AUTOTUNE, 1),
+        (32, 1024, tf.data.AUTOTUNE, 2),
+        (32, 1024, tf.data.AUTOTUNE, 3),
+        (32, 1024, tf.data.AUTOTUNE, 6),
+        (128, 1024, 1, 6),
+        (128, 1024, 2, 3),
+        (128, 1024, 3, 2),
+        (128, 1024, 6, 1),
+        (128, 1024, tf.data.AUTOTUNE, 1),
+        (128, 1024, tf.data.AUTOTUNE, 2),
+        (128, 1024, tf.data.AUTOTUNE, 3),
+        (128, 1024, tf.data.AUTOTUNE, 6),
+    ],
+)
+def test_parallelism_with_interleave(
+    batch_size, shuffle_buffer_size, parallelism, interleave, benchmark
+):
+    data_source = DataSource(
+        scenario=MIXED_TYPES_SCENARIO, num_records=LARGE_NUM_RECORDS, partitions=6
+    )
+    run_atds_benchmark_from_data_source(
+        data_source,
+        batch_size,
+        benchmark,
+        parallelism=parallelism,
+        interleave_parallelism=interleave,
+        codec="deflate",
+        shuffle_buffer_size=shuffle_buffer_size,
+    )
diff --git a/tests/test_atds_avro/benchmark/test_codec_atds_benchmark.py b/tests/test_atds_avro/benchmark/test_codec_atds_benchmark.py
new file mode 100644
index 000000000..c409bb29b
--- /dev/null
+++ b/tests/test_atds_avro/benchmark/test_codec_atds_benchmark.py
@@ -0,0 +1,37 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""ATDS benchmark with different codecs."""
+
+import pytest
+
+from tests.test_atds_avro.utils.data_source import DataSource
+from tests.test_atds_avro.utils.data_source_registry import SMALL_NUM_RECORDS
+from tests.test_atds_avro.utils.atds_benchmark_utils import (
+    run_atds_benchmark_from_data_source,
+)
+from tests.test_atds_avro.utils.benchmark_utils import MIXED_TYPES_SCENARIO
+
+
+@pytest.mark.benchmark(
+    group="codec",
+)
+@pytest.mark.parametrize(
+    ["batch_size", "codec"], [(128, "null"), (128, "deflate"), (128, "snappy")]
+)
+def test_codec(batch_size, codec, benchmark):
+    data_source = DataSource(
+        scenario=MIXED_TYPES_SCENARIO, num_records=SMALL_NUM_RECORDS
+    )
+    run_atds_benchmark_from_data_source(data_source, batch_size, benchmark, codec=codec)
diff --git a/tests/test_atds_avro/benchmark/test_mixed_benchmark.py b/tests/test_atds_avro/benchmark/test_mixed_benchmark.py
new file mode 100644
index 000000000..fd2dc26ed
--- /dev/null
+++ b/tests/test_atds_avro/benchmark/test_mixed_benchmark.py
@@ -0,0 +1,108 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""ATDS benchmark for jrps schema."""
+
+import glob
+import os
+import pytest
+import tensorflow as tf
+
+from tests.test_atds_avro.utils.data_source import DataSource
+from tests.test_atds_avro.utils.data_source_registry import SMALL_NUM_RECORDS
+from tests.test_atds_avro.utils.generator.tensor_generator import (
+    IntTensorGenerator,
+    FloatTensorGenerator,
+    WordTensorGenerator,
+)
+from tests.test_atds_avro.utils.generator.sparse_tensor_generator import (
+    FloatSparseTensorGenerator,
+    ValueDistribution,
+)
+from tests.test_atds_avro.utils.atds_writer import ATDSWriter
+from tests.test_atds_avro.utils.benchmark_utils import benchmark_func
+from tests.test_atds_avro.utils.atds_benchmark_utils import (
+    get_dataset,
+    get_features_from_data_source,
+)
+
+
+@pytest.mark.benchmark(
+    group="jrps",
+)
+def test_jrps_benchmark_data():
+    scenario = {
+        "sparse_1d_float_small_1": FloatSparseTensorGenerator(
+            tf.SparseTensorSpec([3], tf.dtypes.float32), ValueDistribution.SINGLE_VALUE
+        ),
+        "sparse_1d_float_large": FloatSparseTensorGenerator(
+            tf.SparseTensorSpec([50001], tf.dtypes.float32),
+            ValueDistribution.SINGLE_VALUE,
+        ),
+        "dense_0d_float": FloatTensorGenerator(tf.TensorSpec([], tf.dtypes.float32)),
+        "dense_1d_float_large_1": FloatTensorGenerator(
+            tf.TensorSpec([200], tf.dtypes.float32)
+        ),
+        "dense_0d_int_1": IntTensorGenerator(tf.TensorSpec([], tf.dtypes.int32)),
+        "sparse_1d_float_medium_1": FloatSparseTensorGenerator(
+            tf.SparseTensorSpec([10], tf.dtypes.float32), ValueDistribution.SINGLE_VALUE
+        ),
+        "dense_1d_float_large_2": FloatTensorGenerator(
+            tf.TensorSpec([200], tf.dtypes.float32)
+        ),
+        "dense_1d_float_small_1": FloatTensorGenerator(
+            tf.TensorSpec([2], tf.dtypes.float32)
+        ),
+        "dense_1d_float_large_3": FloatTensorGenerator(
+            tf.TensorSpec([200], tf.dtypes.float32)
+        ),
+        "dense_1d_float_small_2": FloatTensorGenerator(
+            tf.TensorSpec([2], tf.dtypes.float32)
+        ),
+        "dense_1d_float_small_3": FloatTensorGenerator(
+            tf.TensorSpec([2], tf.dtypes.float32)
+        ),
+        "sparse_1d_float_medium_2": FloatSparseTensorGenerator(
+            tf.SparseTensorSpec([51], tf.dtypes.float32), ValueDistribution.SINGLE_VALUE
+        ),
+        "sparse_1d_float_small_2": FloatSparseTensorGenerator(
+            tf.SparseTensorSpec([3], tf.dtypes.float32), ValueDistribution.SINGLE_VALUE
+        ),
+        "dense_1d_float_large_4": FloatTensorGenerator(
+            tf.TensorSpec([200], tf.dtypes.float32)
+        ),
+        "dense_1d_float_small_4": FloatTensorGenerator(
+            tf.TensorSpec([1], tf.dtypes.float32)
+        ),
+        "dense_0d_string_1": WordTensorGenerator(
+            tf.TensorSpec([], tf.dtypes.string), avg_length=24
+        ),
+        "dense_0d_int_2": IntTensorGenerator(tf.TensorSpec([], tf.dtypes.int32)),
+        "dense_0d_string_2": WordTensorGenerator(
+            tf.TensorSpec([], tf.dtypes.string), avg_length=24
+        ),
+        "dense_0d_long": IntTensorGenerator(tf.TensorSpec([], tf.dtypes.int64)),
+    }
+    num_partitions = 10
+    data_source = DataSource(
+        scenario=scenario, num_records=SMALL_NUM_RECORDS, partitions=num_partitions
+    )
+    with ATDSWriter() as writer:
+        dir_path = writer.write(data_source)
+        pattern = os.path.join(dir_path, f"*.{writer.extension}")
+        dataset = get_dataset(
+            glob.glob(pattern), get_features_from_data_source(writer, data_source)
+        )
+        dataset = dataset.unbatch()
+        benchmark_func(dataset)
diff --git a/tests/test_atds_avro/benchmark/test_multiple_files_atds_benchmark.py b/tests/test_atds_avro/benchmark/test_multiple_files_atds_benchmark.py
new file mode 100644
index 000000000..eda4553b3
--- /dev/null
+++ b/tests/test_atds_avro/benchmark/test_multiple_files_atds_benchmark.py
@@ -0,0 +1,40 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""ATDS benchmark with multiple files."""
+
+import pytest
+
+from tests.test_atds_avro.utils.data_source import DataSource
+from tests.test_atds_avro.utils.data_source_registry import (
+    LARGE_NUM_RECORDS,
+    MULTIPLE_PARTITION,
+)
+from tests.test_atds_avro.utils.atds_benchmark_utils import (
+    run_atds_benchmark_from_data_source,
+)
+from tests.test_atds_avro.utils.benchmark_utils import MIXED_TYPES_SCENARIO
+
+
+@pytest.mark.benchmark(
+    group="multi_partition",
+)
+@pytest.mark.parametrize(["batch_size", "partitions"], [(128, MULTIPLE_PARTITION)])
+def test_multiple_partitions(batch_size, partitions, benchmark):
+    data_source = DataSource(
+        scenario=MIXED_TYPES_SCENARIO,
+        num_records=LARGE_NUM_RECORDS,
+        partitions=partitions,
+    )
+    run_atds_benchmark_from_data_source(data_source, batch_size, benchmark)
diff --git a/tests/test_atds_avro/benchmark/test_shuffle_atds_benchmark.py b/tests/test_atds_avro/benchmark/test_shuffle_atds_benchmark.py
new file mode 100644
index 000000000..bea079d51
--- /dev/null
+++ b/tests/test_atds_avro/benchmark/test_shuffle_atds_benchmark.py
@@ -0,0 +1,86 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""ATDSDataset shuffle benchmark."""
+
+import glob
+import os
+import pytest
+
+from tests.test_atds_avro.utils.data_source import DataSource
+from tests.test_atds_avro.utils.data_source_registry import SMALL_NUM_RECORDS
+from tests.test_atds_avro.utils.atds_writer import ATDSWriter
+from tests.test_atds_avro.utils.atds_benchmark_utils import (
+    get_dataset,
+    get_features_from_data_source,
+    run_atds_benchmark_from_data_source,
+)
+from tests.test_atds_avro.utils.benchmark_utils import (
+    MIXED_TYPES_SCENARIO,
+    benchmark_func,
+)
+
+
+@pytest.mark.benchmark(
+    group="shuffle",
+)
+@pytest.mark.parametrize(
+    ["batch_size", "shuffle_buffer_size"],
+    [
+        (128, 0),
+        (128, 64),  # shuffle_buffer_size < batch_size (imperfect shuffle)
+        (128, 512),  # shuffle_buffer_size > batch_size (imperfect shuffle)
+        (128, 1024),  # shuffle_buffer_size + batch_size > num_records (perfect shuffle)
+    ],
+)
+def test_in_ops_shuffle(batch_size, shuffle_buffer_size, benchmark):
+    data_source = DataSource(
+        scenario=MIXED_TYPES_SCENARIO, num_records=SMALL_NUM_RECORDS
+    )
+    run_atds_benchmark_from_data_source(
+        data_source, batch_size, benchmark, shuffle_buffer_size=shuffle_buffer_size
+    )
+
+
+@pytest.mark.benchmark(
+    group="shuffle",
+)
+@pytest.mark.parametrize(
+    ["batch_size", "shuffle_buffer_size"],
+    [
+        (128, 64),  # shuffle_buffer_size < batch_size (imperfect shuffle)
+        (128, 512),  # shuffle_buffer_size > batch_size (imperfect shuffle)
+        (128, 1024),  # shuffle_buffer_size + batch_size > num_records (perfect shuffle)
+    ],
+)
+def test_unbatch_shuffle_batch(batch_size, shuffle_buffer_size, benchmark):
+    data_source = DataSource(
+        scenario=MIXED_TYPES_SCENARIO, num_records=SMALL_NUM_RECORDS
+    )
+    with ATDSWriter(codec="null") as writer:
+        dir_path = writer.write(data_source)
+        pattern = os.path.join(dir_path, f"*.{writer.extension}")
+
+        dataset = get_dataset(
+            glob.glob(pattern),
+            get_features_from_data_source(writer, data_source),
+            batch_size=batch_size,
+        )
+        dataset = dataset.unbatch()
+        dataset = dataset.shuffle(shuffle_buffer_size)
+        dataset = dataset.batch(batch_size)
+        count = benchmark.pedantic(
+            target=benchmark_func, args=[dataset], iterations=2, rounds=100, kwargs={}
+        )
+        assert count > 0, f"ATDS record count: {count} must be greater than 0"
diff --git a/tests/test_atds_avro/memory_benchmark/test_memory_growth_benchmark.py b/tests/test_atds_avro/memory_benchmark/test_memory_growth_benchmark.py
new file mode 100644
index 000000000..5484e1829
--- /dev/null
+++ b/tests/test_atds_avro/memory_benchmark/test_memory_growth_benchmark.py
@@ -0,0 +1,48 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""ATDSDataset benchmark for memory growth test."""
+
+import pytest
+import os
+import tensorflow as tf
+
+from tests.test_atds_avro.utils.data_source import DataSource
+from tests.test_atds_avro.utils.generator.tensor_generator import IntTensorGenerator
+
+from tests.test_atds_avro.utils.atds_benchmark_utils import (
+    run_atds_benchmark_from_data_source,
+)
+
+
+@pytest.mark.benchmark(
+    group="memory_growth",
+)
+@pytest.mark.parametrize("n", [(1), (2), (4), (8), (32), (128), (512), (1024)])
+def test_memory_growth(n, benchmark):
+    batch_size = 128
+    # n is the shuffle buffer size to batch size ratio.
+    shuffle_buffer_size = batch_size * n
+    scenario = {"tensor": IntTensorGenerator(tf.TensorSpec(shape=[16], dtype=tf.int32))}
+    # A fixed number of records that covers all ratio n.
+    num_records = 720 * 9 * 1024
+    data_source = DataSource(scenario=scenario, num_records=num_records)
+    run_atds_benchmark_from_data_source(
+        data_source,
+        batch_size,
+        benchmark,
+        codec="null",
+        shuffle_buffer_size=shuffle_buffer_size,
+        rounds=1,
+    )
diff --git a/tests/test_atds_avro/memory_benchmark/test_memory_leak_benchmark.py b/tests/test_atds_avro/memory_benchmark/test_memory_leak_benchmark.py
new file mode 100644
index 000000000..62544502c
--- /dev/null
+++ b/tests/test_atds_avro/memory_benchmark/test_memory_leak_benchmark.py
@@ -0,0 +1,48 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""ATDSDataset benchmark with all types of tensors for memory leak check."""
+
+import pytest
+import os
+
+from tests.test_atds_avro.utils.data_source_registry import (
+    ALL_TYPES_DATA_SOURCE_NAME,
+    get_data_source_from_registry,
+)
+
+from tests.test_atds_avro.utils.atds_benchmark_utils import (
+    run_atds_benchmark_from_data_source,
+)
+
+
+@pytest.mark.skipif(
+    os.getenv("ATDS_MEM_LEAK_CHECK") != "1",
+    reason="This benchmark test is only used in memory leak check.",
+)
+@pytest.mark.benchmark(
+    group="all_types_of_tensors",
+)
+@pytest.mark.parametrize("batch_size", [(16)])
+def test_all_types_of_tensors_for_memory_leak_check(batch_size, benchmark):
+    data_source = get_data_source_from_registry(ALL_TYPES_DATA_SOURCE_NAME)
+    shuffle_buffer_size = batch_size * 8
+    run_atds_benchmark_from_data_source(
+        data_source,
+        batch_size,
+        benchmark,
+        codec="deflate",
+        shuffle_buffer_size=shuffle_buffer_size,
+        rounds=1,
+    )
diff --git a/tests/test_atds_avro/test_atds_dataset_eager.py b/tests/test_atds_avro/test_atds_dataset_eager.py
new file mode 100644
index 000000000..85aa03d5c
--- /dev/null
+++ b/tests/test_atds_avro/test_atds_dataset_eager.py
@@ -0,0 +1,2995 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import pytest
+import tempfile
+import re
+import os
+import gzip
+import json
+import numpy as np
+import tensorflow as tf
+import itertools
+import snappy
+import random
+import avro.schema
+from avro.datafile import DataFileWriter
+from avro.io import DatumWriter
+from parameterized import parameterized
+from tensorflow.python.framework import errors
+from tests.test_parse_avro_eager import AvroDatasetTestBase, AvroRecordsToFile
+from tensorflow_io.python.ops import core_ops
+from tensorflow_io.python.experimental.atds.dataset import ATDSDataset
+from tensorflow_io.python.experimental.atds.features import (
+    DenseFeature,
+    SparseFeature,
+    VarlenFeature,
+)
+
+"This file holds the test cases for ATDSDataset."
+
+
+def create_atds_dataset(
+    writer_schema,
+    record_data,
+    features,
+    batch_size,
+    drop_remainder=None,
+    codec="deflate",
+    num_parallel_calls=None,
+):
+    """
+    Creates ATDSDataset by
+    1. Generate Avro files with the writer_schema and record_data. Note: This uses DEFLATE codec.
+    2. Create ATDSDataset with the generated files, batch size,
+       and features config.
+    """
+    filename = os.path.join(tempfile.mkdtemp(), "test.avro")
+    writer = AvroRecordsToFile(
+        filename=filename, writer_schema=writer_schema, codec=codec
+    )
+    writer.write_records(record_data)
+    return ATDSDataset(
+        filenames=[filename],
+        batch_size=batch_size,
+        features=features,
+        drop_remainder=drop_remainder,
+        num_parallel_calls=num_parallel_calls,
+    )
+
+
+@pytest.mark.parametrize(
+    ["record_data", "error_message"],
+    [
+        (
+            [{"int_list_list": [[1, 2, 3], [3, 4]]}],
+            "Failed to decode feature int_list_list. "
+            "Reason: Number of decoded value 2 does not match the expected dimension size 3"
+            " at the 2th dimension in user defined shape [2,3]",
+        ),
+        (
+            [{"int_list_list": [[1, 2, 3], [3, 4, 5], [6, 7, 8], [9, 10, 11]]}],
+            "Failed to decode feature int_list_list. "
+            "Reason: Number of decoded value 4 does not match the expected dimension size 2"
+            " at the 1th dimension in user defined shape [2,3]",
+        ),
+    ],
+)
+def test_dense_feature_decode_error(record_data, error_message):
+    """test_dense_feature_decode_error"""
+    schema = """{
+        "type": "record",
+        "name": "row",
+        "fields": [
+            {
+               "name": "int_list_list",
+               "type": {
+                  "type": "array",
+                  "items": {
+                      "type": "array",
+                      "items": "int"
+                  }
+               }
+            }
+        ]}"""
+    features = {"int_list_list": DenseFeature([2, 3], tf.dtypes.int32)}
+    with pytest.raises(errors.InvalidArgumentError, match=re.escape(error_message)):
+        dataset = create_atds_dataset(
+            writer_schema=schema,
+            record_data=record_data,
+            features=features,
+            batch_size=1,
+        )
+        iterator = iter(dataset)
+        next(iterator)
+
+
+@pytest.mark.parametrize(
+    ["record_data", "error_message"],
+    [
+        (
+            [{"int_list_list": [[1, 2, 3], [3, 4]]}],
+            "Failed to decode feature int_list_list. "
+            "Reason: Number of decoded value 2 does not match the expected dimension size 3"
+            " at the 2th dimension in user defined shape [?,3]",
+        ),
+        (
+            [{"int_list_list": [[]]}],
+            "Failed to decode feature int_list_list. "
+            "Reason: Number of decoded value 0 does not match the expected dimension size 3"
+            " at the 2th dimension in user defined shape [?,3]",
+        ),
+    ],
+)
+def test_varlen_feature_decode_error(record_data, error_message):
+    """test_varlen_feature_decode_error"""
+    schema = """{
+        "type": "record",
+        "name": "row",
+        "fields": [
+            {
+               "name": "int_list_list",
+               "type": {
+                  "type": "array",
+                  "items": {
+                      "type": "array",
+                      "items": "int"
+                  }
+               }
+            }
+        ]}"""
+    features = {"int_list_list": VarlenFeature([-1, 3], tf.dtypes.int32)}
+    with pytest.raises(errors.InvalidArgumentError, match=re.escape(error_message)):
+        dataset = create_atds_dataset(
+            writer_schema=schema,
+            record_data=record_data,
+            features=features,
+            batch_size=1,
+        )
+        iterator = iter(dataset)
+        next(iterator)
+
+
+@pytest.mark.parametrize(
+    ["record_data", "error_message"],
+    [
+        (
+            [{"sparse_key": {"indices0": [0, 1], "values": []}}],
+            "Failed to decode feature sparse_key. "
+            "Reason: Numbers of decoded value in indice and values array are different. "
+            "Numbers of decoded value in [indices0, values] arrays are [2, 0]",
+        ),
+        (
+            [{"sparse_key": {"indices0": [0, 1, 2], "values": [0.5, -0.5]}}],
+            "Failed to decode feature sparse_key. "
+            "Reason: Numbers of decoded value in indice and values array are different. "
+            "Numbers of decoded value in [indices0, values] arrays are [3, 2]",
+        ),
+    ],
+)
+def test_sparse_feature_decode_error(record_data, error_message):
+    schema = """{
+        "type": "record",
+        "name": "row",
+        "fields": [
+            {
+               "name": "sparse_key",
+               "type" : {
+                   "type" : "record",
+                   "name" : "SparseTensor",
+                   "fields" : [ {
+                     "name" : "indices0",
+                     "type" : {
+                       "type" : "array",
+                       "items" : "long"
+                     }
+                   }, {
+                     "name" : "values",
+                     "type" : {
+                       "type" : "array",
+                       "items" : "float"
+                     }
+                   } ]
+               }
+            }
+        ]}"""
+    features = {"sparse_key": SparseFeature(shape=[10], dtype=tf.dtypes.float32)}
+    with pytest.raises(errors.InvalidArgumentError, match=re.escape(error_message)):
+        dataset = create_atds_dataset(
+            writer_schema=schema,
+            record_data=record_data,
+            features=features,
+            batch_size=1,
+        )
+        iterator = iter(dataset)
+        next(iterator)
+
+
+@pytest.mark.parametrize(
+    ["schema", "features", "record_data", "error_message"],
+    [
+        # test_dense_feature_non_nested_arrays
+        (
+            """{
+                "type": "record",
+                "name": "outer_record",
+                "fields": [
+                    {
+                        "name": "non_nested_arrays",
+                        "type": {
+                            "type": "array",
+                            "items": {
+                                "type": "record",
+                                "name": "inner_record",
+                                "fields": [
+                                    {
+                                        "name": "inner_list",
+                                        "type": {
+                                            "type": "array",
+                                            "items": "int"
+                                        }
+                                    }
+                                ]
+                            }
+                        }
+                    }
+                ]}""",
+            {"non_nested_arrays": DenseFeature([2, 2], tf.dtypes.int32)},
+            [{"non_nested_arrays": [{"inner_list": [1, 2]}]}],
+            "Dense feature 'non_nested_arrays' must be non-nullable nested arrays only. "
+            "Invalid schema found:",
+        ),
+        # test_dense_feature_nullable_array
+        (
+            """{
+                "type": "record",
+                "name": "outer_record",
+                "fields": [
+                    {
+                        "name": "nullable_array",
+                        "type": {
+                            "type": "array",
+                            "items": ["null", {
+                                    "type": "array",
+                                    "items": "int"
+                                }]
+                        }
+                    }
+                ]}""",
+            {"nullable_array": DenseFeature([2, 2], tf.dtypes.int32)},
+            [{"nullable_array": [[1, 2], [3, 4]]}],
+            "Dense feature 'nullable_array' must be non-nullable nested arrays only. "
+            "Invalid schema found:",
+        ),
+        # test_dense_feature_type_mismatch
+        (
+            """{
+                "type": "record",
+                "name": "row",
+                "fields": [
+                    {
+                        "name": "int_list_list",
+                        "type": {
+                            "type": "array",
+                            "items": {
+                                "type": "array",
+                                "items": "int"
+                            }
+                        }
+                    }
+                ]}""",
+            {"int_list_list": DenseFeature([2, 2], tf.dtypes.int64)},
+            [{"int_list_list": [[1, 2]]}],
+            "Schema value type and metadata type mismatch in feature 'int_list_list'. "
+            "Avro schema data type: int, metadata type: int64. "
+            "Invalid schema found:",
+        ),
+        # test_dense_feature_rank_mismatch
+        (
+            """{
+                "type": "record",
+                "name": "row",
+                "fields": [
+                    {
+                        "name": "int_list_list",
+                        "type": {
+                            "type": "array",
+                            "items": {
+                                "type": "array",
+                                "items": "int"
+                            }
+                        }
+                    }
+                ]}""",
+            {"int_list_list": DenseFeature([1, 1, 2], tf.dtypes.int32)},
+            [{"int_list_list": [[1, 2], [3, 4]]}],
+            "Mismatch between avro schema rank and metadata rank in feature 'int_list_list'. "
+            "Avro schema rank: 2, metadata rank: 3. "
+            "Invalid schema found:",
+        ),
+        # test_varlen_feature_non_nested_arrays
+        (
+            """{
+                "type": "record",
+                "name": "outer_record",
+                "fields": [
+                    {
+                        "name": "int_list_list",
+                        "type": {
+                            "type": "array",
+                            "items": {
+                                "type": "record",
+                                "name": "inner_record",
+                                "fields": [
+                                    {
+                                        "name": "inner_list",
+                                        "type": {
+                                            "type": "array",
+                                            "items": "int"
+                                        }
+                                    }
+                                ]
+                            }
+                        }
+                    }
+                ]}""",
+            {"int_list_list": VarlenFeature([-1, 3], tf.dtypes.int32)},
+            [{"int_list_list": [{"inner_list": [1, 2, 3]}, {"inner_list": [3, 4, 5]}]}],
+            "Varlen feature 'int_list_list' must be non-nullable nested arrays only. "
+            "Invalid schema found:",
+        ),
+        # test_varlen_feature_nullable_array
+        (
+            """{
+                "type": "record",
+                "name": "outer_record",
+                "fields": [
+                    {
+                        "name": "nullable_array",
+                        "type": {
+                            "type": "array",
+                            "items": ["null", {
+                                "type": "array",
+                                "items": "int"
+                            }]
+                        }
+                    }
+                ]}""",
+            {"nullable_array": VarlenFeature([2, -1], tf.dtypes.int32)},
+            [{"nullable_array": [[1, 2], [3]]}],
+            "Varlen feature 'nullable_array' must be non-nullable nested arrays only. "
+            "Invalid schema found:",
+        ),
+        # test_varlen_feature_type_mismatch
+        (
+            """{
+                "type": "record",
+                "name": "row",
+                "fields": [
+                    {
+                        "name": "int_list_list",
+                        "type": {
+                            "type": "array",
+                            "items": {
+                                "type": "array",
+                                "items": "int"
+                            }
+                        }
+                    }
+                ]}""",
+            {"int_list_list": VarlenFeature([2, -1], tf.dtypes.int64)},
+            [{"int_list_list": [[1, 2], [1]]}],
+            "Schema value type and metadata type mismatch in feature 'int_list_list'. "
+            "Avro schema data type: int, metadata type: int64. "
+            "Invalid schema found:",
+        ),
+        # test_varlen_feature_rank_mismatch
+        (
+            """{
+                "type": "record",
+                "name": "row",
+                "fields": [
+                    {
+                        "name": "int_list_list",
+                        "type": {
+                            "type": "array",
+                            "items": {
+                                "type": "array",
+                                "items": "int"
+                            }
+                        }
+                    }
+                ]}""",
+            {"int_list_list": VarlenFeature([1, -1, 2], tf.dtypes.int32)},
+            [{"int_list_list": [[1, 2], [3]]}],
+            "Mismatch between avro schema rank and metadata rank in feature 'int_list_list'. "
+            "Avro schema rank: 2, metadata rank: 3. "
+            "Invalid schema found:",
+        ),
+        # test_sparse_missing_indices_column
+        (
+            """{
+                "type": "record",
+                "name": "sparse_test",
+                "fields": [
+                    {
+                    "name": "sparse_feature",
+                    "type": {
+                        "type": "record",
+                        "name": "ignore_name",
+                        "fields": [ {
+                                "name": "indices0",
+                                "type": {
+                                    "type": "array",
+                                    "items": "long"
+                                }
+                            }, {
+                                "name": "indices2",
+                                "type": {
+                                    "type": "array",
+                                    "items": "int"
+                                }
+                            }, {
+                                "name": "values",
+                                "type": {
+                                    "type": "array",
+                                    "items": "long"
+                                }
+                            } ]
+                        }
+                    }
+                ]}""",
+            {"sparse_feature": SparseFeature(shape=[10, 10], dtype=tf.dtypes.int64)},
+            [
+                {
+                    "sparse_feature": {
+                        "indices0": [1, 2],
+                        "indices2": [3, 4],
+                        "values": [10, 11],
+                    }
+                }
+            ],
+            "Sparse schema indices should be contiguous (indices0, indices1, ...). "
+            "Input data schema:",
+        ),
+        # test_sparse_missing_values_column
+        (
+            """{
+                "type": "record",
+                "name": "row",
+                "fields": [
+                    {
+                        "name": "sparse_key",
+                        "type": {
+                            "type": "record",
+                            "name": "SparseTensor",
+                            "fields": [ {
+                                "name": "indices0",
+                                "type": {
+                                    "type": "array",
+                                    "items": "long"
+                                }
+                            }, {
+                                "name": "indices1",
+                                "type": {
+                                    "type": "array",
+                                    "items": "int"
+                                }
+                            }]
+                        }
+                    }
+                ]}""",
+            {"sparse_key": SparseFeature(shape=[10, 10], dtype=tf.dtypes.int64)},
+            [{"sparse_key": {"indices0": [1, 2], "indices1": [3, 4]}}],
+            "Sparse schema is missing values column. Input data schema:",
+        ),
+        # test_sparse_extra_column
+        (
+            """{
+                "type": "record",
+                "name": "row",
+                "fields": [
+                    {
+                        "name": "sparse_key",
+                        "type": {
+                            "type": "record",
+                            "name": "SparseTensor",
+                            "fields": [ {
+                                "name": "indices0",
+                                "type": {
+                                    "type": "array",
+                                    "items": "long"
+                                }
+                            }, {
+                                "name": "indices1",
+                                "type": {
+                                    "type": "array",
+                                    "items": "int"
+                                }
+                            }, {
+                                "name": "values",
+                                "type": {
+                                    "type": "array",
+                                    "items": "long"
+                                }
+                            }, {
+                                "name": "extraColumn",
+                                "type": {
+                                    "type": "array",
+                                    "items": "int"
+                                }
+                            }]
+                        }
+                    }
+                ]}""",
+            {"sparse_key": SparseFeature(shape=[10, 10], dtype=tf.dtypes.int64)},
+            [
+                {
+                    "sparse_key": {
+                        "indices0": [1, 2],
+                        "indices1": [3, 4],
+                        "values": [10, 11],
+                        "extraColumn": [100, 101],
+                    }
+                }
+            ],
+            "Sparse schema can only contain 'indices' columns and a 'values' column. "
+            "Input data schema:",
+        ),
+        # test_sparse_invalid_indices_array
+        (
+            """{
+                "type": "record",
+                "name": "row",
+                "fields": [
+                    {
+                        "name": "sparse_key",
+                        "type": {
+                            "type": "record",
+                            "name": "SparseTensor",
+                            "fields": [ {
+                                "name": "indices0",
+                                "type": "int"
+                            }, {
+                                "name": "values",
+                                "type": {
+                                    "type": "array",
+                                    "items": "long"
+                                }
+                            }]
+                        }
+                    }
+                ]}""",
+            {"sparse_key": SparseFeature(shape=[10], dtype=tf.dtypes.int64)},
+            [{"sparse_key": {"indices0": 1, "values": [10, 11]}}],
+            "Unsupported indices type found in feature 'sparse_key'. "
+            "Sparse tensor indices must be a non-nullable array of non-nullable int or long. "
+            "Invalid schema found:",
+        ),
+        # test_sparse_invalid_indices_type
+        (
+            """{
+                "type": "record",
+                "name": "row",
+                "fields": [
+                    {
+                    "name": "sparse_key",
+                    "type": {
+                        "type": "record",
+                        "name": "SparseTensor",
+                        "fields": [ {
+                            "name": "indices0",
+                            "type": {
+                                "type": "array",
+                                "items": "float"
+                            }
+                            }, {
+                                "name": "values",
+                                "type": {
+                                    "type": "array",
+                                    "items": "long"
+                                }
+                            }]
+                        }
+                    }
+                ]}""",
+            {"sparse_key": SparseFeature(shape=[10], dtype=tf.dtypes.int64)},
+            [{"sparse_key": {"indices0": [0.1, 1.1], "values": [10, 11]}}],
+            "Unsupported indices type found in feature 'sparse_key'. "
+            "Sparse tensor indices must be a non-nullable array of non-nullable int or long. "
+            "Invalid schema found:",
+        ),
+        # test_sparse_invalid_nested_indices
+        (
+            """{
+                "type": "record",
+                "name": "row",
+                "fields": [
+                    {
+                        "name": "sparse_key",
+                        "type": {
+                            "type": "record",
+                            "name": "SparseTensor",
+                            "fields": [ {
+                                "name": "indices0",
+                                "type": {
+                                    "type": "array",
+                                    "items": {
+                                        "type": "array",
+                                        "items": "long"
+                                    }
+                                }
+                            }, {
+                                "name": "values",
+                                "type": {
+                                    "type": "array",
+                                    "items": "long"
+                                }
+                            }]
+                        }
+                    }
+                ]}""",
+            {"sparse_key": SparseFeature(shape=[10], dtype=tf.dtypes.int64)},
+            [{"sparse_key": {"indices0": [[1, 2]], "values": [10, 11]}}],
+            "Unsupported indices type found in feature 'sparse_key'. "
+            "Sparse tensor indices must be a non-nullable array of non-nullable int or long. "
+            "Invalid schema found:",
+        ),
+        # test_sparse_nullable_indices
+        (
+            """{
+                "type": "record",
+                "name": "row",
+                "fields": [
+                    {
+                        "name": "sparse_key",
+                        "type": {
+                            "type": "record",
+                            "name": "SparseTensor",
+                            "fields": [ {
+                                "name": "indices0",
+                                "type": {
+                                    "type": "array",
+                                    "items": ["null", "int"],
+                                    "default": null
+                                }
+                            }, {
+                                "name": "values",
+                                "type": {
+                                    "type": "array",
+                                    "items": "long"
+                                }
+                            }]
+                        }
+                    }
+                ]}""",
+            {"sparse_key": SparseFeature(shape=[10], dtype=tf.dtypes.int64)},
+            [{"sparse_key": {"indices0": [1, 2], "values": [10, 11]}}],
+            "Unsupported indices type found in feature 'sparse_key'. "
+            "Sparse tensor indices must be a non-nullable array of non-nullable int or long. "
+            "Invalid schema found:",
+        ),
+        # test_sparse_invalid_value_array
+        (
+            """{
+                "type": "record",
+                "name": "row",
+                "fields": [
+                    {
+                        "name": "sparse_key",
+                        "type": {
+                            "type": "record",
+                            "name": "SparseTensor",
+                            "fields": [ {
+                                "name": "indices0",
+                                "type": {
+                                    "type": "array",
+                                    "items": "long"
+                                }
+                            }, {
+                                "name": "values",
+                                "type": "long"
+                            }]
+                        }
+                    }
+                ]}""",
+            {"sparse_key": SparseFeature(shape=[10], dtype=tf.dtypes.int64)},
+            [{"sparse_key": {"indices0": [0, 1], "values": 1}}],
+            "Unsupported value type found in feature 'sparse_key'. "
+            "Tensor value must be a non-nullable array of non-nullable int, long, float, double, boolean, bytes, or string. "
+            "Invalid schema found:",
+        ),
+        # test_sparse_invalid_value_type
+        (
+            """{
+                "type": "record",
+                "name": "row",
+                "fields": [
+                    {
+                        "name": "sparse_key",
+                        "type": {
+                            "type": "record",
+                            "name": "SparseTensor",
+                            "fields": [ {
+                                "name": "indices0",
+                                "type": {
+                                    "type": "array",
+                                    "items": "long"
+                                }
+                            }, {
+                                "name": "values",
+                                "type": {
+                                    "type": "array",
+                                    "items": "null"
+                                }
+                            }]
+                        }
+                    }
+                ]}""",
+            {"sparse_key": SparseFeature(shape=[10], dtype=tf.dtypes.int64)},
+            [{"sparse_key": {"indices0": [0, 1], "values": [None, None]}}],
+            "Unsupported value type found in feature 'sparse_key'. "
+            "Tensor value must be a non-nullable array of non-nullable int, long, float, double, boolean, bytes, or string. "
+            "Invalid schema found:",
+        ),
+        # test_sparse_nullable_value
+        (
+            """{
+                "type": "record",
+                "name": "row",
+                "fields": [
+                    {
+                        "name": "sparse_key",
+                        "type": {
+                            "type": "record",
+                            "name": "SparseTensor",
+                            "fields": [ {
+                                "name": "indices0",
+                                "type": {
+                                    "type": "array",
+                                    "items": "long"
+                                }
+                            }, {
+                                "name": "values",
+                                "type": {
+                                    "type": "array",
+                                    "items": ["null", "int"],
+                                    "default": null
+                                }
+                            }]
+                        }
+                    }
+                ]}""",
+            {"sparse_key": SparseFeature(shape=[10], dtype=tf.dtypes.int64)},
+            [{"sparse_key": {"indices0": [0, 1], "values": [1, 2]}}],
+            "Unsupported value type found in feature 'sparse_key'. "
+            "Tensor value must be a non-nullable array of non-nullable int, long, float, double, boolean, bytes, or string. "
+            "Invalid schema found:",
+        ),
+        # test_sparse_nullable_indices_array
+        (
+            """{
+                "type": "record",
+                "name": "row",
+                "fields": [
+                    {
+                        "name": "sparse_key",
+                        "type": {
+                            "type": "record",
+                            "name": "SparseTensor",
+                            "fields": [ {
+                                "name": "indices0",
+                                "type": ["null", {
+                                    "type": "array",
+                                    "items": "long"
+                                }]
+                            }, {
+                                "name": "values",
+                                "type": {
+                                    "type": "array",
+                                    "items": "int"
+                                }
+                            }]
+                        }
+                    }
+                ]}""",
+            {"sparse_key": SparseFeature(shape=[10], dtype=tf.dtypes.int32)},
+            [{"sparse_key": {"indices0": [0, 1], "values": [1, 2]}}],
+            "Unsupported indices type found in feature 'sparse_key'. "
+            "Sparse tensor indices must be a non-nullable array of non-nullable int or long. "
+            "Invalid schema found:",
+        ),
+        # test_sparse_nullable_values_array
+        (
+            """{
+                "type": "record",
+                "name": "row",
+                "fields": [
+                    {
+                        "name": "sparse_key",
+                        "type": {
+                            "type": "record",
+                            "name": "SparseTensor",
+                            "fields": [ {
+                                "name": "indices0",
+                                "type": {
+                                    "type": "array",
+                                    "items": "long"
+                                }
+                            }, {
+                                "name": "values",
+                                "type": ["null", {
+                                    "type": "array",
+                                    "items": "int"
+                                }]
+                            }]
+                        }
+                    }
+                ]}""",
+            {"sparse_key": SparseFeature(shape=[10], dtype=tf.dtypes.int64)},
+            [{"sparse_key": {"indices0": [0, 1], "values": [1, 2]}}],
+            "Unsupported value type found in feature 'sparse_key'. "
+            "Tensor value must be a non-nullable array of non-nullable int, long, float, double, boolean, bytes, or string. "
+            "Invalid schema found:",
+        ),
+        # test_sparse_invalid_nested_values
+        (
+            """{
+                "type": "record",
+                "name": "row",
+                "fields": [
+                    {
+                        "name": "sparse_key",
+                        "type": {
+                            "type": "record",
+                            "name": "SparseTensor",
+                            "fields": [ {
+                                "name": "indices0",
+                                "type": {
+                                    "type": "array",
+                                    "items": "long"
+                                }
+                            }, {
+                                "name": "values",
+                                "type": {
+                                    "type": "array",
+                                    "items": {
+                                        "type": "array",
+                                        "items": "int"
+                                    }
+                                }
+                            }]
+                        }
+                    }
+                ]}""",
+            {"sparse_key": SparseFeature(shape=[10], dtype=tf.dtypes.int64)},
+            [{"sparse_key": {"indices0": [0, 1], "values": [[1, 2]]}}],
+            "Unsupported value type found in feature 'sparse_key'. "
+            "Tensor value must be a non-nullable array of non-nullable int, long, float, double, boolean, bytes, or string. "
+            "Invalid schema found:",
+        ),
+        # test_sparse_value_type_mismatch
+        (
+            """{
+            "type": "record",
+            "name": "row",
+            "fields": [
+                {
+                    "name": "sparse_key",
+                    "type": {
+                        "type": "record",
+                        "name": "SparseTensor",
+                        "fields": [ {
+                            "name": "indices0",
+                            "type": {
+                                "type": "array",
+                                "items": "int"
+                            }
+                        }, {
+                            "name": "values",
+                            "type": {
+                                "type": "array",
+                                "items": "int"
+                            }
+                        }]
+                    }
+                }
+            ]}""",
+            {"sparse_key": SparseFeature(shape=[10], dtype=tf.dtypes.int64)},
+            [{"sparse_key": {"indices0": [0, 1], "values": [1, 2]}}],
+            "Schema value type and metadata type mismatch in feature 'sparse_key'. "
+            "Avro schema data type: int, metadata type: int64. "
+            "Invalid schema found:",
+        ),
+    ],
+)
+def test_feature_schema_check(schema, features, record_data, error_message):
+    """test_feature_schema_check"""
+    with pytest.raises(errors.InvalidArgumentError, match=re.escape(error_message)):
+        dataset = create_atds_dataset(
+            writer_schema=schema,
+            record_data=record_data,
+            features=features,
+            batch_size=1,
+        )
+        iterator = iter(dataset)
+        next(iterator)
+
+
+@pytest.mark.parametrize(
+    ["record_data", "feature_name"],
+    [([{"dense": None}, {"dense": None}, {"dense": None}, {"dense": None}], "dense")],
+)
+def test_ATDSReader_skip_block_with_null_value(record_data, feature_name):
+    writer_schema = """{
+          "type": "record",
+          "name": "row",
+          "fields": [
+              {
+                  "name": "dense",
+                  "type": ["null", "int"]
+                }
+          ]}"""
+    schema = avro.schema.Parse(writer_schema)
+    filename = os.path.join(tempfile.gettempdir(), "test.avro")
+    with open(filename, "wb") as f:
+        writer = DataFileWriter(f, DatumWriter(), schema)
+        for record in record_data:
+            writer.append(record)
+        writer.close()
+    features = {
+        "dense": DenseFeature([], tf.int32),
+    }
+    error_message = (
+        f"Failed to decode feature {feature_name}. " f"Reason: Feature value is null."
+    )
+
+    def _load_dataset_inside_interleave(filename, features):
+        _dataset = tf.data.Dataset.from_tensor_slices([filename])
+        _dataset = _dataset.interleave(
+            lambda filename: ATDSDataset(
+                filenames=filename,
+                batch_size=2,
+                drop_remainder=True,
+                features=features,
+                reader_buffer_size=262144,
+                shuffle_buffer_size=10000,
+                num_parallel_calls=4,
+            ),
+            cycle_length=1,
+        )
+        return _dataset
+
+    with pytest.raises(errors.InvalidArgumentError, match=error_message):
+        indices = tf.data.Dataset.range(1)
+        dataset = indices.interleave(
+            map_func=lambda x: _load_dataset_inside_interleave(filename, features),
+            cycle_length=1,
+            num_parallel_calls=tf.data.experimental.AUTOTUNE,
+            deterministic=False,
+        )
+        iterator = iter(dataset)
+        for _ in range(4):
+            next(iterator)
+
+
+@pytest.mark.parametrize(
+    [
+        "filenames",
+        "batch_size",
+        "reader_buffer_size",
+        "shuffle_buffer_size",
+        "num_parallel_calls",
+        "error_message",
+    ],
+    [
+        (
+            [["file_1"], ["file_2"]],
+            2,
+            1024,
+            1024,
+            1,
+            "`filenames` must be a scalar or a vector.",
+        ),
+        (
+            ["filename"],
+            0,
+            1024,
+            1024,
+            1,
+            "`batch_size` must be greater than 0 but found 0",
+        ),
+        (
+            ["filename"],
+            -1,
+            1024,
+            1024,
+            1,
+            "`batch_size` must be greater than 0 but found -1",
+        ),
+        (
+            ["filename"],
+            1,
+            0,
+            1024,
+            1,
+            "`reader_buffer_size` must be greater than 0 but found 0",
+        ),
+        (
+            ["filename"],
+            1,
+            -2,
+            1024,
+            1,
+            "`reader_buffer_size` must be greater than 0 but found -2",
+        ),
+        (
+            ["filename"],
+            1,
+            1024,
+            -5,
+            1,
+            "`shuffle_buffer_size` must be greater than or equal to 0 but found -5",
+        ),
+        (
+            ["filename"],
+            1,
+            1024,
+            1024,
+            -2,
+            "`num_parallel_calls` must be a positive integer or tf.data.AUTOTUNE, got -2",
+        ),
+    ],
+)
+def test_ATDS_dataset_creation_with_invalid_argument(
+    filenames,
+    batch_size,
+    reader_buffer_size,
+    shuffle_buffer_size,
+    num_parallel_calls,
+    error_message,
+):
+    with pytest.raises(errors.InvalidArgumentError, match=re.escape(error_message)):
+        ATDSDataset(
+            filenames=filenames,
+            batch_size=batch_size,
+            features={"x": DenseFeature([], tf.dtypes.int32)},
+            reader_buffer_size=reader_buffer_size,
+            shuffle_buffer_size=shuffle_buffer_size,
+            num_parallel_calls=num_parallel_calls,
+        )
+
+
+@pytest.mark.parametrize(
+    ["filenames", "batch_size", "features", "error_message"],
+    [
+        (
+            None,
+            1,
+            {"x": DenseFeature([], tf.int32)},
+            r"Attempt to convert a value \(None\) with an unsupported type .*",
+        ),
+        (
+            tf.data.Dataset.from_tensor_slices(["filename"]),
+            1,
+            {"x": DenseFeature([], tf.int32)},
+            r"Attempt to convert a value .* with an unsupported type .*",
+        ),
+        (
+            ["filename"],
+            None,
+            {"x": DenseFeature([], tf.int32)},
+            r"Attempt to convert a value \(None\) with an unsupported type .*",
+        ),
+        (
+            ["filename"],
+            1,
+            {"featureA": ([], tf.int32)},
+            r"Unknown ATDS feature configuration \(\[\], tf\.int32\)\. Only .* are supported\.",
+        ),
+        (
+            ["filename"],
+            1,
+            {},
+            "Features dict cannot be empty and should have at least one feature.",
+        ),
+        (
+            ["filename"],
+            1,
+            None,
+            r"Features can only be a dict with feature name as key and "
+            r"ATDS feature configuration as value but found None\. "
+            r"Available feature configuration are .*",
+        ),
+        (
+            ["filename"],
+            1,
+            ([], tf.int32),
+            r"Features can only be a dict with feature name as key and "
+            r"ATDS feature configuration as value but found \(\[\], tf\.int32\)\. "
+            r"Available feature configuration are .*",
+        ),
+    ],
+)
+def test_ATDS_dataset_creation_with_value_error(
+    filenames, batch_size, features, error_message
+):
+    with pytest.raises(ValueError, match=error_message):
+        ATDSDataset(filenames=filenames, batch_size=batch_size, features=features)
+
+
+@pytest.mark.parametrize(
+    [
+        "feature_keys",
+        "feature_types",
+        "sparse_dtypes",
+        "sparse_shapes",
+        "output_dtypes",
+        "output_shapes",
+        "error_message",
+    ],
+    [
+        (
+            ["feature_1"],
+            ["dense", "sparse"],
+            [],
+            [],
+            [tf.int32],
+            [[]],
+            "The length of feature_keys must equal to the length of "
+            "feature_types. [1 != 2]",
+        ),
+        (
+            ["feature_1", "feature_2"],
+            ["dense", "dense"],
+            [],
+            [],
+            [tf.int32],
+            [[], []],
+            "The length of feature_keys must equal to the length of "
+            "output_dtypes. [2 != 1]",
+        ),
+        (
+            ["feature_1"],
+            ["dense"],
+            [],
+            [],
+            [tf.int32],
+            [[], []],
+            "The length of feature_keys must equal to the length of "
+            "output_shapes. [1 != 2]",
+        ),
+        (
+            ["feature_1"],
+            ["dense"],
+            [tf.int32],
+            [],
+            [tf.int32],
+            [[]],
+            "The length of sparse_dtypes must equal to the number of "
+            "sparse features configured in feature_types. [1 != 0]",
+        ),
+        (
+            ["feature_1"],
+            ["sparse"],
+            [tf.int32],
+            [[1], []],
+            [tf.int32],
+            [[1]],
+            "The length of sparse_shapes must equal to the number of "
+            "sparse features configured in feature_types. [2 != 1]",
+        ),
+        (
+            ["feature_1"],
+            ["ragged"],
+            [],
+            [],
+            [tf.int32],
+            [[1]],
+            "Invalid feature_type, 'ragged'. Only dense, sparse, and "
+            "varlen are supported.",
+        ),
+    ],
+)
+def test_atds_dataset_invalid_attribute(
+    feature_keys,
+    feature_types,
+    sparse_dtypes,
+    sparse_shapes,
+    output_dtypes,
+    output_shapes,
+    error_message,
+):
+    with pytest.raises(errors.InvalidArgumentError, match=re.escape(error_message)):
+        core_ops.io_atds_dataset(
+            filenames="filename",
+            batch_size=1,
+            drop_remainder=False,
+            reader_buffer_size=1024,
+            shuffle_buffer_size=0,
+            num_parallel_calls=1,
+            feature_keys=feature_keys,
+            feature_types=feature_types,
+            sparse_dtypes=sparse_dtypes,
+            sparse_shapes=sparse_shapes,
+            output_dtypes=output_dtypes,
+            output_shapes=output_shapes,
+        )
+
+
+@pytest.mark.parametrize(
+    ["record_data", "feature_name"],
+    [
+        (
+            [
+                {
+                    "dense": 0,
+                    "varlen": [1, 2],
+                    "sparse": {"indices0": [0], "values": [0]},
+                },
+                {
+                    "dense": None,
+                    "varlen": [],
+                    "sparse": {"indices0": [0], "values": [0]},
+                },
+                {
+                    "dense": 0,
+                    "varlen": [1, 2],
+                    "sparse": {"indices0": [0], "values": [0]},
+                },
+            ],
+            "dense",
+        ),
+        (
+            [
+                {
+                    "dense": 0,
+                    "varlen": None,
+                    "sparse": {"indices0": [0], "values": [0]},
+                },
+                {
+                    "dense": 1,
+                    "varlen": [1, 2],
+                    "sparse": {"indices0": [0], "values": [0]},
+                },
+                {
+                    "dense": 0,
+                    "varlen": [1, 2],
+                    "sparse": {"indices0": [0], "values": [0]},
+                },
+            ],
+            "varlen",
+        ),
+        (
+            [
+                {"dense": 0, "varlen": [], "sparse": {"indices0": [0], "values": [0]}},
+                {"dense": 1, "varlen": [1, 2], "sparse": None},
+                {
+                    "dense": 0,
+                    "varlen": [1, 2],
+                    "sparse": {"indices0": [0], "values": [0]},
+                },
+            ],
+            "sparse",
+        ),
+    ],
+)
+def test_ATDS_dataset_with_null_value(record_data, feature_name):
+    writer_schema = """{
+          "type": "record",
+          "name": "row",
+          "fields": [
+              {
+                  "name": "dense",
+                  "type": ["null", "int"]},
+              {
+                  "name": "varlen",
+                  "type": [
+                      {"type": "array", "items": "int"},
+                      "null"
+                  ]
+              },
+              {
+                 "name": "sparse",
+                 "type" : [ {
+                     "type" : "record",
+                     "name" : "IntSparseTensor",
+                     "fields" : [ {
+                       "name" : "indices0",
+                       "type" : { "type" : "array", "items" : "long" }
+                     }, {
+                       "name" : "values",
+                       "type" : { "type" : "array", "items" : "int" }
+                     } ]
+                 }, "null" ]
+              }
+          ]}"""
+    schema = avro.schema.Parse(writer_schema)
+    filename = os.path.join(tempfile.gettempdir(), "test.avro")
+    with open(filename, "wb") as f:
+        writer = DataFileWriter(f, DatumWriter(), schema)
+        for record in record_data:
+            writer.append(record)
+        writer.close()
+
+    features = {
+        "dense": DenseFeature([], tf.int32),
+        "varlen": VarlenFeature([-1], tf.int32),
+        "sparse": SparseFeature([1], tf.int32),
+    }
+    error_message = (
+        f"Failed to decode feature {feature_name}. " f"Reason: Feature value is null."
+    )
+    with pytest.raises(errors.InvalidArgumentError, match=error_message):
+        dataset = ATDSDataset(filename, features=features, batch_size=2)
+        iterator = iter(dataset)
+        next(iterator)
+
+
+@pytest.mark.parametrize("shuffle_buffer_size", [0, 1, 3, 5, 10, 30, 50, 100, 200])
+@pytest.mark.parametrize("batch_size", [2, 5, 10])
+@pytest.mark.parametrize("num_parallel_calls", [tf.data.AUTOTUNE, 10])
+def test_valid_shuffle(shuffle_buffer_size, batch_size, num_parallel_calls):
+    def list_from_dataset(dataset):
+        as_numpy_array = [elem["x"] for elem in list(dataset.as_numpy_iterator())]
+        return list(itertools.chain(*as_numpy_array))
+
+    data_size = 100
+    writer_schema = """{
+            "type": "record",
+            "name": "row",
+            "fields": [
+                {"name": "x", "type": "int"}
+            ]}"""
+    schema = avro.schema.Parse(writer_schema)
+    filename = os.path.join(tempfile.gettempdir(), "test.avro")
+    record_data = [{"x": x} for x in range(0, data_size)]
+    # Generate an avro file with 10 avro blocks.
+    with open(filename, "wb") as f:
+        writer = DataFileWriter(f, DatumWriter(), schema)
+        for i in range(len(record_data)):
+            writer.append(record_data[i])
+            if (i + 1) % 10 == 0:
+                writer.sync()  # Dump every 10 records into an avro block.
+        writer.close()
+
+    features = {
+        "x": DenseFeature([], tf.dtypes.int32),
+    }
+    # Generates a list of 100 epochs and check if each dataset has a different order
+    list_of_lists = []
+    list_of_sets = []
+    num_epochs = 100
+    for i in range(0, num_epochs):
+        li = list_from_dataset(
+            ATDSDataset(
+                filenames=filename,
+                features=features,
+                shuffle_buffer_size=shuffle_buffer_size,
+                batch_size=batch_size,
+                num_parallel_calls=num_parallel_calls,
+            )
+        )
+        list_of_lists.append(li)
+        list_of_sets.append(set(li))
+
+    for i in range(0, num_epochs):
+        for j in range(0, num_epochs):
+            assert (
+                list_of_sets[i] == list_of_sets[j]
+            ), f"Set {list_of_sets[i]} must include the elements of {list_of_sets[j]}"
+            if i != j and shuffle_buffer_size > 0:
+                assert (
+                    list_of_lists[i] != list_of_lists[j]
+                ), f"result {list_of_lists[i]} must be shuffled, and should not be identical to expected_data {list_of_lists[j]}"
+            else:
+                assert (
+                    list_of_lists[i] == list_of_lists[j]
+                ), f"result {list_of_lists[i]} is shuffled, it should be identical to expected_data {list_of_lists[j]}"
+
+
+def test_empty_sparse_buffer():
+    """Tests the empty sparse buffer for dense, varlen, and sparse features."""
+    data_size = 100
+    writer_schema = """{
+        "type": "record",
+        "name": "row",
+        "fields": [
+            {"name": "dense", "type": "int"},
+            {"name": "varlen", "type": {"type": "array", "items": "float"} },
+            {
+                "name": "sparse",
+                "type" : {
+                    "type" : "record",
+                    "name" : "IntSparseTensor",
+                    "fields" : [ {
+                        "name" : "indices0",
+                        "type" : { "type" : "array", "items" : "long" }
+                    }, {
+                        "name" : "values",
+                        "type" : { "type" : "array", "items" : "int" }
+                    } ]
+                }
+            }
+        ]}"""
+    schema = avro.schema.parse(writer_schema)
+    filename = os.path.join(tempfile.gettempdir(), "test.avro")
+    record_data = [
+        {
+            "dense": random.randint(0, 100),
+            "varlen": np.random.rand(random.randint(0, 100)).tolist(),
+            "sparse": {
+                "indices0": [random.randint(0, 4), random.randint(4, 9)],
+                "values": [2 * x, 5 * x],
+            },
+        }
+        for x in range(0, data_size)
+    ]
+    # Generate an avro file with 10 avro blocks.
+    with open(filename, "wb") as f:
+        writer = DataFileWriter(f, DatumWriter(), schema)
+        for i in range(len(record_data)):
+            writer.append(record_data[i])
+            if (i + 1) % 10 == 0:
+                writer.sync()  # Dump every 10 records into an avro block.
+        writer.close()
+
+    features = {
+        "dense": DenseFeature([], tf.int32),
+        "sparse": SparseFeature([10], tf.dtypes.int32),
+        "varlen": VarlenFeature([-1], tf.dtypes.float32),
+    }
+
+    # ATDSReader is parallelized along blocks.
+    # This test ensures that there are enough
+    # threads to gaurantee a few empty sparse buffers
+    dataset = ATDSDataset(
+        filenames=filename,
+        features=features,
+        shuffle_buffer_size=100,
+        batch_size=2,
+        num_parallel_calls=15,
+    )
+
+    for _ in dataset:
+        pass
+
+
+def test_dataset_terminate():
+    writer_schema = """{
+        "type": "record",
+        "name": "row",
+        "fields": [
+            {"name": "int_value", "type": "int"}
+        ]}"""
+    record_data = [{"int_value": 0}, {"int_value": 1}, {"int_value": 2}]
+    features = {"int_value": DenseFeature([], tf.dtypes.int32)}
+
+    def itr(dataset):
+        iter(dataset)
+
+    dataset = create_atds_dataset(
+        writer_schema=writer_schema,
+        record_data=record_data,
+        features=features,
+        batch_size=2,
+        drop_remainder=False,
+    )
+    # Create the internal iterator and then let it get out of scope/destroyed
+    # This will fail if the destructor is waiting to delete the non-existent
+    # prefetch thread.
+    itr(dataset)
+    itr(dataset)
+
+
+class ATDSDatasetTest(AvroDatasetTestBase):
+    """ATDSDatasetTest"""
+
+    @parameterized.expand([("null"), ("deflate"), ("snappy")])
+    def test_decompression(self, codec):
+        data_size = 100
+        data_dimension = 100
+        writer_schema = """{
+            "type": "record",
+            "name": "row",
+            "fields": [
+                {
+                    "name": "int_1d",
+                    "type": {
+                        "type": "array",
+                        "items": "int"
+                    }
+                }
+            ]}"""
+        int_list = np.random.randint(
+            low=-100, high=100, size=data_dimension, dtype=int
+        ).tolist()
+        record_data = [{"int_1d": int_list} for _ in range(0, data_size)]
+
+        features = {
+            "int_1d": DenseFeature([data_dimension], tf.dtypes.int32),
+        }
+        expected_data = [
+            {
+                "int_1d": tf.convert_to_tensor(
+                    list(itertools.repeat(int_list, data_size))
+                ),
+            }
+        ]
+        dataset = create_atds_dataset(
+            writer_schema=writer_schema,
+            record_data=record_data,
+            features=features,
+            batch_size=data_size,
+            codec=codec,
+        )
+        self._verify_output(expected_data=expected_data, actual_dataset=dataset)
+
+    @parameterized.expand([("null"), ("deflate"), ("snappy")])
+    def test_decompression_with_auto_tune(self, codec):
+        """Test cost model and auto thread tuning."""
+        data_size = 128
+        data_dimension = 4096
+        writer_schema = """{
+            "type": "record",
+            "name": "row",
+            "fields": [
+                {
+                    "name": "int_1d",
+                    "type": {
+                        "type": "array",
+                        "items": "int"
+                    }
+                }
+            ]}"""
+        int_list = np.ones(data_dimension, dtype=int).tolist()
+        record_data = [{"int_1d": int_list} for _ in range(0, data_size)]
+
+        features = {
+            "int_1d": DenseFeature([data_dimension], tf.dtypes.int32),
+        }
+        dataset = create_atds_dataset(
+            writer_schema=writer_schema,
+            record_data=record_data,
+            features=features,
+            batch_size=16,
+            codec=codec,
+            num_parallel_calls=tf.data.AUTOTUNE,
+        )
+
+        for _ in dataset:
+            pass
+
+    def test_sparse_feature_with_various_dtypes(self):
+        schema = """{
+                    "type": "record",
+                    "name": "row",
+                    "fields": [
+                        {
+                           "name": "int_1d",
+                           "type" : {
+                               "type" : "record",
+                               "name" : "IntSparseTensor",
+                               "fields" : [ {
+                                 "name" : "indices0",
+                                 "type" : { "type" : "array", "items" : "long" }
+                               }, {
+                                 "name" : "values",
+                                 "type" : { "type" : "array", "items" : "int" }
+                               } ]
+                           }
+                        },
+                        {
+                           "name": "long_2d",
+                           "type" : {
+                               "type" : "record",
+                               "name" : "LongSparseTensor",
+                               "fields" : [ {
+                                 "name" : "indices0",
+                                 "type" : { "type" : "array", "items" : "long" }
+                               }, {
+                                 "name" : "values",
+                                 "type" : { "type" : "array", "items" : "long" }
+                               }, {
+                                 "name" : "indices1",
+                                 "type" : { "type" : "array", "items" : "long" }
+                               } ]
+                           }
+                        },
+                        {
+                           "name": "float_1d",
+                           "type" : {
+                               "type" : "record",
+                               "name" : "FloatSparseTensor",
+                               "fields" : [ {
+                                 "name" : "indices0",
+                                 "type" : { "type" : "array", "items" : "long" }
+                               }, {
+                                 "name" : "values",
+                                 "type" : { "type" : "array", "items" : "float" }
+                               } ]
+                           }
+                        },
+                        {
+                           "name": "double_3d",
+                           "type" : {
+                               "type" : "record",
+                               "name" : "DoubleSparseTensor",
+                               "fields" : [ {
+                                 "name" : "indices0",
+                                 "type" : { "type" : "array", "items" : "long" }
+                               }, {
+                                 "name" : "values",
+                                 "type" : { "type" : "array", "items" : "double" }
+                               }, {
+                                 "name" : "indices2",
+                                 "type" : { "type" : "array", "items" : "long" }
+                               }, {
+                                 "name" : "indices1",
+                                 "type" : { "type" : "array", "items" : "long" }
+                               } ]
+                           }
+                        },
+                        {
+                           "name": "string_1d",
+                           "type" : {
+                               "type" : "record",
+                               "name" : "StringSparseTensor",
+                               "fields" : [ {
+                                 "name" : "indices0",
+                                 "type" : { "type" : "array", "items" : "long" }
+                               }, {
+                                 "name" : "values",
+                                 "type" : { "type" : "array", "items" : "string" }
+                               } ]
+                           }
+                        },
+                        {
+                           "name": "bytes_1d",
+                           "type" : {
+                               "type" : "record",
+                               "name" : "ByteSparseTensor",
+                               "fields" : [ {
+                                 "name" : "indices0",
+                                 "type" : { "type" : "array", "items" : "long" }
+                               }, {
+                                 "name" : "values",
+                                 "type" : { "type" : "array", "items" : "bytes" }
+                               } ]
+                           }
+                        },
+                        {
+                           "name": "bool_1d",
+                           "type" : {
+                               "type" : "record",
+                               "name" : "BoolSparseTensor",
+                               "fields" : [ {
+                                 "name" : "indices0",
+                                 "type" : { "type" : "array", "items" : "long" }
+                               }, {
+                                 "name" : "values",
+                                 "type" : { "type" : "array", "items" : "boolean" }
+                               } ]
+                           }
+                        }
+                    ]}"""
+        s1 = bytes("abc", "utf-8")
+        s2 = bytes("def", "utf-8")
+        s3 = bytes("ijk", "utf-8")
+        s4 = bytes("lmn", "utf-8")
+        s5 = bytes("opq", "utf-8")
+        s6 = bytes("qrs", "utf-8")
+        s7 = bytes("tuv", "utf-8")
+        record_data = [
+            {
+                "int_1d": {"indices0": [7, 9], "values": [2, 5]},
+                "long_2d": {"indices0": [0], "values": [6], "indices1": [0]},
+                "float_1d": {"indices0": [0, 1], "values": [0.5, -0.5]},
+                "double_3d": {
+                    "indices0": [0, 0, 0],
+                    "indices1": [0, 0, 0],
+                    "indices2": [0, 1, 2],
+                    "values": [0.5, -0.5, 1.0],
+                },
+                "string_1d": {"indices0": [2, 5, 8, 9], "values": ["A", "B", "C", "D"]},
+                "bytes_1d": {"indices0": [2, 5, 8, 9], "values": [s1, s2, s3, s4]},
+                "bool_1d": {"indices0": [100], "values": [False]},
+            },
+            {
+                "int_1d": {"indices0": [1], "values": [1]},
+                "long_2d": {"indices0": [], "values": [], "indices1": []},
+                "float_1d": {"indices0": [0], "values": [9.8]},
+                "double_3d": {
+                    "indices0": [0, 0, 0],
+                    "indices1": [0, 1, 2],
+                    "indices2": [0, 0, 0],
+                    "values": [6.5, -1.5, 4.0],
+                },
+                "string_1d": {"indices0": [2], "values": ["E"]},
+                "bytes_1d": {"indices0": [2], "values": [s5]},
+                "bool_1d": {"indices0": [88, 97], "values": [True, True]},
+            },
+            {
+                "int_1d": {"indices0": [2, 4], "values": [6, 8]},
+                "long_2d": {"indices0": [0, 0], "values": [7, 8], "indices1": [3, 5]},
+                "float_1d": {"indices0": [], "values": []},
+                "double_3d": {
+                    "indices0": [0, 1, 2],
+                    "indices1": [0, 0, 0],
+                    "indices2": [0, 0, 0],
+                    "values": [3.5, -4.5, 7.0],
+                },
+                "string_1d": {"indices0": [8, 9], "values": ["F", "G"]},
+                "bytes_1d": {"indices0": [8, 9], "values": [s6, s7]},
+                "bool_1d": {"indices0": [], "values": []},
+            },
+        ]
+        features = {
+            "int_1d": SparseFeature([10], dtype=tf.dtypes.int32),
+            "long_2d": SparseFeature([1, 6], dtype=tf.dtypes.int64),
+            "float_1d": SparseFeature([5], dtype=tf.dtypes.float32),
+            "double_3d": SparseFeature([3, 3, 3], dtype=tf.dtypes.float64),
+            "string_1d": SparseFeature([-1], dtype=tf.dtypes.string),
+            "bytes_1d": SparseFeature([-1], dtype=tf.dtypes.string),
+            "bool_1d": SparseFeature([101], dtype=tf.dtypes.bool),
+        }
+        expected_data = [
+            {
+                "int_1d": tf.compat.v1.SparseTensorValue(
+                    indices=[
+                        [0, 7],
+                        [0, 9],
+                        [1, 1],
+                        [2, 2],
+                        [2, 4],
+                    ],
+                    values=[2, 5, 1, 6, 8],
+                    dense_shape=[3, 10],
+                ),
+                "long_2d": tf.compat.v1.SparseTensorValue(
+                    indices=[
+                        [0, 0, 0],
+                        [2, 0, 3],
+                        [2, 0, 5],
+                    ],
+                    values=np.array([6, 7, 8], dtype=np.int64),
+                    dense_shape=[3, 1, 6],
+                ),
+                "float_1d": tf.compat.v1.SparseTensorValue(
+                    indices=[
+                        [0, 0],
+                        [0, 1],
+                        [1, 0],
+                    ],
+                    values=np.array([0.5, -0.5, 9.8], dtype=np.float32),
+                    dense_shape=[3, 5],
+                ),
+                "double_3d": tf.compat.v1.SparseTensorValue(
+                    indices=[
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 1],
+                        [0, 0, 0, 2],
+                        [1, 0, 0, 0],
+                        [1, 0, 1, 0],
+                        [1, 0, 2, 0],
+                        [2, 0, 0, 0],
+                        [2, 1, 0, 0],
+                        [2, 2, 0, 0],
+                    ],
+                    values=np.array(
+                        [0.5, -0.5, 1.0, 6.5, -1.5, 4.0, 3.5, -4.5, 7.0],
+                        dtype=np.float64,
+                    ),
+                    dense_shape=[3, 3, 3, 3],
+                ),
+                "string_1d": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 2], [0, 5], [0, 8], [0, 9], [1, 2], [2, 8], [2, 9]],
+                    values=["A", "B", "C", "D", "E", "F", "G"],
+                    dense_shape=[3, 10],
+                ),
+                "bytes_1d": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 2], [0, 5], [0, 8], [0, 9], [1, 2], [2, 8], [2, 9]],
+                    values=[s1, s2, s3, s4, s5, s6, s7],
+                    dense_shape=[3, 10],
+                ),
+                "bool_1d": tf.compat.v1.SparseTensorValue(
+                    indices=[
+                        [0, 100],
+                        [1, 88],
+                        [1, 97],
+                    ],
+                    values=[False, True, True],
+                    dense_shape=[3, 101],
+                ),
+            }
+        ]
+        dataset = create_atds_dataset(
+            writer_schema=schema,
+            record_data=record_data,
+            features=features,
+            batch_size=3,
+        )
+        self._verify_output(expected_data=expected_data, actual_dataset=dataset)
+        self.assertEqual(
+            dataset.element_spec,
+            {
+                "int_1d": tf.SparseTensorSpec([None, 10], dtype=tf.int32),
+                "long_2d": tf.SparseTensorSpec([None, 1, 6], dtype=tf.int64),
+                "float_1d": tf.SparseTensorSpec([None, 5], dtype=tf.float32),
+                "double_3d": tf.SparseTensorSpec([None, 3, 3, 3], dtype=tf.float64),
+                "string_1d": tf.SparseTensorSpec([None, None], dtype=tf.string),
+                "bytes_1d": tf.SparseTensorSpec([None, None], dtype=tf.string),
+                "bool_1d": tf.SparseTensorSpec([None, 101], dtype=tf.bool),
+            },
+        )
+
+    def test_sparse_with_empty_tensor(self):
+        schema = """{
+                    "type": "record",
+                    "name": "row",
+                    "fields": [
+                        {
+                           "name": "sparse_key",
+                           "type" : {
+                               "type" : "record",
+                               "name" : "SparseTensor",
+                               "fields" : [ {
+                                 "name" : "indices0",
+                                 "type" : { "type" : "array", "items" : "long" }
+                               }, {
+                                 "name" : "values",
+                                 "type" : { "type" : "array", "items" : "float" }
+                               }, {
+                                 "name" : "indices1",
+                                 "type" : { "type" : "array", "items" : "int" }
+                               } ]
+                           }
+                        }
+                    ]}"""
+        record_data = [
+            {"sparse_key": {"indices0": [], "indices1": [], "values": []}},
+        ]
+        features = {
+            "sparse_key": SparseFeature(shape=[10, 10], dtype=tf.dtypes.float32)
+        }
+        expected_data = [
+            {
+                "sparse_key": tf.compat.v1.SparseTensorValue(
+                    indices=np.array([], dtype=np.int64).reshape((0, 3)),
+                    values=[],
+                    dense_shape=[1, 10, 10],
+                )
+            }
+        ]
+        dataset = create_atds_dataset(
+            writer_schema=schema,
+            record_data=record_data,
+            features=features,
+            batch_size=1,
+        )
+        self._verify_output(expected_data=expected_data, actual_dataset=dataset)
+
+    def test_batching_without_dropping_remainder(self):
+        writer_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {"name": "int_value", "type": "int"}
+              ]}"""
+        record_data = [{"int_value": 0}, {"int_value": 1}, {"int_value": 2}]
+        features = {"int_value": DenseFeature([], tf.dtypes.int32)}
+        expected_data = [
+            {"int_value": tf.convert_to_tensor([0, 1])},
+            {"int_value": tf.convert_to_tensor([2])},
+        ]
+        dataset = create_atds_dataset(
+            writer_schema=writer_schema,
+            record_data=record_data,
+            features=features,
+            batch_size=2,
+            drop_remainder=False,
+        )
+        self._verify_output(expected_data=expected_data, actual_dataset=dataset)
+        self.assertEqual(
+            dataset.element_spec, {"int_value": tf.TensorSpec([None], dtype=tf.int32)}
+        )
+
+    def test_batching_with_dropping_remainder(self):
+        writer_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {"name": "dense", "type": "int"},
+                  {"name": "varlen", "type": {"type": "array", "items": "int"} },
+                  {
+                      "name": "sparse",
+                      "type" : {
+                          "type" : "record",
+                          "name" : "SparseTensor",
+                              "fields" : [ {
+                                  "name" : "indices0",
+                                  "type" : { "type" : "array", "items" : "long" }
+                              }, {
+                                  "name" : "values",
+                                  "type" : { "type" : "array", "items" : "int" }
+                              } ]
+                      }
+                  }
+              ]}"""
+        record_data = [
+            {"dense": 0, "sparse": {"indices0": [0], "values": [1]}, "varlen": [2]},
+            {"dense": 1, "sparse": {"indices0": [0], "values": [2]}, "varlen": [3, 4]},
+            {"dense": 2, "sparse": {"indices0": [0], "values": [3]}, "varlen": []},
+        ]
+        features = {
+            "dense": DenseFeature([], tf.dtypes.int32),
+            "sparse": SparseFeature([1], tf.dtypes.int32),
+            "varlen": VarlenFeature([-1], tf.dtypes.int32),
+        }
+        expected_data = [
+            {
+                "dense": tf.convert_to_tensor([0, 1]),
+                "sparse": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [1, 0]],
+                    values=[1, 2],
+                    dense_shape=[2, 1],
+                ),
+                "varlen": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [1, 0], [1, 1]],
+                    values=[2, 3, 4],
+                    dense_shape=[2, 2],
+                ),
+            },
+        ]
+        dataset = create_atds_dataset(
+            writer_schema=writer_schema,
+            record_data=record_data,
+            features=features,
+            batch_size=2,
+            drop_remainder=True,
+        )
+        self._verify_output(expected_data=expected_data, actual_dataset=dataset)
+        self.assertEqual(
+            dataset.element_spec,
+            {
+                "dense": tf.TensorSpec([2], dtype=tf.int32),
+                "sparse": tf.SparseTensorSpec([2, 1], dtype=tf.int32),
+                "varlen": tf.SparseTensorSpec([2, None], dtype=tf.int32),
+            },
+        )
+
+    def test_sparse_with_single_indices(self):
+        schema = """{
+                    "type": "record",
+                    "name": "row",
+                    "fields": [
+                        {
+                           "name": "sparse_key",
+                           "type" : {
+                               "type" : "record",
+                               "name" : "SparseTensor",
+                               "fields" : [ {
+                                 "name" : "indices0",
+                                 "type" : { "type" : "array", "items" : "long" }
+                               }, {
+                                 "name" : "values",
+                                 "type" : { "type" : "array", "items" : "float" }
+                               } ]
+                           }
+                        }
+                    ]}"""
+        record_data = [
+            {"sparse_key": {"indices0": [0, 1], "values": [0.5, -0.5]}},
+            {"sparse_key": {"indices0": [7], "values": [-1.5]}},
+            {"sparse_key": {"indices0": [6, 8], "values": [1.5, -2.5]}},
+        ]
+        features = {"sparse_key": SparseFeature(dtype=tf.dtypes.float32, shape=[10])}
+        expected_data = [
+            {
+                "sparse_key": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [0, 1], [1, 7], [2, 6], [2, 8]],
+                    values=[0.5, -0.5, -1.5, 1.5, -2.5],
+                    dense_shape=[3, 10],
+                )
+            }
+        ]
+        dataset = create_atds_dataset(
+            writer_schema=schema,
+            record_data=record_data,
+            features=features,
+            batch_size=3,
+        )
+        self._verify_output(expected_data=expected_data, actual_dataset=dataset)
+
+    def test_sparse_with_int_indices(self):
+        schema = """{
+                    "type": "record",
+                    "name": "row",
+                    "fields": [
+                        {
+                           "name": "sparse_key",
+                           "type" : {
+                               "type" : "record",
+                               "name" : "SparseTensor",
+                               "fields" : [ {
+                                 "name" : "indices0",
+                                 "type" : { "type" : "array", "items" : "long" }
+                               }, {
+                                 "name" : "values",
+                                 "type" : { "type" : "array", "items" : "float" }
+                               }, {
+                                 "name" : "indices1",
+                                 "type" : { "type" : "array", "items" : "int" }
+                               } ]
+                           }
+                        }
+                    ]}"""
+        record_data = [
+            {
+                "sparse_key": {
+                    "indices0": [0, 0],
+                    "indices1": [1, 2],
+                    "values": [0.5, -0.5],
+                }
+            },
+            {"sparse_key": {"indices0": [7], "indices1": [0], "values": [-1.5]}},
+            {
+                "sparse_key": {
+                    "indices0": [6, 8],
+                    "indices1": [9, 2],
+                    "values": [1.5, -2.5],
+                }
+            },
+        ]
+        features = {
+            "sparse_key": SparseFeature(dtype=tf.dtypes.float32, shape=[10, 10])
+        }
+        expected_data = [
+            {
+                "sparse_key": tf.compat.v1.SparseTensorValue(
+                    indices=[
+                        [0, 0, 1],
+                        [0, 0, 2],
+                        [1, 7, 0],
+                        [2, 6, 9],
+                        [2, 8, 2],
+                    ],
+                    values=[0.5, -0.5, -1.5, 1.5, -2.5],
+                    dense_shape=[3, 10, 10],
+                )
+            }
+        ]
+        dataset = create_atds_dataset(
+            writer_schema=schema,
+            record_data=record_data,
+            features=features,
+            batch_size=3,
+        )
+        self._verify_output(expected_data=expected_data, actual_dataset=dataset)
+
+    def test_dense_feature_with_various_dtype(self):
+        schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {
+                     "name": "int_1d",
+                     "type": {
+                        "type": "array",
+                        "items": "int"
+                     }
+                  },
+                  {
+                     "name": "long_0d",
+                     "type": "long"
+                  },
+                  {
+                     "name": "float_1d",
+                     "type": {
+                        "type": "array",
+                        "items": "float"
+                     }
+                  },
+                  {
+                     "name": "double_3d",
+                     "type": {
+                        "type": "array",
+                        "items": {
+                            "type": "array",
+                            "items": {
+                                "type": "array",
+                                "items": "double"
+                            }
+                        }
+                     }
+                  },
+                  {
+                     "name": "string_2d",
+                     "type": {
+                         "type": "array",
+                         "items": {
+                             "type": "array",
+                             "items": "string"
+                         }
+                     }
+                  },
+                  {
+                     "name": "bytes_0d",
+                     "type": "bytes"
+                  },
+                  {
+                     "name": "bytes_2d",
+                     "type": {
+                         "type": "array",
+                         "items": {
+                             "type": "array",
+                             "items": "bytes"
+                         }
+                     }
+                  },
+                  {
+                     "name": "bool_0d",
+                     "type": "boolean"
+                  }
+              ]}"""
+        s1 = bytes("abc", "utf-8")
+        s2 = bytes("def", "utf-8")
+        s3 = bytes("ijk", "utf-8")
+
+        record_data = [
+            {
+                "int_1d": [0, 1, 2],
+                "long_0d": 7,
+                "float_1d": [0.1],
+                "double_3d": [[[0.9], [0.8]]],
+                "string_2d": [["abc"], ["de"]],
+                "bytes_0d": s1,
+                "bytes_2d": [[s1], [s2]],
+                "bool_0d": False,
+            },
+            {
+                "int_1d": [3, 4, 5],
+                "long_0d": 8,
+                "float_1d": [0.2],
+                "double_3d": [[[-0.9], [-0.8]]],
+                "string_2d": [["XX"], ["YZ"]],
+                "bytes_0d": s2,
+                "bytes_2d": [[s2], [s3]],
+                "bool_0d": True,
+            },
+            {
+                "int_1d": [6, 7, 8],
+                "long_0d": 9,
+                "float_1d": [0.3],
+                "double_3d": [[[1.5e10], [1.1e20]]],
+                "string_2d": [["CK"], [""]],
+                "bytes_0d": s3,
+                "bytes_2d": [[s3], [s1]],
+                "bool_0d": False,
+            },
+        ]
+        features = {
+            "int_1d": DenseFeature([3], tf.dtypes.int32),
+            "long_0d": DenseFeature([], tf.dtypes.int64),
+            "float_1d": DenseFeature([1], tf.dtypes.float32),
+            "double_3d": DenseFeature([1, 2, 1], tf.dtypes.float64),
+            "string_2d": DenseFeature([2, 1], tf.dtypes.string),
+            "bytes_0d": DenseFeature([], tf.dtypes.string),
+            "bytes_2d": DenseFeature([2, 1], tf.dtypes.string),
+            "bool_0d": DenseFeature([], tf.dtypes.bool),
+        }
+        expected_data = [
+            {
+                "int_1d": tf.convert_to_tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]]),
+                "long_0d": tf.convert_to_tensor(np.array([7, 8, 9], dtype=np.int64)),
+                "float_1d": tf.convert_to_tensor(
+                    np.array([[0.1], [0.2], [0.3]], dtype=np.float32)
+                ),
+                "double_3d": tf.convert_to_tensor(
+                    np.array(
+                        [[[[0.9], [0.8]]], [[[-0.9], [-0.8]]], [[[1.5e10], [1.1e20]]]],
+                        dtype=np.float64,
+                    )
+                ),
+                "string_2d": tf.convert_to_tensor(
+                    [[["abc"], ["de"]], [["XX"], ["YZ"]], [["CK"], [""]]]
+                ),
+                "bytes_0d": tf.convert_to_tensor([s1, s2, s3]),
+                "bytes_2d": tf.convert_to_tensor(
+                    [[[s1], [s2]], [[s2], [s3]], [[s3], [s1]]]
+                ),
+                "bool_0d": tf.convert_to_tensor([False, True, False]),
+            },
+        ]
+
+        dataset = create_atds_dataset(
+            writer_schema=schema,
+            record_data=record_data,
+            features=features,
+            batch_size=3,
+        )
+        self._verify_output(expected_data=expected_data, actual_dataset=dataset)
+        self.assertEqual(
+            dataset.element_spec,
+            {
+                "int_1d": tf.TensorSpec([None, 3], dtype=tf.int32),
+                "long_0d": tf.TensorSpec([None], dtype=tf.int64),
+                "float_1d": tf.TensorSpec([None, 1], dtype=tf.float32),
+                "double_3d": tf.TensorSpec([None, 1, 2, 1], dtype=tf.float64),
+                "string_2d": tf.TensorSpec([None, 2, 1], dtype=tf.string),
+                "bytes_0d": tf.TensorSpec([None], dtype=tf.string),
+                "bytes_2d": tf.TensorSpec([None, 2, 1], dtype=tf.string),
+                "bool_0d": tf.TensorSpec([None], dtype=tf.bool),
+            },
+        )
+
+    def test_skipping_opaque_contextual_columns(self):
+        schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {
+                     "name": "opaque_contextual_column_1",
+                     "type": {
+                        "type": "array",
+                        "items": "int"
+                     }
+                  },
+                  {
+                     "name": "opaque_contextual_column_3",
+                     "type": "string"
+                  },
+                  {
+                     "name": "feature",
+                     "type": {
+                        "type": "array",
+                        "items": "float"
+                     }
+                  },
+                  {
+                     "name": "opaque_contextual_column_2",
+                     "type" : {
+                         "type" : "record",
+                         "name" : "TermValues",
+                         "fields" : [ {
+                           "name" : "term",
+                           "type" : { "type" : "array", "items" : "string" }
+                         }, {
+                           "name" : "values",
+                           "type" : { "type" : "array", "items" : "float" }
+                         } ]
+                     }
+                  }
+        ]}"""
+        record_data = [
+            {
+                "opaque_contextual_column_1": [0, 1, 2],
+                "feature": [0.1],
+                "opaque_contextual_column_3": "ABC",
+                "opaque_contextual_column_2": {
+                    "term": ["A", "B"],
+                    "values": [0.5, -0.5],
+                },
+            },
+            {
+                "opaque_contextual_column_1": [],
+                "feature": [0.2],
+                "opaque_contextual_column_3": "DEF",
+                "opaque_contextual_column_2": {"term": ["C"], "values": [1.0]},
+            },
+            {
+                "opaque_contextual_column_1": [135],
+                "feature": [0.3],
+                "opaque_contextual_column_3": "GH",
+                "opaque_contextual_column_2": {"term": [], "values": [1.8]},
+            },
+            {
+                "opaque_contextual_column_1": [-2, -3],
+                "feature": [0.4],
+                "opaque_contextual_column_3": "I",
+                "opaque_contextual_column_2": {
+                    "term": ["A", "B", "C"],
+                    "values": [0.5],
+                },
+            },
+        ]
+        features = {
+            "feature": DenseFeature([1], tf.dtypes.float32),
+        }
+        expected_data = [
+            {
+                "feature": tf.convert_to_tensor(
+                    np.array([[0.1], [0.2], [0.3], [0.4]], dtype=np.float32)
+                ),
+            },
+        ]
+
+        dataset = create_atds_dataset(
+            writer_schema=schema,
+            record_data=record_data,
+            features=features,
+            batch_size=4,
+        )
+        self._verify_output(expected_data=expected_data, actual_dataset=dataset)
+
+    def test_varlen_feature_with_various_dtypes(self):
+        schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {
+                     "name": "int_feature",
+                     "type": {
+                        "type": "array",
+                        "items": "int"
+                     }
+                  },
+                  {
+                     "name": "long_feature",
+                     "type": "long"
+                  },
+                  {
+                     "name": "float_feature",
+                     "type": {
+                        "type": "array",
+                        "items": "float"
+                     }
+                  },
+                  {
+                     "name": "double_feature",
+                     "type": {
+                         "type": "array",
+                         "items": {
+                             "type": "array",
+                             "items": "double"
+                         }
+                     }
+                  },
+                  {
+                     "name": "string_feature",
+                     "type": {
+                         "type": "array",
+                         "items": {
+                             "type": "array",
+                             "items": "string"
+                         }
+                     }
+                  },
+                  {
+                     "name": "bytes_feature",
+                     "type": {
+                         "type": "array",
+                         "items": {
+                             "type": "array",
+                             "items": "bytes"
+                         }
+                     }
+                  },
+                  {
+                     "name": "bool_feature",
+                     "type": {
+                         "type": "array",
+                         "items": {
+                             "type": "array",
+                             "items": {
+                                 "type": "array",
+                                 "items": "boolean"
+                             }
+                         }
+                     }
+                  }
+              ]}"""
+        s1 = bytes("abc", "utf-8")
+        s2 = bytes("def", "utf-8")
+        s3 = bytes("ijk", "utf-8")
+        s4 = bytes("lmn", "utf-8")
+        s5 = bytes("opq", "utf-8")
+        s6 = bytes("qrs", "utf-8")
+        record_data = [
+            {
+                "int_feature": [0],
+                "long_feature": 1,
+                "float_feature": [1.5, -2.7],
+                "double_feature": [[3.9], [-1.0, 1.0]],
+                "string_feature": [["abc"], ["de"]],
+                "bytes_feature": [[s1], [s2]],
+                "bool_feature": [[[True]], [[False, False], [True]]],
+            },
+            {
+                "int_feature": [],
+                "long_feature": -1,
+                "float_feature": [2.0, 3.0],
+                "double_feature": [[], [7.0]],
+                "string_feature": [["fg"], ["hi"], ["jk"]],
+                "bytes_feature": [[s3], [s4], [s5]],
+                "bool_feature": [[[False]], [[False, True, True]]],
+            },
+            {
+                "int_feature": [1, 2],
+                "long_feature": 2,
+                "float_feature": [5.5, 6.5],
+                "double_feature": [[], []],
+                "string_feature": [["lmn"]],
+                "bytes_feature": [[s6]],
+                "bool_feature": [[[True], [False]]],
+            },
+        ]
+        features = {
+            "int_feature": VarlenFeature([-1], tf.dtypes.int32),
+            "long_feature": VarlenFeature([], tf.dtypes.int64),
+            "float_feature": VarlenFeature([2], tf.dtypes.float32),
+            "double_feature": VarlenFeature([-1, -1], tf.dtypes.float64),
+            "string_feature": VarlenFeature([-1, 1], tf.dtypes.string),
+            "bytes_feature": VarlenFeature([-1, 1], tf.dtypes.string),
+            "bool_feature": VarlenFeature([-1, -1, -1], tf.dtypes.bool),
+        }
+        expected_data = [
+            {
+                "int_feature": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [2, 0], [2, 1]],
+                    values=[0, 1, 2],
+                    dense_shape=[3, 2],
+                ),
+                "long_feature": tf.compat.v1.SparseTensorValue(
+                    indices=[[0], [1], [2]],
+                    values=np.array([1, -1, 2], dtype=np.int64),
+                    dense_shape=[3],
+                ),
+                "float_feature": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [0, 1], [1, 0], [1, 1], [2, 0], [2, 1]],
+                    values=np.array([1.5, -2.7, 2.0, 3.0, 5.5, 6.5], dtype=np.float32),
+                    dense_shape=[3, 2],
+                ),
+                "double_feature": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0, 0], [0, 1, 0], [0, 1, 1], [1, 1, 0]],
+                    values=np.array([3.9, -1.0, 1.0, 7.0], dtype=np.float64),
+                    dense_shape=[3, 2, 2],
+                ),
+                "string_feature": tf.compat.v1.SparseTensorValue(
+                    indices=[
+                        [0, 0, 0],
+                        [0, 1, 0],
+                        [1, 0, 0],
+                        [1, 1, 0],
+                        [1, 2, 0],
+                        [2, 0, 0],
+                    ],
+                    values=["abc", "de", "fg", "hi", "jk", "lmn"],
+                    dense_shape=[3, 3, 1],
+                ),
+                "bytes_feature": tf.compat.v1.SparseTensorValue(
+                    indices=[
+                        [0, 0, 0],
+                        [0, 1, 0],
+                        [1, 0, 0],
+                        [1, 1, 0],
+                        [1, 2, 0],
+                        [2, 0, 0],
+                    ],
+                    values=[s1, s2, s3, s4, s5, s6],
+                    dense_shape=[3, 3, 1],
+                ),
+                "bool_feature": tf.compat.v1.SparseTensorValue(
+                    indices=[
+                        [0, 0, 0, 0],
+                        [0, 1, 0, 0],
+                        [0, 1, 0, 1],
+                        [0, 1, 1, 0],
+                        [1, 0, 0, 0],
+                        [1, 1, 0, 0],
+                        [1, 1, 0, 1],
+                        [1, 1, 0, 2],
+                        [2, 0, 0, 0],
+                        [2, 0, 1, 0],
+                    ],
+                    values=[
+                        True,
+                        False,
+                        False,
+                        True,
+                        False,
+                        False,
+                        True,
+                        True,
+                        True,
+                        False,
+                    ],
+                    dense_shape=[3, 2, 2, 3],
+                ),
+            },
+        ]
+
+        dataset = create_atds_dataset(
+            writer_schema=schema,
+            record_data=record_data,
+            features=features,
+            batch_size=3,
+        )
+        self._verify_output(expected_data=expected_data, actual_dataset=dataset)
+        self.assertEqual(
+            dataset.element_spec,
+            {
+                "int_feature": tf.SparseTensorSpec([None, None], dtype=tf.int32),
+                "long_feature": tf.SparseTensorSpec([None], dtype=tf.int64),
+                "float_feature": tf.SparseTensorSpec([None, 2], dtype=tf.float32),
+                "double_feature": tf.SparseTensorSpec(
+                    [None, None, None], dtype=tf.float64
+                ),
+                "string_feature": tf.SparseTensorSpec([None, None, 1], dtype=tf.string),
+                "bytes_feature": tf.SparseTensorSpec([None, None, 1], dtype=tf.string),
+                "bool_feature": tf.SparseTensorSpec(
+                    [None, None, None, None], dtype=tf.bool
+                ),
+            },
+        )
+
+    def test_sparse_feature_serialization_deserialization(self):
+        schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {
+                     "name": "x",
+                     "type": {
+                        "type": "array",
+                        "items": "int"
+                     }
+                  }
+              ]}"""
+        record_data = [
+            {"x": [0]},
+            {"x": []},
+            {"x": [1, 2]},
+        ]
+        features = {
+            "x": VarlenFeature([-1], tf.dtypes.int32),
+        }
+        expected_data = [
+            {
+                "x": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0]],
+                    values=[0],
+                    dense_shape=[2, 1],
+                ),
+            },
+            {
+                "x": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [0, 1]],
+                    values=[1, 2],
+                    dense_shape=[1, 2],
+                ),
+            },
+        ]
+
+        dataset = create_atds_dataset(
+            writer_schema=schema,
+            record_data=record_data,
+            features=features,
+            batch_size=2,
+        )
+        dataset = dataset.map(lambda d: {"x": tf.io.serialize_many_sparse(d["x"])})
+        dataset = dataset.map(
+            lambda d: {"x": tf.io.deserialize_many_sparse(d["x"], dtype=tf.int32)}
+        )
+        self._verify_output(expected_data=expected_data, actual_dataset=dataset)
+
+    def test_ATDS_dataset_with_interleave(self):
+        writer_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {"name": "int_value", "type": "int"}
+              ]}"""
+        record_data = [{"int_value": 0}, {"int_value": 1}, {"int_value": 2}]
+        features = {"int_value": DenseFeature([], tf.dtypes.int32)}
+        expected_data = [
+            {"int_value": tf.convert_to_tensor([0, 1])},
+            {"int_value": tf.convert_to_tensor([2])},
+        ]
+        filenames = AvroDatasetTestBase._setup_files(
+            writer_schema=writer_schema, records=record_data
+        )
+        dataset = tf.data.Dataset.from_tensor_slices(filenames)
+        dataset = dataset.interleave(
+            lambda x: ATDSDataset(x, features=features, batch_size=2)
+        )
+        self._verify_output(expected_data=expected_data, actual_dataset=dataset)
+
+    def test_ATDS_dataset_with_file_not_existed(self):
+        filename = "file_not_exist"
+        error_message = f".*{filename}.*"
+        with pytest.raises(errors.NotFoundError, match=error_message):
+            dataset = ATDSDataset(
+                filename, features={"x": DenseFeature([], tf.int32)}, batch_size=2
+            )
+            iterator = iter(dataset)
+            next(iterator)
+
+    def test_ATDS_dataset_with_feature_not_existed(self):
+        writer_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {"name": "int_value", "type": "int"}
+              ]}"""
+        record_data = [{"int_value": 0}, {"int_value": 1}, {"int_value": 2}]
+        filenames = AvroDatasetTestBase._setup_files(
+            writer_schema=writer_schema, records=record_data
+        )
+
+        feature_name = "feature_not_existed"
+        features = {feature_name: DenseFeature([], tf.dtypes.int32)}
+        error_message = (
+            f"User defined feature '{feature_name}' cannot be found"
+            f" in the input data. Input data schema: .*"
+        )
+        with pytest.raises(errors.InvalidArgumentError, match=error_message):
+            dataset = ATDSDataset(filenames, features=features, batch_size=2)
+            iterator = iter(dataset)
+            next(iterator)
+
+    def test_ATDS_dataset_with_null_schema(self):
+        writer_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {"name": "dense_0d", "type": ["null", "int"]},
+                  {
+                      "name": "dense_1d",
+                      "type": [
+                          {"type": "array", "items": "int"},
+                          "null"
+                      ]
+                  },
+                  {
+                     "name": "sparse",
+                     "type" : [ {
+                         "type" : "record",
+                         "name" : "IntSparseTensor",
+                         "fields" : [ {
+                           "name" : "indices0",
+                           "type" : { "type" : "array", "items" : "long" }
+                         }, {
+                           "name" : "values",
+                           "type" : { "type" : "array", "items" : "int" }
+                         } ]
+                     }, "null" ]
+                  },
+                  {"name": "non_null", "type": "int"}
+              ]}"""
+        record_data = [
+            {
+                "dense_0d": 0,
+                "dense_1d": [1],
+                "non_null": 1,
+                "sparse": {"indices0": [0], "values": [1]},
+            },
+            {
+                "dense_0d": 1,
+                "dense_1d": [2],
+                "non_null": 2,
+                "sparse": {"indices0": [0], "values": [2]},
+            },
+            {
+                "dense_0d": 2,
+                "dense_1d": [3],
+                "non_null": 3,
+                "sparse": {"indices0": [0], "values": [3]},
+            },
+        ]
+        features = {
+            "dense_0d": DenseFeature([], tf.int32),
+            "dense_1d": DenseFeature([1], tf.int32),
+            "sparse": SparseFeature([1], tf.int32),
+            "non_null": DenseFeature([], tf.int32),
+        }
+        expected_data = [
+            {
+                "dense_0d": tf.convert_to_tensor([0, 1, 2]),
+                "dense_1d": tf.convert_to_tensor([[1], [2], [3]]),
+                "sparse": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [1, 0], [2, 0]],
+                    values=[1, 2, 3],
+                    dense_shape=[3, 1],
+                ),
+                "non_null": tf.convert_to_tensor([1, 2, 3]),
+            }
+        ]
+
+        dataset = create_atds_dataset(
+            writer_schema=writer_schema,
+            record_data=record_data,
+            features=features,
+            batch_size=3,
+        )
+        self._verify_output(expected_data=expected_data, actual_dataset=dataset)
+
+    def test_ATDS_dataset_with_multithreading(self):
+        writer_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {"name": "dense", "type": {"type": "array", "items": "int"}},
+                  {"name": "varlen", "type": {"type": "array", "items": "int"} },
+                  {
+                      "name": "sparse",
+                      "type" : {
+                          "type" : "record",
+                          "name" : "IntSparseTensor",
+                          "fields" : [ {
+                              "name" : "indices0",
+                              "type" : { "type" : "array", "items" : "long" }
+                          }, {
+                              "name" : "values",
+                              "type" : { "type" : "array", "items" : "int" }
+                          } ]
+                      }
+                  }
+              ]}"""
+        schema = avro.schema.Parse(writer_schema)
+        filename = os.path.join(tempfile.gettempdir(), "test.avro")
+        record_data = [
+            {
+                "dense": [0, 1, 2],
+                "sparse": {"indices0": [0], "values": [1]},
+                "varlen": [2],
+            },
+            {
+                "dense": [3, 4, 5],
+                "sparse": {"indices0": [1], "values": [2]},
+                "varlen": [3],
+            },
+            {
+                "dense": [6, 7, 8],
+                "sparse": {"indices0": [2], "values": [3]},
+                "varlen": [],
+            },
+            {
+                "dense": [9, 10, 11],
+                "sparse": {"indices0": [3], "values": [10]},
+                "varlen": [5],
+            },
+            {
+                "dense": [12, 13, 14],
+                "sparse": {"indices0": [4], "values": [1000]},
+                "varlen": [6, 7, 8],
+            },
+        ]
+
+        # Generate an avro file with 5 avro blocks.
+        with open(filename, "wb") as f:
+            writer = DataFileWriter(f, DatumWriter(), schema)
+            for record in record_data:
+                writer.append(record)
+                writer.sync()  # Dump the current record into an avro block.
+            writer.close()
+
+        features = {
+            "dense": DenseFeature([3], tf.dtypes.int32),
+            "sparse": SparseFeature([1], tf.dtypes.int32),
+            "varlen": VarlenFeature([-1], tf.dtypes.int32),
+        }
+
+        expected_data = [
+            {
+                "dense": tf.convert_to_tensor(
+                    [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11], [12, 13, 14]]
+                ),
+                "sparse": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]],
+                    values=[1, 2, 3, 10, 1000],
+                    dense_shape=[5, 1],
+                ),
+                "varlen": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [1, 0], [3, 0], [4, 0], [4, 1], [4, 2]],
+                    values=[2, 3, 5, 6, 7, 8],
+                    dense_shape=[5, 3],
+                ),
+            }
+        ]
+        dataset = ATDSDataset(
+            filenames=filename,
+            features=features,
+            batch_size=5,
+            num_parallel_calls=3,  # Process 5 blocks with 3 threads concurrently
+        )
+        # Result should have deterministic order.
+        self._verify_output(expected_data=expected_data, actual_dataset=dataset)
+
+    def test_ATDS_dataset_processing_multiple_files(self):
+        writer_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {"name": "dense", "type": "int"},
+                  {"name": "varlen", "type": {"type": "array", "items": "float"} },
+                  {
+                      "name": "sparse",
+                      "type" : {
+                          "type" : "record",
+                          "name" : "SparseTensor",
+                              "fields" : [ {
+                                  "name" : "indices0",
+                                  "type" : { "type" : "array", "items" : "long" }
+                              }, {
+                                  "name" : "values",
+                                  "type" : { "type" : "array", "items" : "long" }
+                              } ]
+                      }
+                  }
+              ]}"""
+        record_data = [
+            {"dense": 0, "sparse": {"indices0": [0], "values": [1]}, "varlen": [2.0]},
+            {
+                "dense": 1,
+                "sparse": {"indices0": [1], "values": [2]},
+                "varlen": [3.0, 4.0],
+            },
+            {"dense": 2, "sparse": {"indices0": [2], "values": [3]}, "varlen": []},
+            {"dense": 3, "sparse": {"indices0": [3], "values": [4]}, "varlen": [5.0]},
+            {
+                "dense": 4,
+                "sparse": {"indices0": [4], "values": [5]},
+                "varlen": [6.0, 7.0, 8.0],
+            },
+            {"dense": 5, "sparse": {"indices0": [5], "values": [6]}, "varlen": [9.0]},
+        ]
+
+        schema = avro.schema.Parse(writer_schema)
+        # Generate 3 avro files with 2 records in each file.
+        temp_dir = tempfile.gettempdir()
+        filenames = []
+        for i in range(3):
+            filename = os.path.join(temp_dir, f"test-{i}.avro")
+            with open(filename, "wb") as f:
+                writer = DataFileWriter(f, DatumWriter(), schema)
+                for r in range(2):
+                    writer.append(record_data[i * 2 + r])
+                writer.close()
+            filenames.append(filename)
+
+        features = {
+            "dense": DenseFeature([], tf.dtypes.int32),
+            "sparse": SparseFeature([10], tf.dtypes.int64),
+            "varlen": VarlenFeature([-1], tf.dtypes.float32),
+        }
+        expected_data = [
+            {
+                "dense": tf.convert_to_tensor([0, 1, 2]),
+                "sparse": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [1, 1], [2, 2]],
+                    values=[1, 2, 3],
+                    dense_shape=[3, 10],
+                ),
+                "varlen": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [1, 0], [1, 1]],
+                    values=[2.0, 3.0, 4.0],
+                    dense_shape=[3, 2],
+                ),
+            },
+            {
+                "dense": tf.convert_to_tensor([3, 4, 5]),
+                "sparse": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 3], [1, 4], [2, 5]],
+                    values=[4, 5, 6],
+                    dense_shape=[3, 10],
+                ),
+                "varlen": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [1, 0], [1, 1], [1, 2], [2, 0]],
+                    values=[5.0, 6.0, 7.0, 8.0, 9.0],
+                    dense_shape=[3, 3],
+                ),
+            },
+        ]
+        dataset = ATDSDataset(filenames=filenames, features=features, batch_size=3)
+        # Result should have deterministic order.
+        self._verify_output(expected_data=expected_data, actual_dataset=dataset)
+
+    def test_ATDS_dataset_processing_multiple_files_with_different_schema(self):
+        # Generate 2 avro files with different schema.
+        writer_schema_1 = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {"name": "dense", "type": "int"},
+                  {"name": "varlen", "type": {"type": "array", "items": "float"} },
+                  {
+                      "name": "sparse",
+                      "type" : {
+                          "type" : "record",
+                          "name" : "SparseTensor",
+                              "fields" : [ {
+                                  "name" : "indices0",
+                                  "type" : { "type" : "array", "items" : "long" }
+                              }, {
+                                  "name" : "values",
+                                  "type" : { "type" : "array", "items" : "long" }
+                              } ]
+                      }
+                  }
+              ]}"""
+        record_data_1 = [
+            {"dense": 0, "sparse": {"indices0": [0], "values": [1]}, "varlen": [2.0]},
+            {
+                "dense": 1,
+                "sparse": {"indices0": [1], "values": [2]},
+                "varlen": [3.0, 4.0],
+            },
+            {"dense": 2, "sparse": {"indices0": [2], "values": [3]}, "varlen": []},
+        ]
+        filenames_1 = AvroDatasetTestBase._setup_files(
+            writer_schema=writer_schema_1, records=record_data_1
+        )
+
+        writer_schema_2 = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {"name": "dense", "type": "int"},
+                  {"name": "varlen", "type": {"type": "array", "items": "float"} },
+                  {"name": "unused", "type": {"type": "array", "items": "float"} },
+                  {
+                      "name": "sparse",
+                      "type" : {
+                          "type" : "record",
+                          "name" : "SparseTensor",
+                              "fields" : [ {
+                                  "name" : "indices0",
+                                  "type" : { "type" : "array", "items" : "int" }
+                              }, {
+                                  "name" : "values",
+                                  "type" : { "type" : "array", "items" : "long" }
+                              } ]
+                      }
+                  }
+              ]}"""
+        record_data_2 = [
+            {
+                "dense": 3,
+                "sparse": {"indices0": [3], "values": [4]},
+                "varlen": [5.0],
+                "unused": [1.0],
+            },
+            {
+                "dense": 4,
+                "sparse": {"indices0": [4], "values": [5]},
+                "varlen": [6.0, 7.0, 8.0],
+                "unused": [],
+            },
+            {
+                "dense": 5,
+                "sparse": {"indices0": [5], "values": [6]},
+                "varlen": [9.0],
+                "unused": [-1.0, 2.0],
+            },
+        ]
+        filenames_2 = AvroDatasetTestBase._setup_files(
+            writer_schema=writer_schema_2, records=record_data_2
+        )
+        filenames = filenames_1 + filenames_2
+
+        features = {
+            "dense": DenseFeature([], tf.dtypes.int32),
+            "sparse": SparseFeature([10], tf.dtypes.int64),
+            "varlen": VarlenFeature([-1], tf.dtypes.float32),
+        }
+        error_message = (
+            "Avro schema should be consistent for all input files. "
+            "Schema in file .* varies from the schema in file .*"
+        )
+        with pytest.raises(errors.InvalidArgumentError, match=error_message):
+            dataset = ATDSDataset(filenames=filenames, features=features, batch_size=3)
+            iterator = iter(dataset)
+            next(iterator)  # load first file
+            next(iterator)  # load second file
diff --git a/tests/test_atds_avro/test_atds_feature_eager.py b/tests/test_atds_avro/test_atds_feature_eager.py
new file mode 100644
index 000000000..e360b734b
--- /dev/null
+++ b/tests/test_atds_avro/test_atds_feature_eager.py
@@ -0,0 +1,107 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import pytest
+import re
+import tensorflow as tf
+
+from tensorflow_io.python.experimental.atds.features import (
+    DenseFeature,
+    SparseFeature,
+    VarlenFeature,
+    ATDS_SUPPORTED_DTYPES,
+)
+
+
+@pytest.mark.parametrize(
+    ["shape", "dtype", "error_message"],
+    [
+        (None, tf.int32, "Shape cannot be None."),
+        ([1, 2], None, "dtype cannot be None."),
+        (
+            [1, None],
+            tf.int32,
+            "Dimension in shape cannot be None or 0 but found [1, None].",
+        ),
+        ([1, 0], tf.int32, "Dimension in shape cannot be None or 0 but found [1, 0]."),
+        (
+            [1, 2],
+            tf.float16,
+            f"<dtype: 'float16'> is not supported in ATDS. Available dtypes are {ATDS_SUPPORTED_DTYPES}.",
+        ),
+        (
+            [-1, 2],
+            tf.int32,
+            "Each dimension should be greater than 0 in DenseFeature but found [-1, 2].",
+        ),
+    ],
+)
+def test_atds_dense_feature(shape, dtype, error_message):
+    """test DenseFeature creation"""
+    with pytest.raises(ValueError, match=re.escape(error_message)):
+        DenseFeature(shape, dtype)
+
+
+@pytest.mark.parametrize(
+    ["shape", "dtype", "error_message"],
+    [
+        (None, tf.int32, "Shape cannot be None."),
+        ([1, 2], None, "dtype cannot be None."),
+        (
+            [1, None],
+            tf.int32,
+            "Dimension in shape cannot be None or 0 but found [1, None].",
+        ),
+        ([3, 0], tf.int32, "Dimension in shape cannot be None or 0 but found [3, 0]."),
+        (
+            [1, 2],
+            tf.float16,
+            f"<dtype: 'float16'> is not supported in ATDS. Available dtypes are {ATDS_SUPPORTED_DTYPES}.",
+        ),
+        ([], tf.int64, "SparseFeature cannot be scalar."),
+    ],
+)
+def test_atds_sparse_feature(shape, dtype, error_message):
+    """test SparseFeature creation"""
+    with pytest.raises(ValueError, match=re.escape(error_message)):
+        SparseFeature(shape, dtype)
+
+
+@pytest.mark.parametrize(
+    ["shape", "dtype", "error_message"],
+    [
+        (None, tf.int32, "Shape cannot be None."),
+        ([1, 2], None, "dtype cannot be None."),
+        (
+            [1, None],
+            tf.int32,
+            "Dimension in shape cannot be None or 0 but found [1, None].",
+        ),
+        ([0, 1], tf.int32, "Dimension in shape cannot be None or 0 but found [0, 1]."),
+        (
+            [-1, 2],
+            tf.float16,
+            f"<dtype: 'float16'> is not supported in ATDS. Available dtypes are {ATDS_SUPPORTED_DTYPES}.",
+        ),
+        (
+            [-2, 5],
+            tf.int32,
+            "Each dimension should be greater than 0 or -1 in VarlenFeature but found [-2, 5].",
+        ),
+    ],
+)
+def test_atds_ragged_feature(shape, dtype, error_message):
+    """test VarlenFeature creation"""
+    with pytest.raises(ValueError, match=re.escape(error_message)):
+        VarlenFeature(shape, dtype)
diff --git a/tests/test_atds_avro/test_atds_writer.py b/tests/test_atds_avro/test_atds_writer.py
new file mode 100644
index 000000000..ee86c89b0
--- /dev/null
+++ b/tests/test_atds_avro/test_atds_writer.py
@@ -0,0 +1,396 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""Tests for ATDSWriter"""
+
+import os
+import glob
+import pytest
+import shutil
+import tempfile
+import tensorflow as tf
+
+from tests.test_atds_avro.utils import file_writer
+from tests.test_atds_avro.utils.data_source import DataSource
+from tests.test_atds_avro.utils.generator.tensor_generator import (
+    IntTensorGenerator,
+    FloatTensorGenerator,
+    WordTensorGenerator,
+    BoolTensorGenerator,
+)
+from tests.test_atds_avro.utils.generator.sparse_tensor_generator import (
+    ValueDistribution,
+    IntSparseTensorGenerator,
+    FloatSparseTensorGenerator,
+    WordSparseTensorGenerator,
+    BoolSparseTensorGenerator,
+)
+from tests.test_atds_avro.utils.generator.varlen_tensor_generator import (
+    IntVarLenTensorGenerator,
+    FloatVarLenTensorGenerator,
+    WordVarLenTensorGenerator,
+    BoolVarLenTensorGenerator,
+)
+from tests.test_atds_avro.utils.atds_writer import ATDSWriter
+from tests.test_atds_avro.utils.tf_record_writer import TFRecordWriter
+from tests.test_atds_avro.utils.atds_benchmark_utils import (
+    get_dataset,
+    get_features_from_data_source,
+)
+from tests.test_atds_avro.utils.generator.mock_generator import MockGenerator
+from tests.test_parse_avro_eager import AvroFileToRecords
+
+
+@pytest.mark.parametrize(["num_records", "partitions"], [(10, 1), (23, 3), (5, 6)])
+def test_expected_num_records_and_partitions(num_records, partitions):
+    feature_name = "feature"
+    scenario = {
+        feature_name: IntTensorGenerator(tf.TensorSpec(shape=[3], dtype=tf.int64))
+    }
+    data_source = DataSource(
+        scenario=scenario, num_records=num_records, partitions=partitions
+    )
+
+    with ATDSWriter() as writer:
+        dir_path = writer.write(data_source)
+        pattern = os.path.join(dir_path, f"*.{writer.extension}")
+
+        dataset = tf.data.Dataset.list_files(pattern)
+        assert dataset.cardinality().numpy() == partitions
+        files = glob.glob(pattern)
+        schema = writer.scenario_to_avro_schema(data_source.scenario)
+        counts = 0
+        for fname in files:
+            assert os.path.isfile(fname), f"file does not exist: {fname}"
+            assert os.stat(fname).st_size > 0, f"file is empty: {fname}"
+            counts += len(AvroFileToRecords(fname, reader_schema=schema).get_records())
+        assert counts == num_records
+
+
+@pytest.mark.parametrize(
+    ["generator"],
+    [
+        (IntTensorGenerator(tf.TensorSpec(shape=[], dtype=tf.int32)),),
+        (IntTensorGenerator(tf.TensorSpec(shape=[3], dtype=tf.int32)),),
+        (IntTensorGenerator(tf.TensorSpec(shape=[3, 8], dtype=tf.int32)),),
+        (IntTensorGenerator(tf.TensorSpec(shape=[], dtype=tf.int64)),),
+        (IntTensorGenerator(tf.TensorSpec(shape=[5], dtype=tf.int64)),),
+        (IntTensorGenerator(tf.TensorSpec(shape=[1, 2], dtype=tf.int64)),),
+        (FloatTensorGenerator(tf.TensorSpec(shape=[], dtype=tf.float32)),),
+        (FloatTensorGenerator(tf.TensorSpec(shape=[10], dtype=tf.float32)),),
+        (FloatTensorGenerator(tf.TensorSpec(shape=[2, 4, 6], dtype=tf.float32)),),
+        (FloatTensorGenerator(tf.TensorSpec(shape=[], dtype=tf.float64)),),
+        (FloatTensorGenerator(tf.TensorSpec(shape=[1], dtype=tf.float64)),),
+        (FloatTensorGenerator(tf.TensorSpec(shape=[2, 4], dtype=tf.float64)),),
+        (WordTensorGenerator(tf.TensorSpec(shape=[], dtype=tf.string)),),
+        (WordTensorGenerator(tf.TensorSpec(shape=[5], dtype=tf.string)),),
+        (WordTensorGenerator(tf.TensorSpec(shape=[2, 1], dtype=tf.string)),),
+        (BoolTensorGenerator(tf.TensorSpec(shape=[], dtype=tf.bool)),),
+        (BoolTensorGenerator(tf.TensorSpec(shape=[5], dtype=tf.bool)),),
+        (BoolTensorGenerator(tf.TensorSpec(shape=[2, 1, 3], dtype=tf.bool)),),
+    ],
+)
+def test_dense_tensor_with_various_spec(generator):
+    feature_name = "feature"
+    num_records = 10
+    data = [generator.generate() for _ in range(num_records)]
+
+    mock_generator = MockGenerator(
+        spec=generator.spec, data=data, generator_cls=type(generator)
+    )
+    data_source = DataSource(
+        scenario={feature_name: mock_generator},
+        num_records=num_records,
+    )
+
+    dtype = generator.spec.dtype
+    with ATDSWriter() as writer:
+        dir_path = writer.write(data_source)
+        pattern = os.path.join(dir_path, f"*.{writer.extension}")
+        dataset = get_dataset(
+            glob.glob(pattern), get_features_from_data_source(writer, data_source)
+        )
+        for i, features in enumerate(dataset):
+            actual = features[feature_name]
+            if dtype in [tf.float32, tf.float64]:
+                tf.debugging.assert_near(actual, data[i], atol=1e-6)
+            else:
+                tf.debugging.assert_equal(actual, data[i])
+
+
+@pytest.mark.parametrize(
+    ["generator"],
+    [
+        (
+            IntSparseTensorGenerator(
+                tf.SparseTensorSpec(shape=[10], dtype=tf.int32),
+                num_values=ValueDistribution.SMALL_NUM_VALUE,
+            ),
+        ),
+        (
+            IntSparseTensorGenerator(
+                tf.SparseTensorSpec(shape=[2, 5], dtype=tf.int32),
+                num_values=ValueDistribution.SINGLE_VALUE,
+            ),
+        ),
+        (
+            IntSparseTensorGenerator(
+                tf.SparseTensorSpec(shape=[100], dtype=tf.int64),
+                num_values=ValueDistribution.SMALL_NUM_VALUE,
+            ),
+        ),
+        (
+            IntSparseTensorGenerator(
+                tf.SparseTensorSpec(shape=[20, 500], dtype=tf.int64),
+                num_values=ValueDistribution.LARGE_NUM_VALUE,
+            ),
+        ),
+        (
+            FloatSparseTensorGenerator(
+                tf.SparseTensorSpec(shape=[20], dtype=tf.float32),
+                num_values=ValueDistribution.SMALL_NUM_VALUE,
+            ),
+        ),
+        (
+            FloatSparseTensorGenerator(
+                tf.SparseTensorSpec(shape=[1, 10], dtype=tf.float32),
+                num_values=ValueDistribution.SMALL_NUM_VALUE,
+            ),
+        ),
+        (
+            FloatSparseTensorGenerator(
+                tf.SparseTensorSpec(shape=[50000], dtype=tf.float64),
+                num_values=ValueDistribution.LARGE_NUM_VALUE,
+            ),
+        ),
+        (
+            FloatSparseTensorGenerator(
+                tf.SparseTensorSpec(shape=[2, 2], dtype=tf.float64),
+                num_values=ValueDistribution.SMALL_NUM_VALUE,
+            ),
+        ),
+        (
+            WordSparseTensorGenerator(
+                tf.SparseTensorSpec(shape=[5], dtype=tf.string),
+                num_values=ValueDistribution.SMALL_NUM_VALUE,
+            ),
+        ),
+        (
+            WordSparseTensorGenerator(
+                tf.SparseTensorSpec(shape=[10, 3], dtype=tf.string),
+                num_values=ValueDistribution.SMALL_NUM_VALUE,
+            ),
+        ),
+        (
+            BoolSparseTensorGenerator(
+                tf.SparseTensorSpec(shape=[1], dtype=tf.bool),
+                num_values=ValueDistribution.SINGLE_VALUE,
+            ),
+        ),
+        (
+            BoolSparseTensorGenerator(
+                tf.SparseTensorSpec(shape=[1, 1], dtype=tf.bool),
+                num_values=ValueDistribution.SINGLE_VALUE,
+            ),
+        ),
+    ],
+)
+def test_sparse_tensor_with_various_spec(generator):
+    feature_name = "feature"
+    num_records = 10
+    data = [generator.generate() for _ in range(num_records)]
+
+    mock_generator = MockGenerator(
+        spec=generator.spec, data=data, generator_cls=type(generator)
+    )
+    data_source = DataSource(
+        scenario={feature_name: mock_generator},
+        num_records=num_records,
+    )
+
+    dtype = generator.spec.dtype
+    with ATDSWriter() as writer:
+        dir_path = writer.write(data_source)
+        pattern = os.path.join(dir_path, f"*.{writer.extension}")
+        dataset = get_dataset(
+            glob.glob(pattern), get_features_from_data_source(writer, data_source)
+        )
+        dataset = dataset.unbatch()
+        for i, features in enumerate(dataset):
+            sparse_tensor = features[feature_name]
+            tf.debugging.assert_equal(sparse_tensor.indices, data[i].indices)
+            tf.debugging.assert_equal(sparse_tensor.dense_shape, data[i].dense_shape)
+
+            if dtype in [tf.float32, tf.float64]:
+                tf.debugging.assert_near(
+                    sparse_tensor.values, data[i].values, atol=1e-6
+                )
+            else:
+                tf.debugging.assert_equal(sparse_tensor.values, data[i].values)
+
+
+@pytest.mark.parametrize(
+    ["generator"],
+    [
+        (IntVarLenTensorGenerator(tf.SparseTensorSpec(shape=[None], dtype=tf.int32)),),
+        (IntVarLenTensorGenerator(tf.SparseTensorSpec(shape=[None], dtype=tf.int64)),),
+        (
+            FloatVarLenTensorGenerator(
+                tf.SparseTensorSpec(shape=[20, None], dtype=tf.float32)
+            ),
+        ),
+        (
+            FloatVarLenTensorGenerator(
+                tf.SparseTensorSpec(shape=[1, 2], dtype=tf.float64)
+            ),
+        ),
+        (
+            WordVarLenTensorGenerator(
+                tf.SparseTensorSpec(shape=[None, None, None], dtype=tf.string)
+            ),
+        ),
+        (
+            BoolVarLenTensorGenerator(
+                tf.SparseTensorSpec(shape=[None, 1], dtype=tf.bool)
+            ),
+        ),
+    ],
+)
+def test_varlen_tensor_with_various_spec(generator):
+    feature_name = "feature"
+    num_records = 10
+    data = [generator.generate() for _ in range(num_records)]
+
+    mock_generator = MockGenerator(
+        spec=generator.spec, data=data, generator_cls=type(generator)
+    )
+    data_source = DataSource(
+        scenario={feature_name: mock_generator},
+        num_records=num_records,
+    )
+
+    dtype = generator.spec.dtype
+    with ATDSWriter() as writer:
+        dir_path = writer.write(data_source)
+        pattern = os.path.join(dir_path, f"*.{writer.extension}")
+        dataset = get_dataset(
+            glob.glob(pattern), get_features_from_data_source(writer, data_source)
+        )
+        dataset = dataset.unbatch()
+        for i, features in enumerate(dataset):
+            sparse_tensor = features[feature_name]
+            tf.debugging.assert_equal(sparse_tensor.indices, data[i].indices)
+            tf.debugging.assert_equal(sparse_tensor.dense_shape, data[i].dense_shape)
+
+            if dtype in [tf.float32, tf.float64]:
+                tf.debugging.assert_near(
+                    sparse_tensor.values, data[i].values, atol=1e-6
+                )
+            else:
+                tf.debugging.assert_equal(sparse_tensor.values, data[i].values)
+
+
+def test_hash_code():
+    writer = ATDSWriter(codec="null")
+    null_codec_writer = ATDSWriter(codec="null")
+    deflate_codec_writer = ATDSWriter(codec="deflate")
+    snappy_codec_writer = ATDSWriter(codec="snappy")
+
+    assert writer.hash_code() == null_codec_writer.hash_code()
+    assert writer.hash_code() != deflate_codec_writer.hash_code()
+    assert writer.hash_code() != snappy_codec_writer.hash_code()
+    assert deflate_codec_writer.hash_code() != snappy_codec_writer.hash_code()
+
+
+@pytest.mark.parametrize(
+    ["generator", "num_records", "partitions"],
+    [
+        (IntTensorGenerator(tf.TensorSpec(shape=[], dtype=tf.int32)), 10, 3),
+        (IntTensorGenerator(tf.TensorSpec(shape=[3], dtype=tf.int32)), 10, 1),
+        (IntTensorGenerator(tf.TensorSpec(shape=[3, 8], dtype=tf.int32)), 5, 2),
+        (IntTensorGenerator(tf.TensorSpec(shape=[], dtype=tf.int64)), 20, 3),
+        (IntTensorGenerator(tf.TensorSpec(shape=[5], dtype=tf.int64)), 10, 1),
+        (IntTensorGenerator(tf.TensorSpec(shape=[1, 2], dtype=tf.int64)), 10, 1),
+        (FloatTensorGenerator(tf.TensorSpec(shape=[], dtype=tf.float32)), 15, 2),
+        (FloatTensorGenerator(tf.TensorSpec(shape=[10], dtype=tf.float32)), 10, 1),
+        (FloatTensorGenerator(tf.TensorSpec(shape=[2, 4, 6], dtype=tf.float32)), 3, 3),
+        (FloatTensorGenerator(tf.TensorSpec(shape=[], dtype=tf.float64)), 2, 1),
+        (FloatTensorGenerator(tf.TensorSpec(shape=[1], dtype=tf.float64)), 10, 1),
+        (FloatTensorGenerator(tf.TensorSpec(shape=[2, 4], dtype=tf.float64)), 20, 3),
+        (WordTensorGenerator(tf.TensorSpec(shape=[], dtype=tf.string)), 50, 1),
+        (WordTensorGenerator(tf.TensorSpec(shape=[5], dtype=tf.string)), 40, 3),
+        (WordTensorGenerator(tf.TensorSpec(shape=[2, 1], dtype=tf.string)), 20, 2),
+        (BoolTensorGenerator(tf.TensorSpec(shape=[], dtype=tf.bool)), 30, 3),
+        (BoolTensorGenerator(tf.TensorSpec(shape=[5], dtype=tf.bool)), 10, 1),
+        (BoolTensorGenerator(tf.TensorSpec(shape=[2, 1, 3], dtype=tf.bool)), 10, 1),
+    ],
+)
+def test_read_from_tf_record_cache(generator, num_records, partitions):
+    feature_name = "feature"
+    data = [generator.generate() for _ in range(num_records)]
+
+    mock_generator = MockGenerator(
+        spec=generator.spec, data=data, generator_cls=type(generator)
+    )
+    data_source = DataSource(
+        scenario={feature_name: mock_generator},
+        num_records=num_records,
+        partitions=partitions,
+    )
+
+    dtype = generator.spec.dtype
+    data_source_cache_dir = tempfile.mkdtemp()
+    count = 0
+    try:
+        os.environ[file_writer.TF_IO_BENCHMARK_DATA_CACHE] = data_source_cache_dir
+        with ATDSWriter() as atds_writer, TFRecordWriter() as tf_writer:
+            atds_path = atds_writer.write(data_source)
+            tf_path = os.path.join(atds_path, os.pardir, tf_writer.hash_code())
+            parser_fn = tf_writer.create_tf_example_parser_fn(data_source)
+            for file_index in range(partitions):
+                partition_length = len(str(partitions))
+                index_name = str(file_index).zfill(partition_length)
+                atds_filename = os.path.join(
+                    atds_path, f"part-{index_name}.{atds_writer.extension}"
+                )
+                tf_filename = os.path.join(
+                    tf_path, f"part-{index_name}.{tf_writer.extension}"
+                )
+                atds_dataset = get_dataset(
+                    atds_filename,
+                    get_features_from_data_source(atds_writer, data_source),
+                )
+                atds_dataset = atds_dataset.unbatch()
+                tf_dataset = tf.data.Dataset.list_files(tf_filename)
+                tf_dataset = tf.data.TFRecordDataset(tf_dataset)
+                tf_dataset = tf_dataset.map(parser_fn)
+                for atds_record, tf_record in zip(atds_dataset, tf_dataset):
+                    actual = tf.cast(tf_record[feature_name], dtype)
+                    if dtype in [tf.float32, tf.float64]:
+                        tf.debugging.assert_near(
+                            actual, atds_record[feature_name], atol=1e-6
+                        )
+                        tf.debugging.assert_near(
+                            data[count], atds_record[feature_name], atol=1e-6
+                        )
+                    else:
+                        tf.debugging.assert_equal(actual, atds_record[feature_name])
+                        tf.debugging.assert_equal(
+                            data[count], atds_record[feature_name]
+                        )
+                    count += 1
+        assert count == num_records
+    finally:
+        del os.environ[file_writer.TF_IO_BENCHMARK_DATA_CACHE]
+        shutil.rmtree(data_source_cache_dir)
diff --git a/tests/test_atds_avro/utils/generator/mock_generator.py b/tests/test_atds_avro/utils/generator/mock_generator.py
new file mode 100644
index 000000000..2c66d1f7f
--- /dev/null
+++ b/tests/test_atds_avro/utils/generator/mock_generator.py
@@ -0,0 +1,69 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License.  You may obtain a copy of
+# the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""MockGenerator"""
+
+import tensorflow as tf
+
+from tests.test_atds_avro.utils.generator.generator_base import Generator
+
+
+class MockGenerator(Generator):
+    """MockGenerator is a test utility class that generates tensor based on
+    the given data."""
+
+    def __init__(self, spec, data, generator_cls):
+        """Create a new MockGenerator
+
+        MockGenerator generates tensors by returning tensors from the given data.
+
+        Args:
+          spec: A tf.TensorSpec that describes the output tensor.
+          data: A list of tensor to generate.
+          generator_cls: Class of wrapped generator object.
+
+        Raises:
+          ValueError: If spec is not compatible with data or data is empty.
+        """
+        super().__init__(spec)
+
+        for index, tensor in enumerate(data):
+            if not spec.is_compatible_with(tensor):
+                raise ValueError(
+                    "Input spec and data are not compatible."
+                    f"Tensor {tensor} at {index}th location is not "
+                    f"compatible with input spec {spec}"
+                )
+        if not data:
+            raise ValueError("Input data should not be empty.")
+
+        self._data = data
+        self._index = 0
+        self._generator_cls = generator_cls
+
+    def generate(self):
+        """Generate output tensor by returning tensors from input data.
+        Output tensor was returned based on its order in input data.
+        When all tensors are returned, this generator will restart from beginning.
+        """
+        if self._index >= len(self._data):
+            self._index = 0
+
+        tensor = self._data[self._index]
+        self._index += 1
+        return tensor
+
+    def get_generator_cls(self):
+        """Get the generator class which this object represents."""
+        return self._generator_cls
diff --git a/tests/test_parse_avro_eager.py b/tests/test_parse_avro_eager.py
new file mode 100644
index 000000000..00f3036ba
--- /dev/null
+++ b/tests/test_parse_avro_eager.py
@@ -0,0 +1,2417 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""AvroDatasetTest"""
+# pylint: disable=line-too-long
+# see https://github.com/tensorflow/io/pull/962#issuecomment-632346602
+
+import sys
+from functools import reduce
+import os
+import tempfile
+from io import BytesIO
+import pytest
+import numpy as np
+
+import tensorflow as tf
+from avro.io import DatumReader, DatumWriter, BinaryDecoder, BinaryEncoder
+from avro.datafile import DataFileReader, DataFileWriter
+from avro.schema import Parse as parse
+import tensorflow_io as tfio
+
+if sys.platform == "darwin":
+    pytest.skip("TODO: skip macOS", allow_module_level=True)
+
+
+class AvroRecordsToFile:
+    """AvroRecordsToFile"""
+
+    def __init__(self, filename, writer_schema, codec="deflate"):
+        """
+
+        :param filename:
+        :param writer_schema:
+        :param codec:
+        """
+        self.schema = AvroParser(writer_schema).get_schema_object()
+        self.filename = filename
+        self.codec = codec
+
+    def write_records(self, records):
+        with open(self.filename, "wb") as out:
+            writer = DataFileWriter(out, DatumWriter(), self.schema, codec=self.codec)
+            for record in records:
+                writer.append(record)
+            writer.close()
+
+
+class AvroFileToRecords:
+    """AvroFileToRecords"""
+
+    def __init__(self, filename, reader_schema=None):
+        """
+        Reads records as strings where each row is serialized separately
+
+        :param filename: The filename from where to load the records
+        :param reader_schema: Schema used for reading
+
+        :return: An array of serialized string with one string per record
+        """
+        self.records = []
+
+        with open(filename, "rb") as file_handle:
+            datum_reader = (
+                DatumReader(reader_schema=AvroParser(reader_schema).get_schema_object())
+                if reader_schema
+                else DatumReader()
+            )
+            reader = DataFileReader(file_handle, datum_reader)
+
+            self.records += list(reader)
+
+    def get_records(self):
+        return self.records
+
+
+class AvroSchemaReader:
+    """AvroSchemaReader"""
+
+    def __init__(self, filename):
+        """
+        Reads the schema from a file into json string
+        """
+        with open(filename, "rb") as file_handle:
+            reader = DataFileReader(file_handle, DatumReader())
+            self.schema_json = ""
+            self.schema_json = str(reader.datum_reader.writer_schema)
+
+    def get_schema_json(self):
+        return self.schema_json
+
+
+class AvroParser:
+    """AvroParser"""
+
+    def __init__(self, schema_json):
+        """
+        Create an avro parser mostly to abstract away the API change between
+        avro and avro-python3
+
+        :param schema_json:
+        """
+        self.schema_object = parse(schema_json)
+
+    def get_schema_object(self):
+        return self.schema_object
+
+
+class AvroDeserializer:
+    """AvroDeserializer"""
+
+    def __init__(self, schema_json):
+        """
+        Create an avro deserializer.
+
+        :param schema_json: Json string of the schema.
+        """
+        schema_object = AvroParser(schema_json).get_schema_object()
+        # No schema resolution
+        self.datum_reader = DatumReader(schema_object, schema_object)
+
+    def deserialize(self, serialized_bytes):
+        """
+        Deserialize an avro record from bytes.
+
+        :param serialized_bytes: The serialized bytes input.
+
+        :return: The de-serialized record structure in python as map-list object.
+        """
+        return self.datum_reader.read(BinaryDecoder(BytesIO(serialized_bytes)))
+
+
+class AvroSerializer:
+    """AvroSerializer"""
+
+    def __init__(self, schema_json):
+        """
+        Create an avro serializer.
+
+        :param schema_json: Json string of the schema.
+        """
+        self.datum_writer = DatumWriter(AvroParser(schema_json).get_schema_object())
+
+    def serialize(self, datum):
+        """
+        Serialize a datum into a avro formatted string.
+
+        :param datum: The avro datum.
+
+        :return: The serialized bytes.
+        """
+        writer = BytesIO()
+        self.datum_writer.write(datum, BinaryEncoder(writer))
+        return writer.getvalue()
+
+
+class AvroDatasetTestBase(tf.test.TestCase):
+    """AvroDatasetTestBase"""
+
+    @staticmethod
+    def _setup_files(writer_schema, records):
+        """setup_files"""
+        # Write test records into temporary output directory
+        filename = os.path.join(tempfile.mkdtemp(), "test.avro")
+        writer = AvroRecordsToFile(filename=filename, writer_schema=writer_schema)
+        writer.write_records(records)
+
+        return [filename]
+
+    def assert_values_equal(self, expected, actual):
+        """Asserts that two values are equal."""
+        if isinstance(expected, dict):
+            self.assertItemsEqual(list(expected.keys()), list(actual.keys()))
+            for k in expected.keys():
+                self.assert_values_equal(expected[k], actual[k])
+        elif isinstance(expected, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)):
+            self.assertAllEqual(expected.indices, actual.indices)
+            self.assertAllEqual(expected.values, actual.values)
+            self.assertAllEqual(expected.dense_shape, actual.dense_shape)
+        else:
+            self.assertAllEqual(expected, actual)
+
+    def assert_data_equal(self, expected, actual):
+        """assert_data_equal"""
+
+        def _assert_equal(expected, actual):
+            for name, datum in expected.items():
+                self.assert_values_equal(expected=datum, actual=actual[name])
+
+        if isinstance(expected, tuple):
+            assert isinstance(
+                expected, tuple
+            ), f"Found type {type(actual)} but expected type {tuple}"
+            assert (
+                len(expected) == 2
+            ), "Found {} components in expected dataset but must have {}".format(
+                len(expected), 2
+            )
+
+            assert (
+                len(actual) == 2
+            ), "Found {} components in actual dataset but expected {}".format(
+                len(actual), 2
+            )
+
+            expected_features, expected_labels = expected
+            actual_features, actual_labels = actual
+
+            _assertEqual(expected_features, actual_features)
+            _assertEqual(expected_labels, actual_labels)
+
+        else:
+            _assert_equal(expected, actual)
+
+    def _verify_output(self, expected_data, actual_dataset):
+
+        next_data = iter(actual_dataset)
+
+        for expected in expected_data:
+            self.assert_data_equal(expected=expected, actual=next(next_data))
+
+
+class AvroRecordDatasetTest(AvroDatasetTestBase):
+    """AvroRecordDatasetTest"""
+
+    @staticmethod
+    def _load_records_as_tensors(filenames, schema):
+        serializer = AvroSerializer(schema)
+        return map(
+            lambda s: tf.convert_to_tensor(
+                serializer.serialize(s), dtype=tf.dtypes.string
+            ),
+            reduce(
+                lambda a, b: a + b,
+                [AvroFileToRecords(filename).get_records() for filename in filenames],
+            ),
+        )
+
+    def _test_pass_dataset(self, writer_schema, record_data, **kwargs):
+        """test_pass_dataset"""
+        filenames = AvroRecordDatasetTest._setup_files(
+            writer_schema=writer_schema, records=record_data
+        )
+        expected_data = AvroRecordDatasetTest._load_records_as_tensors(
+            filenames, writer_schema
+        )
+        actual_dataset = tfio.experimental.columnar.AvroRecordDataset(
+            filenames=filenames,
+            num_parallel_reads=kwargs.get("num_parallel_reads", 1),
+            reader_schema=kwargs.get("reader_schema"),
+        )
+        data = iter(actual_dataset)
+        for expected in expected_data:
+            self.assert_values_equal(expected=expected, actual=next(data))
+
+    def _test_pass_dataset_resolved(
+        self, writer_schema, reader_schema, record_data, **kwargs
+    ):
+        """test_pass_dataset_resolved"""
+        filenames = AvroRecordDatasetTest._setup_files(
+            writer_schema=writer_schema, records=record_data
+        )
+        expected_data = AvroRecordDatasetTest._load_records_as_tensors(
+            filenames, reader_schema
+        )
+        actual_dataset = tfio.experimental.columnar.AvroRecordDataset(
+            filenames=filenames,
+            num_parallel_reads=kwargs.get("num_parallel_reads", 1),
+            reader_schema=reader_schema,
+        )
+
+        data = iter(actual_dataset)
+        for expected in expected_data:
+            self.assert_values_equal(expected=expected, actual=next(data))
+
+    def test_wout_reader_schema(self):
+        """test_wout_reader_schema"""
+        writer_schema = """{
+              "type": "record",
+              "name": "dataTypes",
+              "fields": [
+                  {
+                     "name":"index",
+                     "type":"int"
+                  },
+                  {
+                     "name":"string_value",
+                     "type":"string"
+                  }
+              ]}"""
+        record_data = [
+            {"index": 0, "string_value": ""},
+            {"index": 1, "string_value": "SpecialChars@!#$%^&*()-_=+{}[]|/`~\\'?"},
+            {
+                "index": 2,
+                "string_value": "ABCDEFGHIJKLMNOPQRSTUVW"
+                + "Zabcdefghijklmnopqrstuvwz0123456789",
+            },
+        ]
+        self._test_pass_dataset(writer_schema=writer_schema, record_data=record_data)
+
+    @pytest.mark.skip(reason="failed with tf 2.2 rc3 on linux")
+    def test_with_schema_projection(self):
+        """test_with_schema_projection"""
+        writer_schema = """{
+              "type": "record",
+              "name": "dataTypes",
+              "fields": [
+                  {
+                     "name":"index",
+                     "type":"int"
+                  },
+                  {
+                     "name":"string_value",
+                     "type":"string"
+                  }
+              ]}"""
+        # Test projection
+        reader_schema = """{
+              "type": "record",
+              "name": "dataTypes",
+              "fields": [
+                  {
+                     "name":"string_value",
+                     "type":"string"
+                  }
+              ]}"""
+        record_data = [
+            {"index": 0, "string_value": ""},
+            {"index": 1, "string_value": "SpecialChars@!#$%^&*()-_=+{}[]|/`~\\'?"},
+            {
+                "index": 2,
+                "string_value": "ABCDEFGHIJKLMNOPQRSTUVWZabcde"
+                + "fghijklmnopqrstuvwz0123456789",
+            },
+        ]
+        self._test_pass_dataset_resolved(
+            writer_schema=writer_schema,
+            reader_schema=reader_schema,
+            record_data=record_data,
+        )
+
+    def test_schema_type_promotion(self):
+        """test_schema_type_promotion"""
+        writer_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {"name": "int_value", "type": "int"},
+                  {"name": "long_value", "type": "long"}
+              ]}"""
+        reader_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {"name": "int_value", "type": "long"},
+                  {"name": "long_value", "type": "double"}
+              ]}"""
+        record_data = [
+            {"int_value": 0, "long_value": 111},
+            {"int_value": 1, "long_value": 222},
+        ]
+        self._test_pass_dataset_resolved(
+            writer_schema=writer_schema,
+            reader_schema=reader_schema,
+            record_data=record_data,
+        )
+
+
+class MakeAvroRecordDatasetTest(AvroDatasetTestBase):
+    """MakeAvroRecordDatasetTest"""
+
+    def _test_pass_dataset(
+        self,
+        writer_schema,
+        record_data,
+        expected_data,
+        features,
+        reader_schema,
+        batch_size,
+        **kwargs
+    ):
+        """_test_pass_dataset"""
+        filenames = AvroDatasetTestBase._setup_files(
+            writer_schema=writer_schema, records=record_data
+        )
+
+        actual_dataset = tfio.experimental.columnar.make_avro_record_dataset(
+            file_pattern=filenames,
+            features=features,
+            batch_size=batch_size,
+            reader_schema=reader_schema,
+            shuffle=kwargs.get("shuffle", None),
+            num_epochs=kwargs.get("num_epochs", None),
+        )
+
+        self._verify_output(expected_data=expected_data, actual_dataset=actual_dataset)
+
+    def test_variable_length_failed_with_wrong_rank(self):
+        """test_variable_length_failed_with_wrong_rank"""
+        reader_schema = """{
+                    "type": "record",
+                    "name": "row",
+                    "fields": [
+                        {
+                           "name": "int_list_list",
+                           "type": {
+                              "type": "array",
+                              "items": {
+                                  "type": "array",
+                                  "items": "int"
+                              }
+                           }
+                        }
+                    ]}"""
+        record_data = [
+            {"int_list_list": [[1, 2], [3, 4, 5]]},
+            {"int_list_list": [[6]]},
+            {"int_list_list": [[6]]},
+        ]
+        features = {
+            "int_list_list[*][*]": tfio.experimental.columnar.VarLenFeatureWithRank(
+                tf.dtypes.int32
+            )
+        }
+        expected_data = [
+            {
+                "int_list_list[*][*]": tf.compat.v1.SparseTensorValue(
+                    indices=[
+                        [0, 0, 0],
+                        [0, 0, 1],
+                        [0, 1, 0],
+                        [0, 1, 1],
+                        [0, 1, 2],
+                        [1, 0, 0],
+                        [2, 0, 0],
+                    ],
+                    values=[1, 2, 3, 4, 5, 6, 6],
+                    dense_shape=[3, 2, 3],
+                )
+            }
+        ]
+        with self.assertRaises(Exception) as context:
+            self._test_pass_dataset(
+                reader_schema=reader_schema,
+                record_data=record_data,
+                expected_data=expected_data,
+                features=features,
+                writer_schema=reader_schema,
+                batch_size=3,
+                num_epochs=1,
+            )
+            self.assertTrue(
+                "is not compatible with supplied shape" in context.exception
+            )
+
+    def test_variable_length_passed_with_rank(self):
+        """test_variable_length_passed_with_rank"""
+        reader_schema = """{
+                    "type": "record",
+                    "name": "row",
+                    "fields": [
+                        {
+                           "name": "int_list_list",
+                           "type": {
+                              "type": "array",
+                              "items": {
+                                  "type": "array",
+                                  "items": "int"
+                              }
+                           }
+                        }
+                    ]}"""
+        record_data = [
+            {"int_list_list": [[1, 2], [3, 4, 5]]},
+            {"int_list_list": [[6]]},
+            {"int_list_list": [[6]]},
+        ]
+        features = {
+            "int_list_list[*][*]": tfio.experimental.columnar.VarLenFeatureWithRank(
+                tf.dtypes.int32, 2
+            )
+        }
+        expected_data = [
+            {
+                "int_list_list[*][*]": tf.compat.v1.SparseTensorValue(
+                    indices=[
+                        [0, 0, 0],
+                        [0, 0, 1],
+                        [0, 1, 0],
+                        [0, 1, 1],
+                        [0, 1, 2],
+                        [1, 0, 0],
+                        [2, 0, 0],
+                    ],
+                    values=[1, 2, 3, 4, 5, 6, 6],
+                    dense_shape=[3, 2, 3],
+                )
+            }
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            writer_schema=reader_schema,
+            batch_size=3,
+            num_epochs=1,
+        )
+
+    def test_batching(self):
+        """test_batching"""
+        writer_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {"name": "int_value", "type": "int"}
+              ]}"""
+        record_data = [{"int_value": 0}, {"int_value": 1}, {"int_value": 2}]
+        features = {"int_value": tf.io.FixedLenFeature([], tf.dtypes.int32)}
+        expected_data = [
+            {"int_value": tf.convert_to_tensor([0, 1])},
+            {"int_value": tf.convert_to_tensor([2])},
+        ]
+        self._test_pass_dataset(
+            writer_schema=writer_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            reader_schema=writer_schema,
+            batch_size=2,
+            num_epochs=1,
+        )
+
+    def test_fixed_length_list(self):
+        """test_fixed_length_list"""
+        writer_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {
+                     "name": "int_list",
+                     "type": {
+                        "type": "array",
+                        "items": "int"
+                     }
+                  }
+              ]}"""
+        record_data = [
+            {"int_list": [0, 1, 2]},
+            {"int_list": [3, 4, 5]},
+            {"int_list": [6, 7, 8]},
+        ]
+        features = {"int_list[*]": tf.io.FixedLenFeature([3], tf.dtypes.int32)}
+        expected_data = [
+            {"int_list[*]": tf.convert_to_tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])}
+        ]
+
+        self._test_pass_dataset(
+            writer_schema=writer_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            reader_schema=writer_schema,
+            batch_size=3,
+            num_epochs=1,
+        )
+
+
+class ParseAvroDatasetTest(AvroDatasetTestBase):
+    """AvroDatasetTest"""
+
+    def assert_data_equal(self, expected, actual):
+        """assert_data_equal"""
+        for name, datum in expected.items():
+            self.assert_values_equal(expected=datum, actual=actual[name])
+
+    @staticmethod
+    def _batcher(iterable, step):
+        n = len(iterable)
+        for ndx in range(0, n, step):
+            yield iterable[ndx : min(ndx + step, n)]
+
+    def _test_pass_dataset(
+        self, reader_schema, record_data, expected_data, features, batch_size
+    ):
+        """_test_pass_dataset"""
+        # Note, The batch size could be inferred from the expected data but found it better to be
+        # explicit here
+        serializer = AvroSerializer(reader_schema)
+        for expected_datum, actual_records in zip(
+            expected_data, ParseAvroDatasetTest._batcher(record_data, batch_size)
+        ):
+            # Get any key out of expected datum
+            actual_datum = tfio.experimental.columnar.parse_avro(
+                serialized=[
+                    tf.convert_to_tensor(serializer.serialize(r))
+                    for r in actual_records
+                ],
+                reader_schema=reader_schema,
+                features=features,
+            )
+            self.assert_data_equal(expected=expected_datum, actual=actual_datum)
+
+    def _test_fail_dataset(
+        self, reader_schema, record_data, features, batch_size, **kwargs
+    ):
+        parser_schema = kwargs.get("parser_schema", reader_schema)
+        serializer = AvroSerializer(reader_schema)
+        for actual_records in ParseAvroDatasetTest._batcher(record_data, batch_size):
+            # Get any key out of expected datum
+            with self.assertRaises(tf.errors.OpError):
+                _ = tfio.experimental.columnar.parse_avro(
+                    serialized=[
+                        tf.convert_to_tensor(serializer.serialize(r))
+                        for r in actual_records
+                    ],
+                    reader_schema=parser_schema,
+                    features=features,
+                )
+
+    @pytest.mark.skip(reason="failed with tf 2.2 rc3 on linux")
+    def test_primitive_types(self):
+        """test_primitive_types"""
+        reader_schema = """{
+              "type": "record",
+              "name": "dataTypes",
+              "fields": [
+                  {
+                     "name":"string_value",
+                     "type":"string"
+                  },
+                  {
+                     "name":"bytes_value",
+                     "type":"bytes"
+                  },
+                  {
+                     "name":"double_value",
+                     "type":"double"
+                  },
+                  {
+                     "name":"float_value",
+                     "type":"float"
+                  },
+                  {
+                     "name":"long_value",
+                     "type":"long"
+                  },
+                  {
+                     "name":"int_value",
+                     "type":"int"
+                  },
+                  {
+                     "name":"boolean_value",
+                     "type":"boolean"
+                  }
+              ]}"""
+        record_data = [
+            {
+                "string_value": "",
+                "bytes_value": b"",
+                "double_value": 0.0,
+                "float_value": 0.0,
+                "long_value": 0,
+                "int_value": 0,
+                "boolean_value": False,
+            },
+            {
+                "string_value": "SpecialChars@!#$%^&*()-_=+{}[]|/`~\\'?",
+                "bytes_value": b"SpecialChars@!#$%^&*()-_=+{}[]|/`~\\'?",
+                "double_value": -1.0,
+                "float_value": -1.0,
+                "long_value": 9223372036854775807,
+                "int_value": 2147483648 - 1,
+                "boolean_value": True,
+            },
+            {
+                "string_value": "ABCDEFGHIJKLMNOPQRSTUVWZabcdefghi"
+                + "jklmnopqrstuvwz0123456789",
+                "bytes_value": b"ABCDEFGHIJKLMNOPQRSTUVWZab"
+                + "cdefghijklmnopqrstuvwz0123456789",
+                "double_value": 1.0,
+                "float_value": 1.0,
+                "long_value": -9223372036854775807 - 1,
+                "int_value": -2147483648,
+                "boolean_value": False,
+            },
+        ]
+        features = {
+            "string_value": tf.io.FixedLenFeature([], tf.dtypes.string),
+            "bytes_value": tf.io.FixedLenFeature([], tf.dtypes.string),
+            "double_value": tf.io.FixedLenFeature([], tf.dtypes.float64),
+            "float_value": tf.io.FixedLenFeature([], tf.dtypes.float32),
+            "long_value": tf.io.FixedLenFeature([], tf.dtypes.int64),
+            "int_value": tf.io.FixedLenFeature([], tf.dtypes.int32),
+            "boolean_value": tf.io.FixedLenFeature([], tf.dtypes.bool),
+        }
+        expected_data = [
+            {
+                "string_value": tf.convert_to_tensor(
+                    [
+                        tf.compat.as_bytes(""),
+                        tf.compat.as_bytes("SpecialChars@!#$%^&*()-_=+{}[]|/`~\\'?"),
+                        tf.compat.as_bytes(
+                            "ABCDEFGHIJKLMNOPQRSTUVWZabcdefghijklmnopqrstuvwz0123456789"
+                        ),
+                    ]
+                ),
+                "bytes_value": tf.convert_to_tensor(
+                    [
+                        tf.compat.as_bytes(""),
+                        tf.compat.as_bytes("SpecialChars@!#$%^&*()-_=+{}[]|/`~\\'?"),
+                        tf.compat.as_bytes(
+                            "ABCDEFGHIJKLMNOPQRSTUVWZabcdefghijklmnopqrstuvwz0123456789"
+                        ),
+                    ]
+                ),
+                # Note, conversion utils `ops.EagerTensor` only seems to support single precision.
+                # Proper values for double precision are 1.7976931348623157e+308, -1.7976931348623157e+308
+                # In addition, precision is not maintained by the conversion, thus, I simplify set 1.0
+                # and -1.0 instead of proper values 3.40282306074e+38 and -3.40282306074e+38.
+                "double_value": tf.convert_to_tensor([0.0, -1.0, 1.0]),
+                "float_value": tf.convert_to_tensor([0.0, -1.0, 1.0]),
+                "long_value": tf.convert_to_tensor(
+                    [0, 9223372036854775807, -9223372036854775807 - 1]
+                ),
+                "int_value": tf.convert_to_tensor([0, 2147483648 - 1, -2147483648]),
+                "boolean_value": tf.convert_to_tensor([False, True, False]),
+            }
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=3,
+        )
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_fixed_enum_types(self):
+        """test_fixed_enum_types"""
+        reader_schema = """{
+              "type": "record",
+              "name": "dataTypes",
+              "fields": [
+                  {
+                     "name":"fixed_value",
+                     "type": {
+                        "name": "TenBytes",
+                        "type": "fixed",
+                        "size": 10
+                     }
+                  },
+                  {
+                     "name":"enum_value",
+                     "type":{
+                        "name": "Color",
+                        "type": "enum",
+                        "symbols": ["BLUE", "GREEN", "BROWN"]
+                     }
+                  }
+              ]}"""
+        record_data = [
+            {"fixed_value": b"0123456789", "enum_value": "BLUE"},
+            {"fixed_value": b"1234567890", "enum_value": "GREEN"},
+            {"fixed_value": b"2345678901", "enum_value": "BROWN"},
+        ]
+        features = {
+            "fixed_value": tf.io.FixedLenFeature([], tf.dtypes.string),
+            "enum_value": tf.io.FixedLenFeature([], tf.dtypes.string),
+        }
+        expected_data = [
+            {
+                "fixed_value": tf.convert_to_tensor(
+                    [
+                        tf.compat.as_bytes("0123456789"),
+                        tf.compat.as_bytes("1234567890"),
+                        tf.compat.as_bytes("2345678901"),
+                    ]
+                ),
+                "enum_value": tf.convert_to_tensor([b"BLUE", b"GREEN", b"BROWN"]),
+            }
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=3,
+        )
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_batching(self):
+        """test_batching"""
+        reader_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {"name": "int_value", "type": "int"}
+              ]}"""
+        record_data = [{"int_value": 0}, {"int_value": 1}, {"int_value": 2}]
+        features = {"int_value": tf.io.FixedLenFeature([], tf.dtypes.int32)}
+        expected_data = [
+            {"int_value": tf.convert_to_tensor([0, 1])},
+            {"int_value": tf.convert_to_tensor([2])},
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=2,
+        )
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_padding_from_default(self):
+        """test_padding_from_default"""
+        reader_schema = """{
+                  "type": "record",
+                  "name": "row",
+                  "fields": [
+                      {
+                         "name": "fixed_len",
+                         "type": {
+                            "type": "array",
+                            "items": "int"
+                         }
+                      }
+                  ]}"""
+        record_data = [
+            {"fixed_len": [0]},
+            {"fixed_len": [1]},
+            {"fixed_len": [2]},
+            {"fixed_len": [3]},
+        ]
+        features = {
+            "fixed_len[*]": tf.io.FixedLenFeature(
+                [2], tf.dtypes.int32, default_value=[0, 1]
+            )
+        }
+        # Note, last batch is NOT dropped
+        expected_data = [
+            {"fixed_len[*]": tf.convert_to_tensor([[0, 1], [1, 1], [2, 1]])},
+            {"fixed_len[*]": tf.convert_to_tensor([[3, 1]])},
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=3,
+        )
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_batching_with_default(self):
+        """test_batching_with_default"""
+        reader_schema = """{
+                  "type": "record",
+                  "name": "row",
+                  "fields": [
+                      {
+                         "name": "fixed_len",
+                         "type": {
+                            "type": "array",
+                            "items": "int"
+                         }
+                      }
+                  ]}"""
+        record_data = [
+            {"fixed_len": [0, 1, 2]},
+            {"fixed_len": [3, 4, 5]},
+            {"fixed_len": [6, 7, 8]},
+        ]
+        features = {
+            "fixed_len[*]": tf.io.FixedLenFeature(
+                [None, 3], tf.dtypes.int32, default_value=[0, 1, 2]
+            )
+        }
+        expected_data = [
+            {"fixed_len[*]": tf.convert_to_tensor([[0, 1, 2], [3, 4, 5]])},
+            {"fixed_len[*]": tf.convert_to_tensor([[6, 7, 8]])},
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=2,
+        )
+
+    def test_union_with_null(self):
+        reader_schema = """{
+             "type": "record",
+             "name": "data_row",
+             "fields": [
+                {
+                   "name": "possible_float_type",
+                   "type": [
+                      "null",
+                      "float"
+                   ]
+                }
+             ]
+          }
+          """
+        record_data = [
+            {"possible_float_type": 1.0},
+            {"possible_float_type": None},
+            {"possible_float_type": -1.0},
+        ]
+        features = {
+            "possible_float_type:float": tf.io.FixedLenFeature(
+                [], tf.dtypes.float32, default_value=0.0
+            )
+        }
+        # If we have a default, then we use that in the place of the None
+        expected_data = [
+            {"possible_float_type:float": tf.convert_to_tensor([1.0, 0.0, -1.0])}
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=3,
+        )
+
+    def test_null_union_primitive_type(self):
+        reader_schema = """{
+             "type":"record",
+             "name":"data_row",
+             "fields":[
+                {
+                   "name":"multi_type",
+                   "type":[
+                      "null",
+                      "boolean",
+                      "int",
+                      "long",
+                      "float",
+                      "double",
+                      "string"
+                   ]
+                }
+             ]
+          }
+          """
+        record_data = [
+            {"multi_type": None},
+            {"multi_type": True},  # written as double(1.0)
+            {"multi_type": int(1)},  # written as double(1.0)
+            {"multi_type": 2},  # written as double(2.0)
+            {"multi_type": float(3.0)},  # written as double(3.0)
+            {"multi_type": 4.0},  # written as double (4.0)
+            {"multi_type": "abc"},
+        ]
+        features = {
+            "multi_type:boolean": tf.io.FixedLenFeature(
+                [], tf.dtypes.bool, default_value=False
+            ),
+            "multi_type:int": tf.io.FixedLenFeature(
+                [], tf.dtypes.int32, default_value=int(0)
+            ),
+            "multi_type:long": tf.io.FixedLenFeature(
+                [], tf.dtypes.int64, default_value=0
+            ),
+            "multi_type:float": tf.io.FixedLenFeature(
+                [], tf.dtypes.float32, default_value=float(0.0)
+            ),
+            "multi_type:double": tf.io.FixedLenFeature(
+                [], tf.dtypes.float64, default_value=0.0
+            ),
+            "multi_type:string": tf.io.FixedLenFeature(
+                [], tf.dtypes.string, default_value=""
+            ),
+        }
+        expected_data = [
+            {
+                "multi_type:boolean": tf.convert_to_tensor(
+                    [False, False, False, False, False, False, False],
+                    dtype=tf.dtypes.bool,
+                ),
+                "multi_type:int": tf.convert_to_tensor(
+                    [0, 0, 0, 0, 0, 0, 0], dtype=tf.dtypes.int32
+                ),
+                "multi_type:long": tf.convert_to_tensor(
+                    [0, 0, 0, 0, 0, 0, 0], dtype=tf.dtypes.int64
+                ),
+                "multi_type:float": tf.convert_to_tensor(
+                    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=tf.dtypes.float32
+                ),
+                "multi_type:double": tf.convert_to_tensor(
+                    [0.0, 1.0, 1.0, 2.0, 3.0, 4.0, 0.0], dtype=tf.dtypes.float64
+                ),
+                "multi_type:string": tf.convert_to_tensor(
+                    [
+                        tf.compat.as_bytes(""),
+                        tf.compat.as_bytes(""),
+                        tf.compat.as_bytes(""),
+                        tf.compat.as_bytes(""),
+                        tf.compat.as_bytes(""),
+                        tf.compat.as_bytes(""),
+                        tf.compat.as_bytes("abc"),
+                    ]
+                ),
+            }
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=7,
+        )
+
+    def test_union_without_default(self):
+        reader_schema = """{
+             "type": "record",
+             "name": "data_row",
+             "fields": [
+                {
+                   "name": "possible_float_type",
+                   "type": [
+                      "null",
+                      "float"
+                   ]
+                }
+             ]
+          }
+          """
+        record_data = [{"possible_float_type": None}]
+        features = {
+            "possible_float_type:float": tf.io.FixedLenFeature([], tf.dtypes.float32)
+        }
+        self._test_fail_dataset(reader_schema, record_data, features, 1)
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_fixed_length_list(self):
+        """test_fixed_length_list"""
+        reader_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {
+                     "name": "int_list",
+                     "type": {
+                        "type": "array",
+                        "items": "int"
+                     }
+                  }
+              ]}"""
+        record_data = [
+            {"int_list": [0, 1, 2]},
+            {"int_list": [3, 4, 5]},
+            {"int_list": [6, 7, 8]},
+        ]
+        features = {"int_list[*]": tf.io.FixedLenFeature([3], tf.dtypes.int32)}
+        expected_data = [
+            {"int_list[*]": tf.convert_to_tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])}
+        ]
+
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=3,
+        )
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_fixed_length_with_default_vector(self):
+        """test_fixed_length_with_default_vector"""
+        reader_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {
+                     "name": "int_list",
+                     "type": {
+                        "type": "array",
+                        "items": "int"
+                     }
+                  }
+              ]}"""
+        record_data = [{"int_list": [0, 1, 2]}, {"int_list": [3]}, {"int_list": [6, 7]}]
+        features = {
+            "int_list[*]": tf.io.FixedLenFeature(
+                [None, 3], tf.dtypes.int32, default_value=[0, 1, 2]
+            )
+        }
+        expected_data = [
+            {"int_list[*]": tf.convert_to_tensor([[0, 1, 2], [3, 1, 2], [6, 7, 2]])}
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=3,
+        )
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_fixed_length_with_default_scalar(self):
+        """test_fixed_length_with_default_scalar"""
+        reader_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {
+                     "name": "int_list",
+                     "type": {
+                        "type": "array",
+                        "items": "int"
+                     }
+                  }
+              ]}"""
+        record_data = [{"int_list": [0, 1, 2]}, {"int_list": [3]}, {"int_list": [6, 7]}]
+        features = {
+            "int_list[*]": tf.io.FixedLenFeature(
+                [None], tf.dtypes.int32, default_value=0
+            )
+        }
+        expected_data = [
+            {"int_list[*]": tf.convert_to_tensor([[0, 1, 2], [3, 0, 0], [6, 7, 0]])}
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=3,
+        )
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_dense_2d(self):
+        """test_dense_2d"""
+        reader_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {
+                     "name": "int_list",
+                     "type": {
+                        "type": "array",
+                        "items":
+                          {
+                             "name" : "name",
+                             "type" : "record",
+                             "fields" : [
+                                {
+                                   "name": "nested_int_list",
+                                   "type":
+                                      {
+                                          "type": "array",
+                                          "items": "int"
+                                      }
+                                }
+                             ]
+                          }
+                     }
+                  }
+              ]}"""
+        record_data = [
+            {
+                "int_list": [
+                    {"nested_int_list": [1, 2, 3]},
+                    {"nested_int_list": [4, 5, 6]},
+                ]
+            },
+            {
+                "int_list": [
+                    {"nested_int_list": [7, 8, 9]},
+                    {"nested_int_list": [10, 11, 12]},
+                ]
+            },
+        ]
+        features = {
+            "int_list[*].nested_int_list[*]": tf.io.FixedLenFeature(
+                [2, 3], tf.dtypes.int32
+            )
+        }
+        expected_data = [
+            {
+                "int_list[*].nested_int_list[*]": tf.convert_to_tensor(
+                    [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]
+                )
+            }
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=2,
+        )
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_dense_array_3d(self):
+        """test_dense_array_3d"""
+        # Here we use arrays directly for the nesting
+        reader_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {
+                     "name": "int_list",
+                     "type": {
+                        "type": "array",
+                        "items": {
+                            "type": "array",
+                            "items": "int"
+                        }
+                     }
+                  }
+              ]
+              }"""
+        record_data = [
+            {"int_list": [[0, 1, 2], [10, 11, 12], [20, 21, 22]]},
+            {"int_list": [[1, 2, 3], [11, 12, 13], [21, 22, 23]]},
+        ]
+        # Note, need to at least define the rank of the data, dimension can be unknown
+        # This is a limitation inside TensorFlow where shape ranks need to be known
+        # inside _from_compatible_tensor_list
+        features = {
+            "int_list[*][*]": tf.io.FixedLenFeature([None, None], tf.dtypes.int32)
+        }
+        # Note, the outer dimension is the batch dimension
+        expected_data = [
+            {
+                "int_list[*][*]": tf.convert_to_tensor(
+                    [
+                        [[0, 1, 2], [10, 11, 12], [20, 21, 22]],
+                        [[1, 2, 3], [11, 12, 13], [21, 22, 23]],
+                    ]
+                )
+            },
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=2,
+        )
+
+    @pytest.mark.skip(reason="failed with tf 2.2 rc3 on linux")
+    def test_sparse_feature(self):
+        """test_sparse_feature"""
+        reader_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                {
+                  "name": "sparse_type",
+                  "type": {
+                    "type": "array",
+                    "items": {
+                       "type": "record",
+                       "name": "sparse_triplet",
+                       "fields": [
+                          {
+                             "name":"index",
+                             "type":"long"
+                          },
+                          {
+                             "name":"value",
+                             "type":"float"
+                          }
+                       ]
+                    }
+                 }
+              }
+        ]}"""
+        record_data = [
+            {"sparse_type": [{"index": 0, "value": 5.0}, {"index": 3, "value": 2.0}]},
+            {"sparse_type": [{"index": 2, "value": 7.0}]},
+            {"sparse_type": [{"index": 1, "value": 6.0}]},
+            {"sparse_type": [{"index": 3, "value": 3.0}]},
+        ]
+        features = {
+            "sparse_type": tf.io.SparseFeature(
+                index_key="index", value_key="value", dtype=tf.dtypes.float32, size=4
+            )
+        }
+        expected_data = [
+            {
+                "sparse_type": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [0, 3], [1, 2]],
+                    values=[5.0, 2.0, 7.0],
+                    dense_shape=[2, 4],
+                )
+            },
+            {
+                "sparse_type": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 1], [1, 3]], values=[6.0, 3.0], dense_shape=[2, 4]
+                )
+            },
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=2,
+        )
+
+    @pytest.mark.skip(reason="failed with tf 2.2 rc3 on linux")
+    def test_type_reuse(self):
+        """test_type_reuse"""
+        reader_schema = """{
+            "type": "record",
+            "name": "row",
+            "fields": [
+              {
+                "name": "first_value",
+                "type": {
+                  "type": "array",
+                  "items": {
+                     "type": "record",
+                     "name": "Tuple",
+                     "fields": [
+                        {
+                           "name":"index",
+                           "type":"long"
+                        },
+                        {
+                           "name":"value",
+                           "type":"float"
+                        }
+                     ]
+                  }
+              }
+            },
+            {
+              "name": "second_value",
+              "type": {
+                "type": "array",
+                "items": "Tuple"
+              }
+            }
+          ]
+          }"""
+        record_data = [
+            {
+                "first_value": [{"index": 0, "value": 5.0}, {"index": 3, "value": 2.0}],
+                "second_value": [{"index": 2, "value": 7.0}],
+            },
+            {
+                "first_value": [{"index": 0, "value": 2.0}],
+                "second_value": [{"index": 1, "value": 2.0}],
+            },
+        ]
+        features = {
+            "first_value": tf.io.SparseFeature(
+                index_key="index", value_key="value", dtype=tf.dtypes.float32, size=4
+            ),
+            "second_value": tf.io.SparseFeature(
+                index_key="index", value_key="value", dtype=tf.dtypes.float32, size=3
+            ),
+        }
+        expected_data = [
+            {
+                "first_value": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [0, 3], [1, 0]],
+                    values=[5.0, 2.0, 2.0],
+                    dense_shape=[2, 4],
+                ),
+                "second_value": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 2], [1, 1]], values=[7.0, 2.0], dense_shape=[2, 3]
+                ),
+            }
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=2,
+        )
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_variable_length(self):
+        """test_variable_length"""
+        reader_schema = """{
+              "type": "record",
+              "name": "row",
+              "fields": [
+                  {
+                     "name": "int_list",
+                     "type": {
+                        "type": "array",
+                        "items": "int"
+                     }
+                  }
+              ]}"""
+        record_data = [{"int_list": [1, 2]}, {"int_list": [3, 4, 5]}, {"int_list": [6]}]
+        features = {
+            "int_list[*]": tfio.experimental.columnar.VarLenFeatureWithRank(
+                tf.dtypes.int32, 1
+            )
+        }
+        expected_data = [
+            {
+                "int_list[*]": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [0, 1], [1, 0], [1, 1], [1, 2], [2, 0]],
+                    values=[1, 2, 3, 4, 5, 6],
+                    dense_shape=[3, 3],
+                )
+            }
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=3,
+        )
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_variable_length_2d(self):
+        """test_variable_length_2d"""
+        reader_schema = """{
+                  "type": "record",
+                  "name": "row",
+                  "fields": [
+                      {
+                         "name": "int_list_list",
+                         "type": {
+                            "type": "array",
+                            "items": {
+                                "type": "array",
+                                "items": "int"
+                            }
+                         }
+                      }
+                  ]}"""
+        record_data = [
+            {"int_list_list": [[1, 2], [3, 4, 5]]},
+            {"int_list_list": [[6]]},
+            {"int_list_list": [[6]]},
+        ]
+        features = {
+            "int_list_list[*][*]": tfio.experimental.columnar.VarLenFeatureWithRank(
+                tf.dtypes.int32, 2
+            )
+        }
+        expected_data = [
+            {
+                "int_list_list[*][*]": tf.compat.v1.SparseTensorValue(
+                    indices=[
+                        [0, 0, 0],
+                        [0, 0, 1],
+                        [0, 1, 0],
+                        [0, 1, 1],
+                        [0, 1, 2],
+                        [1, 0, 0],
+                        [2, 0, 0],
+                    ],
+                    values=[1, 2, 3, 4, 5, 6, 6],
+                    dense_shape=[3, 2, 3],
+                )
+            }
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=3,
+        )
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_nesting(self):
+        """test_nesting"""
+        reader_schema = """{
+           "type": "record",
+           "name": "nesting",
+           "fields": [
+              {
+                 "name": "nested_record",
+                 "type": {
+                    "type": "record",
+                    "name": "nested_values",
+                    "fields": [
+                       {
+                          "name": "nested_int",
+                          "type": "int"
+                       },
+                       {
+                          "name": "nested_float_list",
+                          "type": {
+                             "type": "array",
+                             "items": "float"
+                          }
+                       }
+                    ]
+                 }
+              },
+              {
+                 "name": "list_of_records",
+                 "type": {
+                    "type": "array",
+                    "items": {
+                       "type": "record",
+                       "name": "person",
+                       "fields": [
+                          {
+                             "name": "first_name",
+                             "type": "string"
+                          },
+                          {
+                             "name": "age",
+                             "type": "int"
+                          }
+                       ]
+                    }
+                 }
+              }
+           ]
+        }
+        """
+        record_data = [
+            {
+                "nested_record": {"nested_int": 0, "nested_float_list": [0.0, 10.0]},
+                "list_of_records": [{"first_name": "Herbert", "age": 70}],
+            },
+            {
+                "nested_record": {"nested_int": 5, "nested_float_list": [-2.0, 7.0]},
+                "list_of_records": [
+                    {"first_name": "Doug", "age": 55},
+                    {"first_name": "Jess", "age": 66},
+                    {"first_name": "Julia", "age": 30},
+                ],
+            },
+            {
+                "nested_record": {"nested_int": 7, "nested_float_list": [3.0, 4.0]},
+                "list_of_records": [{"first_name": "Karl", "age": 32}],
+            },
+        ]
+        features = {
+            "nested_record.nested_int": tf.io.FixedLenFeature([], tf.dtypes.int32),
+            "nested_record.nested_float_list[*]": tf.io.FixedLenFeature(
+                [2], tf.dtypes.float32
+            ),
+            "list_of_records[0].first_name": tf.io.FixedLenFeature(
+                [], tf.dtypes.string
+            ),
+        }
+        expected_data = [
+            {
+                "nested_record.nested_int": tf.convert_to_tensor([0, 5, 7]),
+                "nested_record.nested_float_list[*]": tf.convert_to_tensor(
+                    [[0.0, 10.0], [-2.0, 7.0], [3.0, 4.0]]
+                ),
+                "list_of_records[0].first_name": tf.convert_to_tensor(
+                    [
+                        tf.compat.as_bytes("Herbert"),
+                        tf.compat.as_bytes("Doug"),
+                        tf.compat.as_bytes("Karl"),
+                    ]
+                ),
+            }
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=3,
+        )
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_parse_map_entry(self):
+        """test_parse_map_entry"""
+        reader_schema = """
+        {
+           "type": "record",
+           "name": "nesting",
+           "fields": [
+              {
+                 "name": "map_of_records",
+                 "type": {
+                    "type": "map",
+                    "values": {
+                       "type": "record",
+                       "name": "secondPerson",
+                       "fields": [
+                          {
+                             "name": "first_name",
+                             "type": "string"
+                          },
+                          {
+                             "name": "age",
+                             "type": "int"
+                          }
+                       ]
+                    }
+                 }
+              }
+           ]
+        }
+        """
+        record_data = [
+            {
+                "map_of_records": {
+                    "first": {"first_name": "Herbert", "age": 70},
+                    "second": {"first_name": "Julia", "age": 30},
+                }
+            },
+            {
+                "map_of_records": {
+                    "first": {"first_name": "Doug", "age": 55},
+                    "second": {"first_name": "Jess", "age": 66},
+                }
+            },
+            {
+                "map_of_records": {
+                    "first": {"first_name": "Karl", "age": 32},
+                    "second": {"first_name": "Joan", "age": 21},
+                }
+            },
+        ]
+        # TODO(fraudies): Using FixedLenFeature([1], tf.dtypes.int32) this segfaults
+        features = {
+            "map_of_records['second'].age": tf.io.FixedLenFeature([], tf.dtypes.int32)
+        }
+        expected_data = [
+            {"map_of_records['second'].age": tf.convert_to_tensor([30, 66, 21])}
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=3,
+        )
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_parse_int_as_long_fail(self):
+        """test_parse_int_as_long_fail"""
+        schema = """
+          {
+             "type": "record",
+             "name": "data_row",
+             "fields": [
+                {
+                   "name": "index",
+                   "type": "int"
+                }
+             ]
+          }
+          """
+        record_data = [{"index": 0}]
+        features = {"index": tf.io.FixedLenFeature([], tf.dtypes.int64)}
+        self._test_fail_dataset(schema, record_data, features, 1)
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_parse_int_as_sparse_type_fail(self):
+        """test_parse_int_as_sparse_type_fail"""
+        schema = """
+          {
+             "type": "record",
+             "name": "data_row",
+             "fields": [
+                {
+                   "name": "index",
+                   "type": "int"
+                }
+             ]
+          }
+          """
+        record_data = [{"index": 5}]
+        features = {
+            "index": tf.io.SparseFeature(
+                index_key="index", value_key="value", dtype=tf.dtypes.float32, size=10
+            )
+        }
+        self._test_fail_dataset(schema, record_data, features, 1)
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_parse_float_as_double_fail(self):
+        """test_parse_float_as_double_fail"""
+        schema = """
+          {
+             "type": "record",
+             "name": "data_row",
+             "fields": [
+                {
+                   "name": "weight",
+                   "type": "float"
+                }
+             ]
+          }
+          """
+        record_data = [{"weight": 0.5}]
+        features = {"weight": tf.io.FixedLenFeature([], tf.dtypes.float64)}
+        self._test_fail_dataset(schema, record_data, features, 1)
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_fixed_length_without_proper_default_fail(self):
+        """test_fixed_length_without_proper_default_fail"""
+        schema = """
+          {
+             "type": "record",
+             "name": "data_row",
+             "fields": [
+                {
+                   "name": "int_list_type",
+                   "type": {
+                      "type":"array",
+                      "items":"int"
+                   }
+                }
+             ]
+          }
+          """
+        record_data = [{"int_list_type": [0, 1, 2]}, {"int_list_type": [0, 1]}]
+        features = {"int_list_type": tf.io.FixedLenFeature([], tf.dtypes.int32)}
+        self._test_fail_dataset(schema, record_data, features, 1)
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_wrong_spelling_of_feature_name_fail(self):
+        """test_wrong_spelling_of_feature_name_fail"""
+        schema = """
+          {
+             "type": "record",
+             "name": "data_row",
+             "fields": [
+               {"name": "int_type", "type": "int"}
+             ]
+          }"""
+        record_data = [{"int_type": 0}]
+        features = {"wrong_spelling": tf.io.FixedLenFeature([], tf.dtypes.int32)}
+        self._test_fail_dataset(schema, record_data, features, 1)
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_wrong_index(self):
+        """test_wrong_index"""
+        schema = """
+          {
+             "type": "record",
+             "name": "data_row",
+             "fields": [
+                {
+                   "name": "list_of_records",
+                   "type": {
+                      "type": "array",
+                      "items": {
+                         "type": "record",
+                         "name": "person",
+                         "fields": [
+                            {
+                               "name": "first_name",
+                               "type": "string"
+                            }
+                         ]
+                      }
+                   }
+                }
+             ]
+          }
+          """
+        record_data = [{"list_of_records": [{"first_name": "My name"}]}]
+        features = {
+            "list_of_records[2].name": tf.io.FixedLenFeature([], tf.dtypes.string)
+        }
+        self._test_fail_dataset(schema, record_data, features, 1)
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_filter_with_variable_length(self):
+        """test_filter_with_variable_length"""
+        reader_schema = """
+          {
+             "type": "record",
+             "name": "data_row",
+             "fields": [
+                {
+                   "name": "guests",
+                   "type": {
+                      "type": "array",
+                      "items": {
+                         "type": "record",
+                         "name": "person",
+                         "fields": [
+                            {
+                               "name": "name",
+                               "type": "string"
+                            },
+                            {
+                               "name": "gender",
+                               "type": "string"
+                            }
+                         ]
+                      }
+                   }
+                }
+             ]
+          }
+          """
+        record_data = [
+            {
+                "guests": [
+                    {"name": "Hans", "gender": "male"},
+                    {"name": "Mary", "gender": "female"},
+                    {"name": "July", "gender": "female"},
+                ]
+            },
+            {
+                "guests": [
+                    {"name": "Joel", "gender": "male"},
+                    {"name": "JoAn", "gender": "female"},
+                    {"name": "Marc", "gender": "male"},
+                ]
+            },
+        ]
+        features = {
+            "guests[gender='male'].name": tfio.experimental.columnar.VarLenFeatureWithRank(
+                tf.dtypes.string
+            ),
+            "guests[gender='female'].name": tfio.experimental.columnar.VarLenFeatureWithRank(
+                tf.dtypes.string
+            ),
+        }
+        expected_data = [
+            {
+                "guests[gender='male'].name": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [1, 0], [1, 1]],
+                    values=[
+                        tf.compat.as_bytes("Hans"),
+                        tf.compat.as_bytes("Joel"),
+                        tf.compat.as_bytes("Marc"),
+                    ],
+                    dense_shape=[2, 2],
+                ),
+                "guests[gender='female'].name": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [0, 1], [1, 0]],
+                    values=[
+                        tf.compat.as_bytes("Mary"),
+                        tf.compat.as_bytes("July"),
+                        tf.compat.as_bytes("JoAn"),
+                    ],
+                    dense_shape=[2, 2],
+                ),
+            }
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=2,
+        )
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_filter_with_empty_result(self):
+        """test_filter_with_empty_result"""
+        reader_schema = """
+          {
+             "type": "record",
+             "name": "data_row",
+             "fields": [
+                {
+                   "name": "guests",
+                   "type": {
+                      "type": "array",
+                      "items": {
+                         "type": "record",
+                         "name": "person",
+                         "fields": [
+                            {
+                               "name":"name",
+                               "type":"string"
+                            },
+                            {
+                               "name":"gender",
+                               "type":"string"
+                            }
+                         ]
+                      }
+                   }
+                }
+             ]
+          }
+          """
+        record_data = [
+            {"guests": [{"name": "Hans", "gender": "male"}]},
+            {"guests": [{"name": "Joel", "gender": "male"}]},
+        ]
+        features = {
+            "guests[gender='wrong_value'].name": tfio.experimental.columnar.VarLenFeatureWithRank(
+                tf.dtypes.string
+            )
+        }
+        expected_data = [
+            {
+                "guests[gender='wrong_value'].name": tf.compat.v1.SparseTensorValue(
+                    indices=np.empty(shape=[0, 2], dtype=np.int64),
+                    values=np.empty(shape=[0], dtype=np.str),
+                    dense_shape=np.asarray([2, 0]),
+                )
+            }
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=2,
+        )
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_filter_with_wrong_key_fail(self):
+        """test_filter_with_wrong_key_fail"""
+        reader_schema = """
+          {
+             "type": "record",
+             "name": "data_row",
+             "fields": [
+                {
+                   "name": "guests",
+                   "type": {
+                      "type": "array",
+                      "items": {
+                         "type": "record",
+                         "name": "person",
+                         "fields": [
+                            {
+                               "name":"name",
+                               "type":"string"
+                            }
+                         ]
+                      }
+                   }
+                }
+             ]
+          }
+          """
+        record_data = [{"guests": [{"name": "Hans"}]}]
+        features = {
+            "guests[wrong_key='female'].name": tfio.experimental.columnar.VarLenFeatureWithRank(
+                tf.dtypes.string
+            )
+        }
+        self._test_fail_dataset(reader_schema, record_data, features, 1)
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_filter_with_wrong_pair_fail(self):
+        """test_filter_with_wrong_pair_fail"""
+        reader_schema = """
+          {
+             "type":"record",
+             "name":"data_row",
+             "fields":[
+                {
+                   "name":"guests",
+                   "type":{
+                      "type":"array",
+                      "items":{
+                         "type":"record",
+                         "name":"person",
+                         "fields":[
+                            {
+                               "name":"name",
+                               "type":"string"
+                            }
+                         ]
+                      }
+                   }
+                }
+             ]
+          }
+          """
+        record_data = [{"guests": [{"name": "Hans"}]}]
+        features = {
+            "guests[forgot_the_separator].name": tfio.experimental.columnar.VarLenFeatureWithRank(
+                tf.dtypes.string
+            )
+        }
+        self._test_fail_dataset(reader_schema, record_data, features, 1)
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_filter_with_too_many_separators_fail(self):
+        """test_filter_with_too_many_separators_fail"""
+        reader_schema = """
+          {
+             "type": "record",
+             "name": "data_row",
+             "fields": [
+                {
+                   "name": "guests",
+                   "type": {
+                      "type": "array",
+                      "items": {
+                         "type":"record",
+                         "name":"person",
+                         "fields":[
+                            {
+                               "name":"name",
+                               "type":"string"
+                            }
+                         ]
+                      }
+                   }
+                }
+             ]
+          }
+          """
+        record_data = [{"guests": [{"name": "Hans"}]}]
+        features = {
+            "guests[used=too=many=separators].name": tfio.experimental.columnar.VarLenFeatureWithRank(
+                tf.dtypes.string
+            )
+        }
+        self._test_fail_dataset(reader_schema, record_data, features, 1)
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_filter_for_nested_record(self):
+        """test_filter_for_nested_record"""
+        reader_schema = """
+          {
+             "type": "record",
+             "name": "data_row",
+             "fields": [
+                {
+                   "name": "guests",
+                   "type": {
+                      "type": "array",
+                      "items": {
+                         "type": "record",
+                         "name": "person",
+                         "fields": [
+                            {
+                               "name": "name",
+                               "type": "string"
+                            },
+                            {
+                               "name": "gender",
+                               "type": "string"
+                            },
+                            {
+                               "name": "address",
+                               "type": {
+                                  "type": "record",
+                                  "name": "postal",
+                                  "fields": [
+                                     {
+                                        "name":"street",
+                                        "type":"string"
+                                     },
+                                     {
+                                        "name":"zip",
+                                        "type":"int"
+                                     },
+                                     {
+                                        "name":"state",
+                                        "type":"string"
+                                     }
+                                  ]
+                               }
+                            }
+                         ]
+                      }
+                   }
+                }
+             ]
+          }
+          """
+        record_data = [
+            {
+                "guests": [
+                    {
+                        "name": "Hans",
+                        "gender": "male",
+                        "address": {
+                            "street": "California St",
+                            "zip": 94040,
+                            "state": "CA",
+                        },
+                    },
+                    {
+                        "name": "Mary",
+                        "gender": "female",
+                        "address": {"street": "Ellis St", "zip": 29040, "state": "MA"},
+                    },
+                ]
+            }
+        ]
+        features = {
+            "guests[gender='female'].address.street": tfio.experimental.columnar.VarLenFeatureWithRank(
+                tf.dtypes.string
+            )
+        }
+        expected_data = [
+            {
+                "guests[gender='female']"
+                + ".address.street": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0]],
+                    values=[tf.compat.as_bytes("Ellis St")],
+                    dense_shape=[1, 1],
+                )
+            }
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=2,
+        )
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_filter_with_bytes_as_type(self):
+        """test_filter_with_bytes_as_type"""
+        reader_schema = """
+          {
+             "type": "record",
+             "name": "data_row",
+             "fields": [
+                {
+                   "name": "guests",
+                   "type": {
+                      "type": "array",
+                      "items": {
+                         "type": "record",
+                         "name": "person",
+                         "fields": [
+                            {
+                               "name":"name",
+                               "type":"bytes"
+                            },
+                            {
+                               "name":"gender",
+                               "type":"bytes"
+                            }
+                         ]
+                      }
+                   }
+                }
+             ]
+          }
+          """
+        record_data = [
+            {
+                "guests": [
+                    {"name": b"Hans", "gender": b"male"},
+                    {"name": b"Mary", "gender": b"female"},
+                    {"name": b"July", "gender": b"female"},
+                ]
+            },
+            {
+                "guests": [
+                    {"name": b"Joel", "gender": b"male"},
+                    {"name": b"JoAn", "gender": b"female"},
+                    {"name": b"Marc", "gender": b"male"},
+                ]
+            },
+        ]
+        features = {
+            "guests[gender='male'].name": tfio.experimental.columnar.VarLenFeatureWithRank(
+                tf.dtypes.string
+            ),
+            "guests[gender='female'].name": tfio.experimental.columnar.VarLenFeatureWithRank(
+                tf.dtypes.string
+            ),
+        }
+        expected_data = [
+            {
+                "guests[gender='male'].name": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [1, 0], [1, 1]],
+                    values=[
+                        tf.compat.as_bytes("Hans"),
+                        tf.compat.as_bytes("Joel"),
+                        tf.compat.as_bytes("Marc"),
+                    ],
+                    dense_shape=[2, 2],
+                ),
+                "guests[gender='female'].name": tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [0, 1], [1, 0]],
+                    values=[
+                        tf.compat.as_bytes("Mary"),
+                        tf.compat.as_bytes("July"),
+                        tf.compat.as_bytes("JoAn"),
+                    ],
+                    dense_shape=[2, 2],
+                ),
+            }
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=2,
+        )
+
+    # @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_ignore_namespace(self):
+        """test_namespace"""
+        reader_schema = """
+          {
+            "namespace": "com.test",
+            "type": "record",
+            "name": "simple",
+            "fields": [
+                {
+                   "name":"string_value",
+                   "type":"string"
+                }
+            ]
+          }"""
+        features = {"string_value": tf.io.FixedLenFeature([], tf.dtypes.string)}
+        record_data = [{"string_value": "a"}, {"string_value": "bb"}]
+        expected_data = [
+            {
+                "string_value": tf.convert_to_tensor(
+                    [tf.compat.as_bytes("a"), tf.compat.as_bytes("bb")]
+                )
+            }
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=2,
+        )
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_broken_schema_fail(self):
+        """test_broken_schema_fail"""
+        valid_schema = """
+          {
+            "type": "record",
+            "name": "row",
+            "fields": [
+                {"name": "int_value", "type": "int"}
+            ]
+          }"""
+        record_data = [{"int_value": 0}]
+        broken_schema = """
+          {
+            "type": "record",
+            "name": "row",
+            "fields": [
+                {"name": "index", "type": "int"},
+                {"name": "boolean_type"}
+            ]
+          }"""
+        features = {"index": tf.io.FixedLenFeature([], tf.dtypes.int64)}
+        self._test_fail_dataset(
+            valid_schema, record_data, features, 1, parser_schema=broken_schema
+        )
+
+    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
+    def test_some_optimization_broke_string_repeats_in_batch(self):
+        """test_some_optimization_broke_string_repeats_in_batch"""
+        # In the past this test failed but now passes
+        reader_schema = """
+            {
+              "type": "record",
+              "name": "simple",
+              "fields": [
+                  {
+                     "name":"string_value",
+                     "type":"string"
+                  }
+              ]
+            }"""
+        features = {"string_value": tf.io.FixedLenFeature([], tf.dtypes.string)}
+        record_data = [{"string_value": "aa"}, {"string_value": "bb"}]
+        expected_data = [
+            {
+                "string_value": np.asarray(
+                    [tf.compat.as_bytes("aa"), tf.compat.as_bytes("bb")]
+                )
+            }
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=2,
+        )
+
+    @pytest.mark.skip(reason="failed with tf 2.2 rc3 on linux")
+    # Note current filters resolve to single item and we remove the dimension introduced by that
+    def test_filter_of_sparse_feature(self):
+        """test_filter_of_sparse_feature"""
+        reader_schema = """
+            {
+               "type": "record",
+               "name": "data_row",
+               "fields": [
+                  {
+                     "name": "guests",
+                     "type": {
+                        "type": "array",
+                        "items": {
+                           "type": "record",
+                           "name": "person",
+                           "fields": [
+                              {
+                                 "name": "name",
+                                 "type": "string"
+                              },
+                              {
+                                 "name": "gender",
+                                 "type": "string"
+                              },
+                              {
+                                 "name": "address",
+                                 "type": {
+                                    "type": "array",
+                                    "items": {
+                                       "type": "record",
+                                       "name": "postal",
+                                       "fields": [
+                                          {
+                                             "name":"street",
+                                             "type":"string"
+                                          },
+                                          {
+                                             "name":"zip",
+                                             "type":"long"
+                                          },
+                                          {
+                                             "name":"street_no",
+                                             "type":"int"
+                                          }
+                                       ]
+                                    }
+                                 }
+                              }
+                           ]
+                        }
+                     }
+                  }
+               ]
+            }
+            """
+        record_data = [
+            {
+                "guests": [
+                    {
+                        "name": "Hans",
+                        "gender": "male",
+                        "address": [
+                            {
+                                "street": "California St",
+                                "zip": 94040,
+                                "state": "CA",
+                                "street_no": 1,
+                            },
+                            {
+                                "street": "New York St",
+                                "zip": 32012,
+                                "state": "NY",
+                                "street_no": 2,
+                            },
+                        ],
+                    },
+                    {
+                        "name": "Mary",
+                        "gender": "female",
+                        "address": [
+                            {
+                                "street": "Ellis St",
+                                "zip": 29040,
+                                "state": "MA",
+                                "street_no": 3,
+                            }
+                        ],
+                    },
+                ]
+            }
+        ]
+        features = {
+            "guests[gender='female'].address": tf.io.SparseFeature(
+                index_key="zip",
+                value_key="street_no",
+                dtype=tf.dtypes.int32,
+                size=94040,
+            )
+        }
+        # Note, the filter introduces an additional index,
+        # because filters can have multiple items
+        expected_data = [
+            {
+                "guests[gender='female'].address": tf.compat.v1.SparseTensorValue(
+                    np.asarray([[0, 0, 29040]]),
+                    np.asarray([3]),
+                    np.asarray([1, 1, 94040]),
+                )
+            }
+        ]
+        self._test_pass_dataset(
+            reader_schema=reader_schema,
+            record_data=record_data,
+            expected_data=expected_data,
+            features=features,
+            batch_size=2,
+        )
+
+
+if __name__ == "__main__":
+    test.main()

From b6183fc830d37940a5656a2792b29f1790348b1d Mon Sep 17 00:00:00 2001
From: Lijuan Zhang <lijzhang@linkedin.com>
Date: Mon, 1 May 2023 17:50:33 +0000
Subject: [PATCH 2/6] fix linter

---
 tests/test_parse_avro_eager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_parse_avro_eager.py b/tests/test_parse_avro_eager.py
index 00f3036ba..7410aa245 100644
--- a/tests/test_parse_avro_eager.py
+++ b/tests/test_parse_avro_eager.py
@@ -388,7 +388,7 @@ def _test_pass_dataset(
         features,
         reader_schema,
         batch_size,
-        **kwargs
+        **kwargs,
     ):
         """_test_pass_dataset"""
         filenames = AvroDatasetTestBase._setup_files(

From 9d449547acf46a54a3a3c62c5f9c702d01f6a242 Mon Sep 17 00:00:00 2001
From: Lijuan Zhang <lijzhang@linkedin.com>
Date: Wed, 3 May 2023 20:39:22 +0000
Subject: [PATCH 3/6] delete test_parse_avro_eager.py

---
 .../test_atds_avro/test_atds_dataset_eager.py |    2 +-
 tests/test_parse_avro_eager.py                | 2417 -----------------
 2 files changed, 1 insertion(+), 2418 deletions(-)
 delete mode 100644 tests/test_parse_avro_eager.py

diff --git a/tests/test_atds_avro/test_atds_dataset_eager.py b/tests/test_atds_avro/test_atds_dataset_eager.py
index 85aa03d5c..7490970d7 100644
--- a/tests/test_atds_avro/test_atds_dataset_eager.py
+++ b/tests/test_atds_avro/test_atds_dataset_eager.py
@@ -28,7 +28,7 @@
 from avro.io import DatumWriter
 from parameterized import parameterized
 from tensorflow.python.framework import errors
-from tests.test_parse_avro_eager import AvroDatasetTestBase, AvroRecordsToFile
+from tests.test_parse_avro import AvroDatasetTestBase, AvroRecordsToFile
 from tensorflow_io.python.ops import core_ops
 from tensorflow_io.python.experimental.atds.dataset import ATDSDataset
 from tensorflow_io.python.experimental.atds.features import (
diff --git a/tests/test_parse_avro_eager.py b/tests/test_parse_avro_eager.py
deleted file mode 100644
index 7410aa245..000000000
--- a/tests/test_parse_avro_eager.py
+++ /dev/null
@@ -1,2417 +0,0 @@
-# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""AvroDatasetTest"""
-# pylint: disable=line-too-long
-# see https://github.com/tensorflow/io/pull/962#issuecomment-632346602
-
-import sys
-from functools import reduce
-import os
-import tempfile
-from io import BytesIO
-import pytest
-import numpy as np
-
-import tensorflow as tf
-from avro.io import DatumReader, DatumWriter, BinaryDecoder, BinaryEncoder
-from avro.datafile import DataFileReader, DataFileWriter
-from avro.schema import Parse as parse
-import tensorflow_io as tfio
-
-if sys.platform == "darwin":
-    pytest.skip("TODO: skip macOS", allow_module_level=True)
-
-
-class AvroRecordsToFile:
-    """AvroRecordsToFile"""
-
-    def __init__(self, filename, writer_schema, codec="deflate"):
-        """
-
-        :param filename:
-        :param writer_schema:
-        :param codec:
-        """
-        self.schema = AvroParser(writer_schema).get_schema_object()
-        self.filename = filename
-        self.codec = codec
-
-    def write_records(self, records):
-        with open(self.filename, "wb") as out:
-            writer = DataFileWriter(out, DatumWriter(), self.schema, codec=self.codec)
-            for record in records:
-                writer.append(record)
-            writer.close()
-
-
-class AvroFileToRecords:
-    """AvroFileToRecords"""
-
-    def __init__(self, filename, reader_schema=None):
-        """
-        Reads records as strings where each row is serialized separately
-
-        :param filename: The filename from where to load the records
-        :param reader_schema: Schema used for reading
-
-        :return: An array of serialized string with one string per record
-        """
-        self.records = []
-
-        with open(filename, "rb") as file_handle:
-            datum_reader = (
-                DatumReader(reader_schema=AvroParser(reader_schema).get_schema_object())
-                if reader_schema
-                else DatumReader()
-            )
-            reader = DataFileReader(file_handle, datum_reader)
-
-            self.records += list(reader)
-
-    def get_records(self):
-        return self.records
-
-
-class AvroSchemaReader:
-    """AvroSchemaReader"""
-
-    def __init__(self, filename):
-        """
-        Reads the schema from a file into json string
-        """
-        with open(filename, "rb") as file_handle:
-            reader = DataFileReader(file_handle, DatumReader())
-            self.schema_json = ""
-            self.schema_json = str(reader.datum_reader.writer_schema)
-
-    def get_schema_json(self):
-        return self.schema_json
-
-
-class AvroParser:
-    """AvroParser"""
-
-    def __init__(self, schema_json):
-        """
-        Create an avro parser mostly to abstract away the API change between
-        avro and avro-python3
-
-        :param schema_json:
-        """
-        self.schema_object = parse(schema_json)
-
-    def get_schema_object(self):
-        return self.schema_object
-
-
-class AvroDeserializer:
-    """AvroDeserializer"""
-
-    def __init__(self, schema_json):
-        """
-        Create an avro deserializer.
-
-        :param schema_json: Json string of the schema.
-        """
-        schema_object = AvroParser(schema_json).get_schema_object()
-        # No schema resolution
-        self.datum_reader = DatumReader(schema_object, schema_object)
-
-    def deserialize(self, serialized_bytes):
-        """
-        Deserialize an avro record from bytes.
-
-        :param serialized_bytes: The serialized bytes input.
-
-        :return: The de-serialized record structure in python as map-list object.
-        """
-        return self.datum_reader.read(BinaryDecoder(BytesIO(serialized_bytes)))
-
-
-class AvroSerializer:
-    """AvroSerializer"""
-
-    def __init__(self, schema_json):
-        """
-        Create an avro serializer.
-
-        :param schema_json: Json string of the schema.
-        """
-        self.datum_writer = DatumWriter(AvroParser(schema_json).get_schema_object())
-
-    def serialize(self, datum):
-        """
-        Serialize a datum into a avro formatted string.
-
-        :param datum: The avro datum.
-
-        :return: The serialized bytes.
-        """
-        writer = BytesIO()
-        self.datum_writer.write(datum, BinaryEncoder(writer))
-        return writer.getvalue()
-
-
-class AvroDatasetTestBase(tf.test.TestCase):
-    """AvroDatasetTestBase"""
-
-    @staticmethod
-    def _setup_files(writer_schema, records):
-        """setup_files"""
-        # Write test records into temporary output directory
-        filename = os.path.join(tempfile.mkdtemp(), "test.avro")
-        writer = AvroRecordsToFile(filename=filename, writer_schema=writer_schema)
-        writer.write_records(records)
-
-        return [filename]
-
-    def assert_values_equal(self, expected, actual):
-        """Asserts that two values are equal."""
-        if isinstance(expected, dict):
-            self.assertItemsEqual(list(expected.keys()), list(actual.keys()))
-            for k in expected.keys():
-                self.assert_values_equal(expected[k], actual[k])
-        elif isinstance(expected, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)):
-            self.assertAllEqual(expected.indices, actual.indices)
-            self.assertAllEqual(expected.values, actual.values)
-            self.assertAllEqual(expected.dense_shape, actual.dense_shape)
-        else:
-            self.assertAllEqual(expected, actual)
-
-    def assert_data_equal(self, expected, actual):
-        """assert_data_equal"""
-
-        def _assert_equal(expected, actual):
-            for name, datum in expected.items():
-                self.assert_values_equal(expected=datum, actual=actual[name])
-
-        if isinstance(expected, tuple):
-            assert isinstance(
-                expected, tuple
-            ), f"Found type {type(actual)} but expected type {tuple}"
-            assert (
-                len(expected) == 2
-            ), "Found {} components in expected dataset but must have {}".format(
-                len(expected), 2
-            )
-
-            assert (
-                len(actual) == 2
-            ), "Found {} components in actual dataset but expected {}".format(
-                len(actual), 2
-            )
-
-            expected_features, expected_labels = expected
-            actual_features, actual_labels = actual
-
-            _assertEqual(expected_features, actual_features)
-            _assertEqual(expected_labels, actual_labels)
-
-        else:
-            _assert_equal(expected, actual)
-
-    def _verify_output(self, expected_data, actual_dataset):
-
-        next_data = iter(actual_dataset)
-
-        for expected in expected_data:
-            self.assert_data_equal(expected=expected, actual=next(next_data))
-
-
-class AvroRecordDatasetTest(AvroDatasetTestBase):
-    """AvroRecordDatasetTest"""
-
-    @staticmethod
-    def _load_records_as_tensors(filenames, schema):
-        serializer = AvroSerializer(schema)
-        return map(
-            lambda s: tf.convert_to_tensor(
-                serializer.serialize(s), dtype=tf.dtypes.string
-            ),
-            reduce(
-                lambda a, b: a + b,
-                [AvroFileToRecords(filename).get_records() for filename in filenames],
-            ),
-        )
-
-    def _test_pass_dataset(self, writer_schema, record_data, **kwargs):
-        """test_pass_dataset"""
-        filenames = AvroRecordDatasetTest._setup_files(
-            writer_schema=writer_schema, records=record_data
-        )
-        expected_data = AvroRecordDatasetTest._load_records_as_tensors(
-            filenames, writer_schema
-        )
-        actual_dataset = tfio.experimental.columnar.AvroRecordDataset(
-            filenames=filenames,
-            num_parallel_reads=kwargs.get("num_parallel_reads", 1),
-            reader_schema=kwargs.get("reader_schema"),
-        )
-        data = iter(actual_dataset)
-        for expected in expected_data:
-            self.assert_values_equal(expected=expected, actual=next(data))
-
-    def _test_pass_dataset_resolved(
-        self, writer_schema, reader_schema, record_data, **kwargs
-    ):
-        """test_pass_dataset_resolved"""
-        filenames = AvroRecordDatasetTest._setup_files(
-            writer_schema=writer_schema, records=record_data
-        )
-        expected_data = AvroRecordDatasetTest._load_records_as_tensors(
-            filenames, reader_schema
-        )
-        actual_dataset = tfio.experimental.columnar.AvroRecordDataset(
-            filenames=filenames,
-            num_parallel_reads=kwargs.get("num_parallel_reads", 1),
-            reader_schema=reader_schema,
-        )
-
-        data = iter(actual_dataset)
-        for expected in expected_data:
-            self.assert_values_equal(expected=expected, actual=next(data))
-
-    def test_wout_reader_schema(self):
-        """test_wout_reader_schema"""
-        writer_schema = """{
-              "type": "record",
-              "name": "dataTypes",
-              "fields": [
-                  {
-                     "name":"index",
-                     "type":"int"
-                  },
-                  {
-                     "name":"string_value",
-                     "type":"string"
-                  }
-              ]}"""
-        record_data = [
-            {"index": 0, "string_value": ""},
-            {"index": 1, "string_value": "SpecialChars@!#$%^&*()-_=+{}[]|/`~\\'?"},
-            {
-                "index": 2,
-                "string_value": "ABCDEFGHIJKLMNOPQRSTUVW"
-                + "Zabcdefghijklmnopqrstuvwz0123456789",
-            },
-        ]
-        self._test_pass_dataset(writer_schema=writer_schema, record_data=record_data)
-
-    @pytest.mark.skip(reason="failed with tf 2.2 rc3 on linux")
-    def test_with_schema_projection(self):
-        """test_with_schema_projection"""
-        writer_schema = """{
-              "type": "record",
-              "name": "dataTypes",
-              "fields": [
-                  {
-                     "name":"index",
-                     "type":"int"
-                  },
-                  {
-                     "name":"string_value",
-                     "type":"string"
-                  }
-              ]}"""
-        # Test projection
-        reader_schema = """{
-              "type": "record",
-              "name": "dataTypes",
-              "fields": [
-                  {
-                     "name":"string_value",
-                     "type":"string"
-                  }
-              ]}"""
-        record_data = [
-            {"index": 0, "string_value": ""},
-            {"index": 1, "string_value": "SpecialChars@!#$%^&*()-_=+{}[]|/`~\\'?"},
-            {
-                "index": 2,
-                "string_value": "ABCDEFGHIJKLMNOPQRSTUVWZabcde"
-                + "fghijklmnopqrstuvwz0123456789",
-            },
-        ]
-        self._test_pass_dataset_resolved(
-            writer_schema=writer_schema,
-            reader_schema=reader_schema,
-            record_data=record_data,
-        )
-
-    def test_schema_type_promotion(self):
-        """test_schema_type_promotion"""
-        writer_schema = """{
-              "type": "record",
-              "name": "row",
-              "fields": [
-                  {"name": "int_value", "type": "int"},
-                  {"name": "long_value", "type": "long"}
-              ]}"""
-        reader_schema = """{
-              "type": "record",
-              "name": "row",
-              "fields": [
-                  {"name": "int_value", "type": "long"},
-                  {"name": "long_value", "type": "double"}
-              ]}"""
-        record_data = [
-            {"int_value": 0, "long_value": 111},
-            {"int_value": 1, "long_value": 222},
-        ]
-        self._test_pass_dataset_resolved(
-            writer_schema=writer_schema,
-            reader_schema=reader_schema,
-            record_data=record_data,
-        )
-
-
-class MakeAvroRecordDatasetTest(AvroDatasetTestBase):
-    """MakeAvroRecordDatasetTest"""
-
-    def _test_pass_dataset(
-        self,
-        writer_schema,
-        record_data,
-        expected_data,
-        features,
-        reader_schema,
-        batch_size,
-        **kwargs,
-    ):
-        """_test_pass_dataset"""
-        filenames = AvroDatasetTestBase._setup_files(
-            writer_schema=writer_schema, records=record_data
-        )
-
-        actual_dataset = tfio.experimental.columnar.make_avro_record_dataset(
-            file_pattern=filenames,
-            features=features,
-            batch_size=batch_size,
-            reader_schema=reader_schema,
-            shuffle=kwargs.get("shuffle", None),
-            num_epochs=kwargs.get("num_epochs", None),
-        )
-
-        self._verify_output(expected_data=expected_data, actual_dataset=actual_dataset)
-
-    def test_variable_length_failed_with_wrong_rank(self):
-        """test_variable_length_failed_with_wrong_rank"""
-        reader_schema = """{
-                    "type": "record",
-                    "name": "row",
-                    "fields": [
-                        {
-                           "name": "int_list_list",
-                           "type": {
-                              "type": "array",
-                              "items": {
-                                  "type": "array",
-                                  "items": "int"
-                              }
-                           }
-                        }
-                    ]}"""
-        record_data = [
-            {"int_list_list": [[1, 2], [3, 4, 5]]},
-            {"int_list_list": [[6]]},
-            {"int_list_list": [[6]]},
-        ]
-        features = {
-            "int_list_list[*][*]": tfio.experimental.columnar.VarLenFeatureWithRank(
-                tf.dtypes.int32
-            )
-        }
-        expected_data = [
-            {
-                "int_list_list[*][*]": tf.compat.v1.SparseTensorValue(
-                    indices=[
-                        [0, 0, 0],
-                        [0, 0, 1],
-                        [0, 1, 0],
-                        [0, 1, 1],
-                        [0, 1, 2],
-                        [1, 0, 0],
-                        [2, 0, 0],
-                    ],
-                    values=[1, 2, 3, 4, 5, 6, 6],
-                    dense_shape=[3, 2, 3],
-                )
-            }
-        ]
-        with self.assertRaises(Exception) as context:
-            self._test_pass_dataset(
-                reader_schema=reader_schema,
-                record_data=record_data,
-                expected_data=expected_data,
-                features=features,
-                writer_schema=reader_schema,
-                batch_size=3,
-                num_epochs=1,
-            )
-            self.assertTrue(
-                "is not compatible with supplied shape" in context.exception
-            )
-
-    def test_variable_length_passed_with_rank(self):
-        """test_variable_length_passed_with_rank"""
-        reader_schema = """{
-                    "type": "record",
-                    "name": "row",
-                    "fields": [
-                        {
-                           "name": "int_list_list",
-                           "type": {
-                              "type": "array",
-                              "items": {
-                                  "type": "array",
-                                  "items": "int"
-                              }
-                           }
-                        }
-                    ]}"""
-        record_data = [
-            {"int_list_list": [[1, 2], [3, 4, 5]]},
-            {"int_list_list": [[6]]},
-            {"int_list_list": [[6]]},
-        ]
-        features = {
-            "int_list_list[*][*]": tfio.experimental.columnar.VarLenFeatureWithRank(
-                tf.dtypes.int32, 2
-            )
-        }
-        expected_data = [
-            {
-                "int_list_list[*][*]": tf.compat.v1.SparseTensorValue(
-                    indices=[
-                        [0, 0, 0],
-                        [0, 0, 1],
-                        [0, 1, 0],
-                        [0, 1, 1],
-                        [0, 1, 2],
-                        [1, 0, 0],
-                        [2, 0, 0],
-                    ],
-                    values=[1, 2, 3, 4, 5, 6, 6],
-                    dense_shape=[3, 2, 3],
-                )
-            }
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            writer_schema=reader_schema,
-            batch_size=3,
-            num_epochs=1,
-        )
-
-    def test_batching(self):
-        """test_batching"""
-        writer_schema = """{
-              "type": "record",
-              "name": "row",
-              "fields": [
-                  {"name": "int_value", "type": "int"}
-              ]}"""
-        record_data = [{"int_value": 0}, {"int_value": 1}, {"int_value": 2}]
-        features = {"int_value": tf.io.FixedLenFeature([], tf.dtypes.int32)}
-        expected_data = [
-            {"int_value": tf.convert_to_tensor([0, 1])},
-            {"int_value": tf.convert_to_tensor([2])},
-        ]
-        self._test_pass_dataset(
-            writer_schema=writer_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            reader_schema=writer_schema,
-            batch_size=2,
-            num_epochs=1,
-        )
-
-    def test_fixed_length_list(self):
-        """test_fixed_length_list"""
-        writer_schema = """{
-              "type": "record",
-              "name": "row",
-              "fields": [
-                  {
-                     "name": "int_list",
-                     "type": {
-                        "type": "array",
-                        "items": "int"
-                     }
-                  }
-              ]}"""
-        record_data = [
-            {"int_list": [0, 1, 2]},
-            {"int_list": [3, 4, 5]},
-            {"int_list": [6, 7, 8]},
-        ]
-        features = {"int_list[*]": tf.io.FixedLenFeature([3], tf.dtypes.int32)}
-        expected_data = [
-            {"int_list[*]": tf.convert_to_tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])}
-        ]
-
-        self._test_pass_dataset(
-            writer_schema=writer_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            reader_schema=writer_schema,
-            batch_size=3,
-            num_epochs=1,
-        )
-
-
-class ParseAvroDatasetTest(AvroDatasetTestBase):
-    """AvroDatasetTest"""
-
-    def assert_data_equal(self, expected, actual):
-        """assert_data_equal"""
-        for name, datum in expected.items():
-            self.assert_values_equal(expected=datum, actual=actual[name])
-
-    @staticmethod
-    def _batcher(iterable, step):
-        n = len(iterable)
-        for ndx in range(0, n, step):
-            yield iterable[ndx : min(ndx + step, n)]
-
-    def _test_pass_dataset(
-        self, reader_schema, record_data, expected_data, features, batch_size
-    ):
-        """_test_pass_dataset"""
-        # Note, The batch size could be inferred from the expected data but found it better to be
-        # explicit here
-        serializer = AvroSerializer(reader_schema)
-        for expected_datum, actual_records in zip(
-            expected_data, ParseAvroDatasetTest._batcher(record_data, batch_size)
-        ):
-            # Get any key out of expected datum
-            actual_datum = tfio.experimental.columnar.parse_avro(
-                serialized=[
-                    tf.convert_to_tensor(serializer.serialize(r))
-                    for r in actual_records
-                ],
-                reader_schema=reader_schema,
-                features=features,
-            )
-            self.assert_data_equal(expected=expected_datum, actual=actual_datum)
-
-    def _test_fail_dataset(
-        self, reader_schema, record_data, features, batch_size, **kwargs
-    ):
-        parser_schema = kwargs.get("parser_schema", reader_schema)
-        serializer = AvroSerializer(reader_schema)
-        for actual_records in ParseAvroDatasetTest._batcher(record_data, batch_size):
-            # Get any key out of expected datum
-            with self.assertRaises(tf.errors.OpError):
-                _ = tfio.experimental.columnar.parse_avro(
-                    serialized=[
-                        tf.convert_to_tensor(serializer.serialize(r))
-                        for r in actual_records
-                    ],
-                    reader_schema=parser_schema,
-                    features=features,
-                )
-
-    @pytest.mark.skip(reason="failed with tf 2.2 rc3 on linux")
-    def test_primitive_types(self):
-        """test_primitive_types"""
-        reader_schema = """{
-              "type": "record",
-              "name": "dataTypes",
-              "fields": [
-                  {
-                     "name":"string_value",
-                     "type":"string"
-                  },
-                  {
-                     "name":"bytes_value",
-                     "type":"bytes"
-                  },
-                  {
-                     "name":"double_value",
-                     "type":"double"
-                  },
-                  {
-                     "name":"float_value",
-                     "type":"float"
-                  },
-                  {
-                     "name":"long_value",
-                     "type":"long"
-                  },
-                  {
-                     "name":"int_value",
-                     "type":"int"
-                  },
-                  {
-                     "name":"boolean_value",
-                     "type":"boolean"
-                  }
-              ]}"""
-        record_data = [
-            {
-                "string_value": "",
-                "bytes_value": b"",
-                "double_value": 0.0,
-                "float_value": 0.0,
-                "long_value": 0,
-                "int_value": 0,
-                "boolean_value": False,
-            },
-            {
-                "string_value": "SpecialChars@!#$%^&*()-_=+{}[]|/`~\\'?",
-                "bytes_value": b"SpecialChars@!#$%^&*()-_=+{}[]|/`~\\'?",
-                "double_value": -1.0,
-                "float_value": -1.0,
-                "long_value": 9223372036854775807,
-                "int_value": 2147483648 - 1,
-                "boolean_value": True,
-            },
-            {
-                "string_value": "ABCDEFGHIJKLMNOPQRSTUVWZabcdefghi"
-                + "jklmnopqrstuvwz0123456789",
-                "bytes_value": b"ABCDEFGHIJKLMNOPQRSTUVWZab"
-                + "cdefghijklmnopqrstuvwz0123456789",
-                "double_value": 1.0,
-                "float_value": 1.0,
-                "long_value": -9223372036854775807 - 1,
-                "int_value": -2147483648,
-                "boolean_value": False,
-            },
-        ]
-        features = {
-            "string_value": tf.io.FixedLenFeature([], tf.dtypes.string),
-            "bytes_value": tf.io.FixedLenFeature([], tf.dtypes.string),
-            "double_value": tf.io.FixedLenFeature([], tf.dtypes.float64),
-            "float_value": tf.io.FixedLenFeature([], tf.dtypes.float32),
-            "long_value": tf.io.FixedLenFeature([], tf.dtypes.int64),
-            "int_value": tf.io.FixedLenFeature([], tf.dtypes.int32),
-            "boolean_value": tf.io.FixedLenFeature([], tf.dtypes.bool),
-        }
-        expected_data = [
-            {
-                "string_value": tf.convert_to_tensor(
-                    [
-                        tf.compat.as_bytes(""),
-                        tf.compat.as_bytes("SpecialChars@!#$%^&*()-_=+{}[]|/`~\\'?"),
-                        tf.compat.as_bytes(
-                            "ABCDEFGHIJKLMNOPQRSTUVWZabcdefghijklmnopqrstuvwz0123456789"
-                        ),
-                    ]
-                ),
-                "bytes_value": tf.convert_to_tensor(
-                    [
-                        tf.compat.as_bytes(""),
-                        tf.compat.as_bytes("SpecialChars@!#$%^&*()-_=+{}[]|/`~\\'?"),
-                        tf.compat.as_bytes(
-                            "ABCDEFGHIJKLMNOPQRSTUVWZabcdefghijklmnopqrstuvwz0123456789"
-                        ),
-                    ]
-                ),
-                # Note, conversion utils `ops.EagerTensor` only seems to support single precision.
-                # Proper values for double precision are 1.7976931348623157e+308, -1.7976931348623157e+308
-                # In addition, precision is not maintained by the conversion, thus, I simplify set 1.0
-                # and -1.0 instead of proper values 3.40282306074e+38 and -3.40282306074e+38.
-                "double_value": tf.convert_to_tensor([0.0, -1.0, 1.0]),
-                "float_value": tf.convert_to_tensor([0.0, -1.0, 1.0]),
-                "long_value": tf.convert_to_tensor(
-                    [0, 9223372036854775807, -9223372036854775807 - 1]
-                ),
-                "int_value": tf.convert_to_tensor([0, 2147483648 - 1, -2147483648]),
-                "boolean_value": tf.convert_to_tensor([False, True, False]),
-            }
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=3,
-        )
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_fixed_enum_types(self):
-        """test_fixed_enum_types"""
-        reader_schema = """{
-              "type": "record",
-              "name": "dataTypes",
-              "fields": [
-                  {
-                     "name":"fixed_value",
-                     "type": {
-                        "name": "TenBytes",
-                        "type": "fixed",
-                        "size": 10
-                     }
-                  },
-                  {
-                     "name":"enum_value",
-                     "type":{
-                        "name": "Color",
-                        "type": "enum",
-                        "symbols": ["BLUE", "GREEN", "BROWN"]
-                     }
-                  }
-              ]}"""
-        record_data = [
-            {"fixed_value": b"0123456789", "enum_value": "BLUE"},
-            {"fixed_value": b"1234567890", "enum_value": "GREEN"},
-            {"fixed_value": b"2345678901", "enum_value": "BROWN"},
-        ]
-        features = {
-            "fixed_value": tf.io.FixedLenFeature([], tf.dtypes.string),
-            "enum_value": tf.io.FixedLenFeature([], tf.dtypes.string),
-        }
-        expected_data = [
-            {
-                "fixed_value": tf.convert_to_tensor(
-                    [
-                        tf.compat.as_bytes("0123456789"),
-                        tf.compat.as_bytes("1234567890"),
-                        tf.compat.as_bytes("2345678901"),
-                    ]
-                ),
-                "enum_value": tf.convert_to_tensor([b"BLUE", b"GREEN", b"BROWN"]),
-            }
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=3,
-        )
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_batching(self):
-        """test_batching"""
-        reader_schema = """{
-              "type": "record",
-              "name": "row",
-              "fields": [
-                  {"name": "int_value", "type": "int"}
-              ]}"""
-        record_data = [{"int_value": 0}, {"int_value": 1}, {"int_value": 2}]
-        features = {"int_value": tf.io.FixedLenFeature([], tf.dtypes.int32)}
-        expected_data = [
-            {"int_value": tf.convert_to_tensor([0, 1])},
-            {"int_value": tf.convert_to_tensor([2])},
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=2,
-        )
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_padding_from_default(self):
-        """test_padding_from_default"""
-        reader_schema = """{
-                  "type": "record",
-                  "name": "row",
-                  "fields": [
-                      {
-                         "name": "fixed_len",
-                         "type": {
-                            "type": "array",
-                            "items": "int"
-                         }
-                      }
-                  ]}"""
-        record_data = [
-            {"fixed_len": [0]},
-            {"fixed_len": [1]},
-            {"fixed_len": [2]},
-            {"fixed_len": [3]},
-        ]
-        features = {
-            "fixed_len[*]": tf.io.FixedLenFeature(
-                [2], tf.dtypes.int32, default_value=[0, 1]
-            )
-        }
-        # Note, last batch is NOT dropped
-        expected_data = [
-            {"fixed_len[*]": tf.convert_to_tensor([[0, 1], [1, 1], [2, 1]])},
-            {"fixed_len[*]": tf.convert_to_tensor([[3, 1]])},
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=3,
-        )
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_batching_with_default(self):
-        """test_batching_with_default"""
-        reader_schema = """{
-                  "type": "record",
-                  "name": "row",
-                  "fields": [
-                      {
-                         "name": "fixed_len",
-                         "type": {
-                            "type": "array",
-                            "items": "int"
-                         }
-                      }
-                  ]}"""
-        record_data = [
-            {"fixed_len": [0, 1, 2]},
-            {"fixed_len": [3, 4, 5]},
-            {"fixed_len": [6, 7, 8]},
-        ]
-        features = {
-            "fixed_len[*]": tf.io.FixedLenFeature(
-                [None, 3], tf.dtypes.int32, default_value=[0, 1, 2]
-            )
-        }
-        expected_data = [
-            {"fixed_len[*]": tf.convert_to_tensor([[0, 1, 2], [3, 4, 5]])},
-            {"fixed_len[*]": tf.convert_to_tensor([[6, 7, 8]])},
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=2,
-        )
-
-    def test_union_with_null(self):
-        reader_schema = """{
-             "type": "record",
-             "name": "data_row",
-             "fields": [
-                {
-                   "name": "possible_float_type",
-                   "type": [
-                      "null",
-                      "float"
-                   ]
-                }
-             ]
-          }
-          """
-        record_data = [
-            {"possible_float_type": 1.0},
-            {"possible_float_type": None},
-            {"possible_float_type": -1.0},
-        ]
-        features = {
-            "possible_float_type:float": tf.io.FixedLenFeature(
-                [], tf.dtypes.float32, default_value=0.0
-            )
-        }
-        # If we have a default, then we use that in the place of the None
-        expected_data = [
-            {"possible_float_type:float": tf.convert_to_tensor([1.0, 0.0, -1.0])}
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=3,
-        )
-
-    def test_null_union_primitive_type(self):
-        reader_schema = """{
-             "type":"record",
-             "name":"data_row",
-             "fields":[
-                {
-                   "name":"multi_type",
-                   "type":[
-                      "null",
-                      "boolean",
-                      "int",
-                      "long",
-                      "float",
-                      "double",
-                      "string"
-                   ]
-                }
-             ]
-          }
-          """
-        record_data = [
-            {"multi_type": None},
-            {"multi_type": True},  # written as double(1.0)
-            {"multi_type": int(1)},  # written as double(1.0)
-            {"multi_type": 2},  # written as double(2.0)
-            {"multi_type": float(3.0)},  # written as double(3.0)
-            {"multi_type": 4.0},  # written as double (4.0)
-            {"multi_type": "abc"},
-        ]
-        features = {
-            "multi_type:boolean": tf.io.FixedLenFeature(
-                [], tf.dtypes.bool, default_value=False
-            ),
-            "multi_type:int": tf.io.FixedLenFeature(
-                [], tf.dtypes.int32, default_value=int(0)
-            ),
-            "multi_type:long": tf.io.FixedLenFeature(
-                [], tf.dtypes.int64, default_value=0
-            ),
-            "multi_type:float": tf.io.FixedLenFeature(
-                [], tf.dtypes.float32, default_value=float(0.0)
-            ),
-            "multi_type:double": tf.io.FixedLenFeature(
-                [], tf.dtypes.float64, default_value=0.0
-            ),
-            "multi_type:string": tf.io.FixedLenFeature(
-                [], tf.dtypes.string, default_value=""
-            ),
-        }
-        expected_data = [
-            {
-                "multi_type:boolean": tf.convert_to_tensor(
-                    [False, False, False, False, False, False, False],
-                    dtype=tf.dtypes.bool,
-                ),
-                "multi_type:int": tf.convert_to_tensor(
-                    [0, 0, 0, 0, 0, 0, 0], dtype=tf.dtypes.int32
-                ),
-                "multi_type:long": tf.convert_to_tensor(
-                    [0, 0, 0, 0, 0, 0, 0], dtype=tf.dtypes.int64
-                ),
-                "multi_type:float": tf.convert_to_tensor(
-                    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=tf.dtypes.float32
-                ),
-                "multi_type:double": tf.convert_to_tensor(
-                    [0.0, 1.0, 1.0, 2.0, 3.0, 4.0, 0.0], dtype=tf.dtypes.float64
-                ),
-                "multi_type:string": tf.convert_to_tensor(
-                    [
-                        tf.compat.as_bytes(""),
-                        tf.compat.as_bytes(""),
-                        tf.compat.as_bytes(""),
-                        tf.compat.as_bytes(""),
-                        tf.compat.as_bytes(""),
-                        tf.compat.as_bytes(""),
-                        tf.compat.as_bytes("abc"),
-                    ]
-                ),
-            }
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=7,
-        )
-
-    def test_union_without_default(self):
-        reader_schema = """{
-             "type": "record",
-             "name": "data_row",
-             "fields": [
-                {
-                   "name": "possible_float_type",
-                   "type": [
-                      "null",
-                      "float"
-                   ]
-                }
-             ]
-          }
-          """
-        record_data = [{"possible_float_type": None}]
-        features = {
-            "possible_float_type:float": tf.io.FixedLenFeature([], tf.dtypes.float32)
-        }
-        self._test_fail_dataset(reader_schema, record_data, features, 1)
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_fixed_length_list(self):
-        """test_fixed_length_list"""
-        reader_schema = """{
-              "type": "record",
-              "name": "row",
-              "fields": [
-                  {
-                     "name": "int_list",
-                     "type": {
-                        "type": "array",
-                        "items": "int"
-                     }
-                  }
-              ]}"""
-        record_data = [
-            {"int_list": [0, 1, 2]},
-            {"int_list": [3, 4, 5]},
-            {"int_list": [6, 7, 8]},
-        ]
-        features = {"int_list[*]": tf.io.FixedLenFeature([3], tf.dtypes.int32)}
-        expected_data = [
-            {"int_list[*]": tf.convert_to_tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])}
-        ]
-
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=3,
-        )
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_fixed_length_with_default_vector(self):
-        """test_fixed_length_with_default_vector"""
-        reader_schema = """{
-              "type": "record",
-              "name": "row",
-              "fields": [
-                  {
-                     "name": "int_list",
-                     "type": {
-                        "type": "array",
-                        "items": "int"
-                     }
-                  }
-              ]}"""
-        record_data = [{"int_list": [0, 1, 2]}, {"int_list": [3]}, {"int_list": [6, 7]}]
-        features = {
-            "int_list[*]": tf.io.FixedLenFeature(
-                [None, 3], tf.dtypes.int32, default_value=[0, 1, 2]
-            )
-        }
-        expected_data = [
-            {"int_list[*]": tf.convert_to_tensor([[0, 1, 2], [3, 1, 2], [6, 7, 2]])}
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=3,
-        )
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_fixed_length_with_default_scalar(self):
-        """test_fixed_length_with_default_scalar"""
-        reader_schema = """{
-              "type": "record",
-              "name": "row",
-              "fields": [
-                  {
-                     "name": "int_list",
-                     "type": {
-                        "type": "array",
-                        "items": "int"
-                     }
-                  }
-              ]}"""
-        record_data = [{"int_list": [0, 1, 2]}, {"int_list": [3]}, {"int_list": [6, 7]}]
-        features = {
-            "int_list[*]": tf.io.FixedLenFeature(
-                [None], tf.dtypes.int32, default_value=0
-            )
-        }
-        expected_data = [
-            {"int_list[*]": tf.convert_to_tensor([[0, 1, 2], [3, 0, 0], [6, 7, 0]])}
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=3,
-        )
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_dense_2d(self):
-        """test_dense_2d"""
-        reader_schema = """{
-              "type": "record",
-              "name": "row",
-              "fields": [
-                  {
-                     "name": "int_list",
-                     "type": {
-                        "type": "array",
-                        "items":
-                          {
-                             "name" : "name",
-                             "type" : "record",
-                             "fields" : [
-                                {
-                                   "name": "nested_int_list",
-                                   "type":
-                                      {
-                                          "type": "array",
-                                          "items": "int"
-                                      }
-                                }
-                             ]
-                          }
-                     }
-                  }
-              ]}"""
-        record_data = [
-            {
-                "int_list": [
-                    {"nested_int_list": [1, 2, 3]},
-                    {"nested_int_list": [4, 5, 6]},
-                ]
-            },
-            {
-                "int_list": [
-                    {"nested_int_list": [7, 8, 9]},
-                    {"nested_int_list": [10, 11, 12]},
-                ]
-            },
-        ]
-        features = {
-            "int_list[*].nested_int_list[*]": tf.io.FixedLenFeature(
-                [2, 3], tf.dtypes.int32
-            )
-        }
-        expected_data = [
-            {
-                "int_list[*].nested_int_list[*]": tf.convert_to_tensor(
-                    [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]
-                )
-            }
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=2,
-        )
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_dense_array_3d(self):
-        """test_dense_array_3d"""
-        # Here we use arrays directly for the nesting
-        reader_schema = """{
-              "type": "record",
-              "name": "row",
-              "fields": [
-                  {
-                     "name": "int_list",
-                     "type": {
-                        "type": "array",
-                        "items": {
-                            "type": "array",
-                            "items": "int"
-                        }
-                     }
-                  }
-              ]
-              }"""
-        record_data = [
-            {"int_list": [[0, 1, 2], [10, 11, 12], [20, 21, 22]]},
-            {"int_list": [[1, 2, 3], [11, 12, 13], [21, 22, 23]]},
-        ]
-        # Note, need to at least define the rank of the data, dimension can be unknown
-        # This is a limitation inside TensorFlow where shape ranks need to be known
-        # inside _from_compatible_tensor_list
-        features = {
-            "int_list[*][*]": tf.io.FixedLenFeature([None, None], tf.dtypes.int32)
-        }
-        # Note, the outer dimension is the batch dimension
-        expected_data = [
-            {
-                "int_list[*][*]": tf.convert_to_tensor(
-                    [
-                        [[0, 1, 2], [10, 11, 12], [20, 21, 22]],
-                        [[1, 2, 3], [11, 12, 13], [21, 22, 23]],
-                    ]
-                )
-            },
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=2,
-        )
-
-    @pytest.mark.skip(reason="failed with tf 2.2 rc3 on linux")
-    def test_sparse_feature(self):
-        """test_sparse_feature"""
-        reader_schema = """{
-              "type": "record",
-              "name": "row",
-              "fields": [
-                {
-                  "name": "sparse_type",
-                  "type": {
-                    "type": "array",
-                    "items": {
-                       "type": "record",
-                       "name": "sparse_triplet",
-                       "fields": [
-                          {
-                             "name":"index",
-                             "type":"long"
-                          },
-                          {
-                             "name":"value",
-                             "type":"float"
-                          }
-                       ]
-                    }
-                 }
-              }
-        ]}"""
-        record_data = [
-            {"sparse_type": [{"index": 0, "value": 5.0}, {"index": 3, "value": 2.0}]},
-            {"sparse_type": [{"index": 2, "value": 7.0}]},
-            {"sparse_type": [{"index": 1, "value": 6.0}]},
-            {"sparse_type": [{"index": 3, "value": 3.0}]},
-        ]
-        features = {
-            "sparse_type": tf.io.SparseFeature(
-                index_key="index", value_key="value", dtype=tf.dtypes.float32, size=4
-            )
-        }
-        expected_data = [
-            {
-                "sparse_type": tf.compat.v1.SparseTensorValue(
-                    indices=[[0, 0], [0, 3], [1, 2]],
-                    values=[5.0, 2.0, 7.0],
-                    dense_shape=[2, 4],
-                )
-            },
-            {
-                "sparse_type": tf.compat.v1.SparseTensorValue(
-                    indices=[[0, 1], [1, 3]], values=[6.0, 3.0], dense_shape=[2, 4]
-                )
-            },
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=2,
-        )
-
-    @pytest.mark.skip(reason="failed with tf 2.2 rc3 on linux")
-    def test_type_reuse(self):
-        """test_type_reuse"""
-        reader_schema = """{
-            "type": "record",
-            "name": "row",
-            "fields": [
-              {
-                "name": "first_value",
-                "type": {
-                  "type": "array",
-                  "items": {
-                     "type": "record",
-                     "name": "Tuple",
-                     "fields": [
-                        {
-                           "name":"index",
-                           "type":"long"
-                        },
-                        {
-                           "name":"value",
-                           "type":"float"
-                        }
-                     ]
-                  }
-              }
-            },
-            {
-              "name": "second_value",
-              "type": {
-                "type": "array",
-                "items": "Tuple"
-              }
-            }
-          ]
-          }"""
-        record_data = [
-            {
-                "first_value": [{"index": 0, "value": 5.0}, {"index": 3, "value": 2.0}],
-                "second_value": [{"index": 2, "value": 7.0}],
-            },
-            {
-                "first_value": [{"index": 0, "value": 2.0}],
-                "second_value": [{"index": 1, "value": 2.0}],
-            },
-        ]
-        features = {
-            "first_value": tf.io.SparseFeature(
-                index_key="index", value_key="value", dtype=tf.dtypes.float32, size=4
-            ),
-            "second_value": tf.io.SparseFeature(
-                index_key="index", value_key="value", dtype=tf.dtypes.float32, size=3
-            ),
-        }
-        expected_data = [
-            {
-                "first_value": tf.compat.v1.SparseTensorValue(
-                    indices=[[0, 0], [0, 3], [1, 0]],
-                    values=[5.0, 2.0, 2.0],
-                    dense_shape=[2, 4],
-                ),
-                "second_value": tf.compat.v1.SparseTensorValue(
-                    indices=[[0, 2], [1, 1]], values=[7.0, 2.0], dense_shape=[2, 3]
-                ),
-            }
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=2,
-        )
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_variable_length(self):
-        """test_variable_length"""
-        reader_schema = """{
-              "type": "record",
-              "name": "row",
-              "fields": [
-                  {
-                     "name": "int_list",
-                     "type": {
-                        "type": "array",
-                        "items": "int"
-                     }
-                  }
-              ]}"""
-        record_data = [{"int_list": [1, 2]}, {"int_list": [3, 4, 5]}, {"int_list": [6]}]
-        features = {
-            "int_list[*]": tfio.experimental.columnar.VarLenFeatureWithRank(
-                tf.dtypes.int32, 1
-            )
-        }
-        expected_data = [
-            {
-                "int_list[*]": tf.compat.v1.SparseTensorValue(
-                    indices=[[0, 0], [0, 1], [1, 0], [1, 1], [1, 2], [2, 0]],
-                    values=[1, 2, 3, 4, 5, 6],
-                    dense_shape=[3, 3],
-                )
-            }
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=3,
-        )
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_variable_length_2d(self):
-        """test_variable_length_2d"""
-        reader_schema = """{
-                  "type": "record",
-                  "name": "row",
-                  "fields": [
-                      {
-                         "name": "int_list_list",
-                         "type": {
-                            "type": "array",
-                            "items": {
-                                "type": "array",
-                                "items": "int"
-                            }
-                         }
-                      }
-                  ]}"""
-        record_data = [
-            {"int_list_list": [[1, 2], [3, 4, 5]]},
-            {"int_list_list": [[6]]},
-            {"int_list_list": [[6]]},
-        ]
-        features = {
-            "int_list_list[*][*]": tfio.experimental.columnar.VarLenFeatureWithRank(
-                tf.dtypes.int32, 2
-            )
-        }
-        expected_data = [
-            {
-                "int_list_list[*][*]": tf.compat.v1.SparseTensorValue(
-                    indices=[
-                        [0, 0, 0],
-                        [0, 0, 1],
-                        [0, 1, 0],
-                        [0, 1, 1],
-                        [0, 1, 2],
-                        [1, 0, 0],
-                        [2, 0, 0],
-                    ],
-                    values=[1, 2, 3, 4, 5, 6, 6],
-                    dense_shape=[3, 2, 3],
-                )
-            }
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=3,
-        )
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_nesting(self):
-        """test_nesting"""
-        reader_schema = """{
-           "type": "record",
-           "name": "nesting",
-           "fields": [
-              {
-                 "name": "nested_record",
-                 "type": {
-                    "type": "record",
-                    "name": "nested_values",
-                    "fields": [
-                       {
-                          "name": "nested_int",
-                          "type": "int"
-                       },
-                       {
-                          "name": "nested_float_list",
-                          "type": {
-                             "type": "array",
-                             "items": "float"
-                          }
-                       }
-                    ]
-                 }
-              },
-              {
-                 "name": "list_of_records",
-                 "type": {
-                    "type": "array",
-                    "items": {
-                       "type": "record",
-                       "name": "person",
-                       "fields": [
-                          {
-                             "name": "first_name",
-                             "type": "string"
-                          },
-                          {
-                             "name": "age",
-                             "type": "int"
-                          }
-                       ]
-                    }
-                 }
-              }
-           ]
-        }
-        """
-        record_data = [
-            {
-                "nested_record": {"nested_int": 0, "nested_float_list": [0.0, 10.0]},
-                "list_of_records": [{"first_name": "Herbert", "age": 70}],
-            },
-            {
-                "nested_record": {"nested_int": 5, "nested_float_list": [-2.0, 7.0]},
-                "list_of_records": [
-                    {"first_name": "Doug", "age": 55},
-                    {"first_name": "Jess", "age": 66},
-                    {"first_name": "Julia", "age": 30},
-                ],
-            },
-            {
-                "nested_record": {"nested_int": 7, "nested_float_list": [3.0, 4.0]},
-                "list_of_records": [{"first_name": "Karl", "age": 32}],
-            },
-        ]
-        features = {
-            "nested_record.nested_int": tf.io.FixedLenFeature([], tf.dtypes.int32),
-            "nested_record.nested_float_list[*]": tf.io.FixedLenFeature(
-                [2], tf.dtypes.float32
-            ),
-            "list_of_records[0].first_name": tf.io.FixedLenFeature(
-                [], tf.dtypes.string
-            ),
-        }
-        expected_data = [
-            {
-                "nested_record.nested_int": tf.convert_to_tensor([0, 5, 7]),
-                "nested_record.nested_float_list[*]": tf.convert_to_tensor(
-                    [[0.0, 10.0], [-2.0, 7.0], [3.0, 4.0]]
-                ),
-                "list_of_records[0].first_name": tf.convert_to_tensor(
-                    [
-                        tf.compat.as_bytes("Herbert"),
-                        tf.compat.as_bytes("Doug"),
-                        tf.compat.as_bytes("Karl"),
-                    ]
-                ),
-            }
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=3,
-        )
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_parse_map_entry(self):
-        """test_parse_map_entry"""
-        reader_schema = """
-        {
-           "type": "record",
-           "name": "nesting",
-           "fields": [
-              {
-                 "name": "map_of_records",
-                 "type": {
-                    "type": "map",
-                    "values": {
-                       "type": "record",
-                       "name": "secondPerson",
-                       "fields": [
-                          {
-                             "name": "first_name",
-                             "type": "string"
-                          },
-                          {
-                             "name": "age",
-                             "type": "int"
-                          }
-                       ]
-                    }
-                 }
-              }
-           ]
-        }
-        """
-        record_data = [
-            {
-                "map_of_records": {
-                    "first": {"first_name": "Herbert", "age": 70},
-                    "second": {"first_name": "Julia", "age": 30},
-                }
-            },
-            {
-                "map_of_records": {
-                    "first": {"first_name": "Doug", "age": 55},
-                    "second": {"first_name": "Jess", "age": 66},
-                }
-            },
-            {
-                "map_of_records": {
-                    "first": {"first_name": "Karl", "age": 32},
-                    "second": {"first_name": "Joan", "age": 21},
-                }
-            },
-        ]
-        # TODO(fraudies): Using FixedLenFeature([1], tf.dtypes.int32) this segfaults
-        features = {
-            "map_of_records['second'].age": tf.io.FixedLenFeature([], tf.dtypes.int32)
-        }
-        expected_data = [
-            {"map_of_records['second'].age": tf.convert_to_tensor([30, 66, 21])}
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=3,
-        )
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_parse_int_as_long_fail(self):
-        """test_parse_int_as_long_fail"""
-        schema = """
-          {
-             "type": "record",
-             "name": "data_row",
-             "fields": [
-                {
-                   "name": "index",
-                   "type": "int"
-                }
-             ]
-          }
-          """
-        record_data = [{"index": 0}]
-        features = {"index": tf.io.FixedLenFeature([], tf.dtypes.int64)}
-        self._test_fail_dataset(schema, record_data, features, 1)
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_parse_int_as_sparse_type_fail(self):
-        """test_parse_int_as_sparse_type_fail"""
-        schema = """
-          {
-             "type": "record",
-             "name": "data_row",
-             "fields": [
-                {
-                   "name": "index",
-                   "type": "int"
-                }
-             ]
-          }
-          """
-        record_data = [{"index": 5}]
-        features = {
-            "index": tf.io.SparseFeature(
-                index_key="index", value_key="value", dtype=tf.dtypes.float32, size=10
-            )
-        }
-        self._test_fail_dataset(schema, record_data, features, 1)
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_parse_float_as_double_fail(self):
-        """test_parse_float_as_double_fail"""
-        schema = """
-          {
-             "type": "record",
-             "name": "data_row",
-             "fields": [
-                {
-                   "name": "weight",
-                   "type": "float"
-                }
-             ]
-          }
-          """
-        record_data = [{"weight": 0.5}]
-        features = {"weight": tf.io.FixedLenFeature([], tf.dtypes.float64)}
-        self._test_fail_dataset(schema, record_data, features, 1)
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_fixed_length_without_proper_default_fail(self):
-        """test_fixed_length_without_proper_default_fail"""
-        schema = """
-          {
-             "type": "record",
-             "name": "data_row",
-             "fields": [
-                {
-                   "name": "int_list_type",
-                   "type": {
-                      "type":"array",
-                      "items":"int"
-                   }
-                }
-             ]
-          }
-          """
-        record_data = [{"int_list_type": [0, 1, 2]}, {"int_list_type": [0, 1]}]
-        features = {"int_list_type": tf.io.FixedLenFeature([], tf.dtypes.int32)}
-        self._test_fail_dataset(schema, record_data, features, 1)
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_wrong_spelling_of_feature_name_fail(self):
-        """test_wrong_spelling_of_feature_name_fail"""
-        schema = """
-          {
-             "type": "record",
-             "name": "data_row",
-             "fields": [
-               {"name": "int_type", "type": "int"}
-             ]
-          }"""
-        record_data = [{"int_type": 0}]
-        features = {"wrong_spelling": tf.io.FixedLenFeature([], tf.dtypes.int32)}
-        self._test_fail_dataset(schema, record_data, features, 1)
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_wrong_index(self):
-        """test_wrong_index"""
-        schema = """
-          {
-             "type": "record",
-             "name": "data_row",
-             "fields": [
-                {
-                   "name": "list_of_records",
-                   "type": {
-                      "type": "array",
-                      "items": {
-                         "type": "record",
-                         "name": "person",
-                         "fields": [
-                            {
-                               "name": "first_name",
-                               "type": "string"
-                            }
-                         ]
-                      }
-                   }
-                }
-             ]
-          }
-          """
-        record_data = [{"list_of_records": [{"first_name": "My name"}]}]
-        features = {
-            "list_of_records[2].name": tf.io.FixedLenFeature([], tf.dtypes.string)
-        }
-        self._test_fail_dataset(schema, record_data, features, 1)
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_filter_with_variable_length(self):
-        """test_filter_with_variable_length"""
-        reader_schema = """
-          {
-             "type": "record",
-             "name": "data_row",
-             "fields": [
-                {
-                   "name": "guests",
-                   "type": {
-                      "type": "array",
-                      "items": {
-                         "type": "record",
-                         "name": "person",
-                         "fields": [
-                            {
-                               "name": "name",
-                               "type": "string"
-                            },
-                            {
-                               "name": "gender",
-                               "type": "string"
-                            }
-                         ]
-                      }
-                   }
-                }
-             ]
-          }
-          """
-        record_data = [
-            {
-                "guests": [
-                    {"name": "Hans", "gender": "male"},
-                    {"name": "Mary", "gender": "female"},
-                    {"name": "July", "gender": "female"},
-                ]
-            },
-            {
-                "guests": [
-                    {"name": "Joel", "gender": "male"},
-                    {"name": "JoAn", "gender": "female"},
-                    {"name": "Marc", "gender": "male"},
-                ]
-            },
-        ]
-        features = {
-            "guests[gender='male'].name": tfio.experimental.columnar.VarLenFeatureWithRank(
-                tf.dtypes.string
-            ),
-            "guests[gender='female'].name": tfio.experimental.columnar.VarLenFeatureWithRank(
-                tf.dtypes.string
-            ),
-        }
-        expected_data = [
-            {
-                "guests[gender='male'].name": tf.compat.v1.SparseTensorValue(
-                    indices=[[0, 0], [1, 0], [1, 1]],
-                    values=[
-                        tf.compat.as_bytes("Hans"),
-                        tf.compat.as_bytes("Joel"),
-                        tf.compat.as_bytes("Marc"),
-                    ],
-                    dense_shape=[2, 2],
-                ),
-                "guests[gender='female'].name": tf.compat.v1.SparseTensorValue(
-                    indices=[[0, 0], [0, 1], [1, 0]],
-                    values=[
-                        tf.compat.as_bytes("Mary"),
-                        tf.compat.as_bytes("July"),
-                        tf.compat.as_bytes("JoAn"),
-                    ],
-                    dense_shape=[2, 2],
-                ),
-            }
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=2,
-        )
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_filter_with_empty_result(self):
-        """test_filter_with_empty_result"""
-        reader_schema = """
-          {
-             "type": "record",
-             "name": "data_row",
-             "fields": [
-                {
-                   "name": "guests",
-                   "type": {
-                      "type": "array",
-                      "items": {
-                         "type": "record",
-                         "name": "person",
-                         "fields": [
-                            {
-                               "name":"name",
-                               "type":"string"
-                            },
-                            {
-                               "name":"gender",
-                               "type":"string"
-                            }
-                         ]
-                      }
-                   }
-                }
-             ]
-          }
-          """
-        record_data = [
-            {"guests": [{"name": "Hans", "gender": "male"}]},
-            {"guests": [{"name": "Joel", "gender": "male"}]},
-        ]
-        features = {
-            "guests[gender='wrong_value'].name": tfio.experimental.columnar.VarLenFeatureWithRank(
-                tf.dtypes.string
-            )
-        }
-        expected_data = [
-            {
-                "guests[gender='wrong_value'].name": tf.compat.v1.SparseTensorValue(
-                    indices=np.empty(shape=[0, 2], dtype=np.int64),
-                    values=np.empty(shape=[0], dtype=np.str),
-                    dense_shape=np.asarray([2, 0]),
-                )
-            }
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=2,
-        )
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_filter_with_wrong_key_fail(self):
-        """test_filter_with_wrong_key_fail"""
-        reader_schema = """
-          {
-             "type": "record",
-             "name": "data_row",
-             "fields": [
-                {
-                   "name": "guests",
-                   "type": {
-                      "type": "array",
-                      "items": {
-                         "type": "record",
-                         "name": "person",
-                         "fields": [
-                            {
-                               "name":"name",
-                               "type":"string"
-                            }
-                         ]
-                      }
-                   }
-                }
-             ]
-          }
-          """
-        record_data = [{"guests": [{"name": "Hans"}]}]
-        features = {
-            "guests[wrong_key='female'].name": tfio.experimental.columnar.VarLenFeatureWithRank(
-                tf.dtypes.string
-            )
-        }
-        self._test_fail_dataset(reader_schema, record_data, features, 1)
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_filter_with_wrong_pair_fail(self):
-        """test_filter_with_wrong_pair_fail"""
-        reader_schema = """
-          {
-             "type":"record",
-             "name":"data_row",
-             "fields":[
-                {
-                   "name":"guests",
-                   "type":{
-                      "type":"array",
-                      "items":{
-                         "type":"record",
-                         "name":"person",
-                         "fields":[
-                            {
-                               "name":"name",
-                               "type":"string"
-                            }
-                         ]
-                      }
-                   }
-                }
-             ]
-          }
-          """
-        record_data = [{"guests": [{"name": "Hans"}]}]
-        features = {
-            "guests[forgot_the_separator].name": tfio.experimental.columnar.VarLenFeatureWithRank(
-                tf.dtypes.string
-            )
-        }
-        self._test_fail_dataset(reader_schema, record_data, features, 1)
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_filter_with_too_many_separators_fail(self):
-        """test_filter_with_too_many_separators_fail"""
-        reader_schema = """
-          {
-             "type": "record",
-             "name": "data_row",
-             "fields": [
-                {
-                   "name": "guests",
-                   "type": {
-                      "type": "array",
-                      "items": {
-                         "type":"record",
-                         "name":"person",
-                         "fields":[
-                            {
-                               "name":"name",
-                               "type":"string"
-                            }
-                         ]
-                      }
-                   }
-                }
-             ]
-          }
-          """
-        record_data = [{"guests": [{"name": "Hans"}]}]
-        features = {
-            "guests[used=too=many=separators].name": tfio.experimental.columnar.VarLenFeatureWithRank(
-                tf.dtypes.string
-            )
-        }
-        self._test_fail_dataset(reader_schema, record_data, features, 1)
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_filter_for_nested_record(self):
-        """test_filter_for_nested_record"""
-        reader_schema = """
-          {
-             "type": "record",
-             "name": "data_row",
-             "fields": [
-                {
-                   "name": "guests",
-                   "type": {
-                      "type": "array",
-                      "items": {
-                         "type": "record",
-                         "name": "person",
-                         "fields": [
-                            {
-                               "name": "name",
-                               "type": "string"
-                            },
-                            {
-                               "name": "gender",
-                               "type": "string"
-                            },
-                            {
-                               "name": "address",
-                               "type": {
-                                  "type": "record",
-                                  "name": "postal",
-                                  "fields": [
-                                     {
-                                        "name":"street",
-                                        "type":"string"
-                                     },
-                                     {
-                                        "name":"zip",
-                                        "type":"int"
-                                     },
-                                     {
-                                        "name":"state",
-                                        "type":"string"
-                                     }
-                                  ]
-                               }
-                            }
-                         ]
-                      }
-                   }
-                }
-             ]
-          }
-          """
-        record_data = [
-            {
-                "guests": [
-                    {
-                        "name": "Hans",
-                        "gender": "male",
-                        "address": {
-                            "street": "California St",
-                            "zip": 94040,
-                            "state": "CA",
-                        },
-                    },
-                    {
-                        "name": "Mary",
-                        "gender": "female",
-                        "address": {"street": "Ellis St", "zip": 29040, "state": "MA"},
-                    },
-                ]
-            }
-        ]
-        features = {
-            "guests[gender='female'].address.street": tfio.experimental.columnar.VarLenFeatureWithRank(
-                tf.dtypes.string
-            )
-        }
-        expected_data = [
-            {
-                "guests[gender='female']"
-                + ".address.street": tf.compat.v1.SparseTensorValue(
-                    indices=[[0, 0]],
-                    values=[tf.compat.as_bytes("Ellis St")],
-                    dense_shape=[1, 1],
-                )
-            }
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=2,
-        )
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_filter_with_bytes_as_type(self):
-        """test_filter_with_bytes_as_type"""
-        reader_schema = """
-          {
-             "type": "record",
-             "name": "data_row",
-             "fields": [
-                {
-                   "name": "guests",
-                   "type": {
-                      "type": "array",
-                      "items": {
-                         "type": "record",
-                         "name": "person",
-                         "fields": [
-                            {
-                               "name":"name",
-                               "type":"bytes"
-                            },
-                            {
-                               "name":"gender",
-                               "type":"bytes"
-                            }
-                         ]
-                      }
-                   }
-                }
-             ]
-          }
-          """
-        record_data = [
-            {
-                "guests": [
-                    {"name": b"Hans", "gender": b"male"},
-                    {"name": b"Mary", "gender": b"female"},
-                    {"name": b"July", "gender": b"female"},
-                ]
-            },
-            {
-                "guests": [
-                    {"name": b"Joel", "gender": b"male"},
-                    {"name": b"JoAn", "gender": b"female"},
-                    {"name": b"Marc", "gender": b"male"},
-                ]
-            },
-        ]
-        features = {
-            "guests[gender='male'].name": tfio.experimental.columnar.VarLenFeatureWithRank(
-                tf.dtypes.string
-            ),
-            "guests[gender='female'].name": tfio.experimental.columnar.VarLenFeatureWithRank(
-                tf.dtypes.string
-            ),
-        }
-        expected_data = [
-            {
-                "guests[gender='male'].name": tf.compat.v1.SparseTensorValue(
-                    indices=[[0, 0], [1, 0], [1, 1]],
-                    values=[
-                        tf.compat.as_bytes("Hans"),
-                        tf.compat.as_bytes("Joel"),
-                        tf.compat.as_bytes("Marc"),
-                    ],
-                    dense_shape=[2, 2],
-                ),
-                "guests[gender='female'].name": tf.compat.v1.SparseTensorValue(
-                    indices=[[0, 0], [0, 1], [1, 0]],
-                    values=[
-                        tf.compat.as_bytes("Mary"),
-                        tf.compat.as_bytes("July"),
-                        tf.compat.as_bytes("JoAn"),
-                    ],
-                    dense_shape=[2, 2],
-                ),
-            }
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=2,
-        )
-
-    # @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_ignore_namespace(self):
-        """test_namespace"""
-        reader_schema = """
-          {
-            "namespace": "com.test",
-            "type": "record",
-            "name": "simple",
-            "fields": [
-                {
-                   "name":"string_value",
-                   "type":"string"
-                }
-            ]
-          }"""
-        features = {"string_value": tf.io.FixedLenFeature([], tf.dtypes.string)}
-        record_data = [{"string_value": "a"}, {"string_value": "bb"}]
-        expected_data = [
-            {
-                "string_value": tf.convert_to_tensor(
-                    [tf.compat.as_bytes("a"), tf.compat.as_bytes("bb")]
-                )
-            }
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=2,
-        )
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_broken_schema_fail(self):
-        """test_broken_schema_fail"""
-        valid_schema = """
-          {
-            "type": "record",
-            "name": "row",
-            "fields": [
-                {"name": "int_value", "type": "int"}
-            ]
-          }"""
-        record_data = [{"int_value": 0}]
-        broken_schema = """
-          {
-            "type": "record",
-            "name": "row",
-            "fields": [
-                {"name": "index", "type": "int"},
-                {"name": "boolean_type"}
-            ]
-          }"""
-        features = {"index": tf.io.FixedLenFeature([], tf.dtypes.int64)}
-        self._test_fail_dataset(
-            valid_schema, record_data, features, 1, parser_schema=broken_schema
-        )
-
-    @pytest.mark.skipif(sys.platform == "darwin", reason="macOS fails now")
-    def test_some_optimization_broke_string_repeats_in_batch(self):
-        """test_some_optimization_broke_string_repeats_in_batch"""
-        # In the past this test failed but now passes
-        reader_schema = """
-            {
-              "type": "record",
-              "name": "simple",
-              "fields": [
-                  {
-                     "name":"string_value",
-                     "type":"string"
-                  }
-              ]
-            }"""
-        features = {"string_value": tf.io.FixedLenFeature([], tf.dtypes.string)}
-        record_data = [{"string_value": "aa"}, {"string_value": "bb"}]
-        expected_data = [
-            {
-                "string_value": np.asarray(
-                    [tf.compat.as_bytes("aa"), tf.compat.as_bytes("bb")]
-                )
-            }
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=2,
-        )
-
-    @pytest.mark.skip(reason="failed with tf 2.2 rc3 on linux")
-    # Note current filters resolve to single item and we remove the dimension introduced by that
-    def test_filter_of_sparse_feature(self):
-        """test_filter_of_sparse_feature"""
-        reader_schema = """
-            {
-               "type": "record",
-               "name": "data_row",
-               "fields": [
-                  {
-                     "name": "guests",
-                     "type": {
-                        "type": "array",
-                        "items": {
-                           "type": "record",
-                           "name": "person",
-                           "fields": [
-                              {
-                                 "name": "name",
-                                 "type": "string"
-                              },
-                              {
-                                 "name": "gender",
-                                 "type": "string"
-                              },
-                              {
-                                 "name": "address",
-                                 "type": {
-                                    "type": "array",
-                                    "items": {
-                                       "type": "record",
-                                       "name": "postal",
-                                       "fields": [
-                                          {
-                                             "name":"street",
-                                             "type":"string"
-                                          },
-                                          {
-                                             "name":"zip",
-                                             "type":"long"
-                                          },
-                                          {
-                                             "name":"street_no",
-                                             "type":"int"
-                                          }
-                                       ]
-                                    }
-                                 }
-                              }
-                           ]
-                        }
-                     }
-                  }
-               ]
-            }
-            """
-        record_data = [
-            {
-                "guests": [
-                    {
-                        "name": "Hans",
-                        "gender": "male",
-                        "address": [
-                            {
-                                "street": "California St",
-                                "zip": 94040,
-                                "state": "CA",
-                                "street_no": 1,
-                            },
-                            {
-                                "street": "New York St",
-                                "zip": 32012,
-                                "state": "NY",
-                                "street_no": 2,
-                            },
-                        ],
-                    },
-                    {
-                        "name": "Mary",
-                        "gender": "female",
-                        "address": [
-                            {
-                                "street": "Ellis St",
-                                "zip": 29040,
-                                "state": "MA",
-                                "street_no": 3,
-                            }
-                        ],
-                    },
-                ]
-            }
-        ]
-        features = {
-            "guests[gender='female'].address": tf.io.SparseFeature(
-                index_key="zip",
-                value_key="street_no",
-                dtype=tf.dtypes.int32,
-                size=94040,
-            )
-        }
-        # Note, the filter introduces an additional index,
-        # because filters can have multiple items
-        expected_data = [
-            {
-                "guests[gender='female'].address": tf.compat.v1.SparseTensorValue(
-                    np.asarray([[0, 0, 29040]]),
-                    np.asarray([3]),
-                    np.asarray([1, 1, 94040]),
-                )
-            }
-        ]
-        self._test_pass_dataset(
-            reader_schema=reader_schema,
-            record_data=record_data,
-            expected_data=expected_data,
-            features=features,
-            batch_size=2,
-        )
-
-
-if __name__ == "__main__":
-    test.main()

From c8e01fc8866dba7b484ce34d5306d504f92a0a9d Mon Sep 17 00:00:00 2001
From: Lijuan Zhang <lijzhang@linkedin.com>
Date: Wed, 3 May 2023 20:44:49 +0000
Subject: [PATCH 4/6] update mixed benchmark

---
 tests/test_atds_avro/benchmark/test_mixed_benchmark.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/tests/test_atds_avro/benchmark/test_mixed_benchmark.py b/tests/test_atds_avro/benchmark/test_mixed_benchmark.py
index fd2dc26ed..1e6c9099d 100644
--- a/tests/test_atds_avro/benchmark/test_mixed_benchmark.py
+++ b/tests/test_atds_avro/benchmark/test_mixed_benchmark.py
@@ -12,7 +12,7 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 # ==============================================================================
-"""ATDS benchmark for jrps schema."""
+"""ATDS benchmark for schema with mixed data types."""
 
 import glob
 import os
@@ -37,10 +37,7 @@
     get_features_from_data_source,
 )
 
-
-@pytest.mark.benchmark(
-    group="jrps",
-)
+@pytest.mark.benchmark(group="mixed",)
 def test_jrps_benchmark_data():
     scenario = {
         "sparse_1d_float_small_1": FloatSparseTensorGenerator(

From d36a68ca635ba72817917cab3ce819b02eaf5b53 Mon Sep 17 00:00:00 2001
From: Lijuan Zhang <lijzhang@linkedin.com>
Date: Wed, 3 May 2023 22:45:40 +0000
Subject: [PATCH 5/6] linter

---
 tests/test_atds_avro/benchmark/test_mixed_benchmark.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/test_atds_avro/benchmark/test_mixed_benchmark.py b/tests/test_atds_avro/benchmark/test_mixed_benchmark.py
index 1e6c9099d..643ebfe41 100644
--- a/tests/test_atds_avro/benchmark/test_mixed_benchmark.py
+++ b/tests/test_atds_avro/benchmark/test_mixed_benchmark.py
@@ -37,7 +37,10 @@
     get_features_from_data_source,
 )
 
-@pytest.mark.benchmark(group="mixed",)
+
+@pytest.mark.benchmark(
+    group="mixed",
+)
 def test_jrps_benchmark_data():
     scenario = {
         "sparse_1d_float_small_1": FloatSparseTensorGenerator(

From a652c0d45f5dfdd65fad1be5febfe0cdc5bb1892 Mon Sep 17 00:00:00 2001
From: Lijuan Zhang <lijzhang@linkedin.com>
Date: Thu, 4 May 2023 16:27:37 +0000
Subject: [PATCH 6/6] remove jrps reference in tests

---
 tests/test_atds_avro/benchmark/test_mixed_benchmark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_atds_avro/benchmark/test_mixed_benchmark.py b/tests/test_atds_avro/benchmark/test_mixed_benchmark.py
index 643ebfe41..22dea2d2e 100644
--- a/tests/test_atds_avro/benchmark/test_mixed_benchmark.py
+++ b/tests/test_atds_avro/benchmark/test_mixed_benchmark.py
@@ -41,7 +41,7 @@
 @pytest.mark.benchmark(
     group="mixed",
 )
-def test_jrps_benchmark_data():
+def test_mixed_benchmark_data():
     scenario = {
         "sparse_1d_float_small_1": FloatSparseTensorGenerator(
             tf.SparseTensorSpec([3], tf.dtypes.float32), ValueDistribution.SINGLE_VALUE