From fd57123cb28717199cdc9bf49f9495cea9990c63 Mon Sep 17 00:00:00 2001 From: Richard Tia Date: Fri, 6 Sep 2024 10:40:50 -0700 Subject: [PATCH] fix: separate groupby and grouping set --- .../functional/aggregate_relation_configs.py | 6 + .../sql/relations/aggregate_relations.py | 18 +- .../aggregate_with_group_by_plan.json | 124 +++++++++ .../aggregate_with_grouping_set_plan.json | 30 ++- .../aggregate_with_group_by_plan.json | 236 ++++++++++++++++++ .../aggregate_with_grouping_set_plan.json | 28 ++- .../aggregate_with_group_by_plan.json | 212 ++++++++++++++++ .../aggregate_with_grouping_set_plan.json | 41 ++- .../aggregate_with_group_by_result.txt | 14 ++ .../aggregate_with_grouping_set_result.txt | 10 + 10 files changed, 706 insertions(+), 13 deletions(-) create mode 100644 substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_plan.json create mode 100644 substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_plan.json create mode 100644 substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_plan.json create mode 100644 substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/relation_test_results/aggregate_with_group_by_result.txt diff --git a/substrait_consumer/functional/aggregate_relation_configs.py b/substrait_consumer/functional/aggregate_relation_configs.py index c3ecd8b..98e1daa 100644 --- a/substrait_consumer/functional/aggregate_relation_configs.py +++ b/substrait_consumer/functional/aggregate_relation_configs.py @@ -38,6 +38,12 @@ "sql_query": AGGREGATE_RELATIONS["aggregate_in_subquery"], "ibis_expr": None }, + { + "test_name": "aggregate_with_group_by", + "file_names": ['lineitem_small.parquet'], + "sql_query": AGGREGATE_RELATIONS["aggregate_with_group_by"], + "ibis_expr": None + }, { "test_name": "aggregate_with_grouping_set", "file_names": ['lineitem_small.parquet'], diff --git a/substrait_consumer/functional/queries/sql/relations/aggregate_relations.py b/substrait_consumer/functional/queries/sql/relations/aggregate_relations.py index f9b0fe6..edd0bd7 100644 --- a/substrait_consumer/functional/queries/sql/relations/aggregate_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/aggregate_relations.py @@ -47,12 +47,26 @@ """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), + "aggregate_with_group_by": ( + """ + + SELECT SUM(L_EXTENDEDPRICE), L_LINENUMBER + FROM '{}' + GROUP BY L_LINENUMBER + """, + [DuckDBProducer, DataFusionProducer, IsthmusProducer], + ), "aggregate_with_grouping_set": ( """ - SELECT SUM(l_extendedprice), l_linenumber + SELECT SUM(L_EXTENDEDPRICE), L_LINENUMBER, L_ORDERKEY FROM '{}' - GROUP BY l_linenumber + GROUP BY GROUPING SETS + ( + (L_LINENUMBER), + (L_ORDERKEY) + ) + ORDER BY L_LINENUMBER """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_plan.json new file mode 100644 index 0000000..4ec3769 --- /dev/null +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_plan.json @@ -0,0 +1,124 @@ +{ + "extensions": [ + { + "extensionFunction": { + "extensionUriReference": 4294967295, + "name": "sum" + } + } + ], + "relations": [ + { + "root": { + "input": { + "project": { + "input": { + "aggregate": { + "input": { + "read": { + "baseSchema": { + "names": [ + "l_orderkey", + "l_partkey", + "l_suppkey", + "l_linenumber", + "l_quantity", + "l_extendedprice", + "l_discount", + "l_tax", + "l_returnflag", + "l_linestatus", + "l_shipdate", + "l_commitdate", + "l_receiptdate", + "l_shipinstruct", + "l_shipmode", + "l_comment" + ] + }, + "projection": { + "select": { + "structItems": [ + { + "field": 3 + }, + { + "field": 5 + } + ] + } + }, + "namedTable": { + "names": [ + "lineitem_small" + ] + } + } + }, + "groupings": [ + { + "groupingExpressions": [ + { + "selection": { + "directReference": { + "structField": {} + } + } + } + ] + } + ], + "measures": [ + { + "measure": { + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [ + { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + } + } + } + } + ] + } + } + ] + } + }, + "expressions": [ + { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + } + } + }, + { + "selection": { + "directReference": { + "structField": {} + } + } + } + ] + } + }, + "names": [ + "sum(lineitem_small.l_extendedprice)", + "l_linenumber" + ] + } + } + ], + "version": { + "minorNumber": 51, + "producer": "datafusion" + } +} \ No newline at end of file diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_grouping_set_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_grouping_set_plan.json index 4ec3769..4efc869 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_grouping_set_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_grouping_set_plan.json @@ -39,6 +39,7 @@ "projection": { "select": { "structItems": [ + {}, { "field": 3 }, @@ -56,6 +57,19 @@ } }, "groupings": [ + { + "groupingExpressions": [ + { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + } + } + } + ] + }, { "groupingExpressions": [ { @@ -78,7 +92,7 @@ "selection": { "directReference": { "structField": { - "field": 1 + "field": 2 } } } @@ -95,7 +109,7 @@ "selection": { "directReference": { "structField": { - "field": 1 + "field": 2 } } } @@ -106,13 +120,23 @@ "structField": {} } } + }, + { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + } + } } ] } }, "names": [ "sum(lineitem_small.l_extendedprice)", - "l_linenumber" + "l_linenumber", + "l_orderkey" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_plan.json new file mode 100644 index 0000000..121708f --- /dev/null +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_plan.json @@ -0,0 +1,236 @@ +{ + "extensionUris": [ + { + "extensionUriAnchor": 1, + "uri": "https://github.com/substrait-io/substrait/blob/main/extensions/" + } + ], + "extensions": [ + { + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 1, + "name": "sum:decimal" + } + } + ], + "relations": [ + { + "root": { + "input": { + "project": { + "input": { + "aggregate": { + "input": { + "read": { + "baseSchema": { + "names": [ + "l_orderkey", + "l_partkey", + "l_suppkey", + "l_linenumber", + "l_quantity", + "l_extendedprice", + "l_discount", + "l_tax", + "l_returnflag", + "l_linestatus", + "l_shipdate", + "l_commitdate", + "l_receiptdate", + "l_shipinstruct", + "l_shipmode", + "l_comment" + ], + "struct": { + "types": [ + { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "decimal": { + "scale": 2, + "precision": 15, + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "decimal": { + "scale": 2, + "precision": 15, + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "decimal": { + "scale": 2, + "precision": 15, + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "decimal": { + "scale": 2, + "precision": 15, + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "date": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "date": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "date": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + } + ], + "nullability": "NULLABILITY_REQUIRED" + } + }, + "projection": { + "select": { + "structItems": [ + { + "field": 3 + }, + { + "field": 5 + } + ] + }, + "maintainSingularStruct": true + }, + "namedTable": { + "names": [ + "lineitem_small" + ] + } + } + }, + "groupings": [ + { + "groupingExpressions": [ + { + "selection": { + "directReference": { + "structField": {} + }, + "rootReference": {} + } + } + ] + } + ], + "measures": [ + { + "measure": { + "functionReference": 1, + "outputType": { + "decimal": { + "scale": 2, + "precision": 38, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [ + { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": {} + } + } + } + ] + } + } + ] + } + }, + "expressions": [ + { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": {} + } + }, + { + "selection": { + "directReference": { + "structField": {} + }, + "rootReference": {} + } + } + ] + } + }, + "names": [ + "sum(l_extendedprice)", + "l_linenumber" + ] + } + } + ], + "version": { + "minorNumber": 48, + "producer": "DuckDB" + } +} \ No newline at end of file diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_grouping_set_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_grouping_set_plan.json index 121708f..53cc057 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_grouping_set_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_grouping_set_plan.json @@ -142,6 +142,7 @@ { "field": 3 }, + {}, { "field": 5 } @@ -166,6 +167,16 @@ }, "rootReference": {} } + }, + { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": {} + } } ] } @@ -187,7 +198,7 @@ "selection": { "directReference": { "structField": { - "field": 1 + "field": 2 } }, "rootReference": {} @@ -205,7 +216,7 @@ "selection": { "directReference": { "structField": { - "field": 1 + "field": 2 } }, "rootReference": {} @@ -218,13 +229,24 @@ }, "rootReference": {} } + }, + { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": {} + } } ] } }, "names": [ "sum(l_extendedprice)", - "l_linenumber" + "l_linenumber", + "l_orderkey" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_plan.json new file mode 100644 index 0000000..444ab7b --- /dev/null +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_plan.json @@ -0,0 +1,212 @@ +{ + "extensionUris": [{ + "extensionUriAnchor": 1, + "uri": "/functions_arithmetic_decimal.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "name": "sum:dec" + } + }], + "relations": [{ + "root": { + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [2, 3] + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [16, 17] + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "decimal": { + "scale": 2, + "precision": 15, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "decimal": { + "scale": 2, + "precision": 15, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "decimal": { + "scale": 2, + "precision": 15, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "decimal": { + "scale": 2, + "precision": 15, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "string": { + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "string": { + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "date": { + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "date": { + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "date": { + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "string": { + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "string": { + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "string": { + "nullability": "NULLABILITY_REQUIRED" + } + }], + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM_SMALL"] + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 2, + "precision": 15, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }] + } + }] + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + } + }, + "rootReference": { + } + } + }] + } + }, + "names": ["EXPR$0", "L_LINENUMBER"] + } + }] +} \ No newline at end of file diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_grouping_set_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_grouping_set_plan.json index 444ab7b..61445e9 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_grouping_set_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_grouping_set_plan.json @@ -15,7 +15,7 @@ "project": { "common": { "emit": { - "outputMapping": [2, 3] + "outputMapping": [3, 4, 5] } }, "input": { @@ -28,7 +28,7 @@ "project": { "common": { "emit": { - "outputMapping": [16, 17] + "outputMapping": [16, 17, 18] } }, "input": { @@ -131,6 +131,15 @@ "rootReference": { } } + }, { + "selection": { + "directReference": { + "structField": { + } + }, + "rootReference": { + } + } }, { "selection": { "directReference": { @@ -155,6 +164,18 @@ } } }] + }, { + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }] }], "measures": [{ "measure": { @@ -172,7 +193,7 @@ "selection": { "directReference": { "structField": { - "field": 1 + "field": 2 } }, "rootReference": { @@ -188,7 +209,7 @@ "selection": { "directReference": { "structField": { - "field": 1 + "field": 2 } }, "rootReference": { @@ -203,10 +224,20 @@ "rootReference": { } } + }, { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } }] } }, - "names": ["EXPR$0", "L_LINENUMBER"] + "names": ["EXPR$0", "L_LINENUMBER", "L_ORDERKEY"] } }] } \ No newline at end of file diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/relation_test_results/aggregate_with_group_by_result.txt b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/relation_test_results/aggregate_with_group_by_result.txt new file mode 100644 index 0000000..2d983d6 --- /dev/null +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/relation_test_results/aggregate_with_group_by_result.txt @@ -0,0 +1,14 @@ +15317.00 +32436.00 +7208.00 +25228.00 +21624.00 +28832.00 + +1 +2 +3 +4 +5 +6 + \ No newline at end of file diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/relation_test_results/aggregate_with_grouping_set_result.txt b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/relation_test_results/aggregate_with_grouping_set_result.txt index 2d983d6..bf5b4b9 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/relation_test_results/aggregate_with_grouping_set_result.txt +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/relation_test_results/aggregate_with_grouping_set_result.txt @@ -4,6 +4,7 @@ 25228.00 21624.00 28832.00 +130645.00 1 2 @@ -11,4 +12,13 @@ 4 5 6 +None + +None +None +None +None +None +None +1 \ No newline at end of file