Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add aggregate relation tests #103

Merged
merged 5 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions substrait_consumer/functional/aggregate_relation_configs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from substrait_consumer.functional.queries.sql.relations.aggregate_relations import (
AGGREGATE_RELATIONS)

AGGREGATE_RELATION_TESTS = (
{
"test_name": "single_measure_aggregate",
"file_names": ['lineitem_small.parquet'],
"sql_query": AGGREGATE_RELATIONS["single_measure_aggregate"],
"ibis_expr": None
},
{
"test_name": "multiple_measure_aggregate",
"file_names": ['orders_small.parquet'],
"sql_query": AGGREGATE_RELATIONS["multiple_measure_aggregate"],
"ibis_expr": None
},
{
"test_name": "aggregate_with_computation",
"file_names": ['orders_small.parquet'],
"sql_query": AGGREGATE_RELATIONS["aggregate_with_computation"],
"ibis_expr": None
},
{
"test_name": "compute_within_aggregate",
"file_names": ['orders_small.parquet'],
"sql_query": AGGREGATE_RELATIONS["compute_within_aggregate"],
"ibis_expr": None
},
{
"test_name": "computation_between_aggregates",
"file_names": ['orders_small.parquet'],
"sql_query": AGGREGATE_RELATIONS["computation_between_aggregates"],
"ibis_expr": None
},
{
"test_name": "aggregate_in_subquery",
"file_names": ['orders_small.parquet', 'orders_small.parquet'],
"sql_query": AGGREGATE_RELATIONS["aggregate_in_subquery"],
"ibis_expr": None
},
{
"test_name": "aggregate_with_group_by",
"file_names": ['lineitem_small.parquet'],
"sql_query": AGGREGATE_RELATIONS["aggregate_with_group_by"],
"ibis_expr": None
},
{
"test_name": "aggregate_with_group_by_cube",
"file_names": ['lineitem_small.parquet'],
"sql_query": AGGREGATE_RELATIONS["aggregate_with_group_by_cube"],
"ibis_expr": None
},
{
"test_name": "aggregate_with_group_by_rollup",
"file_names": ['lineitem_small.parquet'],
"sql_query": AGGREGATE_RELATIONS["aggregate_with_group_by_rollup"],
"ibis_expr": None
},
{
"test_name": "aggregate_with_grouping_set",
"file_names": ['lineitem_small.parquet'],
"sql_query": AGGREGATE_RELATIONS["aggregate_with_grouping_set"],
"ibis_expr": None
},
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from substrait_consumer.producers.duckdb_producer import DuckDBProducer
from substrait_consumer.producers.datafusion_producer import DataFusionProducer
from substrait_consumer.producers.isthmus_producer import IsthmusProducer

AGGREGATE_RELATIONS = {
"single_measure_aggregate": (
"""
SELECT COUNT(L_PARTKEY)
FROM '{}'
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"multiple_measure_aggregate": (
"""
SELECT MIN(O_TOTALPRICE), MAX(O_TOTALPRICE), AVG(O_TOTALPRICE)
FROM '{}'
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"aggregate_with_computation": (
"""
SELECT AVG(O_TOTALPRICE) * 10
FROM '{}'
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"compute_within_aggregate": (
"""
SELECT AVG(O_TOTALPRICE * 10)
FROM '{}'
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"computation_between_aggregates": (
"""
SELECT AVG(O_TOTALPRICE) + MAX(O_TOTALPRICE)
FROM '{}'
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"aggregate_in_subquery": (
"""

SELECT O_TOTALPRICE
FROM '{}'
WHERE O_TOTALPRICE <= (SELECT AVG(O_TOTALPRICE) FROM '{}')
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"aggregate_with_group_by": (
"""

SELECT SUM(L_EXTENDEDPRICE), L_LINENUMBER
FROM '{}'
GROUP BY L_LINENUMBER
ORDER BY L_LINENUMBER
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"aggregate_with_group_by_cube": (
"""

SELECT SUM(L_EXTENDEDPRICE), L_LINENUMBER, L_ORDERKEY
FROM '{}'
GROUP BY CUBE(L_LINENUMBER, L_ORDERKEY)
ORDER BY L_LINENUMBER, L_ORDERKEY
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"aggregate_with_group_by_rollup": (
"""

SELECT SUM(L_EXTENDEDPRICE), L_LINENUMBER, L_ORDERKEY
FROM '{}'
GROUP BY ROLLUP(L_LINENUMBER, L_ORDERKEY)
ORDER BY L_LINENUMBER, L_ORDERKEY
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"aggregate_with_grouping_set": (
"""

SELECT SUM(L_EXTENDEDPRICE), L_LINENUMBER, L_ORDERKEY
FROM '{}'
GROUP BY GROUPING SETS
(
(L_LINENUMBER),
(L_ORDERKEY)
)
ORDER BY L_LINENUMBER
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
{
"extensions": [
{
"extensionFunction": {
"extensionUriReference": 4294967295,
"name": "avg"
}
},
{
"extensionFunction": {
"extensionUriReference": 4294967295,
"functionAnchor": 1,
"name": "lte"
}
}
],
"relations": [
{
"root": {
"input": {
"project": {
"input": {
"join": {
"left": {
"read": {
"baseSchema": {
"names": [
"o_orderkey",
"o_custkey",
"o_orderstatus",
"o_totalprice",
"o_orderdate",
"o_orderpriority",
"o_clerk",
"o_shippriority",
"o_comment"
]
},
"projection": {
"select": {
"structItems": [
{
"field": 3
}
]
}
},
"namedTable": {
"names": [
"orders_small"
]
}
}
},
"right": {
"aggregate": {
"input": {
"read": {
"baseSchema": {
"names": [
"o_orderkey",
"o_custkey",
"o_orderstatus",
"o_totalprice",
"o_orderdate",
"o_orderpriority",
"o_clerk",
"o_shippriority",
"o_comment"
]
},
"projection": {
"select": {
"structItems": [
{
"field": 3
}
]
}
},
"namedTable": {
"names": [
"orders_small"
]
}
}
},
"groupings": [
{}
],
"measures": [
{
"measure": {
"invocation": "AGGREGATION_INVOCATION_ALL",
"arguments": [
{
"value": {
"selection": {
"directReference": {
"structField": {}
}
}
}
}
]
}
}
]
}
},
"expression": {
"scalarFunction": {
"functionReference": 1,
"arguments": [
{
"value": {
"cast": {
"type": {
"decimal": {
"scale": 6,
"precision": 19,
"nullability": "NULLABILITY_NULLABLE"
}
},
"input": {
"selection": {
"directReference": {
"structField": {}
}
}
}
}
}
},
{
"value": {
"selection": {
"directReference": {
"structField": {
"field": 1
}
}
}
}
}
]
}
},
"type": "JOIN_TYPE_INNER"
}
},
"expressions": [
{
"selection": {
"directReference": {
"structField": {}
}
}
}
]
}
},
"names": [
"o_totalprice"
]
}
}
],
"version": {
"minorNumber": 51,
"producer": "datafusion"
}
}
Loading
Loading