Skip to content

Commit

Permalink
feat: add tests for join relations
Browse files Browse the repository at this point in the history
  • Loading branch information
richtia committed Sep 10, 2024
1 parent b10aca9 commit 7869051
Show file tree
Hide file tree
Showing 44 changed files with 6,391 additions and 0 deletions.
83 changes: 83 additions & 0 deletions substrait_consumer/functional/join_relation_configs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
from substrait_consumer.functional.queries.sql.relations.join_relations import (
JOIN_RELATIONS)

JOIN_RELATION_TESTS = (
{
"test_name": "inner_join",
"file_names": ["customer_small.parquet", "orders_small.parquet"],
"sql_query": JOIN_RELATIONS["inner_join"],
"ibis_expr": None
},
{
"test_name": "left_join",
"file_names": ["customer_small.parquet", "orders_small.parquet"],
"sql_query": JOIN_RELATIONS["left_join"],
"ibis_expr": None
},
{
"test_name": "right_join",
"file_names": ["customer_small.parquet", "orders_small.parquet"],
"sql_query": JOIN_RELATIONS["right_join"],
"ibis_expr": None
},
{
"test_name": "full_join",
"file_names": ["customer_small.parquet", "orders_small.parquet"],
"sql_query": JOIN_RELATIONS["full_join"],
"ibis_expr": None
},
{
"test_name": "cross_join",
"file_names": ["customer_small.parquet", "orders_small.parquet"],
"sql_query": JOIN_RELATIONS["cross_join"],
"ibis_expr": None
},
{
"test_name": "left_semi_join",
"file_names": ["customer_small.parquet", "orders_small.parquet"],
"sql_query": JOIN_RELATIONS["left_semi_join"],
"ibis_expr": None
},
{
"test_name": "right_semi_join",
"file_names": ["orders_small.parquet", "customer_small.parquet"],
"sql_query": JOIN_RELATIONS["right_semi_join"],
"ibis_expr": None
},
{
"test_name": "left_anti_join",
"file_names": ["customer_small.parquet", "orders_small.parquet"],
"sql_query": JOIN_RELATIONS["left_anti_join"],
"ibis_expr": None
},
{
"test_name": "right_anti_join",
"file_names": ["orders_small.parquet", "lineitem_small.parquet"],
"sql_query": JOIN_RELATIONS["right_anti_join"],
"ibis_expr": None
},
{
"test_name": "left_single_join",
"file_names": ["customer_small.parquet", "customer_small.parquet"],
"sql_query": JOIN_RELATIONS["left_single_join"],
"ibis_expr": None
},
{
"test_name": "right_single_join",
"file_names": ["customer_small.parquet", "customer_small.parquet"],
"sql_query": JOIN_RELATIONS["right_single_join"],
"ibis_expr": None
},
{
"test_name": "left_mark_join",
"file_names": ["orders_small.parquet", "customer_small.parquet"],
"sql_query": JOIN_RELATIONS["left_mark_join"],
"ibis_expr": None
},
{
"test_name": "right_mark_join",
"file_names": ["customer_small.parquet", "orders_small.parquet"],
"sql_query": JOIN_RELATIONS["right_mark_join"],
"ibis_expr": None
},
)
217 changes: 217 additions & 0 deletions substrait_consumer/functional/queries/sql/relations/join_relations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
from substrait_consumer.producers.duckdb_producer import DuckDBProducer
from substrait_consumer.producers.datafusion_producer import DataFusionProducer
from substrait_consumer.producers.isthmus_producer import IsthmusProducer

JOIN_RELATIONS = {
"inner_join": (
"""
SELECT
c.C_CUSTKEY,
c.C_NAME,
o.O_ORDERKEY
FROM
'{}' c
INNER JOIN
'{}' o
ON
c.C_CUSTKEY = o.O_CUSTKEY;
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"left_join": (
"""
SELECT
c.C_CUSTKEY,
c.C_NAME,
o.O_ORDERKEY
FROM
'{}' c
LEFT JOIN
'{}' o
ON
c.C_CUSTKEY = o.O_CUSTKEY;
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"right_join": (
"""
SELECT
c.C_CUSTKEY,
c.C_NAME,
o.O_ORDERKEY
FROM
'{}' c
RIGHT JOIN
'{}' o
ON
c.C_CUSTKEY = o.O_CUSTKEY;
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"full_join": (
"""
SELECT
c.C_CUSTKEY,
c.C_NAME,
o.O_ORDERKEY
FROM
'{}' c
FULL JOIN
'{}' o
ON
c.C_CUSTKEY = o.O_CUSTKEY;
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"cross_join": (
"""
SELECT
c.C_CUSTKEY,
c.C_NAME,
o.O_ORDERKEY
FROM
'{}' c
CROSS JOIN
'{}' o
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"left_semi_join": (
"""
SELECT
c.C_CUSTKEY,
c.C_NAME
FROM
'{}' c
WHERE
EXISTS (
SELECT 1
FROM '{}' o
WHERE o.O_CUSTKEY = c.C_CUSTKEY
);
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"right_semi_join": (
"""
SELECT
o.O_ORDERKEY,
o.O_CUSTKEY
FROM
'{}' o
WHERE
EXISTS (
SELECT 1
FROM '{}' c
WHERE c.C_CUSTKEY = o.O_CUSTKEY
);
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"left_anti_join": (
"""
SELECT
c.C_CUSTKEY,
c.C_NAME
FROM
'{}' c
WHERE
NOT EXISTS (
SELECT 1
FROM '{}' o
WHERE o.O_CUSTKEY = c.C_CUSTKEY
);
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"right_anti_join": (
"""
SELECT
o.O_ORDERKEY,
o.O_CUSTKEY
FROM
'{}' o
WHERE
NOT EXISTS (
SELECT 1
FROM '{}' l
WHERE l.L_ORDERKEY = o.O_ORDERKEY
);
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"left_single_join": (
"""
SELECT
c1.C_CUSTKEY AS c1key,
c1.C_NAME AS c1name,
c1.C_NATIONKEY AS c1nationakey,
c2.C_CUSTKEY AS c2key,
c2.C_NAME AS c2name,
c2.C_NATIONKEY AS c2nationakey
FROM
'{}' c1
LEFT JOIN
'{}' c2
ON
c1.C_NATIONKEY = c2.C_NATIONKEY
AND c1.C_CUSTKEY <> c2.C_CUSTKEY;
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"right_single_join": (
"""
SELECT
c1.C_CUSTKEY AS c1key,
c1.C_NAME AS c1name,
c1.C_NATIONKEY AS c1nationakey,
c2.C_CUSTKEY AS c2key,
c2.C_NAME AS c2name,
c2.C_NATIONKEY AS c2nationakey
FROM
'{}' c1
RIGHT JOIN
'{}' c2
ON
c1.C_NATIONKEY = c2.C_NATIONKEY
AND c1.C_CUSTKEY <> c2.C_CUSTKEY;
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"left_mark_join": (
"""
SELECT
c.C_CUSTKEY,
c.C_NAME,
CASE
WHEN EXISTS (
SELECT 1
FROM '{}' o
WHERE o.O_CUSTKEY = c.C_CUSTKEY
) THEN 'Marked'
ELSE 'Not Marked'
END
FROM
'{}' c;
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
"right_mark_join": (
"""
SELECT
o.O_ORDERKEY,
o.O_CUSTKEY,
CASE
WHEN EXISTS (
SELECT 1
FROM '{}' c
WHERE c.C_CUSTKEY = o.O_CUSTKEY
) THEN 'Marked'
ELSE 'Not Marked'
END
FROM
'{}' o;
""",
[DuckDBProducer, DataFusionProducer, IsthmusProducer],
),
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
{
"relations": [
{
"root": {
"input": {
"cross": {
"left": {
"read": {
"baseSchema": {
"names": [
"c_custkey",
"c_name",
"c_address",
"c_nationkey",
"c_phone",
"c_acctbal",
"c_mktsegment",
"c_comment"
]
},
"projection": {
"select": {
"structItems": [
{},
{
"field": 1
}
]
}
},
"namedTable": {
"names": [
"customer_small"
]
}
}
},
"right": {
"read": {
"baseSchema": {
"names": [
"o_orderkey",
"o_custkey",
"o_orderstatus",
"o_totalprice",
"o_orderdate",
"o_orderpriority",
"o_clerk",
"o_shippriority",
"o_comment"
]
},
"projection": {
"select": {
"structItems": [
{}
]
}
},
"namedTable": {
"names": [
"orders_small"
]
}
}
}
}
},
"names": [
"c_custkey",
"c_name",
"o_orderkey"
]
}
}
],
"version": {
"minorNumber": 51,
"producer": "datafusion"
}
}
Loading

0 comments on commit 7869051

Please sign in to comment.