From af236858a2495bc94cfbcc4e1d30ac678afc97c1 Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Mon, 23 Sep 2024 07:05:50 +0200 Subject: [PATCH 1/5] Bump to latest extension_ci_tools --- .github/workflows/main_distribution.yml | 1 + extension-ci-tools | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main_distribution.yml b/.github/workflows/main_distribution.yml index b974a08..d39510a 100644 --- a/.github/workflows/main_distribution.yml +++ b/.github/workflows/main_distribution.yml @@ -22,6 +22,7 @@ jobs: uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main with: duckdb_version: main + ci_tools_version: main exclude_archs: "wasm_mvp;wasm_eh;wasm_threads;windows_amd64;windows_amd64_rtools" extension_name: substrait diff --git a/extension-ci-tools b/extension-ci-tools index 69ec500..2c849c7 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit 69ec500f2330913633481af5602728a3cd4b7468 +Subproject commit 2c849c7a016eaf911d07ecb8eb9f6aed3d602e00 From 39a5117c391d5c8df1b8b8cf71304d9a4c104a52 Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Mon, 23 Sep 2024 07:07:30 +0200 Subject: [PATCH 2/5] Add tpcds as build (=test) time dependency --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 36723f9..b7b8625 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ all: release EXT_NAME=substrait EXT_CONFIG=${PROJ_DIR}extension_config.cmake -CORE_EXTENSIONS='tpch;json' +CORE_EXTENSIONS='tpch;tpcds;json' # Set this flag during building to enable the benchmark runner ifeq (${BUILD_BENCHMARK}, 1) From b0475b00610805d11eb21a1314d56524587ce0f6 Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Mon, 23 Sep 2024 07:08:06 +0200 Subject: [PATCH 3/5] Move Q72 to statement error --- test/sql/test_substrait_tpcds.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/sql/test_substrait_tpcds.test b/test/sql/test_substrait_tpcds.test index 5892cf7..a7232bb 100644 --- a/test/sql/test_substrait_tpcds.test +++ b/test/sql/test_substrait_tpcds.test @@ -380,7 +380,7 @@ statement ok CALL get_substrait('SELECT i_brand_id brand_id, i_brand brand, t_hour, t_minute, Sum(ext_price) ext_price FROM item, (SELECT ws_ext_sales_price AS ext_price, ws_sold_date_sk AS sold_date_sk, ws_item_sk AS sold_item_sk, ws_sold_time_sk AS time_sk FROM web_sales, date_dim WHERE d_date_sk = ws_sold_date_sk AND d_moy = 11 AND d_year = 2001 UNION ALL SELECT cs_ext_sales_price AS ext_price, cs_sold_date_sk AS sold_date_sk, cs_item_sk AS sold_item_sk, cs_sold_time_sk AS time_sk FROM catalog_sales, date_dim WHERE d_date_sk = cs_sold_date_sk AND d_moy = 11 AND d_year = 2001 UNION ALL SELECT ss_ext_sales_price AS ext_price, ss_sold_date_sk AS sold_date_sk, ss_item_sk AS sold_item_sk, ss_sold_time_sk AS time_sk FROM store_sales, date_dim WHERE d_date_sk = ss_sold_date_sk AND d_moy = 11 AND d_year = 2001) AS tmp, time_dim WHERE sold_item_sk = i_item_sk AND i_manager_id = 1 AND time_sk = t_time_sk AND ( t_meal_time = ''breakfast'' OR t_meal_time = ''dinner'' ) GROUP BY i_brand, i_brand_id, t_hour, t_minute ORDER BY ext_price DESC, i_brand_id; ') #Q 72 (Ambiguous reference) -statement ok +statement error CALL get_substrait('SELECT i_item_desc, w_warehouse_name, d1.d_week_seq, Sum(CASE WHEN p_promo_sk IS NULL THEN 1 ELSE 0 END) no_promo, Sum(CASE WHEN p_promo_sk IS NOT NULL THEN 1 ELSE 0 END) promo, Count(*) total_cnt FROM catalog_sales JOIN inventory ON ( cs_item_sk = inv_item_sk ) JOIN warehouse ON ( w_warehouse_sk = inv_warehouse_sk ) JOIN item ON ( i_item_sk = cs_item_sk ) JOIN customer_demographics ON ( cs_bill_cdemo_sk = cd_demo_sk ) JOIN household_demographics ON ( cs_bill_hdemo_sk = hd_demo_sk ) JOIN date_dim d1 ON ( cs_sold_date_sk = d1.d_date_sk ) JOIN date_dim d2 ON ( inv_date_sk = d2.d_date_sk ) JOIN date_dim d3 ON ( cs_ship_date_sk = d3.d_date_sk ) LEFT OUTER JOIN promotion ON ( cs_promo_sk = p_promo_sk ) LEFT OUTER JOIN catalog_returns ON ( cr_item_sk = cs_item_sk AND cr_order_number = cs_order_number ) WHERE d1.d_week_seq = d2.d_week_seq AND inv_quantity_on_hand < cs_quantity AND d3.d_date > d1.d_date + INTERVAL ''5'' day AND hd_buy_potential = ''501-1000'' AND d1.d_year = 2002 AND cd_marital_status = ''M'' GROUP BY i_item_desc, w_warehouse_name, d1.d_week_seq ORDER BY total_cnt DESC, i_item_desc, w_warehouse_name, d_week_seq LIMIT 100; ') ---- Binder Error: Ambiguous reference to column name "d_week_seq" (use: "d1.d_week_seq" or "d2.d_week_seq") @@ -527,4 +527,4 @@ Not implemented Error: WINDOW statement error CALL get_substrait('SELECT Substr(w_warehouse_name, 1, 20), sm_type, cc_name, Sum(CASE WHEN ( cs_ship_date_sk - cs_sold_date_sk <= 30 ) THEN 1 ELSE 0 END) AS "30 days", Sum(CASE WHEN ( cs_ship_date_sk - cs_sold_date_sk > 30 ) AND ( cs_ship_date_sk - cs_sold_date_sk <= 60 ) THEN 1 ELSE 0 END) AS "31-60 days", Sum(CASE WHEN ( cs_ship_date_sk - cs_sold_date_sk > 60 ) AND ( cs_ship_date_sk - cs_sold_date_sk <= 90 ) THEN 1 ELSE 0 END) AS "61-90 days", Sum(CASE WHEN ( cs_ship_date_sk - cs_sold_date_sk > 90 ) AND ( cs_ship_date_sk - cs_sold_date_sk <= 120 ) THEN 1 ELSE 0 END) AS "91-120 days", Sum(CASE WHEN ( cs_ship_date_sk - cs_sold_date_sk > 120 ) THEN 1 ELSE 0 END) AS ">120 days" FROM catalog_sales, warehouse, ship_mode, call_center, date_dim WHERE d_month_seq BETWEEN 1200 AND 1200 + 11 AND cs_ship_date_sk = d_date_sk AND cs_warehouse_sk = w_warehouse_sk AND cs_ship_mode_sk = sm_ship_mode_sk AND cs_call_center_sk = cc_call_center_sk GROUP BY Substr(w_warehouse_name, 1, 20), sm_type, cc_name ORDER BY Substr(w_warehouse_name, 1, 20), sm_type, cc_name LIMIT 100; ') ---- -Not implemented Error: No expressions in groupings yet \ No newline at end of file +Not implemented Error: No expressions in groupings yet From 37a86d0bb9f048276c7d0db4ee9d8ab346b854ab Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Mon, 23 Sep 2024 07:24:28 +0200 Subject: [PATCH 4/5] tpcds Q58: shorten checked erorr message --- test/sql/test_substrait_tpcds.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sql/test_substrait_tpcds.test b/test/sql/test_substrait_tpcds.test index a7232bb..7e0fc25 100644 --- a/test/sql/test_substrait_tpcds.test +++ b/test/sql/test_substrait_tpcds.test @@ -307,7 +307,7 @@ Not implemented Error: WINDOW statement error CALL get_substrait('WITH ss_items AS (SELECT i_item_id item_id, Sum(ss_ext_sales_price) ss_item_rev FROM store_sales, item, date_dim WHERE ss_item_sk = i_item_sk AND d_date IN (SELECT d_date FROM date_dim WHERE d_week_seq = (SELECT d_week_seq FROM date_dim WHERE d_date = ''2002-02-25'' )) AND ss_sold_date_sk = d_date_sk GROUP BY i_item_id), cs_items AS (SELECT i_item_id item_id, Sum(cs_ext_sales_price) cs_item_rev FROM catalog_sales, item, date_dim WHERE cs_item_sk = i_item_sk AND d_date IN (SELECT d_date FROM date_dim WHERE d_week_seq = (SELECT d_week_seq FROM date_dim WHERE d_date = ''2002-02-25'' )) AND cs_sold_date_sk = d_date_sk GROUP BY i_item_id), ws_items AS (SELECT i_item_id item_id, Sum(ws_ext_sales_price) ws_item_rev FROM web_sales, item, date_dim WHERE ws_item_sk = i_item_sk AND d_date IN (SELECT d_date FROM date_dim WHERE d_week_seq = (SELECT d_week_seq FROM date_dim WHERE d_date = ''2002-02-25'' )) AND ws_sold_date_sk = d_date_sk GROUP BY i_item_id) SELECT ss_items.item_id, ss_item_rev, ss_item_rev / ( ss_item_rev + cs_item_rev + ws_item_rev ) / 3 * 100 ss_dev, cs_item_rev, cs_item_rev / ( ss_item_rev + cs_item_rev + ws_item_rev ) / 3 * 100 cs_dev, ws_item_rev, ws_item_rev / ( ss_item_rev + cs_item_rev + ws_item_rev ) / 3 * 100 ws_dev, ( ss_item_rev + cs_item_rev + ws_item_rev ) / 3 average FROM ss_items, cs_items, ws_items WHERE ss_items.item_id = cs_items.item_id AND ss_items.item_id = ws_items.item_id AND ss_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev AND ss_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev AND cs_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev AND cs_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev AND ws_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev AND ws_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev ORDER BY item_id, ss_item_rev LIMIT 100; ') ---- -Binder Error: Ambiguous reference to column name "item_id" (use: "ws_items.item_id" or "ss_items.item_id") +Binder Error: Ambiguous reference to column name "item_id" #Q 59 statement ok From 7458d260e59ec52ac2e6661824882c7b7340c26f Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Mon, 23 Sep 2024 07:38:16 +0200 Subject: [PATCH 5/5] Shorten yet another error message --- test/sql/test_substrait_tpcds.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sql/test_substrait_tpcds.test b/test/sql/test_substrait_tpcds.test index 7e0fc25..6b99db8 100644 --- a/test/sql/test_substrait_tpcds.test +++ b/test/sql/test_substrait_tpcds.test @@ -383,7 +383,7 @@ CALL get_substrait('SELECT i_brand_id brand_id, i_brand brand, t_hour, t_minute, statement error CALL get_substrait('SELECT i_item_desc, w_warehouse_name, d1.d_week_seq, Sum(CASE WHEN p_promo_sk IS NULL THEN 1 ELSE 0 END) no_promo, Sum(CASE WHEN p_promo_sk IS NOT NULL THEN 1 ELSE 0 END) promo, Count(*) total_cnt FROM catalog_sales JOIN inventory ON ( cs_item_sk = inv_item_sk ) JOIN warehouse ON ( w_warehouse_sk = inv_warehouse_sk ) JOIN item ON ( i_item_sk = cs_item_sk ) JOIN customer_demographics ON ( cs_bill_cdemo_sk = cd_demo_sk ) JOIN household_demographics ON ( cs_bill_hdemo_sk = hd_demo_sk ) JOIN date_dim d1 ON ( cs_sold_date_sk = d1.d_date_sk ) JOIN date_dim d2 ON ( inv_date_sk = d2.d_date_sk ) JOIN date_dim d3 ON ( cs_ship_date_sk = d3.d_date_sk ) LEFT OUTER JOIN promotion ON ( cs_promo_sk = p_promo_sk ) LEFT OUTER JOIN catalog_returns ON ( cr_item_sk = cs_item_sk AND cr_order_number = cs_order_number ) WHERE d1.d_week_seq = d2.d_week_seq AND inv_quantity_on_hand < cs_quantity AND d3.d_date > d1.d_date + INTERVAL ''5'' day AND hd_buy_potential = ''501-1000'' AND d1.d_year = 2002 AND cd_marital_status = ''M'' GROUP BY i_item_desc, w_warehouse_name, d1.d_week_seq ORDER BY total_cnt DESC, i_item_desc, w_warehouse_name, d_week_seq LIMIT 100; ') ---- -Binder Error: Ambiguous reference to column name "d_week_seq" (use: "d1.d_week_seq" or "d2.d_week_seq") +Binder Error: Ambiguous reference to column name "d_week_seq" #Q 73 statement ok