From 1f8b60324eda383360198a5e9129c380400f67fd Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Mon, 30 Sep 2024 10:03:50 +0800 Subject: [PATCH 1/5] Temporarily disable a failed case to unblock nightly CI Signed-off-by: Chong Gao --- integration_tests/src/main/python/date_time_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py index 2b5876be20e..6b48951d069 100644 --- a/integration_tests/src/main/python/date_time_test.py +++ b/integration_tests/src/main/python/date_time_test.py @@ -461,6 +461,7 @@ def test_to_timestamp(parser_policy): # mm: minute; MM: month @pytest.mark.skipif(not is_supported_time_zone(), reason="not all time zones are supported now, refer to https://github.com/NVIDIA/spark-rapids/issues/6839, please update after all time zones are supported") +@pytest.mark.skip(reason="blocked by https://github.com/NVIDIA/spark-rapids/issues/11539, https://github.com/NVIDIA/spark-rapids/issues/11543") @pytest.mark.parametrize("format", ['yyyyMMdd', 'yyyymmdd'], ids=idfn) # these regexps exclude zero year, python does not like zero year @pytest.mark.parametrize("data_gen_regexp", ['([0-9]{3}[1-9])([0-5][0-9])([0-3][0-9])', '([0-9]{3}[1-9])([0-9]{4})'], ids=idfn) From aa7e2767edcccfcd7ec5dbdd5e8158dc5d88cd53 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Mon, 30 Sep 2024 03:39:49 +0000 Subject: [PATCH 2/5] Update test case --- .../src/main/python/date_time_test.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py index 6b48951d069..4c8d016a045 100644 --- a/integration_tests/src/main/python/date_time_test.py +++ b/integration_tests/src/main/python/date_time_test.py @@ -459,24 +459,24 @@ def test_to_timestamp(parser_policy): .select(f.col("a"), f.to_timestamp(f.col("a"), "yyyy-MM-dd HH:mm:ss")), { "spark.sql.legacy.timeParserPolicy": parser_policy}) + # mm: minute; MM: month -@pytest.mark.skipif(not is_supported_time_zone(), reason="not all time zones are supported now, refer to https://github.com/NVIDIA/spark-rapids/issues/6839, please update after all time zones are supported") -@pytest.mark.skip(reason="blocked by https://github.com/NVIDIA/spark-rapids/issues/11539, https://github.com/NVIDIA/spark-rapids/issues/11543") +@pytest.mark.skipif(not is_supported_time_zone(), + reason="not all time zones are supported now, refer to https://github.com/NVIDIA/spark-rapids/issues/6839, please update after all time zones are supported") @pytest.mark.parametrize("format", ['yyyyMMdd', 'yyyymmdd'], ids=idfn) -# these regexps exclude zero year, python does not like zero year -@pytest.mark.parametrize("data_gen_regexp", ['([0-9]{3}[1-9])([0-5][0-9])([0-3][0-9])', '([0-9]{3}[1-9])([0-9]{4})'], ids=idfn) -def test_formats_for_legacy_mode(format, data_gen_regexp): - gen = StringGen(data_gen_regexp) +# Test years after 1900, refer to issues: https://github.com/NVIDIA/spark-rapids/issues/11543, https://github.com/NVIDIA/spark-rapids/issues/11539 +def test_formats_for_legacy_mode(format): + gen = StringGen('(19[0-9]{2}|[2-9][0-9]{3})([0-9]{4})') assert_gpu_and_cpu_are_equal_sql( - lambda spark : unary_op_df(spark, gen), + lambda spark: unary_op_df(spark, gen), "tab", '''select unix_timestamp(a, '{}'), from_unixtime(unix_timestamp(a, '{}'), '{}'), date_format(to_timestamp(a, '{}'), '{}') from tab '''.format(format, format, format, format, format), - { 'spark.sql.legacy.timeParserPolicy': 'LEGACY', - 'spark.rapids.sql.incompatibleDateFormats.enabled': True}) + {'spark.sql.legacy.timeParserPolicy': 'LEGACY', + 'spark.rapids.sql.incompatibleDateFormats.enabled': True}) @tz_sensitive_test @pytest.mark.skipif(not is_supported_time_zone(), reason="not all time zones are supported now, refer to https://github.com/NVIDIA/spark-rapids/issues/6839, please update after all time zones are supported") From 24be851b6227845bbadd8207e1db7b543a958421 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Tue, 8 Oct 2024 10:08:59 +0800 Subject: [PATCH 3/5] Format --- integration_tests/src/main/python/date_time_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py index 4c8d016a045..0b6ba20258c 100644 --- a/integration_tests/src/main/python/date_time_test.py +++ b/integration_tests/src/main/python/date_time_test.py @@ -461,8 +461,7 @@ def test_to_timestamp(parser_policy): # mm: minute; MM: month -@pytest.mark.skipif(not is_supported_time_zone(), - reason="not all time zones are supported now, refer to https://github.com/NVIDIA/spark-rapids/issues/6839, please update after all time zones are supported") +@pytest.mark.skipif(not is_supported_time_zone(), reason="not all time zones are supported now, refer to https://github.com/NVIDIA/spark-rapids/issues/6839, please update after all time zones are supported") @pytest.mark.parametrize("format", ['yyyyMMdd', 'yyyymmdd'], ids=idfn) # Test years after 1900, refer to issues: https://github.com/NVIDIA/spark-rapids/issues/11543, https://github.com/NVIDIA/spark-rapids/issues/11539 def test_formats_for_legacy_mode(format): From 711a047dd2fa46043576cec4935101ae5b862a56 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Tue, 8 Oct 2024 10:10:36 +0800 Subject: [PATCH 4/5] Format --- integration_tests/src/main/python/date_time_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py index 0b6ba20258c..c21618785bc 100644 --- a/integration_tests/src/main/python/date_time_test.py +++ b/integration_tests/src/main/python/date_time_test.py @@ -459,7 +459,6 @@ def test_to_timestamp(parser_policy): .select(f.col("a"), f.to_timestamp(f.col("a"), "yyyy-MM-dd HH:mm:ss")), { "spark.sql.legacy.timeParserPolicy": parser_policy}) - # mm: minute; MM: month @pytest.mark.skipif(not is_supported_time_zone(), reason="not all time zones are supported now, refer to https://github.com/NVIDIA/spark-rapids/issues/6839, please update after all time zones are supported") @pytest.mark.parametrize("format", ['yyyyMMdd', 'yyyymmdd'], ids=idfn) @@ -467,7 +466,7 @@ def test_to_timestamp(parser_policy): def test_formats_for_legacy_mode(format): gen = StringGen('(19[0-9]{2}|[2-9][0-9]{3})([0-9]{4})') assert_gpu_and_cpu_are_equal_sql( - lambda spark: unary_op_df(spark, gen), + lambda spark : unary_op_df(spark, gen), "tab", '''select unix_timestamp(a, '{}'), from_unixtime(unix_timestamp(a, '{}'), '{}'), From 14b28b1eba1f86ed5bc71158f43171c347c645c8 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Tue, 8 Oct 2024 13:28:52 +0330 Subject: [PATCH 5/5] Manually pick code from branch-12.10 --- docs/compatibility.md | 5 +++-- integration_tests/src/main/python/date_time_test.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/compatibility.md b/docs/compatibility.md index e8812eb7d87..1cc0e80902a 100644 --- a/docs/compatibility.md +++ b/docs/compatibility.md @@ -662,9 +662,10 @@ LEGACY timeParserPolicy support has the following limitations when running on th - The proleptic Gregorian calendar is used instead of the hybrid Julian+Gregorian calendar that Spark uses in legacy mode - When format is `yyyyMMdd`, GPU only supports 8 digit strings. Spark supports like 7 digit - `2024101` string while GPU does not support. + `2024101` string while GPU does not support. Only tested `UTC` and `Asia/Shanghai` timezones. - When format is `yyyymmdd`, GPU only supports 8 digit strings. Spark supports like 7 digit - `2024101` string while GPU does not support. + `2024101` string while GPU does not support. Only tested `UTC` and `Asia/Shanghai` timezones. + ## Formatting dates and timestamps as strings diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py index c21618785bc..a38cac3c0a7 100644 --- a/integration_tests/src/main/python/date_time_test.py +++ b/integration_tests/src/main/python/date_time_test.py @@ -463,6 +463,7 @@ def test_to_timestamp(parser_policy): @pytest.mark.skipif(not is_supported_time_zone(), reason="not all time zones are supported now, refer to https://github.com/NVIDIA/spark-rapids/issues/6839, please update after all time zones are supported") @pytest.mark.parametrize("format", ['yyyyMMdd', 'yyyymmdd'], ids=idfn) # Test years after 1900, refer to issues: https://github.com/NVIDIA/spark-rapids/issues/11543, https://github.com/NVIDIA/spark-rapids/issues/11539 +@pytest.mark.skipif(get_test_tz() != "Asia/Shanghai" and get_test_tz() != "UTC", reason="https://github.com/NVIDIA/spark-rapids/issues/11562") def test_formats_for_legacy_mode(format): gen = StringGen('(19[0-9]{2}|[2-9][0-9]{3})([0-9]{4})') assert_gpu_and_cpu_are_equal_sql(