From 718415030f8ebcad14bc3fa10906beca6526c5e9 Mon Sep 17 00:00:00 2001 From: ben-freist <93315290+ben-freist@users.noreply.github.com> Date: Fri, 12 Jul 2024 22:40:04 +0200 Subject: [PATCH] GH-43095: [C++] Update bundled vendor/datetime to support for building with libc++ and C++20 (#43094) ### Rationale for this change We can't build with libc++ and C++20: CMake command line: ```bash cmake -DARROW_ENABLE_THREADING=OFF \ -DARROW_JEMALLOC=OFF \ -DCMAKE_CXX_STANDARD=20 \ -DCXX_ONLY_FLAGS="-stdlib=libc++" \ -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake --preset ninja-debug-minimal ../cpp/ ``` Error log: ``` In file included from ~/.conan2/p/b/arrowe39f77e638649/b/src/cpp/src/arrow/vendored/datetime/tz.cpp:90: ~/.conan2/p/b/arrowe39f77e638649/b/src/cpp/src/arrow/vendored/datetime/tz_private.h:295:12: error: use of overloaded operator '<<' is ambiguous (with operand types 'std::ostream' (aka 'basic_ostream') and 'const sys_seconds' (aka 'const time_point>>')) 295 | os << t.timepoint << "Z "; | ~~ ^ ~~~~~~~~~~~ /usr/lib/llvm-17/bin/../include/c++/v1/__chrono/ostream.h:46:1: note: candidate function [with _CharT = char, _Traits = std::char_traits, _Duration = std::chrono::duration] 46 | operator<<(basic_ostream<_CharT, _Traits>& __os, const sys_time<_Duration> __tp) { | ^ ~/.conan2/p/b/arrowe39f77e638649/b/src/cpp/src/arrow/vendored/datetime/date.h:4214:1: note: candidate function [with CharT = char, Traits = std::char_traits, Duration = std::chrono::duration] 4214 | operator<<(std::basic_ostream& os, const sys_time& tp) ``` ### What changes are included in this PR? Update the bundled vendor/datetime because the upstream has changes for this case: https://github.com/HowardHinnant/date/pull/827 ### Are these changes tested? ### Are there any user-facing changes? * GitHub Issue: #43095 Authored-by: Benjamin Freist Signed-off-by: Sutou Kouhei --- cpp/src/arrow/vendored/datetime/README.md | 2 +- cpp/src/arrow/vendored/datetime/date.h | 27 ++++-- cpp/src/arrow/vendored/datetime/tz.cpp | 95 +++++++++++++++++--- cpp/src/arrow/vendored/datetime/tz.h | 4 +- cpp/src/arrow/vendored/datetime/tz_private.h | 3 +- 5 files changed, 108 insertions(+), 23 deletions(-) diff --git a/cpp/src/arrow/vendored/datetime/README.md b/cpp/src/arrow/vendored/datetime/README.md index 0dd663c5e5acc..5a0993b7b4336 100644 --- a/cpp/src/arrow/vendored/datetime/README.md +++ b/cpp/src/arrow/vendored/datetime/README.md @@ -17,7 +17,7 @@ copies or substantial portions of the Software. Sources for datetime are adapted from Howard Hinnant's date library (https://github.com/HowardHinnant/date). -Sources are taken from changeset cc4685a21e4a4fdae707ad1233c61bbaff241f93 +Sources are taken from changeset 1ead6715dec030d340a316c927c877a3c4e5a00c of the above project. The following changes are made: diff --git a/cpp/src/arrow/vendored/datetime/date.h b/cpp/src/arrow/vendored/datetime/date.h index fd2569c6de0f6..75e2624296672 100644 --- a/cpp/src/arrow/vendored/datetime/date.h +++ b/cpp/src/arrow/vendored/datetime/date.h @@ -4230,7 +4230,7 @@ inline std::basic_ostream& operator<<(std::basic_ostream& os, const local_time& ut) { - return (os << sys_time{ut.time_since_epoch()}); + return (date::operator<<(os, sys_time{ut.time_since_epoch()})); } namespace detail @@ -6353,7 +6353,10 @@ read_signed(std::basic_istream& is, unsigned m = 1, unsigned M = if (('0' <= c && c <= '9') || c == '-' || c == '+') { if (c == '-' || c == '+') + { (void)is.get(); + --M; + } auto x = static_cast(read_unsigned(is, std::max(m, 1u), M)); if (!is.fail()) { @@ -6526,7 +6529,14 @@ read(std::basic_istream& is, int a0, Args&& ...args) *e++ = static_cast(CharT(u % 10) + CharT{'0'}); u /= 10; } while (u > 0); +#if defined(__GNUC__) && __GNUC__ >= 11 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif std::reverse(buf, e); +#if defined(__GNUC__) && __GNUC__ >= 11 +#pragma GCC diagnostic pop +#endif for (auto p = buf; p != e && is.rdstate() == std::ios::goodbit; ++p) read(is, *p); } @@ -6592,7 +6602,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, CONSTDATA int not_a_year = numeric_limits::min(); CONSTDATA int not_a_2digit_year = 100; - CONSTDATA int not_a_century = not_a_year / 100; + CONSTDATA int not_a_century = numeric_limits::min(); CONSTDATA int not_a_month = 0; CONSTDATA int not_a_day = 0; CONSTDATA int not_a_hour = numeric_limits::min(); @@ -7519,7 +7529,12 @@ from_stream(std::basic_istream& is, const CharT* fmt, { auto c = static_cast(Traits::to_char_type(ic)); if (c == '-') + { neg = true; + (void)is.get(); + } + else if (c == '+') + (void)is.get(); } if (modified == CharT{}) { @@ -7735,9 +7750,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, year_month_day ymd_trial = sys_days(year{Y}/January/Sunday[1]) + weeks{U-1} + (weekday{static_cast(wd)} - Sunday); - if (Y == not_a_year) - Y = static_cast(ymd_trial.year()); - else if (year{Y} != ymd_trial.year()) + if (year{Y} != ymd_trial.year()) goto broken; if (m == not_a_month) m = static_cast(static_cast(ymd_trial.month())); @@ -7754,9 +7767,7 @@ from_stream(std::basic_istream& is, const CharT* fmt, year_month_day ymd_trial = sys_days(year{Y}/January/Monday[1]) + weeks{W-1} + (weekday{static_cast(wd)} - Monday); - if (Y == not_a_year) - Y = static_cast(ymd_trial.year()); - else if (year{Y} != ymd_trial.year()) + if (year{Y} != ymd_trial.year()) goto broken; if (m == not_a_month) m = static_cast(static_cast(ymd_trial.month())); diff --git a/cpp/src/arrow/vendored/datetime/tz.cpp b/cpp/src/arrow/vendored/datetime/tz.cpp index e94c1bc8ae682..44c627775f3d7 100644 --- a/cpp/src/arrow/vendored/datetime/tz.cpp +++ b/cpp/src/arrow/vendored/datetime/tz.cpp @@ -96,6 +96,10 @@ # define TARGET_OS_SIMULATOR 0 #endif +#if defined(ANDROID) || defined(__ANDROID__) +#include +#endif + #if USE_OS_TZDB # include #endif @@ -2709,7 +2713,8 @@ operator<<(std::ostream& os, const time_zone& z) os.width(8); os << s.format_ << " "; os << s.until_year_ << ' ' << s.until_date_; - os << " " << s.until_utc_ << " UTC"; + os << " "; + date::operator<<(os, s.until_utc_) << " UTC"; os << " " << s.until_std_ << " STD"; os << " " << s.until_loc_; os << " " << make_time(s.initial_save_); @@ -2734,8 +2739,7 @@ operator<<(std::ostream& os, const time_zone& z) std::ostream& operator<<(std::ostream& os, const leap_second& x) { - using namespace date; - return os << x.date_ << " +"; + return date::operator<<(os, x.date_) << " +"; } #if USE_OS_TZDB @@ -3716,6 +3720,67 @@ get_tzdb() return get_tzdb_list().front(); } +namespace { + +class recursion_limiter +{ + unsigned depth_ = 0; + unsigned limit_; + + class restore_recursion_depth; + +public: + recursion_limiter(recursion_limiter const&) = delete; + recursion_limiter& operator=(recursion_limiter const&) = delete; + + explicit constexpr recursion_limiter(unsigned limit) noexcept; + + restore_recursion_depth count(); +}; + +class recursion_limiter::restore_recursion_depth +{ + recursion_limiter* rc_; + +public: + ~restore_recursion_depth(); + restore_recursion_depth(restore_recursion_depth&&) = default; + + explicit restore_recursion_depth(recursion_limiter* rc) noexcept; +}; + +inline +recursion_limiter::restore_recursion_depth::~restore_recursion_depth() +{ + --(rc_->depth_); +} + +inline +recursion_limiter::restore_recursion_depth::restore_recursion_depth(recursion_limiter* rc) + noexcept + : rc_{rc} +{} + +inline +constexpr +recursion_limiter::recursion_limiter(unsigned limit) noexcept + : limit_{limit} +{ +} + +inline +recursion_limiter::restore_recursion_depth +recursion_limiter::count() +{ + ++depth_; + if (depth_ > limit_) + throw std::runtime_error("recursion limit of " + + std::to_string(limit_) + " exceeded"); + return restore_recursion_depth{this}; +} + +} // unnamed namespace + const time_zone* #if HAS_STRING_VIEW tzdb::locate_zone(std::string_view tz_name) const @@ -3723,6 +3788,10 @@ tzdb::locate_zone(std::string_view tz_name) const tzdb::locate_zone(const std::string& tz_name) const #endif { + // If a link-to-link chain exceeds this limit, give up + thread_local recursion_limiter rc{10}; + auto restore_count = rc.count(); + auto zi = std::lower_bound(zones.begin(), zones.end(), tz_name, #if HAS_STRING_VIEW [](const time_zone& z, const std::string_view& nm) @@ -3746,13 +3815,7 @@ tzdb::locate_zone(const std::string& tz_name) const }); if (li != links.end() && li->name() == tz_name) { - zi = std::lower_bound(zones.begin(), zones.end(), li->target(), - [](const time_zone& z, const std::string& nm) - { - return z.name() < nm; - }); - if (zi != zones.end() && zi->name() == li->target()) - return &*zi; + return locate_zone(li->target()); } #endif // !USE_OS_TZDB throw std::runtime_error(std::string(tz_name) + " not found in timezone database"); @@ -4038,6 +4101,18 @@ tzdb::current_zone() const if (!result.empty()) return locate_zone(result); #endif + // Fall through to try other means. + } + { + // On Android, it is not possible to use file based approach either, + // we have to ask the value of `persist.sys.timezone` system property +#if defined(ANDROID) || defined(__ANDROID__) + char sys_timezone[PROP_VALUE_MAX]; + if (__system_property_get("persist.sys.timezone", sys_timezone) > 0) + { + return locate_zone(sys_timezone); + } +#endif // defined(ANDROID) || defined(__ANDROID__) // Fall through to try other means. } { diff --git a/cpp/src/arrow/vendored/datetime/tz.h b/cpp/src/arrow/vendored/datetime/tz.h index 467db6d199793..df6d1a851ac9d 100644 --- a/cpp/src/arrow/vendored/datetime/tz.h +++ b/cpp/src/arrow/vendored/datetime/tz.h @@ -239,8 +239,8 @@ nonexistent_local_time::make_msg(local_time tp, const local_info& i) << i.first.abbrev << " and\n" << local_seconds{i.second.begin.time_since_epoch()} + i.second.offset << ' ' << i.second.abbrev - << " which are both equivalent to\n" - << i.first.end << " UTC"; + << " which are both equivalent to\n"; + date::operator<<(os, i.first.end) << " UTC"; return os.str(); } diff --git a/cpp/src/arrow/vendored/datetime/tz_private.h b/cpp/src/arrow/vendored/datetime/tz_private.h index 6b7a91493e103..a6bb8fd30a0c7 100644 --- a/cpp/src/arrow/vendored/datetime/tz_private.h +++ b/cpp/src/arrow/vendored/datetime/tz_private.h @@ -291,8 +291,7 @@ struct transition std::ostream& operator<<(std::ostream& os, const transition& t) { - using date::operator<<; - os << t.timepoint << "Z "; + date::operator<<(os, t.timepoint) << "Z "; if (t.info->offset >= std::chrono::seconds{0}) os << '+'; os << make_time(t.info->offset);