From 3fce2f5f2f9790d76ef31571e641b167d25a6f3b Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 17 Oct 2024 16:44:55 +0200 Subject: [PATCH] Fix CopyFile and add test (+ nits) --- cpp/src/arrow/filesystem/s3_internal.h | 41 +++++++++++++++++----- cpp/src/arrow/filesystem/s3fs.cc | 12 ++++++- cpp/src/arrow/filesystem/s3fs.h | 24 ++++++++----- cpp/src/arrow/filesystem/s3fs_benchmark.cc | 2 +- cpp/src/arrow/filesystem/s3fs_test.cc | 18 ++++++++-- 5 files changed, 76 insertions(+), 21 deletions(-) diff --git a/cpp/src/arrow/filesystem/s3_internal.h b/cpp/src/arrow/filesystem/s3_internal.h index e1af8aa880ebc..772387e5fb66e 100644 --- a/cpp/src/arrow/filesystem/s3_internal.h +++ b/cpp/src/arrow/filesystem/s3_internal.h @@ -58,7 +58,7 @@ #endif // !ARROW_AWS_SDK_VERSION_CHECK #if ARROW_AWS_SDK_VERSION_CHECK(1, 9, 201) -# define ARROW_S3_HAS_SSE_C +# define ARROW_S3_HAS_SSE_CUSTOMER_KEY #endif namespace arrow { @@ -339,19 +339,42 @@ inline Result CalculateSSECustomerKeyMD5( sse_customer_key_md5.GetLength())); } -template -Status SetSSECustomerKey(S3RequestType* request, const std::string& sse_customer_key) { +struct SSECustomerKeyHeaders { + std::string sse_customer_key; + std::string sse_customer_key_md5; + std::string sse_customer_algorithm; +}; + +inline Result> GetSSECustomerKeyHeaders( + const std::string& sse_customer_key) { if (sse_customer_key.empty()) { - return Status::OK(); // do nothing if the sse_customer_key is not configured + return std::nullopt; } -#ifdef ARROW_S3_HAS_SSE_C +#ifdef ARROW_S3_HAS_SSE_CUSTOMER_KEY ARROW_ASSIGN_OR_RAISE(auto md5, internal::CalculateSSECustomerKeyMD5(sse_customer_key)); - request->SetSSECustomerKeyMD5(md5); - request->SetSSECustomerKey(arrow::util::base64_encode(sse_customer_key)); - request->SetSSECustomerAlgorithm("AES256"); + return SSECustomerKeyHeaders{arrow::util::base64_encode(sse_customer_key), md5, + "AES256"}; +#else + return Status::NotImplemented( + "SSE customer key not supported by this version of the AWS SDK"); +#endif +} + +template +Status SetSSECustomerKey(S3RequestType* request, const std::string& sse_customer_key) { + ARROW_ASSIGN_OR_RAISE(auto maybe_headers, GetSSECustomerKeyHeaders(sse_customer_key)); + if (!maybe_headers.has_value()) { + return Status::OK(); + } +#ifdef ARROW_S3_HAS_SSE_CUSTOMER_KEY + auto headers = std::move(maybe_headers).value(); + request->SetSSECustomerKey(headers.sse_customer_key); + request->SetSSECustomerKeyMD5(headers.sse_customer_key_md5); + request->SetSSECustomerAlgorithm(headers.sse_customer_algorithm); return Status::OK(); #else - return Status::NotImplemented("SSE-C is not supported"); + return Status::NotImplemented( + "SSE customer key not supported by this version of the AWS SDK"); #endif } diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 9ad99a890d0fb..ee47e1c702073 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -2367,8 +2367,18 @@ class S3FileSystem::Impl : public std::enable_shared_from_this retry_strategy; - /// The SSE-C customized key (raw 32 bytes key). + /// Optional customer-provided key for server-side encryption (SSE-C). + /// + /// This should be the 32-byte AES-256 key, unencoded. std::string sse_customer_key; - /// Path to a single PEM file holding all TLS CA certificates + /// Optional path to a single PEM file holding all TLS CA certificates /// - /// If empty, global filesystem options will be used, if the global filesystem options - /// is also empty, the underlying TLS library's defaults will be used. + /// If empty, global filesystem options will be used (see FileSystemGlobalOptions); + /// if the corresponding global filesystem option is also empty, the underlying + /// TLS library's defaults will be used. std::string tls_ca_file_path; - /// Path to a directory holding TLS CA certificates in individual PEM files + /// Optional path to a directory holding TLS CA + /// + /// The given directory should contain CA certificates as individual PEM files /// named along the OpenSSL "hashed" format. /// - /// If empty, global filesystem options will be used, if the global filesystem options - /// is also empty, the underlying TLS library's defaults will be used. + /// If empty, global filesystem options will be used (see FileSystemGlobalOptions); + /// if the corresponding global filesystem option is also empty, the underlying + /// TLS library's defaults will be used. std::string tls_ca_dir_path; - /// Whether to verify the S3 endpoint's TLS certificate, if the scheme is "https". + /// Whether to verify the S3 endpoint's TLS certificate + /// + /// This option applies if the scheme is "https". bool tls_verify_certificates = true; S3Options(); diff --git a/cpp/src/arrow/filesystem/s3fs_benchmark.cc b/cpp/src/arrow/filesystem/s3fs_benchmark.cc index 5a6753e84c70b..2867012b86616 100644 --- a/cpp/src/arrow/filesystem/s3fs_benchmark.cc +++ b/cpp/src/arrow/filesystem/s3fs_benchmark.cc @@ -61,7 +61,7 @@ class MinioFixture : public benchmark::Fixture { public: void SetUp(const ::benchmark::State& state) override { minio_.reset(new MinioTestServer()); - ASSERT_OK(minio_->Start(false)); + ASSERT_OK(minio_->Start(/*enable_tls_if_supported=*/false)); const char* region_str = std::getenv(kEnvAwsRegion); if (region_str) { diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc index 16d63989b9780..3b83479888240 100644 --- a/cpp/src/arrow/filesystem/s3fs_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_test.cc @@ -95,8 +95,8 @@ ::testing::Environment* s3_env = ::testing::AddGlobalTestEnvironment(new S3Envir ::testing::Environment* minio_env = ::testing::AddGlobalTestEnvironment(new MinioTestEnvironment); -::testing::Environment* minio_env_http = - ::testing::AddGlobalTestEnvironment(new MinioTestEnvironment(false)); +::testing::Environment* minio_env_http = ::testing::AddGlobalTestEnvironment( + new MinioTestEnvironment(/*enable_tls_if_supported=*/false)); MinioTestEnvironment* GetMinioEnv(bool enable_tls_if_supported) { if (enable_tls_if_supported) { @@ -1349,6 +1349,7 @@ TEST_F(TestS3FS, OpenInputFile) { ASSERT_RAISES(IOError, file->Seek(10)); } +// Minio only allows Server Side Encryption on HTTPS client connections. #ifdef MINIO_SERVER_WITH_TLS TEST_F(TestS3FS, SSECustomerKeyMatch) { // normal write/read with correct SSE-C key @@ -1400,6 +1401,18 @@ TEST_F(TestS3FS, SSECustomerKeyMissing) { ASSERT_OK(RestoreTestBucket()); } } + +TEST_F(TestS3FS, SSECustomerKeyCopyFile) { + ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenOutputStream("bucket/newfile_with_sse_c")); + ASSERT_OK(stream->Write("some")); + ASSERT_OK(stream->Close()); + ASSERT_OK(fs_->CopyFile("bucket/newfile_with_sse_c", "bucket/copied_with_sse_c")); + + ASSERT_OK_AND_ASSIGN(auto file, fs_->OpenInputFile("bucket/copied_with_sse_c")); + ASSERT_OK_AND_ASSIGN(auto buf, file->Read(5)); + AssertBufferEqual(*buf, "some"); + ASSERT_OK(RestoreTestBucket()); +} #endif // MINIO_SERVER_WITH_TLS struct S3OptionsTestParameters { @@ -1697,5 +1710,6 @@ TEST(S3GlobalOptions, DefaultsLogLevel) { ASSERT_EQ(S3LogLevel::Fatal, arrow::fs::S3GlobalOptions::Defaults().log_level); } } + } // namespace fs } // namespace arrow