From 89e57d7a6e5d3114bcac900ecd3c0a905b849ea8 Mon Sep 17 00:00:00 2001 From: Jianyuan Jiang Date: Sat, 2 Dec 2023 14:22:19 +0800 Subject: [PATCH] redo(ticdc): use multi part s3 uploader in redo (#10227) close pingcap/tiflow#10226 --- cdc/api/v2/model.go | 3 +++ cdc/redo/writer/memory/file_worker.go | 20 ++++++++++++++++++- pkg/config/consistent.go | 1 + pkg/util/external_storage.go | 8 ++++++-- .../conf/changefeed.toml | 1 + 5 files changed, 30 insertions(+), 3 deletions(-) diff --git a/cdc/api/v2/model.go b/cdc/api/v2/model.go index e4d8edc460d..a181b65d020 100644 --- a/cdc/api/v2/model.go +++ b/cdc/api/v2/model.go @@ -271,6 +271,7 @@ func (c *ReplicaConfig) toInternalReplicaConfigWithOriginConfig( Storage: c.Consistent.Storage, UseFileBackend: c.Consistent.UseFileBackend, Compression: c.Consistent.Compression, + FlushConcurrency: c.Consistent.FlushConcurrency, } } if c.Sink != nil { @@ -765,6 +766,7 @@ func ToAPIReplicaConfig(c *config.ReplicaConfig) *ReplicaConfig { Storage: cloned.Consistent.Storage, UseFileBackend: cloned.Consistent.UseFileBackend, Compression: cloned.Consistent.Compression, + FlushConcurrency: cloned.Consistent.FlushConcurrency, } } if cloned.Mounter != nil { @@ -962,6 +964,7 @@ type ConsistentConfig struct { Storage string `json:"storage,omitempty"` UseFileBackend bool `json:"use_file_backend"` Compression string `json:"compression,omitempty"` + FlushConcurrency int `json:"flush_concurrency,omitempty"` } // ChangefeedSchedulerConfig is per changefeed scheduler settings. diff --git a/cdc/redo/writer/memory/file_worker.go b/cdc/redo/writer/memory/file_worker.go index dd2a22e45b0..d566ddf66c5 100644 --- a/cdc/redo/writer/memory/file_worker.go +++ b/cdc/redo/writer/memory/file_worker.go @@ -196,7 +196,12 @@ func (f *fileWorkerGroup) bgFlushFileCache(egCtx context.Context) error { if err := file.writer.Close(); err != nil { return errors.Trace(err) } - err := f.extStorage.WriteFile(egCtx, file.filename, file.writer.buf.Bytes()) + var err error + if f.cfg.FlushConcurrency <= 1 { + err = f.extStorage.WriteFile(egCtx, file.filename, file.writer.buf.Bytes()) + } else { + err = f.multiPartUpload(egCtx, file) + } f.metricFlushAllDuration.Observe(time.Since(start).Seconds()) if err != nil { return errors.Trace(err) @@ -210,6 +215,19 @@ func (f *fileWorkerGroup) bgFlushFileCache(egCtx context.Context) error { } } +func (f *fileWorkerGroup) multiPartUpload(ctx context.Context, file *fileCache) error { + multipartWrite, err := f.extStorage.Create(ctx, file.filename, &storage.WriterOption{ + Concurrency: f.cfg.FlushConcurrency, + }) + if err != nil { + return errors.Trace(err) + } + if _, err = multipartWrite.Write(ctx, file.writer.buf.Bytes()); err != nil { + return errors.Trace(err) + } + return errors.Trace(multipartWrite.Close(ctx)) +} + func (f *fileWorkerGroup) bgWriteLogs( egCtx context.Context, inputCh <-chan *polymorphicRedoEvent, ) (err error) { diff --git a/pkg/config/consistent.go b/pkg/config/consistent.go index 26f4950b4b3..c8f93c6eac8 100644 --- a/pkg/config/consistent.go +++ b/pkg/config/consistent.go @@ -34,6 +34,7 @@ type ConsistentConfig struct { Storage string `toml:"storage" json:"storage"` UseFileBackend bool `toml:"use-file-backend" json:"use-file-backend"` Compression string `toml:"compression" json:"compression"` + FlushConcurrency int `toml:"flush-concurrency" json:"flush-concurrency,omitempty"` } // ValidateAndAdjust validates the consistency config and adjusts it if necessary. diff --git a/pkg/util/external_storage.go b/pkg/util/external_storage.go index 6ff01993967..0d96c47045f 100644 --- a/pkg/util/external_storage.go +++ b/pkg/util/external_storage.go @@ -196,8 +196,12 @@ func (s *extStorageWithTimeout) WalkDir( func (s *extStorageWithTimeout) Create( ctx context.Context, path string, option *storage.WriterOption, ) (storage.ExternalFileWriter, error) { - ctx, cancel := context.WithTimeout(ctx, s.timeout) - defer cancel() + if option.Concurrency <= 1 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, s.timeout) + defer cancel() + } + // multipart uploading spawns a background goroutine, can't set timeout return s.ExternalStorage.Create(ctx, path, option) } diff --git a/tests/integration_tests/consistent_replicate_storage_s3/conf/changefeed.toml b/tests/integration_tests/consistent_replicate_storage_s3/conf/changefeed.toml index 7edf7be7d69..ff99477c9da 100644 --- a/tests/integration_tests/consistent_replicate_storage_s3/conf/changefeed.toml +++ b/tests/integration_tests/consistent_replicate_storage_s3/conf/changefeed.toml @@ -1,3 +1,4 @@ [consistent] level = "eventual" storage = "s3://logbucket/test-changefeed?endpoint=http://127.0.0.1:24927/" +flush-concurrency = 2