From 9620dfa18fefdb1aa23b2f2e0fe64e8a55071e05 Mon Sep 17 00:00:00 2001 From: Slach Date: Thu, 20 Jul 2023 18:14:56 +0500 Subject: [PATCH 01/12] add CopyObject implementation, TestIntegrationS3 passed --- .github/workflows/build.yaml | 5 +- pkg/backup/create.go | 99 +++++++++++- pkg/backup/delete.go | 123 +++++++++++++-- pkg/backup/restore.go | 136 ++++++++++++++-- pkg/clickhouse/clickhouse.go | 8 + pkg/clickhouse/utils.go | 8 - pkg/config/config.go | 20 +++ pkg/filesystemhelper/filesystemhelper.go | 4 +- pkg/metadata/metadata.go | 5 +- pkg/storage/azblob.go | 98 ++++++------ pkg/storage/cos.go | 9 ++ pkg/storage/ftp.go | 8 + pkg/storage/gcs.go | 26 +++- pkg/storage/general.go | 4 +- pkg/storage/object_disk/object_disk.go | 63 +++++++- pkg/storage/s3.go | 154 ++++++++++++++++++- pkg/storage/sftp.go | 8 + pkg/storage/structs.go | 2 + pkg/storage/utils.go | 2 +- test/integration/.env.example | 5 +- test/integration/config-azblob.yml | 1 + test/integration/config-gcs.yml | 1 + test/integration/config-s3.yml | 1 + test/integration/docker-compose_advanced.yml | 4 + test/integration/dynamic_settings.sh | 36 +++++ 25 files changed, 729 insertions(+), 101 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index bc86495e..b22e5133 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -269,7 +269,10 @@ jobs: QA_AWS_SECRET_KEY: ${{ secrets.QA_AWS_SECRET_KEY }} QA_AWS_BUCKET: ${{ secrets.QA_AWS_BUCKET }} QA_AWS_REGION: ${{ secrets.QA_AWS_REGION }} - + # need for GCP over S3 + QA_GCS_OVER_S3_ACCESS_KEY: ${{ secrets.QA_GCS_OVER_S3_ACCESS_KEY }} + QA_GCS_OVER_S3_SECRET_KEY: ${{ secrets.QA_GCS_OVER_S3_SECRET_KEY }} + QA_GCS_OVER_S3_BUCKET: ${{ secrets.QA_GCS_OVER_S3_BUCKET }} run: | set -x echo "CLICKHOUSE_VERSION=${CLICKHOUSE_VERSION}" diff --git a/pkg/backup/create.go b/pkg/backup/create.go index 3532b1d3..cdbaf6fb 100644 --- a/pkg/backup/create.go +++ b/pkg/backup/create.go @@ -7,8 +7,11 @@ import ( "fmt" "github.com/Altinity/clickhouse-backup/pkg/partition" "github.com/Altinity/clickhouse-backup/pkg/status" + "github.com/Altinity/clickhouse-backup/pkg/storage" + "github.com/Altinity/clickhouse-backup/pkg/storage/object_disk" "os" "path" + "path/filepath" "strings" "time" @@ -102,16 +105,18 @@ func (b *Backuper) CreateBackup(backupName, tablePattern string, partitions []st return err } - diskMap := map[string]string{} + diskMap := make(map[string]string, len(disks)) + diskTypes := make(map[string]string, len(disks)) for _, disk := range disks { diskMap[disk.Name] = disk.Path + diskTypes[disk.Name] = disk.Type } partitionsIdMap, partitionsNameList := partition.ConvertPartitionsToIdsMapAndNamesList(ctx, b.ch, tables, nil, partitions) // create if b.cfg.ClickHouse.UseEmbeddedBackupRestore { - err = b.createBackupEmbedded(ctx, backupName, tablePattern, partitionsNameList, partitionsIdMap, schemaOnly, rbacOnly, configsOnly, tables, allDatabases, allFunctions, disks, diskMap, log, startBackup, version) + err = b.createBackupEmbedded(ctx, backupName, tablePattern, partitionsNameList, partitionsIdMap, schemaOnly, rbacOnly, configsOnly, tables, allDatabases, allFunctions, disks, diskMap, diskTypes, log, startBackup, version) } else { - err = b.createBackupLocal(ctx, backupName, partitionsIdMap, tables, doBackupData, schemaOnly, rbacOnly, configsOnly, version, disks, diskMap, allDatabases, allFunctions, log, startBackup) + err = b.createBackupLocal(ctx, backupName, partitionsIdMap, tables, doBackupData, schemaOnly, rbacOnly, configsOnly, version, disks, diskMap, diskTypes, allDatabases, allFunctions, log, startBackup) } if err != nil { return err @@ -124,7 +129,7 @@ func (b *Backuper) CreateBackup(backupName, tablePattern string, partitions []st return nil } -func (b *Backuper) createBackupLocal(ctx context.Context, backupName string, partitionsIdMap map[metadata.TableTitle]common.EmptyMap, tables []clickhouse.Table, doBackupData bool, schemaOnly bool, rbacOnly bool, configsOnly bool, version string, disks []clickhouse.Disk, diskMap map[string]string, allDatabases []clickhouse.Database, allFunctions []clickhouse.Function, log *apexLog.Entry, startBackup time.Time) error { +func (b *Backuper) createBackupLocal(ctx context.Context, backupName string, partitionsIdMap map[metadata.TableTitle]common.EmptyMap, tables []clickhouse.Table, doBackupData bool, schemaOnly bool, rbacOnly bool, configsOnly bool, version string, disks []clickhouse.Disk, diskMap, diskTypes map[string]string, allDatabases []clickhouse.Database, allFunctions []clickhouse.Function, log *apexLog.Entry, startBackup time.Time) error { // Create backup dir on all clickhouse disks for _, disk := range disks { if err := filesystemhelper.Mkdir(path.Join(disk.Path, "backup"), b.ch, disks); err != nil { @@ -236,14 +241,14 @@ func (b *Backuper) createBackupLocal(ctx context.Context, backupName string, par } backupMetaFile := path.Join(defaultPath, "backup", backupName, "metadata.json") - if err := b.createBackupMetadata(ctx, backupMetaFile, backupName, version, "regular", diskMap, disks, backupDataSize, backupMetadataSize, backupRBACSize, backupConfigSize, tableMetas, allDatabases, allFunctions, log); err != nil { + if err := b.createBackupMetadata(ctx, backupMetaFile, backupName, version, "regular", diskMap, diskTypes, disks, backupDataSize, backupMetadataSize, backupRBACSize, backupConfigSize, tableMetas, allDatabases, allFunctions, log); err != nil { return err } log.WithField("duration", utils.HumanizeDuration(time.Since(startBackup))).Info("done") return nil } -func (b *Backuper) createBackupEmbedded(ctx context.Context, backupName, tablePattern string, partitionsNameList map[metadata.TableTitle][]string, partitionsIdMap map[metadata.TableTitle]common.EmptyMap, schemaOnly, rbacOnly, configsOnly bool, tables []clickhouse.Table, allDatabases []clickhouse.Database, allFunctions []clickhouse.Function, disks []clickhouse.Disk, diskMap map[string]string, log *apexLog.Entry, startBackup time.Time, backupVersion string) error { +func (b *Backuper) createBackupEmbedded(ctx context.Context, backupName, tablePattern string, partitionsNameList map[metadata.TableTitle][]string, partitionsIdMap map[metadata.TableTitle]common.EmptyMap, schemaOnly, rbacOnly, configsOnly bool, tables []clickhouse.Table, allDatabases []clickhouse.Database, allFunctions []clickhouse.Function, disks []clickhouse.Disk, diskMap, diskTypes map[string]string, log *apexLog.Entry, startBackup time.Time, backupVersion string) error { if _, isBackupDiskExists := diskMap[b.cfg.ClickHouse.EmbeddedBackupDisk]; !isBackupDiskExists { return fmt.Errorf("backup disk `%s` not exists in system.disks", b.cfg.ClickHouse.EmbeddedBackupDisk) } @@ -358,7 +363,7 @@ func (b *Backuper) createBackupEmbedded(ctx context.Context, backupName, tablePa } } backupMetaFile := path.Join(diskMap[b.cfg.ClickHouse.EmbeddedBackupDisk], backupName, "metadata.json") - if err := b.createBackupMetadata(ctx, backupMetaFile, backupName, backupVersion, "embedded", diskMap, disks, backupDataSize[0].Size, backupMetadataSize, 0, 0, tableMetas, allDatabases, allFunctions, log); err != nil { + if err := b.createBackupMetadata(ctx, backupMetaFile, backupName, backupVersion, "embedded", diskMap, diskTypes, disks, backupDataSize[0].Size, backupMetadataSize, 0, 0, tableMetas, allDatabases, allFunctions, log); err != nil { return err } @@ -480,6 +485,28 @@ func (b *Backuper) AddTableToBackup(ctx context.Context, backupName, shadowBacku } realSize := map[string]int64{} disksToPartsMap := map[string][]metadata.Part{} + needToUploadObjectDisk := false + for _, disk := range diskList { + if disk.Type == "s3" || disk.Type == "azure_blob_storage" { + needToUploadObjectDisk = true + break + } + } + if needToUploadObjectDisk { + b.dst, err = storage.NewBackupDestination(ctx, b.cfg, b.ch, false, backupName) + if err != nil { + return nil, nil, err + } + if err := b.dst.Connect(ctx); err != nil { + return nil, nil, fmt.Errorf("can't connect to %s: %v", b.dst.Kind(), err) + } + defer func() { + if err := b.dst.Close(ctx); err != nil { + b.log.Warnf("uploadObjectDiskParts: can't close BackupDestination error: %v", err) + } + }() + } + for _, disk := range diskList { select { case <-ctx.Done(): @@ -503,6 +530,14 @@ func (b *Backuper) AddTableToBackup(ctx context.Context, backupName, shadowBacku realSize[disk.Name] = size disksToPartsMap[disk.Name] = parts log.WithField("disk", disk.Name).Debug("shadow moved") + if disk.Type == "s3" || disk.Type == "azure_blob_storage" { + start := time.Now() + if size, err = b.uploadObjectDiskParts(ctx, backupName, backupShadowPath, disk); err != nil { + return disksToPartsMap, realSize, err + } + realSize[disk.Name] += size + log.WithField("disk", disk.Name).WithField("duration", utils.HumanizeDuration(time.Since(start))).Info("object_disk data uploaded") + } // Clean all the files under the shadowPath, cause UNFREEZE unavailable if version < 21004000 { if err := os.RemoveAll(shadowPath); err != nil { @@ -521,7 +556,54 @@ func (b *Backuper) AddTableToBackup(ctx context.Context, backupName, shadowBacku return disksToPartsMap, realSize, nil } -func (b *Backuper) createBackupMetadata(ctx context.Context, backupMetaFile, backupName, version, tags string, diskMap map[string]string, disks []clickhouse.Disk, backupDataSize, backupMetadataSize, backupRBACSize, backupConfigSize uint64, tableMetas []metadata.TableTitle, allDatabases []clickhouse.Database, allFunctions []clickhouse.Function, log *apexLog.Entry) error { +func (b *Backuper) uploadObjectDiskParts(ctx context.Context, backupName, backupShadowPath string, disk clickhouse.Disk) (int64, error) { + var size int64 + var err error + if err = object_disk.InitCredentialsAndConnections(ctx, b.ch, b.cfg, disk.Name); err != nil { + return 0, err + } + + if err := filepath.Walk(backupShadowPath, func(fPath string, fInfo os.FileInfo, err error) error { + if err != nil { + return err + } + if fInfo.IsDir() { + return nil + } + objPartFileMeta, err := object_disk.ReadMetadataFromFile(fPath) + if err != nil { + return err + } + var realSize, objSize int64 + // @TODO think about parallelization here after test pass + for _, storageObject := range objPartFileMeta.StorageObjects { + srcDiskConnection, exists := object_disk.DisksConnections[disk.Name] + if !exists { + return fmt.Errorf("uploadObjectDiskParts: %s not present in object_disk.DisksConnections", disk.Name) + } + if objSize, err = b.dst.CopyObject( + ctx, + srcDiskConnection.GetRemoteBucket(), + path.Join(srcDiskConnection.GetRemotePath(), storageObject.ObjectRelativePath), + path.Join(backupName, disk.Name, storageObject.ObjectRelativePath), + ); err != nil { + return err + } + realSize += objSize + } + if realSize > objPartFileMeta.TotalSize { + size += realSize + } else { + size += objPartFileMeta.TotalSize + } + return nil + }); err != nil { + return 0, err + } + return size, nil +} + +func (b *Backuper) createBackupMetadata(ctx context.Context, backupMetaFile, backupName, version, tags string, diskMap, diskTypes map[string]string, disks []clickhouse.Disk, backupDataSize, backupMetadataSize, backupRBACSize, backupConfigSize uint64, tableMetas []metadata.TableTitle, allDatabases []clickhouse.Database, allFunctions []clickhouse.Function, log *apexLog.Entry) error { select { case <-ctx.Done(): return ctx.Err() @@ -529,6 +611,7 @@ func (b *Backuper) createBackupMetadata(ctx context.Context, backupMetaFile, bac backupMetadata := metadata.BackupMetadata{ BackupName: backupName, Disks: diskMap, + DiskTypes: diskTypes, ClickhouseBackupVersion: version, CreationDate: time.Now().UTC(), Tags: tags, diff --git a/pkg/backup/delete.go b/pkg/backup/delete.go index eb7dbcab..dbe5a850 100644 --- a/pkg/backup/delete.go +++ b/pkg/backup/delete.go @@ -7,6 +7,7 @@ import ( "os" "path" "path/filepath" + "regexp" "strings" "time" @@ -47,16 +48,16 @@ func (b *Backuper) Clean(ctx context.Context) error { } func (b *Backuper) cleanDir(dirName string) error { - if items, err := os.ReadDir(dirName); err != nil { + items, err := os.ReadDir(dirName) + if err != nil { if os.IsNotExist(err) { return nil } return err - } else { - for _, item := range items { - if err = os.RemoveAll(path.Join(dirName, item.Name())); err != nil { - return err - } + } + for _, item := range items { + if err = os.RemoveAll(path.Join(dirName, item.Name())); err != nil { + return err } } return nil @@ -126,6 +127,24 @@ func (b *Backuper) RemoveBackupLocal(ctx context.Context, backupName string, dis if err != nil { return err } + + if b.hasObjectDisks(backupList, backupName, disks) { + bd, err := storage.NewBackupDestination(ctx, b.cfg, b.ch, false, backupName) + if err != nil { + return err + } + err = bd.Connect(ctx) + if err != nil { + return fmt.Errorf("can't connect to remote storage: %v", err) + } + defer func() { + if err := bd.Close(ctx); err != nil { + b.log.Warnf("can't close BackupDestination error: %v", err) + } + }() + b.dst = bd + } + for _, backup := range backupList { if backup.BackupName == backupName { if strings.Contains(backup.Tags, "embedded") { @@ -134,14 +153,19 @@ func (b *Backuper) RemoveBackupLocal(ctx context.Context, backupName string, dis return err } } + for _, disk := range disks { backupPath := path.Join(disk.Path, "backup", backupName) if disk.IsBackup { backupPath = path.Join(disk.Path, backupName) } + if !disk.IsBackup && (disk.Type == "s3" || disk.Type == "azure_blob_storage") { + if err = b.cleanLocalBackupObjectDisk(ctx, backupName, backupPath, disk.Name); err != nil { + return err + } + } log.Debugf("remove '%s'", backupPath) - err = os.RemoveAll(backupPath) - if err != nil { + if err = os.RemoveAll(backupPath); err != nil { return err } } @@ -156,6 +180,46 @@ func (b *Backuper) RemoveBackupLocal(ctx context.Context, backupName string, dis return fmt.Errorf("'%s' is not found on local storage", backupName) } +func (b *Backuper) hasObjectDisks(backupList []LocalBackup, backupName string, disks []clickhouse.Disk) bool { + for _, backup := range backupList { + if backup.BackupName == backupName && !strings.Contains(backup.Tags, "embedded") { + for _, disk := range disks { + if !disk.IsBackup && (disk.Type == "s3" || disk.Type == "azure_blob_storage") { + return true + } + } + } + } + return false +} + +func (b *Backuper) cleanLocalBackupObjectDisk(ctx context.Context, backupName string, backupPath, diskName string) error { + _, err := os.Stat(backupPath) + if os.IsNotExist(err) { + apexLog.Warnf("%v", err) + return nil + } + err = filepath.Walk(backupPath, func(fPath string, fInfo os.FileInfo, err error) error { + if err != nil { + return err + } + if fInfo.IsDir() { + return nil + } + objMeta, err := object_disk.ReadMetadataFromFile(fPath) + if err != nil { + return err + } + for _, storageObject := range objMeta.StorageObjects { + if err = b.dst.DeleteFileFromObjectDiskBackup(ctx, path.Join(backupName, diskName, storageObject.ObjectRelativePath)); err != nil { + return err + } + } + return nil + }) + return err +} + func (b *Backuper) cleanLocalEmbedded(ctx context.Context, backup LocalBackup, disks []clickhouse.Disk) error { // skip if the same backup present in remote if b.cfg.General.RemoteStorage != "custom" && b.cfg.General.RemoteStorage != "none" { @@ -232,6 +296,8 @@ func (b *Backuper) RemoveBackupRemote(ctx context.Context, backupName string) er } }() + b.dst = bd + backupList, err := bd.BackupList(ctx, true, backupName) if err != nil { return err @@ -239,12 +305,15 @@ func (b *Backuper) RemoveBackupRemote(ctx context.Context, backupName string) er for _, backup := range backupList { if backup.BackupName == backupName { if strings.Contains(backup.Tags, "embedded") { - if err := b.cleanRemoteEmbedded(ctx, backup, bd); err != nil { + if err = b.cleanRemoteEmbedded(ctx, backup, bd); err != nil { log.Warnf("b.cleanRemoteEmbedded return error: %v", err) return err } } - if err := bd.RemoveBackup(ctx, backup); err != nil { + if err = b.cleanRemoteBackupObjectDisks(ctx, backup); err != nil { + return err + } + if err = bd.RemoveBackup(ctx, backup); err != nil { log.Warnf("bd.RemoveBackup return error: %v", err) return err } @@ -260,6 +329,40 @@ func (b *Backuper) RemoveBackupRemote(ctx context.Context, backupName string) er return fmt.Errorf("'%s' is not found on remote storage", backupName) } +func (b *Backuper) cleanRemoteBackupObjectDisks(ctx context.Context, backup storage.Backup) error { + if b.dst.Kind() != "azblob" && b.dst.Kind() != "s3" && b.dst.Kind() != "gcs" { + return nil + } + if !backup.Legacy && len(backup.Disks) > 0 && backup.DiskTypes != nil && len(backup.DiskTypes) < len(backup.Disks) { + return fmt.Errorf("RemoveRemoteBackupObjectDisks: invalid backup.DiskTypes=%#v, not correlated with backup.Disks=%#v", backup.DiskTypes, backup.Disks) + } + return b.dst.Walk(ctx, backup.BackupName+"/", true, func(ctx context.Context, f storage.RemoteFile) error { + fName := path.Join(backup.BackupName, f.Name()) + if !strings.HasPrefix(fName, path.Join(backup.BackupName, "/shadow/")) { + return nil + } + for diskName, diskType := range backup.DiskTypes { + if (diskType == "s3" || diskType == "azure_blob_storage") && regexp.MustCompile("/"+diskName+"[_/][^/]+$").MatchString(fName) { + objMetaReader, err := b.dst.GetFileReader(ctx, fName) + if err != nil { + return err + } + objMeta, err := object_disk.ReadMetadataFromReader(objMetaReader, fName) + if err != nil { + return err + } + for _, storageObject := range objMeta.StorageObjects { + err = b.dst.DeleteFileFromObjectDiskBackup(ctx, path.Join(backup.BackupName, diskName, storageObject.ObjectRelativePath)) + if err != nil { + return err + } + } + } + } + return nil + }) +} + func (b *Backuper) cleanRemoteEmbedded(ctx context.Context, backup storage.Backup, bd *storage.BackupDestination) error { // skip if the same backup present in local if localList, _, err := b.GetLocalBackups(ctx, nil); err != nil { diff --git a/pkg/backup/restore.go b/pkg/backup/restore.go index 86961d8c..f41f1215 100644 --- a/pkg/backup/restore.go +++ b/pkg/backup/restore.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "github.com/Altinity/clickhouse-backup/pkg/status" + "github.com/Altinity/clickhouse-backup/pkg/storage" "github.com/Altinity/clickhouse-backup/pkg/storage/object_disk" "io/fs" "net/url" @@ -149,9 +150,11 @@ func (b *Backuper) Restore(backupName, tablePattern string, databaseMapping, par } else { out, err = exec.CommandContext(ctx, cmd[0]).CombinedOutput() } - cancel() - log.Debug(string(out)) - return err + if err != nil { + log.Debug(string(out)) + cancel() + return err + } } if schemaOnly || (schemaOnly == dataOnly) { @@ -569,9 +572,21 @@ func (b *Backuper) RestoreData(ctx context.Context, backupName string, tablePatt return fmt.Errorf("can't restore: %v", err) } - diskMap := map[string]string{} + diskMap := make(map[string]string, len(disks)) + diskTypes := make(map[string]string, len(disks)) for _, disk := range disks { diskMap[disk.Name] = disk.Path + diskTypes[disk.Name] = disk.Type + } + for diskName := range backup.Disks { + if _, exists := diskMap[diskName]; !exists { + diskMap[diskName] = backup.Disks[diskName] + } + } + for diskName := range backup.DiskTypes { + if _, exists := diskTypes[diskName]; !exists { + diskTypes[diskName] = backup.DiskTypes[diskName] + } } var tablesForRestore ListOfTables var partitionsNameList map[metadata.TableTitle][]string @@ -594,7 +609,7 @@ func (b *Backuper) RestoreData(ctx context.Context, backupName string, tablePatt if b.isEmbedded { err = b.restoreDataEmbedded(ctx, backupName, tablesForRestore, metadataPath, partitionsNameList) } else { - err = b.restoreDataRegular(ctx, backupName, tablePattern, tablesForRestore, diskMap, disks, log) + err = b.restoreDataRegular(ctx, backupName, tablePattern, tablesForRestore, diskMap, diskTypes, disks, log) } if err != nil { return err @@ -607,7 +622,7 @@ func (b *Backuper) restoreDataEmbedded(ctx context.Context, backupName string, t return b.restoreEmbedded(ctx, backupName, false, tablesForRestore, metadataPath, partitionsNameList) } -func (b *Backuper) restoreDataRegular(ctx context.Context, backupName string, tablePattern string, tablesForRestore ListOfTables, diskMap map[string]string, disks []clickhouse.Disk, log *apexLog.Entry) error { +func (b *Backuper) restoreDataRegular(ctx context.Context, backupName string, tablePattern string, tablesForRestore ListOfTables, diskMap, diskTypes map[string]string, disks []clickhouse.Disk, log *apexLog.Entry) error { if len(b.cfg.General.RestoreDatabaseMapping) > 0 { tablePattern = b.changeTablePatternFromRestoreDatabaseMapping(tablePattern) } @@ -615,7 +630,7 @@ func (b *Backuper) restoreDataRegular(ctx context.Context, backupName string, ta if err != nil { return err } - disks = b.adjustDisksFromTablesWithSystemDisks(tablesForRestore, diskMap, log, disks) + disks = b.adjustDisksFromTablesWithSystemDisks(tablesForRestore, diskMap, diskTypes, disks, log) dstTablesMap := b.prepareDstTablesMap(chTables) missingTables := b.checkMissingTables(tablesForRestore, chTables) @@ -641,11 +656,11 @@ func (b *Backuper) restoreDataRegular(ctx context.Context, backupName string, ta } // https://github.com/Altinity/clickhouse-backup/issues/529 if b.cfg.ClickHouse.RestoreAsAttach { - if err = b.restoreDataRegularByAttach(ctx, backupName, table, disks, dstTable, log, tablesForRestore, i); err != nil { + if err = b.restoreDataRegularByAttach(ctx, backupName, table, diskMap, diskTypes, disks, dstTable, log, tablesForRestore, i); err != nil { return err } } else { - if err = b.restoreDataRegularByParts(backupName, table, disks, dstTable, log, tablesForRestore, i); err != nil { + if err = b.restoreDataRegularByParts(ctx, backupName, table, diskMap, diskTypes, disks, dstTable, log, tablesForRestore, i); err != nil { return err } } @@ -660,28 +675,118 @@ func (b *Backuper) restoreDataRegular(ctx context.Context, backupName string, ta return nil } -func (b *Backuper) restoreDataRegularByAttach(ctx context.Context, backupName string, table metadata.TableMetadata, disks []clickhouse.Disk, dstTable clickhouse.Table, log *apexLog.Entry, tablesForRestore ListOfTables, i int) error { +func (b *Backuper) restoreDataRegularByAttach(ctx context.Context, backupName string, table metadata.TableMetadata, diskMap, diskTypes map[string]string, disks []clickhouse.Disk, dstTable clickhouse.Table, log *apexLog.Entry, tablesForRestore ListOfTables, i int) error { if err := filesystemhelper.HardlinkBackupPartsToStorage(backupName, table, disks, dstTable.DataPaths, b.ch, false); err != nil { return fmt.Errorf("can't copy data to storage '%s.%s': %v", table.Database, table.Table, err) } - log.Debugf("data to 'storage' copied") + log.Debug("data to 'storage' copied") + if err := b.downloadObjectDiskParts(ctx, backupName, table, diskMap, diskTypes, dstTable); err != nil { + return fmt.Errorf("can't restore object_disk server-side copy data parts '%s.%s': %v", table.Database, table.Table, err) + } + if err := b.ch.AttachTable(ctx, tablesForRestore[i]); err != nil { return fmt.Errorf("can't attach table '%s.%s': %v", tablesForRestore[i].Database, tablesForRestore[i].Table, err) } return nil } -func (b *Backuper) restoreDataRegularByParts(backupName string, table metadata.TableMetadata, disks []clickhouse.Disk, dstTable clickhouse.Table, log *apexLog.Entry, tablesForRestore ListOfTables, i int) error { +func (b *Backuper) restoreDataRegularByParts(ctx context.Context, backupName string, table metadata.TableMetadata, diskMap, diskTypes map[string]string, disks []clickhouse.Disk, dstTable clickhouse.Table, log *apexLog.Entry, tablesForRestore ListOfTables, i int) error { if err := filesystemhelper.HardlinkBackupPartsToStorage(backupName, table, disks, dstTable.DataPaths, b.ch, true); err != nil { return fmt.Errorf("can't copy data to datached '%s.%s': %v", table.Database, table.Table, err) } - log.Debugf("data to 'detached' copied") + log.Debug("data to 'detached' copied") + if err := b.downloadObjectDiskParts(ctx, backupName, table, diskMap, diskTypes, dstTable); err != nil { + return fmt.Errorf("can't restore object_disk server-side copy data parts '%s.%s': %v", table.Database, table.Table, err) + } if err := b.ch.AttachDataParts(tablesForRestore[i], disks); err != nil { return fmt.Errorf("can't attach data parts for table '%s.%s': %v", tablesForRestore[i].Database, tablesForRestore[i].Table, err) } return nil } +func (b *Backuper) downloadObjectDiskParts(ctx context.Context, backupName string, backupTable metadata.TableMetadata, diskMap, diskTypes map[string]string, dstTable clickhouse.Table) error { + log := apexLog.WithFields(apexLog.Fields{"operation": "downloadObjectDiskParts"}) + start := time.Now() + dbAndTableDir := path.Join(common.TablePathEncode(backupTable.Database), common.TablePathEncode(backupTable.Table)) + var err error + needToDownloadObjectDisk := false + for diskName, _ := range backupTable.Parts { + diskType, exists := diskTypes[diskName] + if !exists { + return fmt.Errorf("%s disk doesn't present in diskTypes: %v", diskName, diskTypes) + } + if diskType == "s3" || diskType == "azure_blob_storage" { + needToDownloadObjectDisk = true + break + } + } + if needToDownloadObjectDisk { + b.dst, err = storage.NewBackupDestination(ctx, b.cfg, b.ch, false, backupName) + if err != nil { + return err + } + if err = b.dst.Connect(ctx); err != nil { + return fmt.Errorf("can't connect to %s: %v", b.dst.Kind(), err) + } + defer func() { + if err := b.dst.Close(ctx); err != nil { + b.log.Warnf("downloadObjectDiskParts: can't close BackupDestination error: %v", err) + } + }() + } + + for diskName, parts := range backupTable.Parts { + diskType, exists := diskTypes[diskName] + if !exists { + return fmt.Errorf("%s disk doesn't present in diskTypes: %v", diskName, diskTypes) + } + if diskType == "s3" || diskType == "azure_blob_storage" { + if err = object_disk.InitCredentialsAndConnections(ctx, b.ch, b.cfg, diskName); err != nil { + return err + } + for _, part := range parts { + partPath := path.Join(diskMap[diskName], "backup", backupName, "shadow", dbAndTableDir, diskName, part.Name) + filepath.Walk(partPath, func(fPath string, fInfo fs.FileInfo, err error) error { + if err != nil { + return err + } + if fInfo.IsDir() { + return nil + } + objMeta, err := object_disk.ReadMetadataFromFile(fPath) + if err != nil { + return err + } + if objMeta.StorageObjectCount < 1 { + return fmt.Errorf("%s: invalid object_dist.Metadata: %#v", fPath, objMeta) + } + var srcBucket, srcKey string + for _, storageObject := range objMeta.StorageObjects { + if b.cfg.General.RemoteStorage == "s3" && diskType == "s3" { + srcBucket = b.cfg.S3.Bucket + srcKey = path.Join(b.cfg.S3.ObjectDiskPath, backupName, storageObject.ObjectRelativePath) + } else if b.cfg.General.RemoteStorage == "gcs" && diskType == "s3" { + srcBucket = b.cfg.GCS.Bucket + srcKey = path.Join(b.cfg.GCS.ObjectDiskPath, backupName, storageObject.ObjectRelativePath) + } else if b.cfg.General.RemoteStorage == "azblob" && diskType == "azure_blob_storage" { + srcBucket = b.cfg.AzureBlob.Container + srcKey = path.Join(b.cfg.AzureBlob.ObjectDiskPath, backupName, storageObject.ObjectRelativePath) + } else { + return fmt.Errorf("incompatible object_disk[%s].Type=%s amd remote_storage: %s", diskName, diskType, b.cfg.General.RemoteStorage) + } + if err = object_disk.CopyObject(ctx, b.ch, b.cfg, diskName, srcBucket, srcKey, storageObject.ObjectRelativePath); err != nil { + return fmt.Errorf("object_disk.CopyObject error: %v", err) + } + } + return nil + }) + } + } + } + log.WithField("duration", utils.HumanizeDuration(time.Since(start))).Debugf("done") + return nil +} + func (b *Backuper) checkMissingTables(tablesForRestore ListOfTables, chTables []clickhouse.Table) []string { var missingTables []string for _, table := range tablesForRestore { @@ -716,10 +821,13 @@ func (b *Backuper) prepareDstTablesMap(chTables []clickhouse.Table) map[metadata return dstTablesMap } -func (b *Backuper) adjustDisksFromTablesWithSystemDisks(tablesForRestore ListOfTables, diskMap map[string]string, log *apexLog.Entry, disks []clickhouse.Disk) []clickhouse.Disk { +func (b *Backuper) adjustDisksFromTablesWithSystemDisks(tablesForRestore ListOfTables, diskMap, diskTypes map[string]string, disks []clickhouse.Disk, log *apexLog.Entry) []clickhouse.Disk { for _, t := range tablesForRestore { for disk := range t.Parts { if _, diskExists := diskMap[disk]; !diskExists { + if diskTypes[disk] != diskTypes["default"] { + log.Fatalf("table '%s.%s' require disk '%s' that not found in clickhouse table system.disks, and have different diskType %s than `default` disk %s", t.Database, t.Table, disk, diskTypes[disk], diskTypes["default"]) + } log.Warnf("table '%s.%s' require disk '%s' that not found in clickhouse table system.disks, you can add nonexistent disks to `disk_mapping` in `clickhouse` config section, data will restored to %s", t.Database, t.Table, disk, diskMap["default"]) found := false for _, d := range disks { diff --git a/pkg/clickhouse/clickhouse.go b/pkg/clickhouse/clickhouse.go index 8f476718..0be82b3f 100644 --- a/pkg/clickhouse/clickhouse.go +++ b/pkg/clickhouse/clickhouse.go @@ -144,6 +144,14 @@ func (ch *ClickHouse) GetDisks(ctx context.Context, enrich bool) ([]Disk, error) if disks[i].Name == ch.Config.EmbeddedBackupDisk { disks[i].IsBackup = true } + // s3_plain disk could contains relative + if disks[i].Path != "" && !strings.HasPrefix(disks[i].Path, "/") { + for _, d := range disks { + if d.Name == "default" { + disks[i].Path = path.Join(d.Path, disks[i].Path) + "/" + } + } + } } if len(ch.Config.DiskMapping) == 0 { return disks, nil diff --git a/pkg/clickhouse/utils.go b/pkg/clickhouse/utils.go index a3d6c7c3..3d15e113 100644 --- a/pkg/clickhouse/utils.go +++ b/pkg/clickhouse/utils.go @@ -39,11 +39,3 @@ func GetDisksByPaths(disks []Disk, dataPaths []string) map[string]string { } return result } - -func ConvertToSlice(databases []Database) []string { - res := make([]string, len(databases)) - for idx, dbName := range databases { - res[idx] = dbName.Name - } - return res -} diff --git a/pkg/config/config.go b/pkg/config/config.go index 5fe2c62c..4494e108 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -67,6 +67,7 @@ type GCSConfig struct { CredentialsJSONEncoded string `yaml:"credentials_json_encoded" envconfig:"GCS_CREDENTIALS_JSON_ENCODED"` Bucket string `yaml:"bucket" envconfig:"GCS_BUCKET"` Path string `yaml:"path" envconfig:"GCS_PATH"` + ObjectDiskPath string `yaml:"object_disk_path" envconfig:"GCS_OBJECT_DISK_PATH"` CompressionLevel int `yaml:"compression_level" envconfig:"GCS_COMPRESSION_LEVEL"` CompressionFormat string `yaml:"compression_format" envconfig:"GCS_COMPRESSION_FORMAT"` Debug bool `yaml:"debug" envconfig:"GCS_DEBUG"` @@ -86,6 +87,7 @@ type AzureBlobConfig struct { UseManagedIdentity bool `yaml:"use_managed_identity" envconfig:"AZBLOB_USE_MANAGED_IDENTITY"` Container string `yaml:"container" envconfig:"AZBLOB_CONTAINER"` Path string `yaml:"path" envconfig:"AZBLOB_PATH"` + ObjectDiskPath string `yaml:"object_disk_path" envconfig:"AZBLOB_OBJECT_DISK_PATH"` CompressionLevel int `yaml:"compression_level" envconfig:"AZBLOB_COMPRESSION_LEVEL"` CompressionFormat string `yaml:"compression_format" envconfig:"AZBLOB_COMPRESSION_FORMAT"` SSEKey string `yaml:"sse_key" envconfig:"AZBLOB_SSE_KEY"` @@ -106,6 +108,7 @@ type S3Config struct { AssumeRoleARN string `yaml:"assume_role_arn" envconfig:"S3_ASSUME_ROLE_ARN"` ForcePathStyle bool `yaml:"force_path_style" envconfig:"S3_FORCE_PATH_STYLE"` Path string `yaml:"path" envconfig:"S3_PATH"` + ObjectDiskPath string `yaml:"object_disk_path" envconfig:"S3_OBJECT_DISK_PATH"` DisableSSL bool `yaml:"disable_ssl" envconfig:"S3_DISABLE_SSL"` CompressionLevel int `yaml:"compression_level" envconfig:"S3_COMPRESSION_LEVEL"` CompressionFormat string `yaml:"compression_format" envconfig:"S3_COMPRESSION_FORMAT"` @@ -147,6 +150,7 @@ type FTPConfig struct { Password string `yaml:"password" envconfig:"FTP_PASSWORD"` TLS bool `yaml:"tls" envconfig:"FTP_TLS"` Path string `yaml:"path" envconfig:"FTP_PATH"` + ObjectDiskPath string `yaml:"object_disk_path" envconfig:"FTP_OBJECT_DISK_PATH"` CompressionFormat string `yaml:"compression_format" envconfig:"FTP_COMPRESSION_FORMAT"` CompressionLevel int `yaml:"compression_level" envconfig:"FTP_COMPRESSION_LEVEL"` Concurrency uint8 `yaml:"concurrency" envconfig:"FTP_CONCURRENCY"` @@ -161,6 +165,7 @@ type SFTPConfig struct { Password string `yaml:"password" envconfig:"SFTP_PASSWORD"` Key string `yaml:"key" envconfig:"SFTP_KEY"` Path string `yaml:"path" envconfig:"SFTP_PATH"` + ObjectDiskPath string `yaml:"object_disk_path" envconfig:"SFTP_OBJECT_DISK_PATH"` CompressionFormat string `yaml:"compression_format" envconfig:"SFTP_COMPRESSION_FORMAT"` CompressionLevel int `yaml:"compression_level" envconfig:"SFTP_COMPRESSION_LEVEL"` Concurrency int `yaml:"concurrency" envconfig:"SFTP_CONCURRENCY"` @@ -413,6 +418,21 @@ func ValidateConfig(cfg *Config) error { cfg.General.FullDuration = duration } } + // @TODO add all other storage types + switch cfg.General.RemoteStorage { + case "s3": + if cfg.S3.ObjectDiskPath == "" || strings.HasPrefix(cfg.S3.Path, cfg.S3.ObjectDiskPath) { + return fmt.Errorf("invalid s3->object_disk_path, shall be not empty and shall not be prefix for `path`") + } + case "gcs": + if cfg.GCS.ObjectDiskPath == "" || strings.HasPrefix(cfg.GCS.Path, cfg.GCS.ObjectDiskPath) { + return fmt.Errorf("invalid gcs->object_disk_path, shall be not empty and shall not be prefix for `path`") + } + case "azblob": + if cfg.AzureBlob.ObjectDiskPath == "" || strings.HasPrefix(cfg.AzureBlob.Path, cfg.AzureBlob.ObjectDiskPath) { + return fmt.Errorf("invalid azblob->object_disk_path, shall be not empty and shall not be prefix for `path`") + } + } return nil } diff --git a/pkg/filesystemhelper/filesystemhelper.go b/pkg/filesystemhelper/filesystemhelper.go index 4af9dbc6..bf28a3d8 100644 --- a/pkg/filesystemhelper/filesystemhelper.go +++ b/pkg/filesystemhelper/filesystemhelper.go @@ -115,9 +115,10 @@ func MkdirAll(path string, ch *clickhouse.ClickHouse, disks []clickhouse.Disk) e // HardlinkBackupPartsToStorage - copy partitions for specific table to detached folder func HardlinkBackupPartsToStorage(backupName string, backupTable metadata.TableMetadata, disks []clickhouse.Disk, tableDataPaths []string, ch *clickhouse.ClickHouse, toDetached bool) error { - dstDataPaths := clickhouse.GetDisksByPaths(disks, tableDataPaths) log := apexLog.WithFields(apexLog.Fields{"operation": "HardlinkBackupPartsToStorage"}) start := time.Now() + dstDataPaths := clickhouse.GetDisksByPaths(disks, tableDataPaths) + dbAndTableDir := path.Join(common.TablePathEncode(backupTable.Database), common.TablePathEncode(backupTable.Table)) for _, backupDisk := range disks { backupDiskName := backupDisk.Name if len(backupTable.Parts[backupDiskName]) == 0 { @@ -146,7 +147,6 @@ func HardlinkBackupPartsToStorage(backupName string, backupTable metadata.TableM } else if !info.IsDir() { return fmt.Errorf("'%s' should be directory or absent", dstPartPath) } - dbAndTableDir := path.Join(common.TablePathEncode(backupTable.Database), common.TablePathEncode(backupTable.Table)) partPath := path.Join(backupDisk.Path, "backup", backupName, "shadow", dbAndTableDir, backupDisk.Name, part.Name) // Legacy backup support if _, err := os.Stat(partPath); os.IsNotExist(err) { diff --git a/pkg/metadata/metadata.go b/pkg/metadata/metadata.go index 8e63bb50..98942884 100644 --- a/pkg/metadata/metadata.go +++ b/pkg/metadata/metadata.go @@ -11,10 +11,11 @@ type TableTitle struct { type BackupMetadata struct { BackupName string `json:"backup_name"` - Disks map[string]string `json:"disks"` // "default": "/var/lib/clickhouse" + Disks map[string]string `json:"disks"` // "default": "/var/lib/clickhouse" + DiskTypes map[string]string `json:"disk_types"` // "default": "local" ClickhouseBackupVersion string `json:"version"` CreationDate time.Time `json:"creation_date"` - Tags string `json:"tags,omitempty"` // example "type=manual", "type=scheduled", "hostname": "", "shard=" + Tags string `json:"tags,omitempty"` // "regular,embedded" ClickHouseVersion string `json:"clickhouse_version,omitempty"` DataSize uint64 `json:"data_size,omitempty"` MetadataSize uint64 `json:"metadata_size"` diff --git a/pkg/storage/azblob.go b/pkg/storage/azblob.go index 921e3556..cf9a5819 100644 --- a/pkg/storage/azblob.go +++ b/pkg/storage/azblob.go @@ -29,22 +29,22 @@ type AzureBlob struct { Config *config.AzureBlobConfig } -func (s *AzureBlob) Kind() string { +func (a *AzureBlob) Kind() string { return "azblob" } // Connect - connect to Azure -func (s *AzureBlob) Connect(ctx context.Context) error { - if s.Config.EndpointSuffix == "" { +func (a *AzureBlob) Connect(ctx context.Context) error { + if a.Config.EndpointSuffix == "" { return fmt.Errorf("azblob endpoint suffix not set") } - if s.Config.Container == "" { + if a.Config.Container == "" { return fmt.Errorf("azblob container name not set") } - if s.Config.AccountName == "" { + if a.Config.AccountName == "" { return fmt.Errorf("azblob account name not set") } - if s.Config.AccountKey == "" && s.Config.SharedAccessSignature == "" && !s.Config.UseManagedIdentity { + if a.Config.AccountKey == "" && a.Config.SharedAccessSignature == "" && !a.Config.UseManagedIdentity { return fmt.Errorf("azblob account key or SAS or use_managed_identity must be set") } var ( @@ -52,20 +52,20 @@ func (s *AzureBlob) Connect(ctx context.Context) error { urlString string credential azblob.Credential ) - timeout, err := time.ParseDuration(s.Config.Timeout) + timeout, err := time.ParseDuration(a.Config.Timeout) if err != nil { return err } - if s.Config.AccountKey != "" { - credential, err = azblob.NewSharedKeyCredential(s.Config.AccountName, s.Config.AccountKey) + if a.Config.AccountKey != "" { + credential, err = azblob.NewSharedKeyCredential(a.Config.AccountName, a.Config.AccountKey) if err != nil { return err } - urlString = fmt.Sprintf("%s://%s.blob.%s", s.Config.EndpointSchema, s.Config.AccountName, s.Config.EndpointSuffix) - } else if s.Config.SharedAccessSignature != "" { + urlString = fmt.Sprintf("%a://%a.blob.%a", a.Config.EndpointSchema, a.Config.AccountName, a.Config.EndpointSuffix) + } else if a.Config.SharedAccessSignature != "" { credential = azblob.NewAnonymousCredential() - urlString = fmt.Sprintf("%s://%s.blob.%s?%s", s.Config.EndpointSchema, s.Config.AccountName, s.Config.EndpointSuffix, s.Config.SharedAccessSignature) - } else if s.Config.UseManagedIdentity { + urlString = fmt.Sprintf("%a://%a.blob.%a?%a", a.Config.EndpointSchema, a.Config.AccountName, a.Config.EndpointSuffix, a.Config.SharedAccessSignature) + } else if a.Config.UseManagedIdentity { azureEnv, err := azure.EnvironmentFromName("AZUREPUBLICCLOUD") if err != nil { return err @@ -94,26 +94,26 @@ func (s *AzureBlob) Connect(ctx context.Context) error { } credential = azblob.NewTokenCredential("", tokenRefresher) - urlString = fmt.Sprintf("%s://%s.blob.%s", s.Config.EndpointSchema, s.Config.AccountName, s.Config.EndpointSuffix) + urlString = fmt.Sprintf("%a://%a.blob.%a", a.Config.EndpointSchema, a.Config.AccountName, a.Config.EndpointSuffix) } u, err := url.Parse(urlString) if err != nil { return err } - // don't pollute syslog with expected 404's and other garbage logs + // don't pollute syslog with expected 404'a and other garbage logs pipeline.SetForceLogEnabled(false) select { case <-ctx.Done(): return ctx.Err() default: - s.Container = azblob.NewServiceURL(*u, azblob.NewPipeline(credential, azblob.PipelineOptions{ + a.Container = azblob.NewServiceURL(*u, azblob.NewPipeline(credential, azblob.PipelineOptions{ Retry: azblob.RetryOptions{ TryTimeout: timeout, }, - })).NewContainerURL(s.Config.Container) - _, err = s.Container.Create(ctx, azblob.Metadata{}, azblob.PublicAccessNone) + })).NewContainerURL(a.Config.Container) + _, err = a.Container.Create(ctx, azblob.Metadata{}, azblob.PublicAccessNone) if err != nil && !isContainerAlreadyExists(err) { return err } @@ -121,64 +121,70 @@ func (s *AzureBlob) Connect(ctx context.Context) error { if _, err := rand.Read(testName); err != nil { return errors.Wrapf(err, "azblob: failed to generate test blob name") } - testBlob := s.Container.NewBlockBlobURL(base64.URLEncoding.EncodeToString(testName)) + testBlob := a.Container.NewBlockBlobURL(base64.URLEncoding.EncodeToString(testName)) if _, err = testBlob.GetProperties(ctx, azblob.BlobAccessConditions{}, azblob.ClientProvidedKeyOptions{}); err != nil { if se, ok := err.(azblob.StorageError); !ok || se.ServiceCode() != azblob.ServiceCodeBlobNotFound { - return errors.Wrapf(err, "azblob: failed to access container %s", s.Config.Container) + return errors.Wrapf(err, "azblob: failed to access container %a", a.Config.Container) } } - if s.Config.SSEKey != "" { - key, err := base64.StdEncoding.DecodeString(s.Config.SSEKey) + if a.Config.SSEKey != "" { + key, err := base64.StdEncoding.DecodeString(a.Config.SSEKey) if err != nil { return errors.Wrapf(err, "malformed SSE key, must be base64-encoded 256-bit key") } if len(key) != 32 { return fmt.Errorf("malformed SSE key, must be base64-encoded 256-bit key") } - b64key := s.Config.SSEKey + b64key := a.Config.SSEKey shakey := sha256.Sum256(key) b64sha := base64.StdEncoding.EncodeToString(shakey[:]) - s.CPK = azblob.NewClientProvidedKeyOptions(&b64key, &b64sha, nil) + a.CPK = azblob.NewClientProvidedKeyOptions(&b64key, &b64sha, nil) } return nil } } -func (s *AzureBlob) Close(ctx context.Context) error { +func (a *AzureBlob) Close(ctx context.Context) error { return nil } -func (s *AzureBlob) GetFileReader(ctx context.Context, key string) (io.ReadCloser, error) { - blob := s.Container.NewBlockBlobURL(path.Join(s.Config.Path, key)) - r, err := blob.Download(ctx, 0, azblob.CountToEnd, azblob.BlobAccessConditions{}, false, s.CPK) +func (a *AzureBlob) GetFileReader(ctx context.Context, key string) (io.ReadCloser, error) { + blob := a.Container.NewBlockBlobURL(path.Join(a.Config.Path, key)) + r, err := blob.Download(ctx, 0, azblob.CountToEnd, azblob.BlobAccessConditions{}, false, a.CPK) if err != nil { return nil, err } return r.Body(azblob.RetryReaderOptions{}), nil } -func (s *AzureBlob) GetFileReaderWithLocalPath(ctx context.Context, key, _ string) (io.ReadCloser, error) { - return s.GetFileReader(ctx, key) +func (a *AzureBlob) GetFileReaderWithLocalPath(ctx context.Context, key, _ string) (io.ReadCloser, error) { + return a.GetFileReader(ctx, key) } -func (s *AzureBlob) PutFile(ctx context.Context, key string, r io.ReadCloser) error { - blob := s.Container.NewBlockBlobURL(path.Join(s.Config.Path, key)) - bufferSize := s.Config.BufferSize // Configure the size of the rotating buffers that are used when uploading - maxBuffers := s.Config.MaxBuffers // Configure the number of rotating buffers that are used when uploading - _, err := x.UploadStreamToBlockBlob(ctx, r, blob, azblob.UploadStreamToBlockBlobOptions{BufferSize: bufferSize, MaxBuffers: maxBuffers}, s.CPK) +func (a *AzureBlob) PutFile(ctx context.Context, key string, r io.ReadCloser) error { + blob := a.Container.NewBlockBlobURL(path.Join(a.Config.Path, key)) + bufferSize := a.Config.BufferSize // Configure the size of the rotating buffers that are used when uploading + maxBuffers := a.Config.MaxBuffers // Configure the number of rotating buffers that are used when uploading + _, err := x.UploadStreamToBlockBlob(ctx, r, blob, azblob.UploadStreamToBlockBlobOptions{BufferSize: bufferSize, MaxBuffers: maxBuffers}, a.CPK) return err } -func (s *AzureBlob) DeleteFile(ctx context.Context, key string) error { - blob := s.Container.NewBlockBlobURL(path.Join(s.Config.Path, key)) +func (a *AzureBlob) DeleteFile(ctx context.Context, key string) error { + blob := a.Container.NewBlockBlobURL(path.Join(a.Config.Path, key)) _, err := blob.Delete(ctx, azblob.DeleteSnapshotsOptionInclude, azblob.BlobAccessConditions{}) return err } -func (s *AzureBlob) StatFile(ctx context.Context, key string) (RemoteFile, error) { - blob := s.Container.NewBlockBlobURL(path.Join(s.Config.Path, key)) - r, err := blob.GetProperties(ctx, azblob.BlobAccessConditions{}, s.CPK) +func (a *AzureBlob) DeleteFileFromObjectDiskBackup(ctx context.Context, key string) error { + blob := a.Container.NewBlockBlobURL(path.Join(a.Config.ObjectDiskPath, key)) + _, err := blob.Delete(ctx, azblob.DeleteSnapshotsOptionInclude, azblob.BlobAccessConditions{}) + return err +} + +func (a *AzureBlob) StatFile(ctx context.Context, key string) (RemoteFile, error) { + blob := a.Container.NewBlockBlobURL(path.Join(a.Config.Path, key)) + r, err := blob.GetProperties(ctx, azblob.BlobAccessConditions{}, a.CPK) if err != nil { if se, ok := err.(azblob.StorageError); !ok || se.ServiceCode() != azblob.ServiceCodeBlobNotFound { return nil, err @@ -192,8 +198,8 @@ func (s *AzureBlob) StatFile(ctx context.Context, key string) (RemoteFile, error }, nil } -func (s *AzureBlob) Walk(ctx context.Context, azPath string, recursive bool, process func(ctx context.Context, r RemoteFile) error) error { - prefix := path.Join(s.Config.Path, azPath) +func (a *AzureBlob) Walk(ctx context.Context, azPath string, recursive bool, process func(ctx context.Context, r RemoteFile) error) error { + prefix := path.Join(a.Config.Path, azPath) if prefix == "" || prefix == "/" { prefix = "" } else { @@ -209,7 +215,7 @@ func (s *AzureBlob) Walk(ctx context.Context, azPath string, recursive bool, pro } for mrk.NotDone() { if !recursive { - r, err := s.Container.ListBlobsHierarchySegment(ctx, mrk, delimiter, opt) + r, err := a.Container.ListBlobsHierarchySegment(ctx, mrk, delimiter, opt) if err != nil { return err } @@ -237,7 +243,7 @@ func (s *AzureBlob) Walk(ctx context.Context, azPath string, recursive bool, pro } mrk = r.NextMarker } else { - r, err := s.Container.ListBlobsFlatSegment(ctx, mrk, opt) + r, err := a.Container.ListBlobsFlatSegment(ctx, mrk, opt) if err != nil { return err } @@ -262,6 +268,10 @@ func (s *AzureBlob) Walk(ctx context.Context, azPath string, recursive bool, pro return nil } +func (a *AzureBlob) CopyObject(ctx context.Context, srcBucket, srcKey, dstKey string) (int64, error) { + return 0, fmt.Errorf("CopyObject not imlemented for %s", a.Kind()) +} + type azureBlobFile struct { size int64 lastModified time.Time diff --git a/pkg/storage/cos.go b/pkg/storage/cos.go index 3feace67..31aa9944 100644 --- a/pkg/storage/cos.go +++ b/pkg/storage/cos.go @@ -2,6 +2,7 @@ package storage import ( "context" + "fmt" "github.com/Altinity/clickhouse-backup/pkg/config" "io" "net/http" @@ -145,6 +146,14 @@ func (c *COS) PutFile(ctx context.Context, key string, r io.ReadCloser) error { return err } +func (c *COS) CopyObject(ctx context.Context, srcBucket, srcKey, dstKey string) (int64, error) { + return 0, fmt.Errorf("CopyObject not imlemented for %s", c.Kind()) +} + +func (c *COS) DeleteFileFromObjectDiskBackup(ctx context.Context, key string) error { + return fmt.Errorf("DeleteFileFromObjectDiskBackup not imlemented for %s", c.Kind()) +} + type cosFile struct { size int64 lastModified time.Time diff --git a/pkg/storage/ftp.go b/pkg/storage/ftp.go index 85893f19..20a25831 100644 --- a/pkg/storage/ftp.go +++ b/pkg/storage/ftp.go @@ -208,6 +208,14 @@ func (f *FTP) PutFile(ctx context.Context, key string, r io.ReadCloser) error { return client.Stor(k, r) } +func (f *FTP) CopyObject(ctx context.Context, srcBucket, srcKey, dstKey string) (int64, error) { + return 0, fmt.Errorf("CopyObject not imlemented for %a", f.Kind()) +} + +func (f *FTP) DeleteFileFromObjectDiskBackup(ctx context.Context, key string) error { + return fmt.Errorf("DeleteFileFromObjectDiskBackup not imlemented for %s", f.Kind()) +} + type ftpFile struct { size int64 lastModified time.Time diff --git a/pkg/storage/gcs.go b/pkg/storage/gcs.go index 50b933d6..065117df 100644 --- a/pkg/storage/gcs.go +++ b/pkg/storage/gcs.go @@ -189,12 +189,34 @@ func (gcs *GCS) StatFile(ctx context.Context, key string) (RemoteFile, error) { }, nil } -func (gcs *GCS) DeleteFile(ctx context.Context, key string) error { - key = path.Join(gcs.Config.Path, key) +func (gcs *GCS) deleteKey(ctx context.Context, key string) error { object := gcs.client.Bucket(gcs.Config.Bucket).Object(key) return object.Delete(ctx) } +func (gcs *GCS) DeleteFile(ctx context.Context, key string) error { + key = path.Join(gcs.Config.Path, key) + return gcs.deleteKey(ctx, key) +} + +func (gcs *GCS) DeleteFileFromObjectDiskBackup(ctx context.Context, key string) error { + key = path.Join(gcs.Config.ObjectDiskPath, key) + return gcs.deleteKey(ctx, key) +} + +func (gcs *GCS) CopyObject(ctx context.Context, srcBucket, srcKey, dstKey string) (int64, error) { + src := gcs.client.Bucket(srcBucket).Object(srcKey) + dst := gcs.client.Bucket(gcs.Config.Bucket).Object(path.Join(gcs.Config.ObjectDiskPath, dstKey)) + attrs, err := src.Attrs(ctx) + if err != nil { + return 0, err + } + if _, err = dst.CopierFrom(src).Run(ctx); err != nil { + return 0, err + } + return attrs.Size, nil +} + type gcsFile struct { size int64 lastModified time.Time diff --git a/pkg/storage/general.go b/pkg/storage/general.go index 0260e3db..191cdadf 100644 --- a/pkg/storage/general.go +++ b/pkg/storage/general.go @@ -70,11 +70,11 @@ func (bd *BackupDestination) RemoveOldBackups(ctx context.Context, keep int) err bd.Log.WithFields(apexLog.Fields{ "operation": "RemoveOldBackups", "duration": utils.HumanizeDuration(time.Since(start)), - }).Info("calculate backup list for delete") + }).Info("calculate backup list for deleteKey") for _, backupToDelete := range backupsToDelete { startDelete := time.Now() if err := bd.RemoveBackup(ctx, backupToDelete); err != nil { - bd.Log.Warnf("can't delete %s return error : %v", backupToDelete.BackupName, err) + bd.Log.Warnf("can't deleteKey %s return error : %v", backupToDelete.BackupName, err) } bd.Log.WithFields(apexLog.Fields{ "operation": "RemoveOldBackups", diff --git a/pkg/storage/object_disk/object_disk.go b/pkg/storage/object_disk/object_disk.go index 7b3d899c..bc3fc36a 100644 --- a/pkg/storage/object_disk/object_disk.go +++ b/pkg/storage/object_disk/object_disk.go @@ -183,6 +183,7 @@ type ObjectStorageCredentials struct { S3SecretKey string S3AssumeRole string S3Region string + S3StorageClass string AzureAccountName string AzureAccountKey string AzureContainerName string @@ -208,6 +209,28 @@ func (c *ObjectStorageConnection) GetRemoteStorage() storage.RemoteStorage { return nil } +func (c *ObjectStorageConnection) GetRemoteBucket() string { + switch c.Type { + case "s3": + return c.S3.Config.Bucket + case "azure_blob_storage": + return c.AzureBlob.Config.Container + } + apexLog.Fatalf("invalid ObjectStorageConnection.type %s", c.Type) + return "" +} + +func (c *ObjectStorageConnection) GetRemotePath() string { + switch c.Type { + case "s3": + return c.S3.Config.Path + case "azure_blob_storage": + return c.AzureBlob.Config.Path + } + apexLog.Fatalf("invalid ObjectStorageConnection.type %s", c.Type) + return "" +} + var DisksConnections map[string]ObjectStorageConnection var SystemDisks map[string]clickhouse.Disk @@ -312,6 +335,11 @@ func getObjectDisksCredentials(ctx context.Context, ch *clickhouse.ClickHouse) ( if regionNode := d.SelectElement("region"); regionNode != nil { creds.S3Region = strings.Trim(regionNode.InnerText(), "\r\n \t") } + if storageClassNode := d.SelectElement("s3_storage_class"); storageClassNode != nil { + creds.S3StorageClass = strings.Trim(storageClassNode.InnerText(), "\r\n \t") + } else { + creds.S3StorageClass = "STANDARD" + } accessKeyNode := d.SelectElement("access_key_id") secretKeyNode := d.SelectElement("secret_access_key") useEnvironmentCredentials := d.SelectElement("use_environment_credentials") @@ -357,6 +385,22 @@ func getObjectDisksCredentials(ctx context.Context, ch *clickhouse.ClickHouse) ( } } } + for _, d := range disks { + diskName := d.Data + if diskTypeNode := d.SelectElement("type"); diskTypeNode != nil { + diskType := diskTypeNode.InnerText() + switch diskType { + case "encrypted", "cache": + _, exists := credentials[diskName] + if !exists { + if diskNode := d.SelectElement("disk"); diskNode != nil { + childDiskName := diskNode.InnerText() + credentials[diskName] = credentials[childDiskName] + } + } + } + } + } return credentials, nil } @@ -397,6 +441,9 @@ func makeObjectDiskConnection(ctx context.Context, ch *clickhouse.ClickHouse, cf if creds.S3Region != "" { s3cfg.Region = creds.S3Region } + if creds.S3StorageClass != "" { + s3cfg.StorageClass = creds.S3StorageClass + } if creds.S3AssumeRole != "" { s3cfg.AssumeRoleARN = creds.S3AssumeRole } @@ -423,11 +470,12 @@ func makeObjectDiskConnection(ctx context.Context, ch *clickhouse.ClickHouse, cf s3cfg.Path = path.Join(pathItems[1:]...) s3cfg.ForcePathStyle = true } + // need for CopyObject + s3cfg.ObjectDiskPath = s3cfg.Path connection.S3 = &storage.S3{Config: &s3cfg, Log: apexLog.WithField("logger", "S3")} if err = connection.S3.Connect(ctx); err != nil { return nil, err } - break case "azblob": connection.Type = "azure_blob_storage" azureCfg := config.AzureBlobConfig{ @@ -453,6 +501,8 @@ func makeObjectDiskConnection(ctx context.Context, ch *clickhouse.ClickHouse, cf if azureCfg.AccountName != "" && strings.HasPrefix(azureCfg.Path, "/"+creds.AzureAccountName) { azureCfg.Path = strings.TrimPrefix(azureURL.Path, "/"+creds.AzureAccountName) } + // need for CopyObject + azureCfg.ObjectDiskPath = azureCfg.Path } if creds.AzureAccountKey != "" { azureCfg.AccountKey = creds.AzureAccountKey @@ -464,7 +514,6 @@ func makeObjectDiskConnection(ctx context.Context, ch *clickhouse.ClickHouse, cf if err = connection.AzureBlob.Connect(ctx); err != nil { return nil, err } - break } return &connection, nil } @@ -574,3 +623,13 @@ func GetFileSize(ctx context.Context, ch *clickhouse.ClickHouse, cfg *config.Con } return fileInfo.Size(), nil } + +func CopyObject(ctx context.Context, ch *clickhouse.ClickHouse, cfg *config.Config, diskName, srcBucket, srcKey, dstPath string) error { + if err := InitCredentialsAndConnections(ctx, ch, cfg, diskName); err != nil { + return err + } + connection := DisksConnections[diskName] + remoteStorage := connection.GetRemoteStorage() + _, err := remoteStorage.CopyObject(ctx, srcBucket, srcKey, dstPath) + return err +} diff --git a/pkg/storage/s3.go b/pkg/storage/s3.go index 4efd76f5..777a7656 100644 --- a/pkg/storage/s3.go +++ b/pkg/storage/s3.go @@ -7,11 +7,13 @@ import ( "github.com/Altinity/clickhouse-backup/pkg/config" "github.com/aws/smithy-go" awsV2http "github.com/aws/smithy-go/transport/http" + "golang.org/x/sync/semaphore" "io" "net/http" "os" "path" "strings" + "sync" "time" "golang.org/x/sync/errgroup" @@ -230,24 +232,34 @@ func (s *S3) PutFile(ctx context.Context, key string, r io.ReadCloser) error { return err } -func (s *S3) DeleteFile(ctx context.Context, key string) error { +func (s *S3) deleteKey(ctx context.Context, key string) error { params := &s3.DeleteObjectInput{ Bucket: aws.String(s.Config.Bucket), - Key: aws.String(path.Join(s.Config.Path, key)), + Key: aws.String(key), } if s.versioning { objVersion, err := s.getObjectVersion(ctx, key) if err != nil { - return errors.Wrapf(err, "DeleteFile, obtaining object version %+v", params) + return errors.Wrapf(err, "deleteKey, obtaining object version %+v", params) } params.VersionId = objVersion } if _, err := s.client.DeleteObject(ctx, params); err != nil { - return errors.Wrapf(err, "DeleteFile, deleting object %+v", params) + return errors.Wrapf(err, "deleteKey, deleting object %+v", params) } return nil } +func (s *S3) DeleteFile(ctx context.Context, key string) error { + key = path.Join(s.Config.Path, key) + return s.deleteKey(ctx, key) +} + +func (s *S3) DeleteFileFromObjectDiskBackup(ctx context.Context, key string) error { + key = path.Join(s.Config.ObjectDiskPath, key) + return s.deleteKey(ctx, key) +} + func (s *S3) isVersioningEnabled(ctx context.Context) bool { output, err := s.client.GetBucketVersioning(ctx, &s3.GetBucketVersioningInput{ Bucket: aws.String(s.Config.Bucket), @@ -348,6 +360,140 @@ func (s *S3) remotePager(ctx context.Context, s3Path string, recursive bool, pro return nil } +func (s *S3) CopyObject(ctx context.Context, srcBucket, srcKey, dstKey string) (int64, error) { + dstKey = path.Join(s.Config.ObjectDiskPath, dstKey) + // Initiate a multipart upload + params := s3.CreateMultipartUploadInput{ + Bucket: aws.String(s.Config.Bucket), + Key: aws.String(dstKey), + StorageClass: s3types.StorageClass(strings.ToUpper(s.Config.StorageClass)), + } + // https://github.com/Altinity/clickhouse-backup/issues/588 + if len(s.Config.ObjectLabels) > 0 { + tags := "" + for k, v := range s.Config.ObjectLabels { + if tags != "" { + tags += "&" + } + tags += k + "=" + v + } + params.Tagging = aws.String(tags) + } + if s.Config.SSE != "" { + params.ServerSideEncryption = s3types.ServerSideEncryption(s.Config.SSE) + } + if s.Config.SSEKMSKeyId != "" { + params.SSEKMSKeyId = aws.String(s.Config.SSEKMSKeyId) + } + if s.Config.SSECustomerAlgorithm != "" { + params.SSECustomerAlgorithm = aws.String(s.Config.SSECustomerAlgorithm) + } + if s.Config.SSECustomerKey != "" { + params.SSECustomerKey = aws.String(s.Config.SSECustomerKey) + } + if s.Config.SSECustomerKeyMD5 != "" { + params.SSECustomerKeyMD5 = aws.String(s.Config.SSECustomerKeyMD5) + } + if s.Config.SSEKMSEncryptionContext != "" { + params.SSEKMSEncryptionContext = aws.String(s.Config.SSEKMSEncryptionContext) + } + + // Get the size of the source object + sourceObjResp, err := s.client.HeadObject(ctx, &s3.HeadObjectInput{ + Bucket: aws.String(srcBucket), + Key: aws.String(srcKey), + }) + if err != nil { + return 0, err + } + srcSize := sourceObjResp.ContentLength + + initResp, err := s.client.CreateMultipartUpload(ctx, ¶ms) + if err != nil { + return 0, err + } + + // Get the upload ID + uploadID := initResp.UploadId + + // Set the part size (e.g., 5 MB) + partSize := srcSize / s.Config.MaxPartsCount + if partSize < 5*1024*1024 { + partSize = 5 * 1014 * 1024 + } + + // Calculate the number of parts + numParts := (srcSize + partSize - 1) / partSize + + copyPartSemaphore := semaphore.NewWeighted(int64(s.Config.Concurrency)) + copyPartErrGroup, ctx := errgroup.WithContext(ctx) + + var mu sync.Mutex + var parts []s3types.CompletedPart + + // Copy each part of the object + for partNumber := int64(1); partNumber <= numParts; partNumber++ { + if err := copyPartSemaphore.Acquire(ctx, 1); err != nil { + apexLog.Errorf("can't acquire semaphore during CopyObject data parts: %v", err) + break + } + // Calculate the byte range for the part + start := (partNumber - 1) * partSize + end := partNumber * partSize + if end > srcSize { + end = srcSize + } + currentPartNumber := int32(partNumber) + + copyPartErrGroup.Go(func() error { + defer copyPartSemaphore.Release(1) + // Copy the part + partResp, err := s.client.UploadPartCopy(ctx, &s3.UploadPartCopyInput{ + Bucket: aws.String(s.Config.Bucket), + Key: aws.String(dstKey), + CopySource: aws.String(srcBucket + "/" + srcKey), + CopySourceRange: aws.String(fmt.Sprintf("bytes=%d-%d", start, end-1)), + UploadId: uploadID, + PartNumber: currentPartNumber, + }) + if err != nil { + return err + } + mu.Lock() + defer mu.Unlock() + parts = append(parts, s3types.CompletedPart{ + ETag: partResp.CopyPartResult.ETag, + PartNumber: currentPartNumber, + }) + return nil + }) + } + if err := copyPartErrGroup.Wait(); err != nil { + _, abortErr := s.client.AbortMultipartUpload(context.Background(), &s3.AbortMultipartUploadInput{ + Bucket: aws.String(s.Config.Bucket), + Key: aws.String(dstKey), + UploadId: uploadID, + }) + if abortErr != nil { + return 0, fmt.Errorf("aborting CopyObject multipart upload: %v, original error was: %v", abortErr, err) + } + return 0, fmt.Errorf("one of CopyObject go-routine return error: %v", err) + } + + // Complete the multipart upload + _, err = s.client.CompleteMultipartUpload(context.Background(), &s3.CompleteMultipartUploadInput{ + Bucket: aws.String(s.Config.Bucket), + Key: aws.String(dstKey), + UploadId: uploadID, + MultipartUpload: &s3types.CompletedMultipartUpload{Parts: parts}, + }) + if err != nil { + return 0, fmt.Errorf("complete CopyObject multipart upload: %v", err) + } + + return srcSize, nil +} + type s3File struct { size int64 lastModified time.Time diff --git a/pkg/storage/sftp.go b/pkg/storage/sftp.go index 9e5d1b54..9fe39cf4 100644 --- a/pkg/storage/sftp.go +++ b/pkg/storage/sftp.go @@ -238,6 +238,14 @@ func (sftp *SFTP) PutFile(ctx context.Context, key string, localFile io.ReadClos return nil } +func (sftp *SFTP) CopyObject(ctx context.Context, srcBucket, srcKey, dstKey string) (int64, error) { + return 0, fmt.Errorf("CopyObject not imlemented for %s", sftp.Kind()) +} + +func (sftp *SFTP) DeleteFileFromObjectDiskBackup(ctx context.Context, key string) error { + return fmt.Errorf("DeleteFileFromObjectDiskBackup not imlemented for %s", sftp.Kind()) +} + // Implement RemoteFile type sftpFile struct { size int64 diff --git a/pkg/storage/structs.go b/pkg/storage/structs.go index 92fdd57d..d26b4c35 100644 --- a/pkg/storage/structs.go +++ b/pkg/storage/structs.go @@ -26,8 +26,10 @@ type RemoteStorage interface { Close(ctx context.Context) error StatFile(ctx context.Context, key string) (RemoteFile, error) DeleteFile(ctx context.Context, key string) error + DeleteFileFromObjectDiskBackup(ctx context.Context, key string) error Walk(ctx context.Context, prefix string, recursive bool, fn func(context.Context, RemoteFile) error) error GetFileReader(ctx context.Context, key string) (io.ReadCloser, error) GetFileReaderWithLocalPath(ctx context.Context, key, localPath string) (io.ReadCloser, error) PutFile(ctx context.Context, key string, r io.ReadCloser) error + CopyObject(ctx context.Context, srcBucket, srcKey, dstKey string) (int64, error) } diff --git a/pkg/storage/utils.go b/pkg/storage/utils.go index 02517815..f91cdded 100644 --- a/pkg/storage/utils.go +++ b/pkg/storage/utils.go @@ -16,7 +16,7 @@ func GetBackupsToDelete(backups []Backup, keep int) []Backup { sort.SliceStable(backups, func(i, j int) bool { return backups[i].UploadDate.After(backups[j].UploadDate) }) - // KeepRemoteBackups should respect incremental backups sequences and don't delete required backups + // KeepRemoteBackups should respect incremental backups sequences and don't deleteKey required backups // fix https://github.com/Altinity/clickhouse-backup/issues/111 // fix https://github.com/Altinity/clickhouse-backup/issues/385 // fix https://github.com/Altinity/clickhouse-backup/issues/525 diff --git a/test/integration/.env.example b/test/integration/.env.example index 03ad8f9f..cbc57c36 100644 --- a/test/integration/.env.example +++ b/test/integration/.env.example @@ -7,9 +7,12 @@ export QA_AWS_SECRET_KEY=XXX export QA_AWS_BUCKET=XXX export QA_AWS_ENDPOINT=https://XXX/ export QA_AWS_REGION=XXX +export QA_GCS_OVER_S3_ACCESS_KEY=XXXX +export QA_GCS_OVER_S3_SECRET_KEY=XXXX +export QA_GCS_OVER_S3_BUCKET=XXX # QA_GCS_CRED_JSON='{ "type": "service_account", "project_id": "XXXX", "private_key_id": "XXXXX", "private_key": "SSH KEY XXXX", "client_email": "XXXX", "client_id": "XXXX", "auth_uri": "https://accounts.google.com/o/oauth2/auth", "token_uri": "https://oauth2.googleapis.com/token", "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/" }' export QA_GCS_CRED_JSON="" export QA_GCS_CRED_JSON_ENCODED=$(echo '{ "type": "service_account", "project_id": "XXXX", "private_key_id": "XXXXX", "private_key": "SSH KEY XXXX", "client_email": "XXXX", "client_id": "XXXX", "auth_uri": "https://accounts.google.com/o/oauth2/auth", "token_uri": "https://oauth2.googleapis.com/token", "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/" }' | base64 -w 0 ) export CLICKHOUSE_TESTS_DIR=${PWD}/test/testflows/clickhouse_backup -export GOCOVERDIR=${PWD}/test/testflows/_coverage_ \ No newline at end of file +export GOCOVERDIR=${PWD}/test/testflows/_coverage_ diff --git a/test/integration/config-azblob.yml b/test/integration/config-azblob.yml index f2d42cfd..ce9b8043 100644 --- a/test/integration/config-azblob.yml +++ b/test/integration/config-azblob.yml @@ -15,4 +15,5 @@ azblob: endpoint_schema: http container: container1 path: backup + object_disk_path: object_disks compression_format: tar diff --git a/test/integration/config-gcs.yml b/test/integration/config-gcs.yml index ff8ba7be..33a18bd0 100644 --- a/test/integration/config-gcs.yml +++ b/test/integration/config-gcs.yml @@ -11,6 +11,7 @@ clickhouse: gcs: bucket: altinity-qa-test path: backup + object_disk_path: object_disks credentials_file: /etc/clickhouse-backup/credentials.json compression_format: tar # endpoint: http://gcs:8080/storage/v1/ diff --git a/test/integration/config-s3.yml b/test/integration/config-s3.yml index 96d83df0..b44faf67 100644 --- a/test/integration/config-s3.yml +++ b/test/integration/config-s3.yml @@ -31,6 +31,7 @@ s3: acl: private force_path_style: true path: backup/{cluster}/{shard} + object_disk_path: object_disk/{cluster}/{shard} disable_ssl: true compression_format: tar allow_multipart_download: true diff --git a/test/integration/docker-compose_advanced.yml b/test/integration/docker-compose_advanced.yml index b81e1547..f4e5495b 100644 --- a/test/integration/docker-compose_advanced.yml +++ b/test/integration/docker-compose_advanced.yml @@ -170,6 +170,10 @@ services: # https://github.com/Altinity/clickhouse-backup/issues/691: AWS_ACCESS_KEY_ID: access-key AWS_SECRET_ACCESS_KEY: it-is-my-super-secret-key +# GCS over S3 object disk + QA_GCS_OVER_S3_ACCESS_KEY: ${QA_GCS_OVER_S3_ACCESS_KEY} + QA_GCS_OVER_S3_SECRET_KEY: ${QA_GCS_OVER_S3_SECRET_KEY} + QA_GCS_OVER_S3_BUCKET: ${QA_GCS_OVER_S3_BUCKET} volumes: - ./backup-user.xml:/etc/clickhouse-server/users.d/backup-user.xml - ./enable-access_management.xml:/etc/clickhouse-server/users.d/enable-access_management.xml diff --git a/test/integration/dynamic_settings.sh b/test/integration/dynamic_settings.sh index 62561582..e4f3b52a 100644 --- a/test/integration/dynamic_settings.sh +++ b/test/integration/dynamic_settings.sh @@ -111,6 +111,32 @@ cat < /etc/clickhouse-server/config.d/storage_configuration_s3.xml EOT +cat < /etc/clickhouse-server/config.d/storage_configuration_gcs.xml + + + + + s3 + https://storage.googleapis.com/${QA_GCS_OVER_S3_BUCKET}/clickhouse_backup_disk_gcs_over_s3/${HOSTNAME}/ + ${QA_GCS_OVER_S3_ACCESS_KEY} + ${QA_GCS_OVER_S3_SECRET_KEY} + true + false + + + + + + + disk_gcs_over_s3 + + + + + + +EOT + fi if [[ "${CLICKHOUSE_VERSION}" == "head" || "${CLICKHOUSE_VERSION}" =~ ^21\.12 || "${CLICKHOUSE_VERSION}" =~ ^2[2-9]\.[0-9]+ ]]; then @@ -249,6 +275,16 @@ cat < /etc/clickhouse-server/config.d/backup_storage_configuration_azure.x + + + + + azure + + + + + backups_s3 backups_s3_plain From da1bd993ffaaa72619c5076f87bc60363166af91 Mon Sep 17 00:00:00 2001 From: Slach Date: Fri, 21 Jul 2023 15:23:30 +0500 Subject: [PATCH 02/12] test CopyObject implementation, TestIntegrationGCS passed, fix https://github.com/Altinity/clickhouse-backup/issues/447 --- pkg/backup/delete.go | 134 ++++++++++++++++++--------- pkg/storage/gcs.go | 4 +- pkg/storage/s3.go | 2 +- test/integration/integration_test.go | 43 +++++---- 4 files changed, 122 insertions(+), 61 deletions(-) diff --git a/pkg/backup/delete.go b/pkg/backup/delete.go index dbe5a850..61818eab 100644 --- a/pkg/backup/delete.go +++ b/pkg/backup/delete.go @@ -147,8 +147,13 @@ func (b *Backuper) RemoveBackupLocal(ctx context.Context, backupName string, dis for _, backup := range backupList { if backup.BackupName == backupName { - if strings.Contains(backup.Tags, "embedded") { - if err := b.cleanLocalEmbedded(ctx, backup, disks); err != nil { + var skip bool + skip, err = b.skipIfTheSameRemoteBackupPresent(ctx, backup.BackupName, backup.Tags) + if err != nil { + return err + } + if !skip && strings.Contains(backup.Tags, "embedded") { + if err = b.cleanLocalEmbedded(ctx, backup, disks); err != nil { log.Warnf("b.cleanRemoteEmbedded return error: %v", err) return err } @@ -159,7 +164,7 @@ func (b *Backuper) RemoveBackupLocal(ctx context.Context, backupName string, dis if disk.IsBackup { backupPath = path.Join(disk.Path, backupName) } - if !disk.IsBackup && (disk.Type == "s3" || disk.Type == "azure_blob_storage") { + if !skip && !disk.IsBackup && (disk.Type == "s3" || disk.Type == "azure_blob_storage") && !strings.Contains(backup.Tags, "embedded") { if err = b.cleanLocalBackupObjectDisk(ctx, backupName, backupPath, disk.Name); err != nil { return err } @@ -196,8 +201,9 @@ func (b *Backuper) hasObjectDisks(backupList []LocalBackup, backupName string, d func (b *Backuper) cleanLocalBackupObjectDisk(ctx context.Context, backupName string, backupPath, diskName string) error { _, err := os.Stat(backupPath) if os.IsNotExist(err) { - apexLog.Warnf("%v", err) return nil + } else if err != nil { + return err } err = filepath.Walk(backupPath, func(fPath string, fInfo os.FileInfo, err error) error { if err != nil { @@ -221,18 +227,6 @@ func (b *Backuper) cleanLocalBackupObjectDisk(ctx context.Context, backupName st } func (b *Backuper) cleanLocalEmbedded(ctx context.Context, backup LocalBackup, disks []clickhouse.Disk) error { - // skip if the same backup present in remote - if b.cfg.General.RemoteStorage != "custom" && b.cfg.General.RemoteStorage != "none" { - if remoteList, err := b.GetRemoteBackups(ctx, true); err != nil { - return err - } else { - for _, remoteBackup := range remoteList { - if remoteBackup.BackupName == backup.BackupName && strings.Contains(remoteBackup.Tags, "embedded") { - return nil - } - } - } - } for _, disk := range disks { if disk.Name == b.cfg.ClickHouse.EmbeddedBackupDisk { if err := object_disk.InitCredentialsAndConnections(ctx, b.ch, b.cfg, disk.Name); err != nil { @@ -265,6 +259,23 @@ func (b *Backuper) cleanLocalEmbedded(ctx context.Context, backup LocalBackup, d return nil } +func (b *Backuper) skipIfTheSameRemoteBackupPresent(ctx context.Context, backupName, tags string) (bool, error) { + if b.cfg.General.RemoteStorage != "custom" && b.cfg.General.RemoteStorage != "none" { + if remoteList, err := b.GetRemoteBackups(ctx, true); err != nil { + return true, err + } else { + for _, remoteBackup := range remoteList { + if remoteBackup.BackupName == backupName { + if tags == "" || (tags != "" && strings.Contains(remoteBackup.Tags, tags)) { + return true, nil + } + } + } + } + } + return false, nil +} + func (b *Backuper) RemoveBackupRemote(ctx context.Context, backupName string) error { log := b.log.WithField("logger", "RemoveBackupRemote") backupName = utils.CleanBackupNameRE.ReplaceAllString(backupName, "") @@ -304,15 +315,20 @@ func (b *Backuper) RemoveBackupRemote(ctx context.Context, backupName string) er } for _, backup := range backupList { if backup.BackupName == backupName { - if strings.Contains(backup.Tags, "embedded") { - if err = b.cleanRemoteEmbedded(ctx, backup, bd); err != nil { - log.Warnf("b.cleanRemoteEmbedded return error: %v", err) + if skip, err := b.skipIfSameLocalBackupPresent(ctx, backup.BackupName, backup.Tags); err != nil { + return err + } else if !skip { + if strings.Contains(backup.Tags, "embedded") { + if err = b.cleanRemoteEmbedded(ctx, backup, bd); err != nil { + log.Warnf("b.cleanRemoteEmbedded return error: %v", err) + return err + } + } else if err = b.cleanRemoteBackupObjectDisks(ctx, backup); err != nil { + log.Warnf("b.cleanRemoteBackupObjectDisks return error: %v", err) return err } } - if err = b.cleanRemoteBackupObjectDisks(ctx, backup); err != nil { - return err - } + if err = bd.RemoveBackup(ctx, backup); err != nil { log.Warnf("bd.RemoveBackup return error: %v", err) return err @@ -342,20 +358,49 @@ func (b *Backuper) cleanRemoteBackupObjectDisks(ctx context.Context, backup stor return nil } for diskName, diskType := range backup.DiskTypes { - if (diskType == "s3" || diskType == "azure_blob_storage") && regexp.MustCompile("/"+diskName+"[_/][^/]+$").MatchString(fName) { - objMetaReader, err := b.dst.GetFileReader(ctx, fName) - if err != nil { - return err - } - objMeta, err := object_disk.ReadMetadataFromReader(objMetaReader, fName) - if err != nil { - return err - } - for _, storageObject := range objMeta.StorageObjects { - err = b.dst.DeleteFileFromObjectDiskBackup(ctx, path.Join(backup.BackupName, diskName, storageObject.ObjectRelativePath)) + if diskType == "s3" || diskType == "azure_blob_storage" { + compressedRE := regexp.MustCompile(`/shadow/([^/]+/[^/]+)/` + diskName + `_[^/]+$`) + // compressed remote object disk part + if matches := compressedRE.FindStringSubmatch(fName); len(matches) > 0 { + localPath := path.Join(backup.Disks[diskName], "backup", backup.BackupName, "shadow", matches[1], diskName) + if err := b.dst.DownloadCompressedStream(ctx, fName, localPath); err != nil { + return err + } + filepath.Walk(localPath, func(fPath string, fInfo fs.FileInfo, err error) error { + if err != nil { + return err + } + if fInfo.IsDir() { + return nil + } + objMeta, err := object_disk.ReadMetadataFromFile(fPath) + if err != nil { + return err + } + for _, storageObject := range objMeta.StorageObjects { + err = b.dst.DeleteFileFromObjectDiskBackup(ctx, path.Join(backup.BackupName, diskName, storageObject.ObjectRelativePath)) + if err != nil { + return err + } + } + return nil + }) + // non compressed remote object disk part + } else if regexp.MustCompile(`/shadow/[^/]+/[^/]+/` + diskName + `/.+$`).MatchString(fName) { + objMetaReader, err := b.dst.GetFileReader(ctx, fName) if err != nil { return err } + objMeta, err := object_disk.ReadMetadataFromReader(objMetaReader, fName) + if err != nil { + return err + } + for _, storageObject := range objMeta.StorageObjects { + err = b.dst.DeleteFileFromObjectDiskBackup(ctx, path.Join(backup.BackupName, diskName, storageObject.ObjectRelativePath)) + if err != nil { + return err + } + } } } } @@ -364,16 +409,6 @@ func (b *Backuper) cleanRemoteBackupObjectDisks(ctx context.Context, backup stor } func (b *Backuper) cleanRemoteEmbedded(ctx context.Context, backup storage.Backup, bd *storage.BackupDestination) error { - // skip if the same backup present in local - if localList, _, err := b.GetLocalBackups(ctx, nil); err != nil { - return err - } else { - for _, localBackup := range localList { - if localBackup.BackupName == backup.BackupName && strings.Contains(localBackup.Tags, "embedded") { - return nil - } - } - } if err := object_disk.InitCredentialsAndConnections(ctx, b.ch, b.cfg, b.cfg.ClickHouse.EmbeddedBackupDisk); err != nil { return err } @@ -398,6 +433,19 @@ func (b *Backuper) cleanRemoteEmbedded(ctx context.Context, backup storage.Backu }) } +func (b *Backuper) skipIfSameLocalBackupPresent(ctx context.Context, backupName, tags string) (bool, error) { + if localList, _, err := b.GetLocalBackups(ctx, nil); err != nil { + return true, err + } else { + for _, localBackup := range localList { + if localBackup.BackupName == backupName && strings.Contains(localBackup.Tags, tags) { + return true, nil + } + } + } + return false, nil +} + func (b *Backuper) CleanRemoteBroken(commandId int) error { ctx, cancel, err := status.Current.GetContextWithCancel(commandId) if err != nil { diff --git a/pkg/storage/gcs.go b/pkg/storage/gcs.go index 065117df..24fc6beb 100644 --- a/pkg/storage/gcs.go +++ b/pkg/storage/gcs.go @@ -205,8 +205,9 @@ func (gcs *GCS) DeleteFileFromObjectDiskBackup(ctx context.Context, key string) } func (gcs *GCS) CopyObject(ctx context.Context, srcBucket, srcKey, dstKey string) (int64, error) { + dstKey = path.Join(gcs.Config.ObjectDiskPath, dstKey) src := gcs.client.Bucket(srcBucket).Object(srcKey) - dst := gcs.client.Bucket(gcs.Config.Bucket).Object(path.Join(gcs.Config.ObjectDiskPath, dstKey)) + dst := gcs.client.Bucket(gcs.Config.Bucket).Object(dstKey) attrs, err := src.Attrs(ctx) if err != nil { return 0, err @@ -214,6 +215,7 @@ func (gcs *GCS) CopyObject(ctx context.Context, srcBucket, srcKey, dstKey string if _, err = dst.CopierFrom(src).Run(ctx); err != nil { return 0, err } + log.Debugf("GCS->CopyObject %s/%s -> %s/%s", srcBucket, srcKey, gcs.Config.Bucket, dstKey) return attrs.Size, nil } diff --git a/pkg/storage/s3.go b/pkg/storage/s3.go index 777a7656..67d8485c 100644 --- a/pkg/storage/s3.go +++ b/pkg/storage/s3.go @@ -490,7 +490,7 @@ func (s *S3) CopyObject(ctx context.Context, srcBucket, srcKey, dstKey string) ( if err != nil { return 0, fmt.Errorf("complete CopyObject multipart upload: %v", err) } - + s.Log.Debugf("S3->CopyObject %s/%s -> %s/%s", srcBucket, srcKey, s.Config.Bucket, dstKey) return srcSize, nil } diff --git a/test/integration/integration_test.go b/test/integration/integration_test.go index a34ac6fe..f5eced12 100644 --- a/test/integration/integration_test.go +++ b/test/integration/integration_test.go @@ -1118,7 +1118,7 @@ func fullCleanup(r *require.Assertions, ch *TestClickHouse, backupNames, backupT func generateTestData(ch *TestClickHouse, r *require.Assertions, remoteStorageType string) { log.Infof("Generate test data %s", remoteStorageType) - generateTestDataWithDifferentStoragePolicy() + generateTestDataWithDifferentStoragePolicy(remoteStorageType) for _, data := range testData { if isTableSkip(ch, data, false) { continue @@ -1133,14 +1133,14 @@ func generateTestData(ch *TestClickHouse, r *require.Assertions, remoteStorageTy } } -func generateTestDataWithDifferentStoragePolicy() { +func generateTestDataWithDifferentStoragePolicy(remoteStorageType string) { for databaseName, databaseEngine := range map[string]string{dbNameOrdinary: "Ordinary", dbNameAtomic: "Atomic"} { - testDataEncrypted := TestDataStruct{ + testDataWithStoragePolicy := TestDataStruct{ Database: databaseName, DatabaseEngine: databaseEngine, Rows: func() []map[string]interface{} { - var result []map[string]interface{} + result := make([]map[string]interface{}, 100) for i := 0; i < 100; i++ { - result = append(result, map[string]interface{}{"id": uint64(i)}) + result[i] = map[string]interface{}{"id": uint64(i)} } return result }(), @@ -1150,32 +1150,43 @@ func generateTestDataWithDifferentStoragePolicy() { addTestDataIfNotExists := func() { found := false for _, data := range testData { - if data.Name == testDataEncrypted.Name && data.Database == testDataEncrypted.Database { + if data.Name == testDataWithStoragePolicy.Name && data.Database == testDataWithStoragePolicy.Database { found = true break } } if !found { - testData = append(testData, testDataEncrypted) + testData = append(testData, testDataWithStoragePolicy) } } //s3 disks support after 21.8 - if compareVersion(os.Getenv("CLICKHOUSE_VERSION"), "21.8") >= 0 { - testDataEncrypted.Name = "test_s3" - testDataEncrypted.Schema = "(id UInt64) Engine=MergeTree ORDER BY id SETTINGS storage_policy = 's3_only'" + if compareVersion(os.Getenv("CLICKHOUSE_VERSION"), "21.8") >= 0 && remoteStorageType == "S3" { + testDataWithStoragePolicy.Name = "test_s3" + testDataWithStoragePolicy.Schema = "(id UInt64) Engine=MergeTree ORDER BY id SETTINGS storage_policy = 's3_only'" addTestDataIfNotExists() } - //encrypted disks support after 21.10 if compareVersion(os.Getenv("CLICKHOUSE_VERSION"), "21.10") >= 0 { - testDataEncrypted.Name = "test_hdd3_encrypted" - testDataEncrypted.Schema = "(id UInt64) Engine=MergeTree ORDER BY id SETTINGS storage_policy = 'hdd3_only_encrypted'" + testDataWithStoragePolicy.Name = "test_hdd3_encrypted" + testDataWithStoragePolicy.Schema = "(id UInt64) Engine=MergeTree ORDER BY id SETTINGS storage_policy = 'hdd3_only_encrypted'" addTestDataIfNotExists() } //encrypted s3 disks support after 21.12 - if compareVersion(os.Getenv("CLICKHOUSE_VERSION"), "21.12") >= 0 { - testDataEncrypted.Name = "test_s3_encrypted" - testDataEncrypted.Schema = "(id UInt64) Engine=MergeTree ORDER BY id SETTINGS storage_policy = 's3_only_encrypted'" + if compareVersion(os.Getenv("CLICKHOUSE_VERSION"), "21.12") >= 0 && remoteStorageType == "S3" { + testDataWithStoragePolicy.Name = "test_s3_encrypted" + testDataWithStoragePolicy.Schema = "(id UInt64) Engine=MergeTree ORDER BY id SETTINGS storage_policy = 's3_only_encrypted'" + addTestDataIfNotExists() + } + //gcs over s3 support added in 22.6 + if compareVersion(os.Getenv("CLICKHOUSE_VERSION"), "22.6") >= 0 && remoteStorageType == "GCS" { + testDataWithStoragePolicy.Name = "test_gcs" + testDataWithStoragePolicy.Schema = "(id UInt64) Engine=MergeTree ORDER BY id SETTINGS storage_policy = 'gcs_only'" + addTestDataIfNotExists() + } + //check azure_blob_storage only in 23.3+ (added in 22.1) + if compareVersion(os.Getenv("CLICKHOUSE_VERSION"), "23.3") >= 0 && remoteStorageType == "AZBLOB" { + testDataWithStoragePolicy.Name = "test_azure" + testDataWithStoragePolicy.Schema = "(id UInt64) Engine=MergeTree ORDER BY id SETTINGS storage_policy = 'azure_only'" addTestDataIfNotExists() } } From d693ade4899cfb189726cb753d6ae4972ac57601 Mon Sep 17 00:00:00 2001 From: Slach Date: Fri, 21 Jul 2023 21:17:10 +0500 Subject: [PATCH 03/12] test CopyObject implementation, TestIntegrationAzure passed, fix https://github.com/Altinity/clickhouse-backup/issues/447 --- pkg/backup/delete.go | 7 ++- pkg/backup/restore.go | 6 +- pkg/storage/azblob.go | 50 ++++++++++++--- test/integration/config-s3-fips.yml | 1 + test/integration/docker-compose_advanced.yml | 27 ++++---- test/integration/dynamic_settings.sh | 65 ++++++++++---------- 6 files changed, 98 insertions(+), 58 deletions(-) diff --git a/pkg/backup/delete.go b/pkg/backup/delete.go index 61818eab..3f5325c8 100644 --- a/pkg/backup/delete.go +++ b/pkg/backup/delete.go @@ -360,8 +360,8 @@ func (b *Backuper) cleanRemoteBackupObjectDisks(ctx context.Context, backup stor for diskName, diskType := range backup.DiskTypes { if diskType == "s3" || diskType == "azure_blob_storage" { compressedRE := regexp.MustCompile(`/shadow/([^/]+/[^/]+)/` + diskName + `_[^/]+$`) - // compressed remote object disk part if matches := compressedRE.FindStringSubmatch(fName); len(matches) > 0 { + // compressed remote object disk part localPath := path.Join(backup.Disks[diskName], "backup", backup.BackupName, "shadow", matches[1], diskName) if err := b.dst.DownloadCompressedStream(ctx, fName, localPath); err != nil { return err @@ -385,8 +385,11 @@ func (b *Backuper) cleanRemoteBackupObjectDisks(ctx context.Context, backup stor } return nil }) - // non compressed remote object disk part + if err := os.RemoveAll(localPath); err != nil { + return err + } } else if regexp.MustCompile(`/shadow/[^/]+/[^/]+/` + diskName + `/.+$`).MatchString(fName) { + // non compressed remote object disk part objMetaReader, err := b.dst.GetFileReader(ctx, fName) if err != nil { return err diff --git a/pkg/backup/restore.go b/pkg/backup/restore.go index f41f1215..911d3028 100644 --- a/pkg/backup/restore.go +++ b/pkg/backup/restore.go @@ -764,13 +764,13 @@ func (b *Backuper) downloadObjectDiskParts(ctx context.Context, backupName strin for _, storageObject := range objMeta.StorageObjects { if b.cfg.General.RemoteStorage == "s3" && diskType == "s3" { srcBucket = b.cfg.S3.Bucket - srcKey = path.Join(b.cfg.S3.ObjectDiskPath, backupName, storageObject.ObjectRelativePath) + srcKey = path.Join(b.cfg.S3.ObjectDiskPath, backupName, diskName, storageObject.ObjectRelativePath) } else if b.cfg.General.RemoteStorage == "gcs" && diskType == "s3" { srcBucket = b.cfg.GCS.Bucket - srcKey = path.Join(b.cfg.GCS.ObjectDiskPath, backupName, storageObject.ObjectRelativePath) + srcKey = path.Join(b.cfg.GCS.ObjectDiskPath, backupName, diskName, storageObject.ObjectRelativePath) } else if b.cfg.General.RemoteStorage == "azblob" && diskType == "azure_blob_storage" { srcBucket = b.cfg.AzureBlob.Container - srcKey = path.Join(b.cfg.AzureBlob.ObjectDiskPath, backupName, storageObject.ObjectRelativePath) + srcKey = path.Join(b.cfg.AzureBlob.ObjectDiskPath, backupName, diskName, storageObject.ObjectRelativePath) } else { return fmt.Errorf("incompatible object_disk[%s].Type=%s amd remote_storage: %s", diskName, diskType, b.cfg.General.RemoteStorage) } diff --git a/pkg/storage/azblob.go b/pkg/storage/azblob.go index cf9a5819..44eca049 100644 --- a/pkg/storage/azblob.go +++ b/pkg/storage/azblob.go @@ -25,6 +25,7 @@ import ( // AzureBlob - presents methods for manipulate data on Azure type AzureBlob struct { Container azblob.ContainerURL + Pipeline pipeline.Pipeline CPK azblob.ClientProvidedKeyOptions Config *config.AzureBlobConfig } @@ -61,10 +62,10 @@ func (a *AzureBlob) Connect(ctx context.Context) error { if err != nil { return err } - urlString = fmt.Sprintf("%a://%a.blob.%a", a.Config.EndpointSchema, a.Config.AccountName, a.Config.EndpointSuffix) + urlString = fmt.Sprintf("%s://%s.blob.%s", a.Config.EndpointSchema, a.Config.AccountName, a.Config.EndpointSuffix) } else if a.Config.SharedAccessSignature != "" { credential = azblob.NewAnonymousCredential() - urlString = fmt.Sprintf("%a://%a.blob.%a?%a", a.Config.EndpointSchema, a.Config.AccountName, a.Config.EndpointSuffix, a.Config.SharedAccessSignature) + urlString = fmt.Sprintf("%s://%s.blob.%s?%s", a.Config.EndpointSchema, a.Config.AccountName, a.Config.EndpointSuffix, a.Config.SharedAccessSignature) } else if a.Config.UseManagedIdentity { azureEnv, err := azure.EnvironmentFromName("AZUREPUBLICCLOUD") if err != nil { @@ -94,7 +95,7 @@ func (a *AzureBlob) Connect(ctx context.Context) error { } credential = azblob.NewTokenCredential("", tokenRefresher) - urlString = fmt.Sprintf("%a://%a.blob.%a", a.Config.EndpointSchema, a.Config.AccountName, a.Config.EndpointSuffix) + urlString = fmt.Sprintf("%s://%s.blob.%s", a.Config.EndpointSchema, a.Config.AccountName, a.Config.EndpointSuffix) } u, err := url.Parse(urlString) @@ -108,11 +109,12 @@ func (a *AzureBlob) Connect(ctx context.Context) error { case <-ctx.Done(): return ctx.Err() default: - a.Container = azblob.NewServiceURL(*u, azblob.NewPipeline(credential, azblob.PipelineOptions{ + a.Pipeline = azblob.NewPipeline(credential, azblob.PipelineOptions{ Retry: azblob.RetryOptions{ TryTimeout: timeout, }, - })).NewContainerURL(a.Config.Container) + }) + a.Container = azblob.NewServiceURL(*u, a.Pipeline).NewContainerURL(a.Config.Container) _, err = a.Container.Create(ctx, azblob.Metadata{}, azblob.PublicAccessNone) if err != nil && !isContainerAlreadyExists(err) { return err @@ -124,7 +126,7 @@ func (a *AzureBlob) Connect(ctx context.Context) error { testBlob := a.Container.NewBlockBlobURL(base64.URLEncoding.EncodeToString(testName)) if _, err = testBlob.GetProperties(ctx, azblob.BlobAccessConditions{}, azblob.ClientProvidedKeyOptions{}); err != nil { if se, ok := err.(azblob.StorageError); !ok || se.ServiceCode() != azblob.ServiceCodeBlobNotFound { - return errors.Wrapf(err, "azblob: failed to access container %a", a.Config.Container) + return errors.Wrapf(err, "azblob: failed to access container %s", a.Config.Container) } } @@ -269,7 +271,41 @@ func (a *AzureBlob) Walk(ctx context.Context, azPath string, recursive bool, pro } func (a *AzureBlob) CopyObject(ctx context.Context, srcBucket, srcKey, dstKey string) (int64, error) { - return 0, fmt.Errorf("CopyObject not imlemented for %s", a.Kind()) + dstKey = path.Join(a.Config.ObjectDiskPath, dstKey) + srcURLString := fmt.Sprintf("%s://%s.%s/%s/%s", a.Config.EndpointSchema, a.Config.AccountName, a.Config.EndpointSuffix, srcBucket, srcKey) + srcURL, err := url.Parse(srcURLString) + if err != nil { + return 0, err + } + + sourceBlobURL := azblob.NewBlobURL(*srcURL, a.Pipeline) + destinationBlobURL := a.Container.NewBlobURL(dstKey) + + startCopy, err := destinationBlobURL.StartCopyFromURL(ctx, sourceBlobURL.URL(), nil, azblob.ModifiedAccessConditions{}, azblob.BlobAccessConditions{}, azblob.AccessTierNone, nil) + if err != nil { + return 0, fmt.Errorf("azblob->CopyObject failed to start copy operation: %v", err) + } + copyStatus := startCopy.CopyStatus() + copyStatusDesc := "" + var size int64 + pollCount := 1 + sleepDuration := time.Millisecond * 50 + for copyStatus == azblob.CopyStatusPending { + // @TODO think how to avoid polling GetProperties in AZBLOB during CopyObject + time.Sleep(sleepDuration * time.Duration(pollCount*2)) + dstMeta, err := destinationBlobURL.GetProperties(ctx, azblob.BlobAccessConditions{}, azblob.ClientProvidedKeyOptions{}) + if err != nil { + return 0, fmt.Errorf("azblob->CopyObject failed to destinationBlobURL.GetProperties operation: %v", err) + } + copyStatus = dstMeta.CopyStatus() + copyStatusDesc = dstMeta.CopyStatusDescription() + size = dstMeta.ContentLength() + pollCount++ + } + if copyStatus == azblob.CopyStatusFailed { + return 0, fmt.Errorf("azblob->CopyObject got CopyStatusFailed %s", copyStatusDesc) + } + return size, nil } type azureBlobFile struct { diff --git a/test/integration/config-s3-fips.yml b/test/integration/config-s3-fips.yml index 26223b75..528959e9 100644 --- a/test/integration/config-s3-fips.yml +++ b/test/integration/config-s3-fips.yml @@ -30,6 +30,7 @@ s3: acl: private force_path_style: false path: backup/{cluster}/{shard} + object_disk_path: object_disks/{cluster}/{shard} disable_ssl: false compression_format: tar allow_multipart_download: true diff --git a/test/integration/docker-compose_advanced.yml b/test/integration/docker-compose_advanced.yml index f4e5495b..4b17aa46 100644 --- a/test/integration/docker-compose_advanced.yml +++ b/test/integration/docker-compose_advanced.yml @@ -79,22 +79,23 @@ services: networks: - clickhouse-backup -# azure_init: -# image: mcr.microsoft.com/azure-cli:latest -# command: -# - /bin/sh -# - -xc + azure_init: + image: mcr.microsoft.com/azure-cli:latest + command: + - /bin/sh + - -xc + - sleep infinity # - | # az storage container create --debug --name azure-backup-disk && # az storage container create --debug --name azure-disk -# depends_on: -# azure: -# condition: service_healthy -# environment: -# # https://github.com/Azure/Azurite/blob/main/README.md#usage-with-azure-storage-sdks-or-tools -# AZURE_STORAGE_CONNECTION_STRING: DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azure:10000/devstoreaccount1; -# networks: -# - clickhouse-backup + depends_on: + azure: + condition: service_healthy + environment: + # https://github.com/Azure/Azurite/blob/main/README.md#usage-with-azure-storage-sdks-or-tools + AZURE_STORAGE_CONNECTION_STRING: DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azure:10000/devstoreaccount1; + networks: + - clickhouse-backup mysql: image: docker.io/mysql:${MYSQL_VERSION:-latest} diff --git a/test/integration/dynamic_settings.sh b/test/integration/dynamic_settings.sh index e4f3b52a..5c69534b 100644 --- a/test/integration/dynamic_settings.sh +++ b/test/integration/dynamic_settings.sh @@ -253,43 +253,42 @@ chown -R clickhouse /var/lib/clickhouse/disks/ cat < /etc/clickhouse-server/config.d/backup_storage_configuration_azure.xml - - - - azure_blob_storage - http://azure:10000/devstoreaccount1 - azure-disk - - devstoreaccount1 - Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== - false - - - azure_blob_storage - http://azure:10000/devstoreaccount1 - azure-backup-disk - - devstoreaccount1 - Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== - false - - - + + + + azure_blob_storage + http://azure:10000/devstoreaccount1 + azure-disk + + devstoreaccount1 + Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== + false + + + azure_blob_storage + http://azure:10000/devstoreaccount1 + azure-backup-disk + + devstoreaccount1 + Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== + false + + - - - azure - - + + + azure + + - - - backups_s3 - backups_s3_plain - backups_azure - + + + backups_s3 + backups_s3_plain + backups_azure + EOT From d45f3f523b50f841fec90ccf3660efba6bbdd237 Mon Sep 17 00:00:00 2001 From: Slach Date: Sun, 23 Jul 2023 20:40:09 +0500 Subject: [PATCH 04/12] TestRBAC, TestConfigs was slow, TestIntegrationGCS failed, WTF? --- pkg/backup/create.go | 37 ++++++-------- pkg/backup/delete.go | 5 +- pkg/backup/restore.go | 53 +++++++++++--------- pkg/clickhouse/clickhouse.go | 5 +- pkg/storage/object_disk/object_disk.go | 16 ++++-- pkg/storage/s3.go | 2 +- test/integration/docker-compose_advanced.yml | 7 +-- test/integration/dynamic_settings.sh | 15 ++++-- test/integration/integration_test.go | 28 ++++++----- 9 files changed, 92 insertions(+), 76 deletions(-) diff --git a/pkg/backup/create.go b/pkg/backup/create.go index cdbaf6fb..08067c66 100644 --- a/pkg/backup/create.go +++ b/pkg/backup/create.go @@ -485,27 +485,6 @@ func (b *Backuper) AddTableToBackup(ctx context.Context, backupName, shadowBacku } realSize := map[string]int64{} disksToPartsMap := map[string][]metadata.Part{} - needToUploadObjectDisk := false - for _, disk := range diskList { - if disk.Type == "s3" || disk.Type == "azure_blob_storage" { - needToUploadObjectDisk = true - break - } - } - if needToUploadObjectDisk { - b.dst, err = storage.NewBackupDestination(ctx, b.cfg, b.ch, false, backupName) - if err != nil { - return nil, nil, err - } - if err := b.dst.Connect(ctx); err != nil { - return nil, nil, fmt.Errorf("can't connect to %s: %v", b.dst.Kind(), err) - } - defer func() { - if err := b.dst.Close(ctx); err != nil { - b.log.Warnf("uploadObjectDiskParts: can't close BackupDestination error: %v", err) - } - }() - } for _, disk := range diskList { select { @@ -530,8 +509,17 @@ func (b *Backuper) AddTableToBackup(ctx context.Context, backupName, shadowBacku realSize[disk.Name] = size disksToPartsMap[disk.Name] = parts log.WithField("disk", disk.Name).Debug("shadow moved") - if disk.Type == "s3" || disk.Type == "azure_blob_storage" { + if disk.Type == "s3" || disk.Type == "azure_blob_storage" && len(parts) > 0 { start := time.Now() + if b.dst == nil { + b.dst, err = storage.NewBackupDestination(ctx, b.cfg, b.ch, false, backupName) + if err != nil { + return nil, nil, err + } + if err := b.dst.Connect(ctx); err != nil { + return nil, nil, fmt.Errorf("can't connect to %s: %v", b.dst.Kind(), err) + } + } if size, err = b.uploadObjectDiskParts(ctx, backupName, backupShadowPath, disk); err != nil { return disksToPartsMap, realSize, err } @@ -552,6 +540,11 @@ func (b *Backuper) AddTableToBackup(ctx context.Context, backupName, shadowBacku return disksToPartsMap, realSize, err } } + if b.dst != nil { + if err := b.dst.Close(ctx); err != nil { + b.log.Warnf("uploadObjectDiskParts: can't close BackupDestination error: %v", err) + } + } log.Debug("done") return disksToPartsMap, realSize, nil } diff --git a/pkg/backup/delete.go b/pkg/backup/delete.go index 3f5325c8..d0e5c73a 100644 --- a/pkg/backup/delete.go +++ b/pkg/backup/delete.go @@ -190,7 +190,10 @@ func (b *Backuper) hasObjectDisks(backupList []LocalBackup, backupName string, d if backup.BackupName == backupName && !strings.Contains(backup.Tags, "embedded") { for _, disk := range disks { if !disk.IsBackup && (disk.Type == "s3" || disk.Type == "azure_blob_storage") { - return true + backupExists, err := os.ReadDir(path.Join(disk.Path, "backup", backup.BackupName)) + if err == nil && len(backupExists) > 0 { + return true + } } } } diff --git a/pkg/backup/restore.go b/pkg/backup/restore.go index 911d3028..b864c2f6 100644 --- a/pkg/backup/restore.go +++ b/pkg/backup/restore.go @@ -143,6 +143,7 @@ func (b *Backuper) Restore(backupName, tablePattern string, databaseMapping, par return err } ctx, cancel := context.WithTimeout(ctx, 180*time.Second) + defer cancel() log.Infof("run %s", b.ch.Config.RestartCommand) var out []byte if len(cmd) > 1 { @@ -152,7 +153,6 @@ func (b *Backuper) Restore(backupName, tablePattern string, databaseMapping, par } if err != nil { log.Debug(string(out)) - cancel() return err } } @@ -432,7 +432,7 @@ func (b *Backuper) restoreSchemaEmbedded(ctx context.Context, backupName string, }); err != nil { return err } - return b.restoreEmbedded(ctx, backupName, true, tablesForRestore, "", nil) + return b.restoreEmbedded(ctx, backupName, true, tablesForRestore, nil) } func (b *Backuper) restoreSchemaRegular(tablesForRestore ListOfTables, version int, log *apexLog.Entry) error { @@ -607,7 +607,7 @@ func (b *Backuper) RestoreData(ctx context.Context, backupName string, tablePatt } log.Debugf("found %d tables with data in backup", len(tablesForRestore)) if b.isEmbedded { - err = b.restoreDataEmbedded(ctx, backupName, tablesForRestore, metadataPath, partitionsNameList) + err = b.restoreDataEmbedded(ctx, backupName, tablesForRestore, partitionsNameList) } else { err = b.restoreDataRegular(ctx, backupName, tablePattern, tablesForRestore, diskMap, diskTypes, disks, log) } @@ -618,8 +618,8 @@ func (b *Backuper) RestoreData(ctx context.Context, backupName string, tablePatt return nil } -func (b *Backuper) restoreDataEmbedded(ctx context.Context, backupName string, tablesForRestore ListOfTables, metadataPath string, partitionsNameList map[metadata.TableTitle][]string) error { - return b.restoreEmbedded(ctx, backupName, false, tablesForRestore, metadataPath, partitionsNameList) +func (b *Backuper) restoreDataEmbedded(ctx context.Context, backupName string, tablesForRestore ListOfTables, partitionsNameList map[metadata.TableTitle][]string) error { + return b.restoreEmbedded(ctx, backupName, false, tablesForRestore, partitionsNameList) } func (b *Backuper) restoreDataRegular(ctx context.Context, backupName string, tablePattern string, tablesForRestore ListOfTables, diskMap, diskTypes map[string]string, disks []clickhouse.Disk, log *apexLog.Entry) error { @@ -680,7 +680,7 @@ func (b *Backuper) restoreDataRegularByAttach(ctx context.Context, backupName st return fmt.Errorf("can't copy data to storage '%s.%s': %v", table.Database, table.Table, err) } log.Debug("data to 'storage' copied") - if err := b.downloadObjectDiskParts(ctx, backupName, table, diskMap, diskTypes, dstTable); err != nil { + if err := b.downloadObjectDiskParts(ctx, backupName, table, diskMap, diskTypes); err != nil { return fmt.Errorf("can't restore object_disk server-side copy data parts '%s.%s': %v", table.Database, table.Table, err) } @@ -695,7 +695,7 @@ func (b *Backuper) restoreDataRegularByParts(ctx context.Context, backupName str return fmt.Errorf("can't copy data to datached '%s.%s': %v", table.Database, table.Table, err) } log.Debug("data to 'detached' copied") - if err := b.downloadObjectDiskParts(ctx, backupName, table, diskMap, diskTypes, dstTable); err != nil { + if err := b.downloadObjectDiskParts(ctx, backupName, table, diskMap, diskTypes); err != nil { return fmt.Errorf("can't restore object_disk server-side copy data parts '%s.%s': %v", table.Database, table.Table, err) } if err := b.ch.AttachDataParts(tablesForRestore[i], disks); err != nil { @@ -704,13 +704,13 @@ func (b *Backuper) restoreDataRegularByParts(ctx context.Context, backupName str return nil } -func (b *Backuper) downloadObjectDiskParts(ctx context.Context, backupName string, backupTable metadata.TableMetadata, diskMap, diskTypes map[string]string, dstTable clickhouse.Table) error { +func (b *Backuper) downloadObjectDiskParts(ctx context.Context, backupName string, backupTable metadata.TableMetadata, diskMap, diskTypes map[string]string) error { log := apexLog.WithFields(apexLog.Fields{"operation": "downloadObjectDiskParts"}) start := time.Now() dbAndTableDir := path.Join(common.TablePathEncode(backupTable.Database), common.TablePathEncode(backupTable.Table)) var err error needToDownloadObjectDisk := false - for diskName, _ := range backupTable.Parts { + for diskName := range backupTable.Parts { diskType, exists := diskTypes[diskName] if !exists { return fmt.Errorf("%s disk doesn't present in diskTypes: %v", diskName, diskTypes) @@ -720,20 +720,21 @@ func (b *Backuper) downloadObjectDiskParts(ctx context.Context, backupName strin break } } - if needToDownloadObjectDisk { - b.dst, err = storage.NewBackupDestination(ctx, b.cfg, b.ch, false, backupName) - if err != nil { - return err - } - if err = b.dst.Connect(ctx); err != nil { - return fmt.Errorf("can't connect to %s: %v", b.dst.Kind(), err) - } - defer func() { - if err := b.dst.Close(ctx); err != nil { - b.log.Warnf("downloadObjectDiskParts: can't close BackupDestination error: %v", err) - } - }() + if !needToDownloadObjectDisk { + return nil } + b.dst, err = storage.NewBackupDestination(ctx, b.cfg, b.ch, false, backupName) + if err != nil { + return err + } + if err = b.dst.Connect(ctx); err != nil { + return fmt.Errorf("can't connect to %s: %v", b.dst.Kind(), err) + } + defer func() { + if err := b.dst.Close(ctx); err != nil { + b.log.Warnf("downloadObjectDiskParts: can't close BackupDestination error: %v", err) + } + }() for diskName, parts := range backupTable.Parts { diskType, exists := diskTypes[diskName] @@ -746,7 +747,7 @@ func (b *Backuper) downloadObjectDiskParts(ctx context.Context, backupName strin } for _, part := range parts { partPath := path.Join(diskMap[diskName], "backup", backupName, "shadow", dbAndTableDir, diskName, part.Name) - filepath.Walk(partPath, func(fPath string, fInfo fs.FileInfo, err error) error { + if err := filepath.Walk(partPath, func(fPath string, fInfo fs.FileInfo, err error) error { if err != nil { return err } @@ -779,7 +780,9 @@ func (b *Backuper) downloadObjectDiskParts(ctx context.Context, backupName strin } } return nil - }) + }); err != nil { + return err + } } } } @@ -871,7 +874,7 @@ func (b *Backuper) changeTablePatternFromRestoreDatabaseMapping(tablePattern str return tablePattern } -func (b *Backuper) restoreEmbedded(ctx context.Context, backupName string, restoreOnlySchema bool, tablesForRestore ListOfTables, metadataPath string, partitionsNameList map[metadata.TableTitle][]string) error { +func (b *Backuper) restoreEmbedded(ctx context.Context, backupName string, restoreOnlySchema bool, tablesForRestore ListOfTables, partitionsNameList map[metadata.TableTitle][]string) error { restoreSQL := "Disk(?,?)" tablesSQL := "" l := len(tablesForRestore) diff --git a/pkg/clickhouse/clickhouse.go b/pkg/clickhouse/clickhouse.go index 0be82b3f..7e36f413 100644 --- a/pkg/clickhouse/clickhouse.go +++ b/pkg/clickhouse/clickhouse.go @@ -144,11 +144,12 @@ func (ch *ClickHouse) GetDisks(ctx context.Context, enrich bool) ([]Disk, error) if disks[i].Name == ch.Config.EmbeddedBackupDisk { disks[i].IsBackup = true } - // s3_plain disk could contains relative + // s3_plain disk could contain relative remote disks path, need transform it to `/var/lib/clickhouse/disks/disk_name` if disks[i].Path != "" && !strings.HasPrefix(disks[i].Path, "/") { for _, d := range disks { if d.Name == "default" { - disks[i].Path = path.Join(d.Path, disks[i].Path) + "/" + disks[i].Path = path.Join(d.Path, "disks", disks[i].Name) + "/" + break } } } diff --git a/pkg/storage/object_disk/object_disk.go b/pkg/storage/object_disk/object_disk.go index bc3fc36a..aff4b4ec 100644 --- a/pkg/storage/object_disk/object_disk.go +++ b/pkg/storage/object_disk/object_disk.go @@ -432,14 +432,20 @@ func makeObjectDiskConnection(ctx context.Context, ch *clickhouse.ClickHouse, cf switch creds.Type { case "s3", "gcs": connection.Type = "s3" - s3cfg := config.S3Config{Debug: cfg.S3.Debug} + s3cfg := config.S3Config{Debug: cfg.S3.Debug, MaxPartsCount: cfg.S3.MaxPartsCount, Concurrency: 1} s3URL, err := url.Parse(creds.EndPoint) if err != nil { return nil, err } s3cfg.Endpoint = s3URL.Scheme + "://" + s3URL.Host + if cfg.S3.Concurrency > 0 { + s3cfg.Concurrency = cfg.S3.Concurrency + } + s3cfg.Region = "us-east-1" if creds.S3Region != "" { s3cfg.Region = creds.S3Region + } else if cfg.S3.Region != "" { + s3cfg.Region = cfg.S3.Region } if creds.S3StorageClass != "" { s3cfg.StorageClass = creds.S3StorageClass @@ -479,10 +485,10 @@ func makeObjectDiskConnection(ctx context.Context, ch *clickhouse.ClickHouse, cf case "azblob": connection.Type = "azure_blob_storage" azureCfg := config.AzureBlobConfig{ - Timeout: "15m", - BufferSize: 2 * 1024 * 1024, - MaxBuffers: 3, - MaxPartsCount: 5000, + Timeout: cfg.AzureBlob.Timeout, + BufferSize: cfg.AzureBlob.BufferSize, + MaxBuffers: cfg.AzureBlob.MaxBuffers, + MaxPartsCount: cfg.AzureBlob.MaxPartsCount, } azureURL, err := url.Parse(creds.EndPoint) if err != nil { diff --git a/pkg/storage/s3.go b/pkg/storage/s3.go index 67d8485c..8aa58451 100644 --- a/pkg/storage/s3.go +++ b/pkg/storage/s3.go @@ -112,7 +112,7 @@ func (s *S3) Connect(ctx context.Context) error { if s.Config.Debug { awsConfig.Logger = newS3Logger(s.Log) - awsConfig.ClientLogMode = aws.LogRetries | aws.LogRequest | aws.LogResponse + awsConfig.ClientLogMode = aws.LogRetries | aws.LogRequestWithBody | aws.LogResponseWithBody } if s.Config.DisableCertVerification { diff --git a/test/integration/docker-compose_advanced.yml b/test/integration/docker-compose_advanced.yml index 4b17aa46..b282aa80 100644 --- a/test/integration/docker-compose_advanced.yml +++ b/test/integration/docker-compose_advanced.yml @@ -172,9 +172,10 @@ services: AWS_ACCESS_KEY_ID: access-key AWS_SECRET_ACCESS_KEY: it-is-my-super-secret-key # GCS over S3 object disk - QA_GCS_OVER_S3_ACCESS_KEY: ${QA_GCS_OVER_S3_ACCESS_KEY} - QA_GCS_OVER_S3_SECRET_KEY: ${QA_GCS_OVER_S3_SECRET_KEY} - QA_GCS_OVER_S3_BUCKET: ${QA_GCS_OVER_S3_BUCKET} + QA_GCS_OVER_S3_ACCESS_KEY: "${QA_GCS_OVER_S3_ACCESS_KEY}" + QA_GCS_OVER_S3_SECRET_KEY: "${QA_GCS_OVER_S3_SECRET_KEY}" + QA_GCS_OVER_S3_BUCKET: "${QA_GCS_OVER_S3_BUCKET}" + AWS_EC2_METADATA_DISABLED: "true" volumes: - ./backup-user.xml:/etc/clickhouse-server/users.d/backup-user.xml - ./enable-access_management.xml:/etc/clickhouse-server/users.d/enable-access_management.xml diff --git a/test/integration/dynamic_settings.sh b/test/integration/dynamic_settings.sh index 5c69534b..b7854b25 100644 --- a/test/integration/dynamic_settings.sh +++ b/test/integration/dynamic_settings.sh @@ -95,7 +95,8 @@ cat < /etc/clickhouse-server/config.d/storage_configuration_s3.xml it-is-my-super-secret-key --> 1 - true + + false @@ -120,7 +121,8 @@ cat < /etc/clickhouse-server/config.d/storage_configuration_gcs.xml https://storage.googleapis.com/${QA_GCS_OVER_S3_BUCKET}/clickhouse_backup_disk_gcs_over_s3/${HOSTNAME}/ ${QA_GCS_OVER_S3_ACCESS_KEY} ${QA_GCS_OVER_S3_SECRET_KEY} - true + + false false @@ -153,7 +155,8 @@ cat < /etc/clickhouse-server/config.d/storage_configuration_encrypted_s3.x it-is-my-super-secret-key --> 1 - true + + false encrypted @@ -163,7 +166,8 @@ cat < /etc/clickhouse-server/config.d/storage_configuration_encrypted_s3.x 00112233445566778899aabbccddeeff ffeeddccbbaa99887766554433221100 1 - true + + false @@ -193,7 +197,6 @@ cat < /etc/clickhouse-server/config.d/backup_storage_configuration_s3.xml - true s3 http://minio:9000/clickhouse/backups_s3/ 1 false + + false diff --git a/test/integration/integration_test.go b/test/integration/integration_test.go index f5eced12..2a27f9a6 100644 --- a/test/integration/integration_test.go +++ b/test/integration/integration_test.go @@ -54,7 +54,7 @@ type TestDataStruct struct { CheckDatabaseOnly bool } -var testData = []TestDataStruct{ +var defaultTestData = []TestDataStruct{ { Database: dbNameOrdinary, DatabaseEngine: "Ordinary", Name: ".inner.table1", @@ -301,7 +301,8 @@ var testData = []TestDataStruct{ }, } -var incrementData = []TestDataStruct{ +var testData = defaultTestData +var defaultIncrementData = []TestDataStruct{ { Database: dbNameOrdinary, DatabaseEngine: "Ordinary", Name: ".inner.table1", @@ -391,6 +392,7 @@ var incrementData = []TestDataStruct{ OrderBy: "id", }, } +var incrementData = defaultIncrementData func init() { log.SetHandler(logcli.New(os.Stdout)) @@ -526,7 +528,7 @@ func TestDoRestoreRBAC(t *testing.T) { ch := &TestClickHouse{} r := require.New(t) - ch.connectWithWait(r, 1*time.Second, 10*time.Second) + ch.connectWithWait(r, 1*time.Second, 1*time.Second) r.NoError(dockerCP("config-s3.yml", "clickhouse:/etc/clickhouse-backup/config.yml")) ch.queryWithNoError(r, "DROP TABLE IF EXISTS default.test_rbac") @@ -559,7 +561,7 @@ func TestDoRestoreRBAC(t *testing.T) { ch.chbackend.Close() r.NoError(utils.ExecCmd(context.Background(), 180*time.Second, "docker-compose", "-f", os.Getenv("COMPOSE_FILE"), "restart", "clickhouse")) - ch.connectWithWait(r, 2*time.Second, 10*time.Second) + ch.connectWithWait(r, 2*time.Second, 8*time.Second) log.Info("download+restore RBAC") r.NoError(dockerExec("clickhouse", "ls", "-lah", "/var/lib/clickhouse/access")) @@ -570,7 +572,7 @@ func TestDoRestoreRBAC(t *testing.T) { // we can't restart clickhouse inside container, we need restart container ch.chbackend.Close() r.NoError(utils.ExecCmd(context.Background(), 180*time.Second, "docker-compose", "-f", os.Getenv("COMPOSE_FILE"), "restart", "clickhouse")) - ch.connectWithWait(r, 2*time.Second, 10*time.Second) + ch.connectWithWait(r, 2*time.Second, 8*time.Second) r.NoError(dockerExec("clickhouse", "ls", "-lah", "/var/lib/clickhouse/access")) @@ -630,7 +632,7 @@ func TestDoRestoreConfigs(t *testing.T) { r.NoError(dockerExec("clickhouse", "clickhouse-backup", "delete", "local", "test_configs_backup")) ch.chbackend.Close() - ch.connectWithWait(r, 2*time.Second, 10*time.Second) + ch.connectWithWait(r, 2*time.Second, 8*time.Second) selectEmptyResultForAggQuery := "SELECT value FROM system.settings WHERE name='empty_result_for_aggregation_by_empty_set'" @@ -642,7 +644,7 @@ func TestDoRestoreConfigs(t *testing.T) { r.NoError(dockerExec("clickhouse", "clickhouse-backup", "download", "test_configs_backup")) ch.chbackend.Close() - ch.connectWithWait(r, 2*time.Second, 10*time.Second) + ch.connectWithWait(r, 2*time.Second, 8*time.Second) settings = "" r.NoError(ch.chbackend.SelectSingleRowNoCtx(&settings, "SELECT value FROM system.settings WHERE name='empty_result_for_aggregation_by_empty_set'")) @@ -1118,6 +1120,7 @@ func fullCleanup(r *require.Assertions, ch *TestClickHouse, backupNames, backupT func generateTestData(ch *TestClickHouse, r *require.Assertions, remoteStorageType string) { log.Infof("Generate test data %s", remoteStorageType) + testData = defaultTestData generateTestDataWithDifferentStoragePolicy(remoteStorageType) for _, data := range testData { if isTableSkip(ch, data, false) { @@ -1194,6 +1197,7 @@ func generateTestDataWithDifferentStoragePolicy(remoteStorageType string) { func generateIncrementTestData(ch *TestClickHouse, r *require.Assertions) { log.Info("Generate increment test data") + incrementData = defaultIncrementData for _, data := range incrementData { if isTableSkip(ch, data, false) { continue @@ -2013,18 +2017,18 @@ func (ch *TestClickHouse) connectWithWait(r *require.Assertions, sleepBefore, ti r.NoError(utils.ExecCmd(context.Background(), 180*time.Second, "docker", "logs", "clickhouse")) out, dockerErr := dockerExecOut("clickhouse", "clickhouse-client", "--echo", "-q", "'SELECT version()'") r.NoError(dockerErr) - ch.chbackend.Log.Warnf(out) + ch.chbackend.Log.Debug(out) r.NoError(err) } if err != nil { - log.Warnf("clickhouse not ready %v, wait %d seconds", err, i*2) r.NoError(utils.ExecCmd(context.Background(), 180*time.Second, "docker", "ps", "-a")) if out, dockerErr := dockerExecOut("clickhouse", "clickhouse-client", "--echo", "-q", "SELECT version()"); dockerErr == nil { - log.Warnf(out) - } else { log.Info(out) + } else { + log.Warn(out) } - time.Sleep(time.Second * time.Duration(i*2)) + log.Warnf("clickhouse not ready %v, wait %v seconds", err, (time.Duration(i) * timeOut).Seconds()) + time.Sleep(time.Duration(i) * timeOut) } else { if compareVersion(os.Getenv("CLICKHOUSE_VERSION"), "20.8") == 1 { var count uint64 From aaa5c891552a497409dd9389b92dc119f8c4c61b Mon Sep 17 00:00:00 2001 From: Slach Date: Mon, 24 Jul 2023 08:28:38 +0500 Subject: [PATCH 05/12] allow GCS over S3, remove Accept-Encoding header from sign https://stackoverflow.com/a/74382598/1204665, https://github.com/aws/aws-sdk-go-v2/issues/1816, GCS over S3 have no 5Gb restriction, TestIntegrationGCS pass again, TestIntegrationEmbedded need to fix --- pkg/storage/gcs.go | 1 + pkg/storage/object_disk/object_disk.go | 2 + pkg/storage/s3.go | 126 +++++++++++++++++++++---- 3 files changed, 109 insertions(+), 20 deletions(-) diff --git a/pkg/storage/gcs.go b/pkg/storage/gcs.go index 24fc6beb..7a01000f 100644 --- a/pkg/storage/gcs.go +++ b/pkg/storage/gcs.go @@ -64,6 +64,7 @@ func (gcs *GCS) Connect(ctx context.Context) error { clientOptions := make([]option.ClientOption, 0) clientOptions = append(clientOptions, option.WithTelemetryDisabled()) endpoint := "https://storage.googleapis.com/storage/v1/" + if gcs.Config.Endpoint != "" { endpoint = gcs.Config.Endpoint clientOptions = append([]option.ClientOption{option.WithoutAuthentication()}, clientOptions...) diff --git a/pkg/storage/object_disk/object_disk.go b/pkg/storage/object_disk/object_disk.go index aff4b4ec..9c00b9bf 100644 --- a/pkg/storage/object_disk/object_disk.go +++ b/pkg/storage/object_disk/object_disk.go @@ -449,6 +449,8 @@ func makeObjectDiskConnection(ctx context.Context, ch *clickhouse.ClickHouse, cf } if creds.S3StorageClass != "" { s3cfg.StorageClass = creds.S3StorageClass + } else { + s3cfg.StorageClass = cfg.S3.StorageClass } if creds.S3AssumeRole != "" { s3cfg.AssumeRoleARN = creds.S3AssumeRole diff --git a/pkg/storage/s3.go b/pkg/storage/s3.go index 8aa58451..cb373710 100644 --- a/pkg/storage/s3.go +++ b/pkg/storage/s3.go @@ -4,10 +4,6 @@ import ( "context" "crypto/tls" "fmt" - "github.com/Altinity/clickhouse-backup/pkg/config" - "github.com/aws/smithy-go" - awsV2http "github.com/aws/smithy-go/transport/http" - "golang.org/x/sync/semaphore" "io" "net/http" "os" @@ -16,20 +12,23 @@ import ( "sync" "time" - "golang.org/x/sync/errgroup" - + "github.com/Altinity/clickhouse-backup/pkg/config" apexLog "github.com/apex/log" "github.com/aws/aws-sdk-go-v2/aws" + v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4" awsV2Config "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/credentials" "github.com/aws/aws-sdk-go-v2/credentials/stscreds" - "github.com/aws/aws-sdk-go-v2/service/sts" - s3manager "github.com/aws/aws-sdk-go-v2/feature/s3/manager" "github.com/aws/aws-sdk-go-v2/service/s3" s3types "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/aws/aws-sdk-go-v2/service/sts" + "github.com/aws/smithy-go" awsV2Logging "github.com/aws/smithy-go/logging" + awsV2http "github.com/aws/smithy-go/transport/http" "github.com/pkg/errors" + "golang.org/x/sync/errgroup" + "golang.org/x/sync/semaphore" ) type S3LogToApexLogAdapter struct { @@ -51,6 +50,39 @@ func (S3LogToApexLogAdapter S3LogToApexLogAdapter) Logf(severity awsV2Logging.Cl } } +// RecalculateV4Signature allow GCS over S3, remove Accept-Encoding header from sign https://stackoverflow.com/a/74382598/1204665, https://github.com/aws/aws-sdk-go-v2/issues/1816 +type RecalculateV4Signature struct { + next http.RoundTripper + signer *v4.Signer + awsConfig aws.Config +} + +func (lt *RecalculateV4Signature) RoundTrip(req *http.Request) (*http.Response, error) { + // store for later use + acceptEncodingValue := req.Header.Get("Accept-Encoding") + + // delete the header so the header doesn't account for in the signature + req.Header.Del("Accept-Encoding") + + // sign with the same date + timeString := req.Header.Get("X-Amz-Date") + timeDate, _ := time.Parse("20060102T150405Z", timeString) + + creds, err := lt.awsConfig.Credentials.Retrieve(req.Context()) + if err != nil { + return nil, err + } + err = lt.signer.SignHTTP(req.Context(), creds, req, v4.GetPayloadHash(req.Context()), "s3", lt.awsConfig.Region, timeDate) + if err != nil { + return nil, err + } + // Reset Accept-Encoding if desired + req.Header.Set("Accept-Encoding", acceptEncodingValue) + + // follows up the original round tripper + return lt.next.RoundTrip(req) +} + // S3 - presents methods for manipulate data on s3 type S3 struct { client *s3.Client @@ -115,11 +147,12 @@ func (s *S3) Connect(ctx context.Context) error { awsConfig.ClientLogMode = aws.LogRetries | aws.LogRequestWithBody | aws.LogResponseWithBody } + httpTransport := http.DefaultTransport if s.Config.DisableCertVerification { - tr := &http.Transport{ + httpTransport = &http.Transport{ TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, } - awsConfig.HTTPClient = &http.Client{Transport: tr} + awsConfig.HTTPClient = &http.Client{Transport: httpTransport} } if s.Config.Endpoint != "" { @@ -134,6 +167,11 @@ func (s *S3) Connect(ctx context.Context) error { }) } + // allow GCS over S3, remove Accept-Encoding header from sign https://stackoverflow.com/a/74382598/1204665, https://github.com/aws/aws-sdk-go-v2/issues/1816 + if strings.Contains(s.Config.Endpoint, "storage.googleapis.com") { + // Assign custom client with our own transport + awsConfig.HTTPClient = &http.Client{Transport: &RecalculateV4Signature{httpTransport, v4.NewSigner(), awsConfig}} + } s.client = s3.NewFromConfig(awsConfig, func(o *s3.Options) { o.UsePathStyle = s.Config.ForcePathStyle o.EndpointOptions.DisableHTTPS = s.Config.DisableSSL @@ -362,6 +400,64 @@ func (s *S3) remotePager(ctx context.Context, s3Path string, recursive bool, pro func (s *S3) CopyObject(ctx context.Context, srcBucket, srcKey, dstKey string) (int64, error) { dstKey = path.Join(s.Config.ObjectDiskPath, dstKey) + if strings.Contains(s.Config.Endpoint, "storage.googleapis.com") { + params := s3.CopyObjectInput{ + Bucket: aws.String(s.Config.Bucket), + Key: aws.String(dstKey), + CopySource: aws.String(path.Join(srcBucket, srcKey)), + StorageClass: s3types.StorageClass(strings.ToUpper(s.Config.StorageClass)), + } + // https://github.com/Altinity/clickhouse-backup/issues/588 + if len(s.Config.ObjectLabels) > 0 { + tags := "" + for k, v := range s.Config.ObjectLabels { + if tags != "" { + tags += "&" + } + tags += k + "=" + v + } + params.Tagging = aws.String(tags) + } + if s.Config.SSE != "" { + params.ServerSideEncryption = s3types.ServerSideEncryption(s.Config.SSE) + } + if s.Config.SSEKMSKeyId != "" { + params.SSEKMSKeyId = aws.String(s.Config.SSEKMSKeyId) + } + if s.Config.SSECustomerAlgorithm != "" { + params.SSECustomerAlgorithm = aws.String(s.Config.SSECustomerAlgorithm) + } + if s.Config.SSECustomerKey != "" { + params.SSECustomerKey = aws.String(s.Config.SSECustomerKey) + } + if s.Config.SSECustomerKeyMD5 != "" { + params.SSECustomerKeyMD5 = aws.String(s.Config.SSECustomerKeyMD5) + } + if s.Config.SSEKMSEncryptionContext != "" { + params.SSEKMSEncryptionContext = aws.String(s.Config.SSEKMSEncryptionContext) + } + _, err := s.client.CopyObject(ctx, ¶ms) + if err != nil { + return 0, err + } + dstObjResp, err := s.client.HeadObject(ctx, &s3.HeadObjectInput{ + Bucket: aws.String(s.Config.Bucket), + Key: aws.String(dstKey), + }) + if err != nil { + return 0, err + } + return dstObjResp.ContentLength, nil + } + // Get the size of the source object + sourceObjResp, err := s.client.HeadObject(ctx, &s3.HeadObjectInput{ + Bucket: aws.String(srcBucket), + Key: aws.String(srcKey), + }) + if err != nil { + return 0, err + } + srcSize := sourceObjResp.ContentLength // Initiate a multipart upload params := s3.CreateMultipartUploadInput{ Bucket: aws.String(s.Config.Bucket), @@ -398,16 +494,6 @@ func (s *S3) CopyObject(ctx context.Context, srcBucket, srcKey, dstKey string) ( params.SSEKMSEncryptionContext = aws.String(s.Config.SSEKMSEncryptionContext) } - // Get the size of the source object - sourceObjResp, err := s.client.HeadObject(ctx, &s3.HeadObjectInput{ - Bucket: aws.String(srcBucket), - Key: aws.String(srcKey), - }) - if err != nil { - return 0, err - } - srcSize := sourceObjResp.ContentLength - initResp, err := s.client.CreateMultipartUpload(ctx, ¶ms) if err != nil { return 0, err From 96fac09fa909b8a7251e6df45f0f3af1699a156f Mon Sep 17 00:00:00 2001 From: Slach Date: Mon, 24 Jul 2023 11:05:55 +0500 Subject: [PATCH 06/12] add object_disk_path to some test configs, fix config validation when using embedded backup, cleanup integration_test.go move testBackupSpecifiedPartitions near runTestMainScenario, all tests passed 1400 seconds --- pkg/config/config.go | 26 +- test/integration/config-database-mapping.yml | 1 + test/integration/config-s3-nodelete.yml | 3 +- test/integration/config-s3-plain-embedded.yml | 1 + test/integration/integration_test.go | 228 +++++++++--------- 5 files changed, 132 insertions(+), 127 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index 4494e108..f420f320 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -419,18 +419,20 @@ func ValidateConfig(cfg *Config) error { } } // @TODO add all other storage types - switch cfg.General.RemoteStorage { - case "s3": - if cfg.S3.ObjectDiskPath == "" || strings.HasPrefix(cfg.S3.Path, cfg.S3.ObjectDiskPath) { - return fmt.Errorf("invalid s3->object_disk_path, shall be not empty and shall not be prefix for `path`") - } - case "gcs": - if cfg.GCS.ObjectDiskPath == "" || strings.HasPrefix(cfg.GCS.Path, cfg.GCS.ObjectDiskPath) { - return fmt.Errorf("invalid gcs->object_disk_path, shall be not empty and shall not be prefix for `path`") - } - case "azblob": - if cfg.AzureBlob.ObjectDiskPath == "" || strings.HasPrefix(cfg.AzureBlob.Path, cfg.AzureBlob.ObjectDiskPath) { - return fmt.Errorf("invalid azblob->object_disk_path, shall be not empty and shall not be prefix for `path`") + if !cfg.ClickHouse.UseEmbeddedBackupRestore { + switch cfg.General.RemoteStorage { + case "s3": + if cfg.S3.ObjectDiskPath == "" || strings.HasPrefix(cfg.S3.Path, cfg.S3.ObjectDiskPath) { + return fmt.Errorf("invalid s3->object_disk_path, shall be not empty and shall not be prefix for s3->path") + } + case "gcs": + if cfg.GCS.ObjectDiskPath == "" || strings.HasPrefix(cfg.GCS.Path, cfg.GCS.ObjectDiskPath) { + return fmt.Errorf("invalid gcs->object_disk_path, shall be not empty and shall not be prefix for gcs->path") + } + case "azblob": + if cfg.AzureBlob.ObjectDiskPath == "" || strings.HasPrefix(cfg.AzureBlob.Path, cfg.AzureBlob.ObjectDiskPath) { + return fmt.Errorf("invalid azblob->object_disk_path, shall be not empty and shall not be prefix for gcs->path") + } } } return nil diff --git a/test/integration/config-database-mapping.yml b/test/integration/config-database-mapping.yml index 0c025aa7..ff6ce138 100644 --- a/test/integration/config-database-mapping.yml +++ b/test/integration/config-database-mapping.yml @@ -24,6 +24,7 @@ s3: acl: private force_path_style: true path: backup/{cluster}/{shard} + object_disk_path: object_disk/{cluster}/{shard} disable_ssl: true compression_format: tar api: diff --git a/test/integration/config-s3-nodelete.yml b/test/integration/config-s3-nodelete.yml index 72b58809..29eb97db 100644 --- a/test/integration/config-s3-nodelete.yml +++ b/test/integration/config-s3-nodelete.yml @@ -26,7 +26,8 @@ s3: endpoint: http://minio:9000 acl: private force_path_style: true - path: backup + path: backup/{cluster}/{shard} + object_disk_path: object_disk/{cluster}/{shard} disable_ssl: true compression_format: tar api: diff --git a/test/integration/config-s3-plain-embedded.yml b/test/integration/config-s3-plain-embedded.yml index e1f6bdc3..ee501cf7 100644 --- a/test/integration/config-s3-plain-embedded.yml +++ b/test/integration/config-s3-plain-embedded.yml @@ -29,6 +29,7 @@ s3: acl: private force_path_style: true path: backup/{cluster}/{shard} + object_disk_path: object_disk/{cluster}/{shard} disable_ssl: true compression_format: none api: diff --git a/test/integration/integration_test.go b/test/integration/integration_test.go index 2a27f9a6..902a48f1 100644 --- a/test/integration/integration_test.go +++ b/test/integration/integration_test.go @@ -1079,6 +1079,120 @@ func runMainIntegrationScenario(t *testing.T, remoteStorageType string) { } } +func testBackupSpecifiedPartitions(r *require.Assertions, ch *TestClickHouse, remoteStorageType string) { + log.Info("testBackupSpecifiedPartitions started") + var err error + var out string + var result, expectedCount uint64 + + partitionBackupName := fmt.Sprintf("partition_backup_%d", rand.Int()) + fullBackupName := fmt.Sprintf("full_backup_%d", rand.Int()) + // Create and fill tables + ch.queryWithNoError(r, "DROP TABLE IF EXISTS default.t1") + ch.queryWithNoError(r, "DROP TABLE IF EXISTS default.t2") + ch.queryWithNoError(r, "CREATE TABLE default.t1 (dt Date, v UInt64) ENGINE=MergeTree() PARTITION BY toYYYYMMDD(dt) ORDER BY dt") + ch.queryWithNoError(r, "CREATE TABLE default.t2 (dt String, v UInt64) ENGINE=MergeTree() PARTITION BY dt ORDER BY dt") + for _, dt := range []string{"2022-01-01", "2022-01-02", "2022-01-03", "2022-01-04"} { + ch.queryWithNoError(r, fmt.Sprintf("INSERT INTO default.t1 SELECT '%s', number FROM numbers(10)", dt)) + ch.queryWithNoError(r, fmt.Sprintf("INSERT INTO default.t2 SELECT '%s', number FROM numbers(10)", dt)) + } + + // check create_remote full > download + partitions > delete local > download > restore --partitions > restore + r.NoError(dockerExec("clickhouse", "clickhouse-backup", "create_remote", "--tables=default.t*", fullBackupName)) + r.NoError(dockerExec("clickhouse", "clickhouse-backup", "delete", "local", fullBackupName)) + r.NoError(dockerExec("clickhouse", "clickhouse-backup", "download", "--partitions=('2022-01-02'),('2022-01-03')", fullBackupName)) + fullBackupDir := "/var/lib/clickhouse/backup/" + fullBackupName + "/shadow/default/t?/default/" + if strings.HasPrefix(remoteStorageType, "EMBEDDED") { + fullBackupDir = "/var/lib/clickhouse/disks/backups" + strings.ToLower(strings.TrimPrefix(remoteStorageType, "EMBEDDED")) + "/" + fullBackupName + "/data/default/t?" + } + out, err = dockerExecOut("clickhouse", "bash", "-c", "ls -la "+fullBackupDir+" | wc -l") + r.NoError(err) + expectedLines := "13" + // custom storage doesn't support --partitions for upload / download now + // embedded storage contain hardLink files and will download additional data parts + if remoteStorageType == "CUSTOM" || strings.HasPrefix(remoteStorageType, "EMBEDDED") { + expectedLines = "17" + } + r.Equal(expectedLines, strings.Trim(out, "\r\n\t ")) + r.NoError(dockerExec("clickhouse", "clickhouse-backup", "delete", "local", fullBackupName)) + r.NoError(dockerExec("clickhouse", "clickhouse-backup", "download", fullBackupName)) + + fullBackupDir = "/var/lib/clickhouse/backup/" + fullBackupName + "/shadow/default/t?/default/" + if strings.HasPrefix(remoteStorageType, "EMBEDDED") { + fullBackupDir = "/var/lib/clickhouse/disks/backups" + strings.ToLower(strings.TrimPrefix(remoteStorageType, "EMBEDDED")) + "/" + fullBackupName + "/data/default/t?" + } + out, err = dockerExecOut("clickhouse", "bash", "-c", "ls -la "+fullBackupDir+"| wc -l") + r.NoError(err) + r.Equal("17", strings.Trim(out, "\r\n\t ")) + r.NoError(dockerExec("clickhouse", "clickhouse-backup", "restore", "--partitions=('2022-01-02'),('2022-01-03')", fullBackupName)) + result = 0 + r.NoError(ch.chbackend.SelectSingleRowNoCtx(&result, "SELECT sum(c) FROM (SELECT count() AS c FROM default.t1 UNION ALL SELECT count() AS c FROM default.t2)")) + expectedCount = 40 + r.Equal(expectedCount, result, fmt.Sprintf("expect count=%d", expectedCount)) + r.NoError(dockerExec("clickhouse", "clickhouse-backup", "restore", fullBackupName)) + result = 0 + r.NoError(ch.chbackend.SelectSingleRowNoCtx(&result, "SELECT sum(c) FROM (SELECT count() AS c FROM default.t1 UNION ALL SELECT count() AS c FROM default.t2)")) + r.Equal(uint64(80), result, "expect count=80") + r.NoError(dockerExec("clickhouse", "clickhouse-backup", "delete", "remote", fullBackupName)) + r.NoError(dockerExec("clickhouse", "clickhouse-backup", "delete", "local", fullBackupName)) + + // check create + partitions + r.NoError(dockerExec("clickhouse", "clickhouse-backup", "create", "--tables=default.t1", "--partitions=20220102,20220103", partitionBackupName)) + partitionBackupDir := "/var/lib/clickhouse/backup/" + partitionBackupName + "/shadow/default/t1/default/" + if strings.HasPrefix(remoteStorageType, "EMBEDDED") { + partitionBackupDir = "/var/lib/clickhouse/disks/backups" + strings.ToLower(strings.TrimPrefix(remoteStorageType, "EMBEDDED")) + "/" + partitionBackupName + "/data/default/t1" + } + out, err = dockerExecOut("clickhouse", "bash", "-c", "ls -la "+partitionBackupDir+"| wc -l") + r.NoError(err) + r.Equal("5", strings.Trim(out, "\r\n\t ")) + r.NoError(dockerExec("clickhouse", "clickhouse-backup", "delete", "local", partitionBackupName)) + + // check create > upload + partitions + r.NoError(dockerExec("clickhouse", "clickhouse-backup", "create", "--tables=default.t1", partitionBackupName)) + partitionBackupDir = "/var/lib/clickhouse/backup/" + partitionBackupName + "/shadow/default/t1/default/" + if strings.HasPrefix(remoteStorageType, "EMBEDDED") { + partitionBackupDir = "/var/lib/clickhouse/disks/backups" + strings.ToLower(strings.TrimPrefix(remoteStorageType, "EMBEDDED")) + "/" + partitionBackupName + "/data/default/t1" + } + out, err = dockerExecOut("clickhouse", "bash", "-c", "ls -la "+partitionBackupDir+" | wc -l") + r.NoError(err) + r.Equal("7", strings.Trim(out, "\r\n\t ")) + r.NoError(dockerExec("clickhouse", "clickhouse-backup", "upload", "--tables=default.t1", "--partitions=20220102,20220103", partitionBackupName)) + r.NoError(dockerExec("clickhouse", "clickhouse-backup", "delete", "local", partitionBackupName)) + + // restore partial uploaded + r.NoError(dockerExec("clickhouse", "clickhouse-backup", "restore_remote", partitionBackupName)) + + // Check partial restored t1 + result = 0 + r.NoError(ch.chbackend.SelectSingleRowNoCtx(&result, "SELECT count() FROM default.t1")) + + expectedCount = 20 + // custom and embedded doesn't support --partitions in upload and download + if remoteStorageType == "CUSTOM" || strings.HasPrefix(remoteStorageType, "EMBEDDED") { + expectedCount = 40 + } + r.Equal(expectedCount, result, fmt.Sprintf("expect count=%d", expectedCount)) + + // Check only selected partitions restored + result = 0 + r.NoError(ch.chbackend.SelectSingleRowNoCtx(&result, "SELECT count() FROM default.t1 WHERE dt NOT IN ('2022-01-02','2022-01-03')")) + expectedCount = 0 + // custom and embedded doesn't support --partitions in upload and download + if remoteStorageType == "CUSTOM" || strings.HasPrefix(remoteStorageType, "EMBEDDED") { + expectedCount = 20 + } + r.Equal(expectedCount, result, "expect count=0") + + // DELETE backup. + r.NoError(dockerExec("clickhouse", "clickhouse-backup", "delete", "remote", partitionBackupName)) + r.NoError(dockerExec("clickhouse", "clickhouse-backup", "delete", "local", partitionBackupName)) + + ch.queryWithNoError(r, "DROP TABLE IF EXISTS default.t1") + ch.queryWithNoError(r, "DROP TABLE IF EXISTS default.t2") + + log.Info("testBackupSpecifiedPartitions finish") +} + func checkResumeAlreadyProcessed(backupCmd, testBackupName, resumeKind string, r *require.Assertions, remoteStorageType string) { // backupCmd = fmt.Sprintf("%s & PID=$!; sleep 0.7; kill -9 $PID; cat /var/lib/clickhouse/backup/%s/upload.state; sleep 0.3; %s", backupCmd, testBackupName, backupCmd) if remoteStorageType == "CUSTOM" || strings.HasPrefix(remoteStorageType, "EMBEDDED") { @@ -2347,117 +2461,3 @@ func installDebIfNotExists(r *require.Assertions, container string, pkgs ...stri ), )) } - -func testBackupSpecifiedPartitions(r *require.Assertions, ch *TestClickHouse, remoteStorageType string) { - log.Info("testBackupSpecifiedPartitions started") - var err error - var out string - var result, expectedCount uint64 - - partitionBackupName := fmt.Sprintf("partition_backup_%d", rand.Int()) - fullBackupName := fmt.Sprintf("full_backup_%d", rand.Int()) - // Create and fill tables - ch.queryWithNoError(r, "DROP TABLE IF EXISTS default.t1") - ch.queryWithNoError(r, "DROP TABLE IF EXISTS default.t2") - ch.queryWithNoError(r, "CREATE TABLE default.t1 (dt Date, v UInt64) ENGINE=MergeTree() PARTITION BY toYYYYMMDD(dt) ORDER BY dt") - ch.queryWithNoError(r, "CREATE TABLE default.t2 (dt String, v UInt64) ENGINE=MergeTree() PARTITION BY dt ORDER BY dt") - for _, dt := range []string{"2022-01-01", "2022-01-02", "2022-01-03", "2022-01-04"} { - ch.queryWithNoError(r, fmt.Sprintf("INSERT INTO default.t1 SELECT '%s', number FROM numbers(10)", dt)) - ch.queryWithNoError(r, fmt.Sprintf("INSERT INTO default.t2 SELECT '%s', number FROM numbers(10)", dt)) - } - - // check create_remote full > download + partitions > delete local > download > restore --partitions > restore - r.NoError(dockerExec("clickhouse", "clickhouse-backup", "create_remote", "--tables=default.t*", fullBackupName)) - r.NoError(dockerExec("clickhouse", "clickhouse-backup", "delete", "local", fullBackupName)) - r.NoError(dockerExec("clickhouse", "clickhouse-backup", "download", "--partitions=('2022-01-02'),('2022-01-03')", fullBackupName)) - fullBackupDir := "/var/lib/clickhouse/backup/" + fullBackupName + "/shadow/default/t?/default/" - if strings.HasPrefix(remoteStorageType, "EMBEDDED") { - fullBackupDir = "/var/lib/clickhouse/disks/backups" + strings.ToLower(strings.TrimPrefix(remoteStorageType, "EMBEDDED")) + "/" + fullBackupName + "/data/default/t?" - } - out, err = dockerExecOut("clickhouse", "bash", "-c", "ls -la "+fullBackupDir+" | wc -l") - r.NoError(err) - expectedLines := "13" - // custom storage doesn't support --partitions for upload / download now - // embedded storage contain hardLink files and will download additional data parts - if remoteStorageType == "CUSTOM" || strings.HasPrefix(remoteStorageType, "EMBEDDED") { - expectedLines = "17" - } - r.Equal(expectedLines, strings.Trim(out, "\r\n\t ")) - r.NoError(dockerExec("clickhouse", "clickhouse-backup", "delete", "local", fullBackupName)) - r.NoError(dockerExec("clickhouse", "clickhouse-backup", "download", fullBackupName)) - - fullBackupDir = "/var/lib/clickhouse/backup/" + fullBackupName + "/shadow/default/t?/default/" - if strings.HasPrefix(remoteStorageType, "EMBEDDED") { - fullBackupDir = "/var/lib/clickhouse/disks/backups" + strings.ToLower(strings.TrimPrefix(remoteStorageType, "EMBEDDED")) + "/" + fullBackupName + "/data/default/t?" - } - out, err = dockerExecOut("clickhouse", "bash", "-c", "ls -la "+fullBackupDir+"| wc -l") - r.NoError(err) - r.Equal("17", strings.Trim(out, "\r\n\t ")) - r.NoError(dockerExec("clickhouse", "clickhouse-backup", "restore", "--partitions=('2022-01-02'),('2022-01-03')", fullBackupName)) - result = 0 - r.NoError(ch.chbackend.SelectSingleRowNoCtx(&result, "SELECT sum(c) FROM (SELECT count() AS c FROM default.t1 UNION ALL SELECT count() AS c FROM default.t2)")) - expectedCount = 40 - r.Equal(expectedCount, result, fmt.Sprintf("expect count=%d", expectedCount)) - r.NoError(dockerExec("clickhouse", "clickhouse-backup", "restore", fullBackupName)) - result = 0 - r.NoError(ch.chbackend.SelectSingleRowNoCtx(&result, "SELECT sum(c) FROM (SELECT count() AS c FROM default.t1 UNION ALL SELECT count() AS c FROM default.t2)")) - r.Equal(uint64(80), result, "expect count=80") - r.NoError(dockerExec("clickhouse", "clickhouse-backup", "delete", "remote", fullBackupName)) - r.NoError(dockerExec("clickhouse", "clickhouse-backup", "delete", "local", fullBackupName)) - - // check create + partitions - r.NoError(dockerExec("clickhouse", "clickhouse-backup", "create", "--tables=default.t1", "--partitions=20220102,20220103", partitionBackupName)) - partitionBackupDir := "/var/lib/clickhouse/backup/" + partitionBackupName + "/shadow/default/t1/default/" - if strings.HasPrefix(remoteStorageType, "EMBEDDED") { - partitionBackupDir = "/var/lib/clickhouse/disks/backups" + strings.ToLower(strings.TrimPrefix(remoteStorageType, "EMBEDDED")) + "/" + partitionBackupName + "/data/default/t1" - } - out, err = dockerExecOut("clickhouse", "bash", "-c", "ls -la "+partitionBackupDir+"| wc -l") - r.NoError(err) - r.Equal("5", strings.Trim(out, "\r\n\t ")) - r.NoError(dockerExec("clickhouse", "clickhouse-backup", "delete", "local", partitionBackupName)) - - // check create > upload + partitions - r.NoError(dockerExec("clickhouse", "clickhouse-backup", "create", "--tables=default.t1", partitionBackupName)) - partitionBackupDir = "/var/lib/clickhouse/backup/" + partitionBackupName + "/shadow/default/t1/default/" - if strings.HasPrefix(remoteStorageType, "EMBEDDED") { - partitionBackupDir = "/var/lib/clickhouse/disks/backups" + strings.ToLower(strings.TrimPrefix(remoteStorageType, "EMBEDDED")) + "/" + partitionBackupName + "/data/default/t1" - } - out, err = dockerExecOut("clickhouse", "bash", "-c", "ls -la "+partitionBackupDir+" | wc -l") - r.NoError(err) - r.Equal("7", strings.Trim(out, "\r\n\t ")) - r.NoError(dockerExec("clickhouse", "clickhouse-backup", "upload", "--tables=default.t1", "--partitions=20220102,20220103", partitionBackupName)) - r.NoError(dockerExec("clickhouse", "clickhouse-backup", "delete", "local", partitionBackupName)) - - // restore partial uploaded - r.NoError(dockerExec("clickhouse", "clickhouse-backup", "restore_remote", partitionBackupName)) - - // Check partial restored t1 - result = 0 - r.NoError(ch.chbackend.SelectSingleRowNoCtx(&result, "SELECT count() FROM default.t1")) - - expectedCount = 20 - // custom and embedded doesn't support --partitions in upload and download - if remoteStorageType == "CUSTOM" || strings.HasPrefix(remoteStorageType, "EMBEDDED") { - expectedCount = 40 - } - r.Equal(expectedCount, result, fmt.Sprintf("expect count=%d", expectedCount)) - - // Check only selected partitions restored - result = 0 - r.NoError(ch.chbackend.SelectSingleRowNoCtx(&result, "SELECT count() FROM default.t1 WHERE dt NOT IN ('2022-01-02','2022-01-03')")) - expectedCount = 0 - // custom and embedded doesn't support --partitions in upload and download - if remoteStorageType == "CUSTOM" || strings.HasPrefix(remoteStorageType, "EMBEDDED") { - expectedCount = 20 - } - r.Equal(expectedCount, result, "expect count=0") - - // DELETE backup. - r.NoError(dockerExec("clickhouse", "clickhouse-backup", "delete", "remote", partitionBackupName)) - r.NoError(dockerExec("clickhouse", "clickhouse-backup", "delete", "local", partitionBackupName)) - - ch.queryWithNoError(r, "DROP TABLE IF EXISTS default.t1") - ch.queryWithNoError(r, "DROP TABLE IF EXISTS default.t2") - - log.Info("testBackupSpecifiedPartitions finish") -} From 0c22aa37831302422ede1992cd3b4f53d6940214 Mon Sep 17 00:00:00 2001 From: Slach Date: Mon, 24 Jul 2023 11:24:02 +0500 Subject: [PATCH 07/12] fix `make test` --- pkg/storage/ftp.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/storage/ftp.go b/pkg/storage/ftp.go index 20a25831..a48198b2 100644 --- a/pkg/storage/ftp.go +++ b/pkg/storage/ftp.go @@ -209,7 +209,7 @@ func (f *FTP) PutFile(ctx context.Context, key string, r io.ReadCloser) error { } func (f *FTP) CopyObject(ctx context.Context, srcBucket, srcKey, dstKey string) (int64, error) { - return 0, fmt.Errorf("CopyObject not imlemented for %a", f.Kind()) + return 0, fmt.Errorf("CopyObject not imlemented for %s", f.Kind()) } func (f *FTP) DeleteFileFromObjectDiskBackup(ctx context.Context, key string) error { From 06cd6aded95596d1dfca53db203dfbc709b47bd7 Mon Sep 17 00:00:00 2001 From: Slach Date: Mon, 24 Jul 2023 11:56:19 +0500 Subject: [PATCH 08/12] remove docker-compose from requirements.txt to allow run inside GitHub actions docker-compose requires PyYAML<6,>=3.10 which failed to install ``` /tmp/pip-build-env-x3k7dqup/overlay/lib/python3.10/site-packages/setuptools/config/setupcfg.py:293: _DeprecatedConfig: Deprecated config in `setup.cfg` !! ******************************************************************************** The license_file parameter is deprecated, use license_files instead. By 2023-Oct-30, you need to update your project and remove deprecated calls or your builds will no longer be supported. ``` --- test/testflows/requirements.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/testflows/requirements.txt b/test/testflows/requirements.txt index b4f29d02..1bc16424 100644 --- a/test/testflows/requirements.txt +++ b/test/testflows/requirements.txt @@ -1,5 +1,3 @@ -testflows==1.9.61 +testflows==1.9.71 requests -docker>=5.0.0 -docker-compose>=1.29.1 setuptools \ No newline at end of file From d7c0d4defd21306007a959fc1c43e47165e95063 Mon Sep 17 00:00:00 2001 From: Slach Date: Mon, 24 Jul 2023 12:55:01 +0500 Subject: [PATCH 09/12] move object_disk_path validation to `create` and `restore` to avoid unnecessary failures, fix testflows --- pkg/backup/create.go | 4 ++++ pkg/config/config.go | 17 ++++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/pkg/backup/create.go b/pkg/backup/create.go index 08067c66..ea4f1c74 100644 --- a/pkg/backup/create.go +++ b/pkg/backup/create.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "fmt" + "github.com/Altinity/clickhouse-backup/pkg/config" "github.com/Altinity/clickhouse-backup/pkg/partition" "github.com/Altinity/clickhouse-backup/pkg/status" "github.com/Altinity/clickhouse-backup/pkg/storage" @@ -510,6 +511,9 @@ func (b *Backuper) AddTableToBackup(ctx context.Context, backupName, shadowBacku disksToPartsMap[disk.Name] = parts log.WithField("disk", disk.Name).Debug("shadow moved") if disk.Type == "s3" || disk.Type == "azure_blob_storage" && len(parts) > 0 { + if err = config.ValidateObjectDiskConfig(b.cfg); err != nil { + return nil, nil, err + } start := time.Now() if b.dst == nil { b.dst, err = storage.NewBackupDestination(ctx, b.cfg, b.ch, false, backupName) diff --git a/pkg/config/config.go b/pkg/config/config.go index f420f320..0a16c084 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -418,20 +418,23 @@ func ValidateConfig(cfg *Config) error { cfg.General.FullDuration = duration } } - // @TODO add all other storage types + return nil +} + +func ValidateObjectDiskConfig(cfg *Config) error { if !cfg.ClickHouse.UseEmbeddedBackupRestore { switch cfg.General.RemoteStorage { case "s3": - if cfg.S3.ObjectDiskPath == "" || strings.HasPrefix(cfg.S3.Path, cfg.S3.ObjectDiskPath) { - return fmt.Errorf("invalid s3->object_disk_path, shall be not empty and shall not be prefix for s3->path") + if cfg.S3.Path != "" && (cfg.S3.ObjectDiskPath == "" || strings.HasPrefix(cfg.S3.Path, cfg.S3.ObjectDiskPath)) { + return fmt.Errorf("data in objects disks, invalid s3->object_disk_path config section, shall be not empty and shall not be prefix for s3->path") } case "gcs": - if cfg.GCS.ObjectDiskPath == "" || strings.HasPrefix(cfg.GCS.Path, cfg.GCS.ObjectDiskPath) { - return fmt.Errorf("invalid gcs->object_disk_path, shall be not empty and shall not be prefix for gcs->path") + if cfg.GCS.Path != "" && (cfg.GCS.ObjectDiskPath == "" || strings.HasPrefix(cfg.GCS.Path, cfg.GCS.ObjectDiskPath)) { + return fmt.Errorf("data in objects disks, invalid gcs->object_disk_path config section, shall be not empty and shall not be prefix for gcs->path") } case "azblob": - if cfg.AzureBlob.ObjectDiskPath == "" || strings.HasPrefix(cfg.AzureBlob.Path, cfg.AzureBlob.ObjectDiskPath) { - return fmt.Errorf("invalid azblob->object_disk_path, shall be not empty and shall not be prefix for gcs->path") + if cfg.AzureBlob.Path != "" && (cfg.AzureBlob.ObjectDiskPath == "" || strings.HasPrefix(cfg.AzureBlob.Path, cfg.AzureBlob.ObjectDiskPath)) { + return fmt.Errorf("data in objects disks, invalid azblob->object_disk_path config section, shall be not empty and shall not be prefix for gcs->path") } } } From b56ea6bba42d7499fb0bde44804ff4ffe4008df9 Mon Sep 17 00:00:00 2001 From: Slach Date: Mon, 24 Jul 2023 13:09:16 +0500 Subject: [PATCH 10/12] add PyYAML to requirement.txt, fix github actions Signed-off-by: Slach --- pkg/backup/restore.go | 4 ++++ .../clickhouse_backup/tests/snapshots/cli.py.cli.snapshot | 2 +- test/testflows/requirements.txt | 3 ++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/pkg/backup/restore.go b/pkg/backup/restore.go index b864c2f6..35f6e00e 100644 --- a/pkg/backup/restore.go +++ b/pkg/backup/restore.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "github.com/Altinity/clickhouse-backup/pkg/config" "github.com/Altinity/clickhouse-backup/pkg/status" "github.com/Altinity/clickhouse-backup/pkg/storage" "github.com/Altinity/clickhouse-backup/pkg/storage/object_disk" @@ -742,6 +743,9 @@ func (b *Backuper) downloadObjectDiskParts(ctx context.Context, backupName strin return fmt.Errorf("%s disk doesn't present in diskTypes: %v", diskName, diskTypes) } if diskType == "s3" || diskType == "azure_blob_storage" { + if err = config.ValidateObjectDiskConfig(b.cfg); err != nil { + return err + } if err = object_disk.InitCredentialsAndConnections(ctx, b.ch, b.cfg, diskName); err != nil { return err } diff --git a/test/testflows/clickhouse_backup/tests/snapshots/cli.py.cli.snapshot b/test/testflows/clickhouse_backup/tests/snapshots/cli.py.cli.snapshot index 959ecaae..7940675f 100644 --- a/test/testflows/clickhouse_backup/tests/snapshots/cli.py.cli.snapshot +++ b/test/testflows/clickhouse_backup/tests/snapshots/cli.py.cli.snapshot @@ -1,4 +1,4 @@ -default_config = r"""'[\'general:\', \' remote_storage: none\', \' disable_progress_bar: true\', \' backups_to_keep_local: 0\', \' backups_to_keep_remote: 0\', \' log_level: info\', \' allow_empty_backups: false\', \' use_resumable_state: true\', \' restore_schema_on_cluster: ""\', \' upload_by_part: true\', \' download_by_part: true\', \' restore_database_mapping: {}\', \' retries_on_failure: 3\', \' retries_pause: 30s\', \' watch_interval: 1h\', \' full_interval: 24h\', \' watch_backup_name_template: shard{shard}-{type}-{time:20060102150405}\', \' retriesduration: 100ms\', \' watchduration: 1h0m0s\', \' fullduration: 24h0m0s\', \'clickhouse:\', \' username: default\', \' password: ""\', \' host: localhost\', \' port: 9000\', \' disk_mapping: {}\', \' skip_tables:\', \' - system.*\', \' - INFORMATION_SCHEMA.*\', \' - information_schema.*\', \' - _temporary_and_external_tables.*\', \' timeout: 5m\', \' freeze_by_part: false\', \' freeze_by_part_where: ""\', \' use_embedded_backup_restore: false\', \' embedded_backup_disk: ""\', \' backup_mutations: true\', \' restore_as_attach: false\', \' check_parts_columns: true\', \' secure: false\', \' skip_verify: false\', \' sync_replicated_tables: false\', \' log_sql_queries: true\', \' config_dir: /etc/clickhouse-server/\', \' restart_command: systemctl restart clickhouse-server\', \' ignore_not_exists_error_during_freeze: true\', \' check_replicas_before_attach: true\', \' tls_key: ""\', \' tls_cert: ""\', \' tls_ca: ""\', \' debug: false\', \'s3:\', \' access_key: ""\', \' secret_key: ""\', \' bucket: ""\', \' endpoint: ""\', \' region: us-east-1\', \' acl: private\', \' assume_role_arn: ""\', \' force_path_style: false\', \' path: ""\', \' disable_ssl: false\', \' compression_level: 1\', \' compression_format: tar\', \' sse: ""\', \' sse_kms_key_id: ""\', \' sse_customer_algorithm: ""\', \' sse_customer_key: ""\', \' sse_customer_key_md5: ""\', \' sse_kms_encryption_context: ""\', \' disable_cert_verification: false\', \' use_custom_storage_class: false\', \' storage_class: STANDARD\', \' custom_storage_class_map: {}\', \' part_size: 0\', \' allow_multipart_download: false\', \' object_labels: {}\', \' debug: false\', \'gcs:\', \' credentials_file: ""\', \' credentials_json: ""\', \' credentials_json_encoded: ""\', \' bucket: ""\', \' path: ""\', \' compression_level: 1\', \' compression_format: tar\', \' debug: false\', \' endpoint: ""\', \' storage_class: STANDARD\', \' object_labels: {}\', \' custom_storage_class_map: {}\', \'cos:\', \' url: ""\', \' timeout: 2m\', \' secret_id: ""\', \' secret_key: ""\', \' path: ""\', \' compression_format: tar\', \' compression_level: 1\', \' debug: false\', \'api:\', \' listen: localhost:7171\', \' enable_metrics: true\', \' enable_pprof: false\', \' username: ""\', \' password: ""\', \' secure: false\', \' certificate_file: ""\', \' private_key_file: ""\', \' ca_cert_file: ""\', \' ca_key_file: ""\', \' create_integration_tables: false\', \' integration_tables_host: ""\', \' allow_parallel: false\', \' complete_resumable_after_restart: true\', \'ftp:\', \' address: ""\', \' timeout: 2m\', \' username: ""\', \' password: ""\', \' tls: false\', \' path: ""\', \' compression_format: tar\', \' compression_level: 1\', \' debug: false\', \'sftp:\', \' address: ""\', \' port: 22\', \' username: ""\', \' password: ""\', \' key: ""\', \' path: ""\', \' compression_format: tar\', \' compression_level: 1\', \' debug: false\', \'azblob:\', \' endpoint_schema: https\', \' endpoint_suffix: core.windows.net\', \' account_name: ""\', \' account_key: ""\', \' sas: ""\', \' use_managed_identity: false\', \' container: ""\', \' path: ""\', \' compression_level: 1\', \' compression_format: tar\', \' sse_key: ""\', \' buffer_size: 0\', \' buffer_count: 3\', \' timeout: 15m\', \'custom:\', \' upload_command: ""\', \' download_command: ""\', \' list_command: ""\', \' delete_command: ""\', \' command_timeout: 4h\', \' commandtimeoutduration: 4h0m0s\']'""" +default_config = r"""'[\'general:\', \' remote_storage: none\', \' disable_progress_bar: true\', \' backups_to_keep_local: 0\', \' backups_to_keep_remote: 0\', \' log_level: info\', \' allow_empty_backups: false\', \' use_resumable_state: true\', \' restore_schema_on_cluster: ""\', \' upload_by_part: true\', \' download_by_part: true\', \' restore_database_mapping: {}\', \' retries_on_failure: 3\', \' retries_pause: 30s\', \' watch_interval: 1h\', \' full_interval: 24h\', \' watch_backup_name_template: shard{shard}-{type}-{time:20060102150405}\', \' retriesduration: 100ms\', \' watchduration: 1h0m0s\', \' fullduration: 24h0m0s\', \'clickhouse:\', \' username: default\', \' password: ""\', \' host: localhost\', \' port: 9000\', \' disk_mapping: {}\', \' skip_tables:\', \' - system.*\', \' - INFORMATION_SCHEMA.*\', \' - information_schema.*\', \' - _temporary_and_external_tables.*\', \' timeout: 5m\', \' freeze_by_part: false\', \' freeze_by_part_where: ""\', \' use_embedded_backup_restore: false\', \' embedded_backup_disk: ""\', \' backup_mutations: true\', \' restore_as_attach: false\', \' check_parts_columns: true\', \' secure: false\', \' skip_verify: false\', \' sync_replicated_tables: false\', \' log_sql_queries: true\', \' config_dir: /etc/clickhouse-server/\', \' restart_command: systemctl restart clickhouse-server\', \' ignore_not_exists_error_during_freeze: true\', \' check_replicas_before_attach: true\', \' tls_key: ""\', \' tls_cert: ""\', \' tls_ca: ""\', \' debug: false\', \'s3:\', \' access_key: ""\', \' secret_key: ""\', \' bucket: ""\', \' endpoint: ""\', \' region: us-east-1\', \' acl: private\', \' assume_role_arn: ""\', \' force_path_style: false\', \' path: ""\', \' object_disk_path: ""\', \' disable_ssl: false\', \' compression_level: 1\', \' compression_format: tar\', \' sse: ""\', \' sse_kms_key_id: ""\', \' sse_customer_algorithm: ""\', \' sse_customer_key: ""\', \' sse_customer_key_md5: ""\', \' sse_kms_encryption_context: ""\', \' disable_cert_verification: false\', \' use_custom_storage_class: false\', \' storage_class: STANDARD\', \' custom_storage_class_map: {}\', \' part_size: 0\', \' allow_multipart_download: false\', \' object_labels: {}\', \' debug: false\', \'gcs:\', \' credentials_file: ""\', \' credentials_json: ""\', \' credentials_json_encoded: ""\', \' bucket: ""\', \' path: ""\', \' object_disk_path: ""\', \' compression_level: 1\', \' compression_format: tar\', \' debug: false\', \' endpoint: ""\', \' storage_class: STANDARD\', \' object_labels: {}\', \' custom_storage_class_map: {}\', \'cos:\', \' url: ""\', \' timeout: 2m\', \' secret_id: ""\', \' secret_key: ""\', \' path: ""\', \' compression_format: tar\', \' compression_level: 1\', \' debug: false\', \'api:\', \' listen: localhost:7171\', \' enable_metrics: true\', \' enable_pprof: false\', \' username: ""\', \' password: ""\', \' secure: false\', \' certificate_file: ""\', \' private_key_file: ""\', \' ca_cert_file: ""\', \' ca_key_file: ""\', \' create_integration_tables: false\', \' integration_tables_host: ""\', \' allow_parallel: false\', \' complete_resumable_after_restart: true\', \'ftp:\', \' address: ""\', \' timeout: 2m\', \' username: ""\', \' password: ""\', \' tls: false\', \' path: ""\', \' object_disk_path: ""\', \' compression_format: tar\', \' compression_level: 1\', \' debug: false\', \'sftp:\', \' address: ""\', \' port: 22\', \' username: ""\', \' password: ""\', \' key: ""\', \' path: ""\', \' object_disk_path: ""\', \' compression_format: tar\', \' compression_level: 1\', \' debug: false\', \'azblob:\', \' endpoint_schema: https\', \' endpoint_suffix: core.windows.net\', \' account_name: ""\', \' account_key: ""\', \' sas: ""\', \' use_managed_identity: false\', \' container: ""\', \' path: ""\', \' object_disk_path: ""\', \' compression_level: 1\', \' compression_format: tar\', \' sse_key: ""\', \' buffer_size: 0\', \' buffer_count: 3\', \' timeout: 15m\', \'custom:\', \' upload_command: ""\', \' download_command: ""\', \' list_command: ""\', \' delete_command: ""\', \' command_timeout: 4h\', \' commandtimeoutduration: 4h0m0s\']'""" help_flag = r"""'NAME:\n clickhouse-backup - Tool for easy backup of ClickHouse with cloud supportUSAGE:\n clickhouse-backup [-t, --tables=.] DESCRIPTION:\n Run as \'root\' or \'clickhouse\' userCOMMANDS:\n tables List of tables, exclude skip_tables\n create Create new backup\n create_remote Create and upload new backup\n upload Upload backup to remote storage\n list List of backups\n download Download backup from remote storage\n restore Create schema and restore data from backup\n restore_remote Download and restore\n delete Delete specific backup\n default-config Print default config\n print-config Print current config merged with environment variables\n clean Remove data in \'shadow\' folder from all \'path\' folders available from \'system.disks\'\n clean_remote_broken Remove all broken remote backups\n watch Run infinite loop which create full + incremental backup sequence to allow efficient backup sequences\n server Run API server\n help, h Shows a list of commands or help for one commandGLOBAL OPTIONS:\n --config value, -c value Config \'FILE\' name. (default: "/etc/clickhouse-backup/config.yml") [$CLICKHOUSE_BACKUP_CONFIG]\n --help, -h show help\n --version, -v print the version'""" diff --git a/test/testflows/requirements.txt b/test/testflows/requirements.txt index 1bc16424..541c9085 100644 --- a/test/testflows/requirements.txt +++ b/test/testflows/requirements.txt @@ -1,3 +1,4 @@ testflows==1.9.71 requests -setuptools \ No newline at end of file +setuptools +PyYAML \ No newline at end of file From d264f2555f074f9aa769241d09306659257cbba9 Mon Sep 17 00:00:00 2001 From: Slach Date: Mon, 24 Jul 2023 14:25:09 +0500 Subject: [PATCH 11/12] GCS over S3 added in 22.6+ --- test/integration/dynamic_settings.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/integration/dynamic_settings.sh b/test/integration/dynamic_settings.sh index b7854b25..e15f6bec 100644 --- a/test/integration/dynamic_settings.sh +++ b/test/integration/dynamic_settings.sh @@ -112,6 +112,10 @@ cat < /etc/clickhouse-server/config.d/storage_configuration_s3.xml EOT +fi + +if [[ "${CLICKHOUSE_VERSION}" == "head" || "${CLICKHOUSE_VERSION}" =~ ^22\.[6-9]+ || "${CLICKHOUSE_VERSION}" =~ ^22\.1[0-9]+ || "${CLICKHOUSE_VERSION}" =~ ^2[3-9]\.[1-9]+ ]]; then + cat < /etc/clickhouse-server/config.d/storage_configuration_gcs.xml From 884a664034be900db6eee6504c5eea6facc20ea2 Mon Sep 17 00:00:00 2001 From: Slach Date: Mon, 24 Jul 2023 16:51:06 +0500 Subject: [PATCH 12/12] try to fix api.py to avoid race condition between GET /backup/actions and POST /backup/actions --- pkg/server/server.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/server/server.go b/pkg/server/server.go index 3d745307..26e3c390 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -405,8 +405,9 @@ func (api *APIServer) actionsAsyncCommandsHandler(command string, args []string, if !api.config.API.AllowParallel && status.Current.InProgress() { return actionsResults, ErrAPILocked } + // to avoid race condition between GET /backup/actions and POST /backup/actions + commandId, _ := status.Current.Start(row.Command) go func() { - commandId, _ := status.Current.Start(row.Command) err, _ := api.metrics.ExecuteWithMetrics(command, 0, func() error { return api.cliApp.Run(append([]string{"clickhouse-backup", "-c", api.configPath, "--command-id", strconv.FormatInt(int64(commandId), 10)}, args...)) })