From d1b6ab46d6974b177309df2ffdec218fc33a6f24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=B4=E6=9D=A8=E5=B8=86?= <39647285+leno23@users.noreply.github.com> Date: Sun, 17 May 2026 15:53:16 +0800 Subject: [PATCH 1/3] fix: allow resume without download state file Treat download.state2 as optional for --resume so an existing backup directory without state can be validated and resumed from scratch. --- pkg/backup/download.go | 18 +++++++++++------- pkg/backup/download_test.go | 12 ++++++++++++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/pkg/backup/download.go b/pkg/backup/download.go index c469e488a..b091f2b47 100644 --- a/pkg/backup/download.go +++ b/pkg/backup/download.go @@ -40,6 +40,16 @@ var ( ErrBackupIsAlreadyExists = errors.New("backup is already exists") ) +func (b *Backuper) resumeExistingBackup(backupName string) bool { + _, checkDownloadErr := os.Stat(path.Join(b.DefaultDataPath, "backup", backupName, "download.state2")) + if errors.Is(checkDownloadErr, os.ErrNotExist) { + log.Warn().Msgf("%s already exists but no download.state2 found, will resume download from scratch", backupName) + } else { + log.Warn().Msgf("%s already exists will try to resume download", backupName) + } + return true +} + func (b *Backuper) Download(backupName string, tablePattern string, partitions []string, schemaOnly, rbacOnly, configsOnly, namedCollectionsOnly, resume bool, hardlinkExistsFiles bool, backupVersion string, commandId int) error { if pidCheckErr := pidlock.CheckAndCreatePidFile(backupName, "download"); pidCheckErr != nil { return errors.WithMessage(pidCheckErr, "CheckAndCreatePidFile") @@ -87,14 +97,8 @@ func (b *Backuper) Download(backupName string, tablePattern string, partitions [ } if !b.resume { return ErrBackupIsAlreadyExists - } else { - _, checkDownloadErr := os.Stat(path.Join(b.DefaultDataPath, "backup", backupName, "download.state2")) - if errors.Is(checkDownloadErr, os.ErrNotExist) { - return ErrBackupIsAlreadyExists - } - isResumeExists = true - log.Warn().Msgf("%s already exists will try to resume download", backupName) } + isResumeExists = b.resumeExistingBackup(backupName) } } startDownload := time.Now() diff --git a/pkg/backup/download_test.go b/pkg/backup/download_test.go index 27ea7434e..ac7c5f7e0 100644 --- a/pkg/backup/download_test.go +++ b/pkg/backup/download_test.go @@ -1,6 +1,8 @@ package backup import ( + "os" + "path" "regexp" "testing" "time" @@ -91,6 +93,16 @@ var remoteBackup = storage.Backup{ UploadDate: time.Now(), } +func TestResumeExistingBackupAllowsMissingStateFile(t *testing.T) { + backupName := "test_resume_crash" + defaultDataPath := t.TempDir() + assert.NoError(t, os.MkdirAll(path.Join(defaultDataPath, "backup", backupName), 0o750)) + + backuper := &Backuper{DefaultDataPath: defaultDataPath} + + assert.True(t, backuper.resumeExistingBackup(backupName)) +} + func TestReBalanceTablesMetadataIfDiskNotExists_Files_NoErrors(t *testing.T) { remoteBackup.DataFormat = "tar" baseTable := metadata.TableMetadata{ From 7a4ed041c512b8c78ec76d4c9c57b9144370b0cb Mon Sep 17 00:00:00 2001 From: slach Date: Fri, 29 May 2026 15:10:39 +0400 Subject: [PATCH 2/3] fix review comments https://github.com/Altinity/clickhouse-backup/pull/1385#discussion_r3315157643 --- pkg/backup/download.go | 14 ++++++++------ pkg/backup/download_test.go | 22 +++++++++++++++------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/pkg/backup/download.go b/pkg/backup/download.go index b091f2b47..ab454007f 100644 --- a/pkg/backup/download.go +++ b/pkg/backup/download.go @@ -40,14 +40,13 @@ var ( ErrBackupIsAlreadyExists = errors.New("backup is already exists") ) -func (b *Backuper) resumeExistingBackup(backupName string) bool { +func (b *Backuper) resumeExistingBackup(backupName string) error { _, checkDownloadErr := os.Stat(path.Join(b.DefaultDataPath, "backup", backupName, "download.state2")) if errors.Is(checkDownloadErr, os.ErrNotExist) { - log.Warn().Msgf("%s already exists but no download.state2 found, will resume download from scratch", backupName) - } else { - log.Warn().Msgf("%s already exists will try to resume download", backupName) + return fmt.Errorf("local backup '%s' exists but resumable state 'download.state2' is missing, so it is unknown which parts are complete and resuming on top of partial data risks silent corruption; run `clickhouse-backup delete local %s` and retry the download, or investigate why the resumable state was lost", backupName, backupName) } - return true + log.Warn().Msgf("%s already exists will try to resume download", backupName) + return nil } func (b *Backuper) Download(backupName string, tablePattern string, partitions []string, schemaOnly, rbacOnly, configsOnly, namedCollectionsOnly, resume bool, hardlinkExistsFiles bool, backupVersion string, commandId int) error { @@ -98,7 +97,10 @@ func (b *Backuper) Download(backupName string, tablePattern string, partitions [ if !b.resume { return ErrBackupIsAlreadyExists } - isResumeExists = b.resumeExistingBackup(backupName) + if resumeErr := b.resumeExistingBackup(backupName); resumeErr != nil { + return resumeErr + } + isResumeExists = true } } startDownload := time.Now() diff --git a/pkg/backup/download_test.go b/pkg/backup/download_test.go index ac7c5f7e0..8e70dd577 100644 --- a/pkg/backup/download_test.go +++ b/pkg/backup/download_test.go @@ -12,6 +12,7 @@ import ( "github.com/Altinity/clickhouse-backup/v2/pkg/metadata" "github.com/Altinity/clickhouse-backup/v2/pkg/storage" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) var b = Backuper{ @@ -93,14 +94,21 @@ var remoteBackup = storage.Backup{ UploadDate: time.Now(), } -func TestResumeExistingBackupAllowsMissingStateFile(t *testing.T) { +func TestResumeExistingBackupMissingStateFileReturnsError(t *testing.T) { backupName := "test_resume_crash" defaultDataPath := t.TempDir() - assert.NoError(t, os.MkdirAll(path.Join(defaultDataPath, "backup", backupName), 0o750)) + backupDir := path.Join(defaultDataPath, "backup", backupName) + assert.NoError(t, os.MkdirAll(backupDir, 0o750)) backuper := &Backuper{DefaultDataPath: defaultDataPath} - assert.True(t, backuper.resumeExistingBackup(backupName)) + err := backuper.resumeExistingBackup(backupName) + require.Error(t, err) + assert.Contains(t, err.Error(), "download.state2") + assert.Contains(t, err.Error(), "delete local "+backupName) + + assert.NoError(t, os.WriteFile(path.Join(backupDir, "download.state2"), []byte("state"), 0o640)) + assert.NoError(t, backuper.resumeExistingBackup(backupName)) } func TestReBalanceTablesMetadataIfDiskNotExists_Files_NoErrors(t *testing.T) { @@ -246,7 +254,7 @@ func TestReBalanceTablesMetadataIfDiskNotExists_CheckErrors(t *testing.T) { tableMetadataAfterDownloadRepacked[i] = tableMetadataAfterDownload[i] } err := b.reBalanceTablesMetadataIfDiskNotExists(tableMetadataAfterDownloadRepacked, baseDisks, invalidRemoteBackup) - assert.Error(t, err) + require.Error(t, err) assert.Contains(t, err.Error(), "disk: hdd2 not found in disk_types section map[string]string{\"default\":\"local\", \"s3\":\"s3\", \"s3_disk2\":\"s3\"} in Test/metadata.json", ) @@ -258,7 +266,7 @@ func TestReBalanceTablesMetadataIfDiskNotExists_CheckErrors(t *testing.T) { tableMetadataAfterDownloadRepacked[i] = tableMetadataAfterDownload[i] } err = b.reBalanceTablesMetadataIfDiskNotExists(tableMetadataAfterDownloadRepacked, baseDisks, invalidRemoteBackup) - assert.Error(t, err) + require.Error(t, err) assert.Contains(t, err.Error(), "disk: hdd2, diskType: unknown not found in system.disks") // invalid storage_policy @@ -274,7 +282,7 @@ func TestReBalanceTablesMetadataIfDiskNotExists_CheckErrors(t *testing.T) { invalidTable.Query = "CREATE TABLE default.test3(id UInt64) ENGINE=MergeTree() ORDER BY id SETTINGS storage_policy='invalid'" tableMetadataAfterDownloadRepacked = ListOfTables{&invalidTable} err = b.reBalanceTablesMetadataIfDiskNotExists(tableMetadataAfterDownloadRepacked, baseDisks, invalidRemoteBackup) - assert.Error(t, err) + require.Error(t, err) matched, matchErr := regexp.MatchString(`storagePolicy: invalid with diskType: \w+ not found in system.disks`, err.Error()) assert.NoError(t, matchErr) assert.True(t, matched) @@ -290,7 +298,7 @@ func TestReBalanceTablesMetadataIfDiskNotExists_CheckErrors(t *testing.T) { } tableMetadataAfterDownloadRepacked = ListOfTables{&invalidTable} err = b.reBalanceTablesMetadataIfDiskNotExists(tableMetadataAfterDownloadRepacked, invalidDisks, invalidRemoteBackup) - assert.Error(t, err) + require.Error(t, err) assert.Contains(t, err.Error(), "250B free space, not found in system.disks with `local` type") } From c6162ae43129cbfa68862d4abc1583db868a3496 Mon Sep 17 00:00:00 2001 From: slach Date: Fri, 29 May 2026 16:11:10 +0400 Subject: [PATCH 3/3] fix TestServerAPI failures --- pkg/backup/download.go | 4 +++- pkg/backup/download_test.go | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pkg/backup/download.go b/pkg/backup/download.go index fba8f2e43..fe5342244 100644 --- a/pkg/backup/download.go +++ b/pkg/backup/download.go @@ -44,7 +44,9 @@ var ( func (b *Backuper) resumeExistingBackup(backupName string) error { _, checkDownloadErr := os.Stat(path.Join(b.DefaultDataPath, "backup", backupName, "download.state2")) if errors.Is(checkDownloadErr, os.ErrNotExist) { - return fmt.Errorf("local backup '%s' exists but resumable state 'download.state2' is missing, so it is unknown which parts are complete and resuming on top of partial data risks silent corruption; run `clickhouse-backup delete local %s` and retry the download, or investigate why the resumable state was lost", backupName, backupName) + // wrap ErrBackupIsAlreadyExists so RestoreFromRemote keeps reusing an already complete local backup (issue #625), + // while a standalone `download --resume` surfaces the guidance below instead of a bare "backup is already exists" + return fmt.Errorf("%w: local backup '%s' exists but resumable state 'download.state2' is missing, so it is unknown which parts are complete and resuming on top of partial data risks silent corruption; run `clickhouse-backup delete local %s` and retry the download, or investigate why the resumable state was lost", ErrBackupIsAlreadyExists, backupName, backupName) } log.Warn().Msgf("%s already exists will try to resume download", backupName) return nil diff --git a/pkg/backup/download_test.go b/pkg/backup/download_test.go index 8e70dd577..8a6691327 100644 --- a/pkg/backup/download_test.go +++ b/pkg/backup/download_test.go @@ -106,6 +106,8 @@ func TestResumeExistingBackupMissingStateFileReturnsError(t *testing.T) { require.Error(t, err) assert.Contains(t, err.Error(), "download.state2") assert.Contains(t, err.Error(), "delete local "+backupName) + // must keep wrapping ErrBackupIsAlreadyExists so RestoreFromRemote reuses the local backup (issue #625) + assert.ErrorIs(t, err, ErrBackupIsAlreadyExists) assert.NoError(t, os.WriteFile(path.Join(backupDir, "download.state2"), []byte("state"), 0o640)) assert.NoError(t, backuper.resumeExistingBackup(backupName))