diff --git a/pkg/backup/download.go b/pkg/backup/download.go index 5e68697b..fe534224 100644 --- a/pkg/backup/download.go +++ b/pkg/backup/download.go @@ -41,6 +41,17 @@ var ( ErrBackupIsAlreadyExists = errors.New("backup is already exists") ) +func (b *Backuper) resumeExistingBackup(backupName string) error { + _, checkDownloadErr := os.Stat(path.Join(b.DefaultDataPath, "backup", backupName, "download.state2")) + if errors.Is(checkDownloadErr, os.ErrNotExist) { + // wrap ErrBackupIsAlreadyExists so RestoreFromRemote keeps reusing an already complete local backup (issue #625), + // while a standalone `download --resume` surfaces the guidance below instead of a bare "backup is already exists" + return fmt.Errorf("%w: local backup '%s' exists but resumable state 'download.state2' is missing, so it is unknown which parts are complete and resuming on top of partial data risks silent corruption; run `clickhouse-backup delete local %s` and retry the download, or investigate why the resumable state was lost", ErrBackupIsAlreadyExists, backupName, backupName) + } + log.Warn().Msgf("%s already exists will try to resume download", backupName) + return nil +} + func (b *Backuper) Download(backupName string, tablePattern string, partitions []string, schemaOnly, rbacOnly, configsOnly, namedCollectionsOnly, resume bool, hardlinkExistsFiles bool, backupVersion string, commandId int) error { if pidCheckErr := pidlock.CheckAndCreatePidFile(backupName, "download"); pidCheckErr != nil { return errors.WithMessage(pidCheckErr, "CheckAndCreatePidFile") @@ -88,14 +99,11 @@ func (b *Backuper) Download(backupName string, tablePattern string, partitions [ } if !b.resume { return ErrBackupIsAlreadyExists - } else { - _, checkDownloadErr := os.Stat(path.Join(b.DefaultDataPath, "backup", backupName, "download.state2")) - if errors.Is(checkDownloadErr, os.ErrNotExist) { - return ErrBackupIsAlreadyExists - } - isResumeExists = true - log.Warn().Msgf("%s already exists will try to resume download", backupName) } + if resumeErr := b.resumeExistingBackup(backupName); resumeErr != nil { + return resumeErr + } + isResumeExists = true } } startDownload := time.Now() diff --git a/pkg/backup/download_test.go b/pkg/backup/download_test.go index 27ea7434..8a669132 100644 --- a/pkg/backup/download_test.go +++ b/pkg/backup/download_test.go @@ -1,6 +1,8 @@ package backup import ( + "os" + "path" "regexp" "testing" "time" @@ -10,6 +12,7 @@ import ( "github.com/Altinity/clickhouse-backup/v2/pkg/metadata" "github.com/Altinity/clickhouse-backup/v2/pkg/storage" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) var b = Backuper{ @@ -91,6 +94,25 @@ var remoteBackup = storage.Backup{ UploadDate: time.Now(), } +func TestResumeExistingBackupMissingStateFileReturnsError(t *testing.T) { + backupName := "test_resume_crash" + defaultDataPath := t.TempDir() + backupDir := path.Join(defaultDataPath, "backup", backupName) + assert.NoError(t, os.MkdirAll(backupDir, 0o750)) + + backuper := &Backuper{DefaultDataPath: defaultDataPath} + + err := backuper.resumeExistingBackup(backupName) + require.Error(t, err) + assert.Contains(t, err.Error(), "download.state2") + assert.Contains(t, err.Error(), "delete local "+backupName) + // must keep wrapping ErrBackupIsAlreadyExists so RestoreFromRemote reuses the local backup (issue #625) + assert.ErrorIs(t, err, ErrBackupIsAlreadyExists) + + assert.NoError(t, os.WriteFile(path.Join(backupDir, "download.state2"), []byte("state"), 0o640)) + assert.NoError(t, backuper.resumeExistingBackup(backupName)) +} + func TestReBalanceTablesMetadataIfDiskNotExists_Files_NoErrors(t *testing.T) { remoteBackup.DataFormat = "tar" baseTable := metadata.TableMetadata{ @@ -234,7 +256,7 @@ func TestReBalanceTablesMetadataIfDiskNotExists_CheckErrors(t *testing.T) { tableMetadataAfterDownloadRepacked[i] = tableMetadataAfterDownload[i] } err := b.reBalanceTablesMetadataIfDiskNotExists(tableMetadataAfterDownloadRepacked, baseDisks, invalidRemoteBackup) - assert.Error(t, err) + require.Error(t, err) assert.Contains(t, err.Error(), "disk: hdd2 not found in disk_types section map[string]string{\"default\":\"local\", \"s3\":\"s3\", \"s3_disk2\":\"s3\"} in Test/metadata.json", ) @@ -246,7 +268,7 @@ func TestReBalanceTablesMetadataIfDiskNotExists_CheckErrors(t *testing.T) { tableMetadataAfterDownloadRepacked[i] = tableMetadataAfterDownload[i] } err = b.reBalanceTablesMetadataIfDiskNotExists(tableMetadataAfterDownloadRepacked, baseDisks, invalidRemoteBackup) - assert.Error(t, err) + require.Error(t, err) assert.Contains(t, err.Error(), "disk: hdd2, diskType: unknown not found in system.disks") // invalid storage_policy @@ -262,7 +284,7 @@ func TestReBalanceTablesMetadataIfDiskNotExists_CheckErrors(t *testing.T) { invalidTable.Query = "CREATE TABLE default.test3(id UInt64) ENGINE=MergeTree() ORDER BY id SETTINGS storage_policy='invalid'" tableMetadataAfterDownloadRepacked = ListOfTables{&invalidTable} err = b.reBalanceTablesMetadataIfDiskNotExists(tableMetadataAfterDownloadRepacked, baseDisks, invalidRemoteBackup) - assert.Error(t, err) + require.Error(t, err) matched, matchErr := regexp.MatchString(`storagePolicy: invalid with diskType: \w+ not found in system.disks`, err.Error()) assert.NoError(t, matchErr) assert.True(t, matched) @@ -278,7 +300,7 @@ func TestReBalanceTablesMetadataIfDiskNotExists_CheckErrors(t *testing.T) { } tableMetadataAfterDownloadRepacked = ListOfTables{&invalidTable} err = b.reBalanceTablesMetadataIfDiskNotExists(tableMetadataAfterDownloadRepacked, invalidDisks, invalidRemoteBackup) - assert.Error(t, err) + require.Error(t, err) assert.Contains(t, err.Error(), "250B free space, not found in system.disks with `local` type") }