Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Verify cache on check #3747

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 17 additions & 5 deletions cmd/restic/cmd_check.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ var cmdCheck = &cobra.Command{
The "check" command tests the repository for errors and reports any errors it
finds. It can also be used to read all data and therefore simulate a restore.

By default, the "check" command will always load all data directly from the
repository and not use a local cache.
By default, the "check" command will use a local cache but verify it against
the repository. It is possible to switch to using a temporary cache.

EXIT STATUS
===========
Expand All @@ -46,7 +46,8 @@ type CheckOptions struct {
ReadData bool
ReadDataSubset string
CheckUnused bool
WithCache bool
NoCacheVerify bool
TemporaryCache bool
}

var checkOptions CheckOptions
Expand All @@ -58,7 +59,10 @@ func init() {
f.BoolVar(&checkOptions.ReadData, "read-data", false, "read all data blobs")
f.StringVar(&checkOptions.ReadDataSubset, "read-data-subset", "", "read a `subset` of data packs, specified as 'n/t' for specific part, or either 'x%' or 'x.y%' or a size in bytes with suffixes k/K, m/M, g/G, t/T for a random subset")
f.BoolVar(&checkOptions.CheckUnused, "check-unused", false, "find unused blobs")
f.BoolVar(&checkOptions.WithCache, "with-cache", false, "use the cache")
f.BoolVar(&checkOptions.NoCacheVerify, "with-cache", false, "disable verification of local cache")
_ = f.MarkDeprecated("with-cache", "--with-cache is deprecated, use --no-cache-verify instead")
f.BoolVar(&checkOptions.NoCacheVerify, "no-cache-verify", false, "disable verification of local cache")
f.BoolVar(&checkOptions.TemporaryCache, "temporary-cache", false, "create a temporary cache")
}

func checkFlags(opts CheckOptions) error {
Expand Down Expand Up @@ -101,6 +105,9 @@ func checkFlags(opts CheckOptions) error {

}
}
if opts.NoCacheVerify && opts.TemporaryCache {
return errors.Fatal("check flags --no-cache-verify and --temporary-cache cannot be used together")
}

return nil
}
Expand Down Expand Up @@ -148,7 +155,7 @@ func parsePercentage(s string) (float64, error) {
// * by default, we use a cache in a temporary directory that is deleted after the check
func prepareCheckCache(opts CheckOptions, gopts *GlobalOptions) (cleanup func()) {
cleanup = func() {}
if opts.WithCache {
if !opts.TemporaryCache {
// use the default cache, no setup needed
return cleanup
}
Expand Down Expand Up @@ -201,6 +208,11 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
return err
}

if repo.Cache != nil && !opts.NoCacheVerify {
// verify files in already existing cache
repo.Cache.EnableVerification()
}

if !gopts.NoLock {
Verbosef("create exclusive lock for repository\n")
lock, err := lockRepoExclusive(gopts.ctx, repo)
Expand Down
98 changes: 81 additions & 17 deletions internal/cache/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package cache

import (
"context"
"crypto/sha256"
"fmt"
"io"
"sync"

Expand Down Expand Up @@ -89,7 +91,7 @@ func (b *Backend) Save(ctx context.Context, h restic.Handle, rd restic.RewindRea
return nil
}

func (b *Backend) cacheFile(ctx context.Context, h restic.Handle) error {
func (b *Backend) tryToCacheFile(ctx context.Context, h restic.Handle) error {
finish := make(chan struct{})

b.inProgressMutex.Lock()
Expand All @@ -106,9 +108,24 @@ func (b *Backend) cacheFile(ctx context.Context, h restic.Handle) error {
return nil
}

defer func() {
// signal other waiting goroutines that the file may now be cached
close(finish)

// remove the finish channel from the map
b.inProgressMutex.Lock()
delete(b.inProgress, h)
b.inProgressMutex.Unlock()
}()

if b.hasToVerify(h) && b.Cache.Has(h) {
if err := b.verify(ctx, h); err != nil {
return err
}
}

// test again, maybe the file was cached in the meantime
if !b.Cache.Has(h) {

// nope, it's still not in the cache, pull it from the repo and save it

err := b.Backend.Load(ctx, h, 0, 0, func(rd io.Reader) error {
Expand All @@ -120,14 +137,7 @@ func (b *Backend) cacheFile(ctx context.Context, h restic.Handle) error {
}
}

// signal other waiting goroutines that the file may now be cached
close(finish)

// remove the finish channel from the map
b.inProgressMutex.Lock()
delete(b.inProgress, h)
b.inProgressMutex.Unlock()

b.markVerified(h)
return nil
}

Expand All @@ -148,6 +158,62 @@ func (b *Backend) loadFromCacheOrDelegate(ctx context.Context, h restic.Handle,
return rd.Close()
}

func (b *Backend) hasToVerify(h restic.Handle) bool {
if b.Cache.verifiedFiles == nil {
return false
}

b.Cache.verifiedFilesLock.Lock()
_, ok := b.Cache.verifiedFiles[h]
b.Cache.verifiedFilesLock.Unlock()
return !ok
}

func (b *Backend) markVerified(h restic.Handle) {
if b.Cache.verifiedFiles != nil {
b.Cache.verifiedFilesLock.Lock()
b.Cache.verifiedFiles[h] = struct{}{}
b.Cache.verifiedFilesLock.Unlock()
}
}

func (b *Backend) verify(ctx context.Context, h restic.Handle) error {
// verify that the cache file is correct or at least not more broken than the version stored at the backend
var remoteHash, localHash restic.ID

err := b.Backend.Load(ctx, h, 0, 0, func(rd io.Reader) error {
hash := sha256.New()
_, ierr := io.Copy(hash, rd)
remoteHash = restic.IDFromHash(hash.Sum(nil))
return ierr
})
if err != nil {
return err
}
err = b.Backend.Load(ctx, h, 0, 0, func(rd io.Reader) error {
hash := sha256.New()
_, ierr := io.Copy(hash, rd)
localHash = restic.IDFromHash(hash.Sum(nil))
return ierr
})
if err != nil {
return err
}

if remoteHash != localHash {
if remoteHash.String() == h.Name {
// the remote version is correct, but not the local version
// delete the local version to repair the cache
_ = b.Cache.remove(h)
} else if localHash.String() == h.Name {
return fmt.Errorf("%v: remote file damaged, please re-upload the cached copy", h)
} else {
return fmt.Errorf("%v: cached and remote file differ and are both invalid", h)
}
}
return nil
}

// Load loads a file from the cache or the backend.
func (b *Backend) Load(ctx context.Context, h restic.Handle, length int, offset int64, consumer func(rd io.Reader) error) error {
b.inProgressMutex.Lock()
Expand All @@ -160,7 +226,7 @@ func (b *Backend) Load(ctx context.Context, h restic.Handle, length int, offset
debug.Log("downloading %v finished", h)
}

if b.Cache.Has(h) {
if b.Cache.Has(h) && !b.hasToVerify(h) {
debug.Log("Load(%v, %v, %v) from cache", h, length, offset)
rd, err := b.Cache.load(h, length, offset)
if err == nil {
Expand All @@ -181,13 +247,11 @@ func (b *Backend) Load(ctx context.Context, h restic.Handle, length int, offset
}

debug.Log("auto-store %v in the cache", h)
err := b.cacheFile(ctx, h)
if err == nil {
return b.loadFromCacheOrDelegate(ctx, h, length, offset, consumer)
err := b.tryToCacheFile(ctx, h)
if err != nil {
return err
}

debug.Log("error caching %v: %v, falling back to backend", h, err)
return b.Backend.Load(ctx, h, length, offset, consumer)
return b.loadFromCacheOrDelegate(ctx, h, length, offset, consumer)
}

// Stat tests whether the backend has a file. If it does not exist but still
Expand Down
8 changes: 8 additions & 0 deletions internal/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"path/filepath"
"regexp"
"strconv"
"sync"
"time"

"github.com/pkg/errors"
Expand All @@ -20,6 +21,9 @@ type Cache struct {
path string
Base string
Created bool

verifiedFiles map[restic.Handle]struct{}
verifiedFilesLock sync.Mutex
}

const dirMode = 0700
Expand Down Expand Up @@ -158,6 +162,10 @@ func updateTimestamp(d string) error {
return fs.Chtimes(d, t, t)
}

func (c *Cache) EnableVerification() {
c.verifiedFiles = make(map[restic.Handle]struct{})
}

// MaxCacheAge is the default age (30 days) after which cache directories are considered old.
const MaxCacheAge = 30 * 24 * time.Hour

Expand Down