Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

backup --files-from-verbatim allow merging snapshots #3405

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 9 additions & 0 deletions changelog/unreleased/pull-3405
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Enhancement: Add incremental backups

Restic allowed to backup files from a list passed as parameter and this created
a new snapshot, containing only the new backuped files. Now, using the --merge
option, the new snapshot will contain also the files (not included in the list file passed)
of a previous parent snapshot.

https://github.com/restic/restic/issues/3118
https://forum.restic.net/t/backup-parent-behavior/3286
22 changes: 14 additions & 8 deletions cmd/restic/cmd_backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ type BackupOptions struct {
IgnoreInode bool
IgnoreCtime bool
UseFsSnapshot bool
Merge bool
DryRun bool
}

Expand All @@ -117,6 +118,7 @@ func init() {
f.BoolVar(&backupOptions.Stdin, "stdin", false, "read backup from stdin")
f.StringVar(&backupOptions.StdinFilename, "stdin-filename", "stdin", "`filename` to use when reading from stdin")
f.Var(&backupOptions.Tags, "tag", "add `tags` for the new snapshot in the format `tag[,tag,...]` (can be specified multiple times)")
f.BoolVar(&backupOptions.Merge, "merge", false, `merge the backuped files with the parent snapshot`)

f.StringVarP(&backupOptions.Host, "host", "H", "", "set the `hostname` for the snapshot manually. To prevent an expensive rescan use the \"parent\" flag")
f.StringVar(&backupOptions.Host, "hostname", "", "set the `hostname` for the snapshot manually")
Expand Down Expand Up @@ -463,9 +465,12 @@ func collectTargets(opts BackupOptions, args []string) (targets []string, err er
return nil, errors.Fatal("nothing to backup, please specify target files/dirs")
}

targets, err = filterExisting(targets)
if err != nil {
return nil, err
// Skip filter existing if merge option is true
if !opts.Merge {
targets, err = filterExisting(targets)
if err != nil {
return nil, err
}
}

return targets, nil
Expand Down Expand Up @@ -705,11 +710,12 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
}

snapshotOpts := archiver.SnapshotOptions{
Excludes: opts.Excludes,
Tags: opts.Tags.Flatten(),
Time: timeStamp,
Hostname: opts.Host,
ParentSnapshot: *parentSnapshotID,
Excludes: opts.Excludes,
Tags: opts.Tags.Flatten(),
Time: timeStamp,
Hostname: opts.Host,
ParentSnapshot: *parentSnapshotID,
MergeWithParent: opts.Merge,
}

if !gopts.JSON {
Expand Down
10 changes: 10 additions & 0 deletions doc/040_backup.rst
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,16 @@ You can combine all three options with each other and with the normal file argum
$ restic backup --files-from /tmp/files_to_backup /tmp/some_additional_file
$ restic backup --files-from /tmp/glob-pattern --files-from-raw /tmp/generated-list /tmp/some_additional_file

Merging Snapshots
*****************

With the option ``--files-from-verbatim``, restic creates a new snapshot containing only the list of files specified
by the option.
The backup command has a ``--merge`` option that instructs restic to merge the files specified in the
list passed to ``--files-from-verbatim`` with a parent snapshot, chosen automatically by restic, or specified manually
throw the ``--parent`` option. The path of the merged snapshot will be the same of its parent.


Comparing Snapshots
*******************

Expand Down
147 changes: 120 additions & 27 deletions internal/archiver/archiver.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,10 +164,58 @@ func (arch *Archiver) error(item string, fi os.FileInfo, err error) error {
return errf
}

func (arch *Archiver) fileOrDirNotExist(path string) bool {
_, err := arch.FS.Lstat(path)
return err != nil && os.IsNotExist(errors.Cause(err))
}

func (arch *Archiver) fileOrDirExist(path string) bool {
_, err := arch.FS.Lstat(path)
return err == nil
}

// mergeNodes merge the lists of nodes. The result is a new list of nodes in which each node from both input
// lists appear only once, preferring the nodes from the second list.
func (arch *Archiver) mergeNodes(listNode1, listNode2 []*restic.Node, path string) (mergedNodes []*restic.Node) {
setNode := make(map[string]*restic.Node)
for _, node := range listNode1 {

// some of the files or dir in the frst set could belong to a dir
// that has been deleted and the recreated
// Will check if the file or dir is still there, before adding it to the set
if arch.fileOrDirExist(join(path, node.Name)) {
// the file exists, will add to the set
setNode[node.Name] = node
}
}
for _, node := range listNode2 {

// check if the file (or the directory) exists
if node.IsPlaceholder() {
// remove the file or the dir if was previously added in the set
debug.Log("removed : %s", node.Path)
delete(setNode, node.Name)
continue
}

// the file exist, will update the set
setNode[node.Name] = node
}

for _, node := range setNode {
mergedNodes = append(mergedNodes, node)
}

// sort nodes lexicographically
sort.Sort(restic.Nodes(mergedNodes))
return
}

// saveTree stores a tree in the repo. It checks the index and the known blobs
// before saving anything.
func (arch *Archiver) saveTree(ctx context.Context, t *restic.Tree) (restic.ID, ItemStats, error) {
var s ItemStats

buf, err := json.Marshal(t)
if err != nil {
return restic.ID{}, s, errors.Wrap(err, "MarshalJSON")
Expand Down Expand Up @@ -494,6 +542,15 @@ func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous
return fn, false, nil
}

func (arch *Archiver) SavePlaceholder(path, name string) (fn FutureNode, excluded bool, err error) {
node := restic.NodePlaceholder(path, name)
fn = FutureNode{
snPath: path,
node: node,
}
return fn, false, nil
}

// fileChanged tries to detect whether a file's content has changed compared
// to the contents of node, which describes the same path in the parent backup.
// It should only be run for regular files.
Expand Down Expand Up @@ -546,8 +603,9 @@ func (arch *Archiver) statDir(dir string) (os.FileInfo, error) {
}

// SaveTree stores a Tree in the repo, returned is the tree. snPath is the path
// within the current snapshot.
func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree, previous *restic.Tree) (*restic.Tree, error) {
// within the current snapshot. If mergeWithPrevious is true, will merge the tree
// with the provided previous tree, keeping from the previous only files not included in atree.
func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree, previous *restic.Tree, mergeWithPrevious bool) (*restic.Tree, error) {
debug.Log("%v (%v nodes), parent %v", snPath, len(atree.Nodes), previous)

tree := restic.NewTree()
Expand All @@ -565,7 +623,15 @@ func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree,

// this is a leaf node
if subatree.Leaf() {
fn, excluded, err := arch.Save(ctx, join(snPath, name), subatree.Path, previous.Find(name))

var fn FutureNode
var excluded bool
var err error
if mergeWithPrevious && arch.fileOrDirNotExist(subatree.Path) {
fn, excluded, err = arch.SavePlaceholder(subatree.Path, name)
} else {
fn, excluded, err = arch.Save(ctx, join(snPath, name), subatree.Path, previous.Find(name))
}

if err != nil {
err = arch.error(subatree.Path, fn.fi, err)
Expand Down Expand Up @@ -599,36 +665,47 @@ func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree,
}

// not a leaf node, archive subtree
subtree, err := arch.SaveTree(ctx, join(snPath, name), &subatree, oldSubtree)
if err != nil {
return nil, err
}

id, nodeStats, err := arch.saveTree(ctx, subtree)
subtree, err := arch.SaveTree(ctx, join(snPath, name), &subatree, oldSubtree, mergeWithPrevious)
if err != nil {
return nil, err
}

if subatree.FileInfoPath == "" {
return nil, errors.Errorf("FileInfoPath for %v/%v is empty", snPath, name)
if mergeWithPrevious && oldSubtree != nil {
debug.Log("merging nodes")
subtree.Nodes = arch.mergeNodes(oldSubtree.Nodes, subtree.Nodes, subatree.FileInfoPath)
}

debug.Log("%v, saved subtree %v as %v", snPath, subtree, id.Str())

fi, err := arch.statDir(subatree.FileInfoPath)
if err != nil {
if err != nil && !mergeWithPrevious {
return nil, err
}

debug.Log("%v, dir node data loaded from %v", snPath, subatree.FileInfoPath)

node, err := arch.nodeFromFileInfo(subatree.FileInfoPath, fi)
if err != nil {
return nil, err
var node *restic.Node
var nodeStats ItemStats
var id restic.ID
if err != nil && mergeWithPrevious {
// the dir does not exist
node = restic.NodePlaceholder(subatree.FileInfoPath, name)
} else {
node, err = arch.nodeFromFileInfo(subatree.FileInfoPath, fi)
if err != nil {
return nil, err
}
id, nodeStats, err = arch.saveTree(ctx, subtree)
if err != nil {
return nil, err
}
node.Subtree = &id
}

node.Name = name
node.Subtree = &id

if subatree.FileInfoPath == "" {
return nil, errors.Errorf("FileInfoPath for %v/%v is empty", snPath, name)
}

debug.Log("%v, saved subtree %v as %v", snPath, subtree, id.Str())

err = tree.Insert(node)
if err != nil {
Expand Down Expand Up @@ -725,11 +802,12 @@ func resolveRelativeTargets(filesys fs.FS, targets []string) ([]string, error) {

// SnapshotOptions collect attributes for a new snapshot.
type SnapshotOptions struct {
Tags restic.TagList
Hostname string
Excludes []string
Time time.Time
ParentSnapshot restic.ID
Tags restic.TagList
Hostname string
Excludes []string
Time time.Time
ParentSnapshot restic.ID
MergeWithParent bool
}

// loadParentTree loads a tree referenced by snapshot id. If id is null, nil is returned.
Expand Down Expand Up @@ -781,7 +859,7 @@ func (arch *Archiver) Snapshot(ctx context.Context, targets []string, opts Snaps
return nil, restic.ID{}, err
}

atree, err := NewTree(arch.FS, cleanTargets)
atree, err := NewTree(arch.FS, cleanTargets, opts.MergeWithParent)
if err != nil {
return nil, restic.ID{}, err
}
Expand All @@ -796,7 +874,8 @@ func (arch *Archiver) Snapshot(ctx context.Context, targets []string, opts Snaps
arch.runWorkers(wctx, &t)

debug.Log("starting snapshot")
tree, err := arch.SaveTree(wctx, "/", atree, arch.loadParentTree(wctx, opts.ParentSnapshot))
parentTree := arch.loadParentTree(wctx, opts.ParentSnapshot)
tree, err := arch.SaveTree(wctx, "/", atree, parentTree, opts.MergeWithParent)
if err != nil {
return err
}
Expand All @@ -805,6 +884,11 @@ func (arch *Archiver) Snapshot(ctx context.Context, targets []string, opts Snaps
return errors.New("snapshot is empty")
}

if opts.MergeWithParent && parentTree != nil {
debug.Log("merging nodes")
tree.Nodes = arch.mergeNodes(parentTree.Nodes, tree.Nodes, atree.FileInfoPath)
}

rootTreeID, stats, err = arch.saveTree(wctx, tree)
// trigger shutdown but don't set an error
t.Kill(nil)
Expand All @@ -826,7 +910,16 @@ func (arch *Archiver) Snapshot(ctx context.Context, targets []string, opts Snaps
return nil, restic.ID{}, err
}

sn, err := restic.NewSnapshot(targets, opts.Tags, opts.Hostname, opts.Time)
paths := targets
if opts.MergeWithParent {
sn, err := restic.LoadSnapshot(ctx, arch.Repo, opts.ParentSnapshot)
if err != nil {
return nil, restic.ID{}, err
}
paths = sn.Paths
}

sn, err := restic.NewSnapshot(paths, opts.Tags, opts.Hostname, opts.Time)
if err != nil {
return nil, restic.ID{}, err
}
Expand Down