Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Back up and restore Windows Alternate Data Streams #4614

Draft
wants to merge 16 commits into
base: master
Choose a base branch
from
Draft
9 changes: 9 additions & 0 deletions changelog/unreleased/pull-4614
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Enhancement: Back up and restore Windows Alternate Data Streams

Restic did not back up Alternate Data Streams in Windows. Restic now backs up Alternate Data Streams (ADS) and restores them back to the main files.
The Alternate Data Streams are backed up like any other normal files, and the full name of the stream is stored as the name of the file.
During restore, the ADS are restored and attached to the original files as Alternate Data Streams.
For progress and summary, the ADS are not counted in the file counts, but the sizes of the ADS files are counted.

https://github.com/restic/restic/pull/4614
https://github.com/restic/restic/issues/1401
29 changes: 21 additions & 8 deletions internal/archiver/archiver.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,18 +236,20 @@ func (arch *Archiver) SaveDir(ctx context.Context, snPath string, dir string, fi
if err != nil {
return FutureNode{}, err
}
sort.Strings(names)
pathnames := arch.preProcessPaths(dir, names)
sort.Strings(pathnames)

nodes := make([]FutureNode, 0, len(names))
nodes := make([]FutureNode, 0, len(pathnames))

for _, name := range names {
for _, pathname := range pathnames {
// test if context has been cancelled
if ctx.Err() != nil {
debug.Log("context has been cancelled, aborting")
return FutureNode{}, ctx.Err()
}
name := getNameFromPathname(pathname)
pathname := arch.processPath(dir, pathname)

pathname := arch.FS.Join(dir, name)
oldNode := previous.Find(name)
snItem := join(snPath, name)
fn, excluded, err := arch.Save(ctx, snItem, pathname, oldNode)
Expand All @@ -260,7 +262,7 @@ func (arch *Archiver) SaveDir(ctx context.Context, snPath string, dir string, fi
continue
}

return FutureNode{}, err
return FutureNode{}, errors.Wrap(err, "error saving a target (file or directory)")
}

if excluded {
Expand Down Expand Up @@ -349,6 +351,11 @@ func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous
if err != nil {
return FutureNode{}, false, err
}
//In case of windows ADS files for checking include and excludes we use the main file which has the ADS files attached.
//For Unix, the main file is the same as there is no ADS. So targetMain is always the same as target.
//After checking the exclusion for actually processing the file, we use the full file name including ads portion if any.
targetMain := fs.SanitizeMainFileName(target)
abstargetMain := fs.SanitizeMainFileName(abstarget)

// exclude files by path before running Lstat to reduce number of lstat calls
if !arch.SelectByName(abstarget) {
Expand All @@ -357,7 +364,7 @@ func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous
}

// get file info and run remaining select functions that require file information
fi, err := arch.FS.Lstat(target)
fiMain, err := arch.FS.Lstat(targetMain)
if err != nil {
debug.Log("lstat() for %v returned error: %v", target, err)
err = arch.error(abstarget, err)
Expand All @@ -366,10 +373,15 @@ func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous
}
return FutureNode{}, true, nil
}
if !arch.Select(abstarget, fi) {
if !arch.Select(abstargetMain, fiMain) {
debug.Log("%v is excluded", target)
return FutureNode{}, true, nil
}
var fi os.FileInfo
fi, shouldReturn, fn, excluded, err := arch.processTargets(target, targetMain, abstarget, fiMain)
if shouldReturn {
return fn, excluded, err
}

switch {
case fs.IsRegularFile(fi):
Expand Down Expand Up @@ -659,8 +671,9 @@ func readdirnames(filesystem fs.FS, dir string, flags int) ([]string, error) {
func resolveRelativeTargets(filesys fs.FS, targets []string) ([]string, error) {
debug.Log("targets before resolving: %v", targets)
result := make([]string, 0, len(targets))
preProcessTargets(filesys, &targets)
for _, target := range targets {
target = filesys.Clean(target)
target = processTarget(filesys, target)
pc, _ := pathComponents(filesys, target, false)
if len(pc) > 0 {
result = append(result, target)
Expand Down
48 changes: 48 additions & 0 deletions internal/archiver/archiver_notwin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
//go:build !windows
// +build !windows

package archiver

import (
"os"

"github.com/restic/restic/internal/fs"
)

// preProcessTargets performs preprocessing of the targets before the loop.
// It is a no-op on non-windows OS as we do not need to do an
// extra iteration on the targets before the loop.
// We process each target inside the loop.
func preProcessTargets(_ fs.FS, _ *[]string) {
// no-op
}

// processTarget processes each target in the loop.
// In case of non-windows OS it uses the passed filesys to clean the target.
func processTarget(filesys fs.FS, target string) string {
return filesys.Clean(target)
}

// preProcessPaths processes paths before looping.
func (arch *Archiver) preProcessPaths(_ string, names []string) (paths []string) {
// In case of non-windows OS this is no-op as we process the paths within the loop
// and avoid the extra looping before hand.
return names
}

// processPath processes the path in the loop.
func (arch *Archiver) processPath(dir string, name string) (path string) {
//In case of non-windows OS we prepare the path in the loop.
return arch.FS.Join(dir, name)
}

// getNameFromPathname gets the name from pathname.
// In case for non-windows the pathname is same as the name.
func getNameFromPathname(pathname string) (name string) {
return pathname
}

// processTargets is no-op for non-windows OS
func (arch *Archiver) processTargets(_ string, _ string, _ string, fiMain os.FileInfo) (fi os.FileInfo, shouldReturn bool, fn FutureNode, excluded bool, err error) {
return fiMain, false, FutureNode{}, false, nil
}
102 changes: 102 additions & 0 deletions internal/archiver/archiver_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package archiver

import (
"os"
"path/filepath"

"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/fs"
)

// preProcessTargets performs preprocessing of the targets before the loop.
// For Windows, it cleans each target and it also adds ads stream for each
// target to the targets array.
// We read the ADS from each file and add them as independent Nodes with
// the full ADS name as the name of the file.
// During restore the ADS files are restored using the ADS name and that
// automatically attaches them as ADS to the main file.
func preProcessTargets(filesys fs.FS, targets *[]string) {
for _, target := range *targets {
target = filesys.Clean(target)
addADSStreams(target, targets)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

couldn't we discover the ads stream just in time when a file/dir is backed up and thereby get rid of all this preprocessing? Shouldn't be too complex to add that to SaveDir.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the beginning, resolveRelativeTargets is called from scanner's Scan method and from archiver Snapshot method and the sorted cleanTargets obtained in that step are then passed to NewTree.
We need these ADS entries to be available before the Node creation in the tree.
We do add this in SaveDir as well, but it is also needed in resolveRelativeTargets.

}
}

// processTarget processes each target in the loop.
// In case of windows the clean up of target is already done
// in preProcessTargets before the loop, hence this is no-op.
func processTarget(_ fs.FS, target string) string {
return target
}

// getNameFromPathname gets the name from pathname.
// In case for windows the pathname is the full path, so it need to get the base name.
func getNameFromPathname(pathname string) (name string) {
return filepath.Base(pathname)
}

// preProcessPaths processes paths before looping.
func (arch *Archiver) preProcessPaths(dir string, names []string) (paths []string) {
// In case of windows we want to add the ADS paths as well before sorting.
return arch.getPathsIncludingADS(dir, names)
}

// processPath processes the path in the loop.
func (arch *Archiver) processPath(_ string, name string) (path string) {
// In case of windows we have already prepared the paths before the loop.
// Hence this is a no-op.
return name
}

// getPathsIncludingADS iterates all passed path names and adds the ads
// contained in those paths before returning all full paths including ads
func (arch *Archiver) getPathsIncludingADS(dir string, names []string) []string {
paths := make([]string, 0, len(names))

for _, name := range names {
pathname := arch.FS.Join(dir, name)
paths = append(paths, pathname)
addADSStreams(pathname, &paths)
}
return paths
}

// addADSStreams gets the ads streams if any in the pathname passed and adds them to the passed paths
func addADSStreams(pathname string, paths *[]string) {
success, adsStreams, err := fs.GetADStreamNames(pathname)
if success {
streamCount := len(adsStreams)
if streamCount > 0 {
debug.Log("ADS Streams for file: %s, streams: %v", pathname, adsStreams)
for i := 0; i < streamCount; i++ {
adsStream := adsStreams[i]
adsPath := pathname + adsStream
*paths = append(*paths, adsPath)
}
}
} else if err != nil {
debug.Log("No ADS found for path: %s, err: %v", pathname, err)
}
}

// processTargets in windows performs Lstat for the ADS files since the file info would not be available for them yet.
func (arch *Archiver) processTargets(target string, targetMain string, abstarget string, fiMain os.FileInfo) (fi os.FileInfo, shouldReturn bool, fn FutureNode, excluded bool, err error) {
if target != targetMain {
//If this is an ADS file we need to Lstat again for the file info.
fi, err = arch.FS.Lstat(target)
if err != nil {
debug.Log("lstat() for %v returned error: %v", target, err)
err = arch.error(abstarget, err)
if err != nil {
return nil, true, FutureNode{}, false, errors.WithStack(err)
}
//If this is an ads file, shouldReturn should be true because we want to
// skip the remaining processing of the file.
return nil, true, FutureNode{}, true, nil
}
} else {
fi = fiMain
}
return fi, false, FutureNode{}, false, nil
}