Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

extract: --skip-errors ignores corrupted chunks (w/ log message), see #840 #7481

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
57 changes: 42 additions & 15 deletions src/borg/archive.py
Expand Up @@ -788,6 +788,7 @@ def extract_item(
hlm=None,
pi=None,
continue_extraction=False,
skip_integrity_errors=False,
):
"""
Extract archive item.
Expand All @@ -800,6 +801,8 @@ def extract_item(
:param hlm: maps hlid to link_target for extracting subtrees with hardlinks correctly
:param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
:param continue_extraction: continue a previously interrupted extraction of same archive
:param skip_integrity_errors: skip over corrupted chunks instead of raising IntegrityError
(ignored for dry_run and stdout)
"""

def same_item(item, st):
Expand Down Expand Up @@ -849,15 +852,15 @@ def same_item(item, st):
)
if has_damaged_chunks:
raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.")
return
return True

dest = self.cwd
path = os.path.join(dest, item.path)
# Attempt to remove existing files, ignore errors on failure
try:
st = os.stat(path, follow_symlinks=False)
if continue_extraction and same_item(item, st):
return # done! we already have fully extracted this file in a previous run.
return True # done! we already have fully extracted this file in a previous run.
elif stat.S_ISDIR(st.st_mode):
os.rmdir(path)
else:
Expand All @@ -878,20 +881,43 @@ def make_parent(path):
make_parent(path)
with self.extract_helper(item, path, hlm) as hardlink_set:
if hardlink_set:
return
return True
with backup_io("open"):
fd = open(path, "wb")
with fd:
ids = [c.id for c in item.chunks]
for data in self.pipeline.fetch_many(ids, is_preloaded=True, ro_type=ROBJ_FILE_STREAM):
chunk_index = -1
chunk_iterator = self.pipeline.fetch_many(ids, is_preloaded=True, ro_type=ROBJ_FILE_STREAM)
skipped_errors = False
while True:
try:
chunk_index += 1
data = next(chunk_iterator)
except StopIteration:
break
except IntegrityError as err:
if not skip_integrity_errors:
raise
c = item.chunks[chunk_index]
size = c.size
logger.warning("%s: chunk %s: %s", remove_surrogates(item.path), bin_to_hex(c.id), err)
with backup_io("seek"):
fd.seek(size, 1)
skipped_errors = True
# restart chunk data generator
ids = [c.id for c in item.chunks[chunk_index + 1 :]]
chunk_iterator = self.pipeline.fetch_many(ids, is_preloaded=True, ro_type=ROBJ_FILE_STREAM)
else:
with backup_io("write"):
size = len(data)
if sparse and zeros.startswith(data):
# all-zero chunk: create a hole in a sparse file
fd.seek(size, 1)
else:
fd.write(data)
if pi:
pi.show(increase=len(data), info=[remove_surrogates(item.path)])
with backup_io("write"):
if sparse and zeros.startswith(data):
# all-zero chunk: create a hole in a sparse file
fd.seek(len(data), 1)
else:
fd.write(data)
pi.show(increase=size, info=[remove_surrogates(item.path)])

with backup_io("truncate_and_attrs"):
pos = item_chunks_size = fd.tell()
fd.truncate(pos)
Expand All @@ -905,7 +931,7 @@ def make_parent(path):
)
if has_damaged_chunks:
raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.")
return
return not skipped_errors
with backup_io:
# No repository access beyond this point.
if stat.S_ISDIR(mode):
Expand All @@ -919,7 +945,7 @@ def make_parent(path):
with self.extract_helper(item, path, hlm) as hardlink_set:
if hardlink_set:
# unusual, but possible: this is a hardlinked symlink.
return
return True
target = item.target
try:
os.symlink(target, path)
Expand All @@ -930,18 +956,19 @@ def make_parent(path):
make_parent(path)
with self.extract_helper(item, path, hlm) as hardlink_set:
if hardlink_set:
return
return True
os.mkfifo(path)
self.restore_attrs(path, item)
elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode):
make_parent(path)
with self.extract_helper(item, path, hlm) as hardlink_set:
if hardlink_set:
return
return True
os.mknod(path, item.mode, item.rdev)
self.restore_attrs(path, item)
else:
raise Exception("Unknown archive item type %r" % item.mode)
return True

def restore_attrs(self, path, item, symlink=False, fd=None):
"""
Expand Down
21 changes: 18 additions & 3 deletions src/borg/archiver/extract_cmd.py
Expand Up @@ -39,6 +39,7 @@ def do_extract(self, args, repository, manifest, archive):
progress = args.progress
output_list = args.output_list
dry_run = args.dry_run
skip_errors = args.skip_errors
stdout = args.stdout
sparse = args.sparse
strip_components = args.strip_components
Expand Down Expand Up @@ -76,9 +77,16 @@ def do_extract(self, args, repository, manifest, archive):
dirs.append(item)
archive.extract_item(item, stdout=stdout, restore_attrs=False)
else:
archive.extract_item(
item, stdout=stdout, sparse=sparse, hlm=hlm, pi=pi, continue_extraction=continue_extraction
)
if not archive.extract_item(
item,
stdout=stdout,
sparse=sparse,
hlm=hlm,
pi=pi,
continue_extraction=continue_extraction,
skip_integrity_errors=skip_errors,
):
self.exit_code = EXIT_WARNING
except (BackupOSError, BackupError) as e:
self.print_warning("%s: %s", remove_surrogates(orig_path), e)

Expand Down Expand Up @@ -175,6 +183,13 @@ def build_parser_extract(self, subparsers, common_parser, mid_common_parser):
action="store_true",
help="continue a previously interrupted extraction of same archive",
)
subparser.add_argument(
"--skip-errors",
dest="skip_errors",
action="store_true",
help="skip corrupted chunks with a log message (exit 1) instead of aborting "
"(no effect for --dry-run and --stdout)",
)
subparser.add_argument("name", metavar="NAME", type=archivename_validator, help="specify the archive name")
subparser.add_argument(
"paths", metavar="PATH", nargs="*", type=str, help="paths to extract; patterns are supported"
Expand Down
22 changes: 22 additions & 0 deletions src/borg/testsuite/archiver/extract_cmd.py
Expand Up @@ -625,6 +625,28 @@ def test_overwrite(archivers, request):
cmd(archiver, "extract", "test", exit_code=1)


def test_extract_skip_errors(archivers, request):
archiver = request.getfixturevalue(archivers)
create_regular_file(archiver.input_path, "file1", contents=b"a" * 280 + b"b" * 280)
cmd(archiver, "rcreate", "-e" "none")
cmd(archiver, "create", "--chunker-params", "7,9,8,128", "test", "input")
segment_files = sorted(os.listdir(os.path.join(archiver.repository_path, "data", "0")), reverse=True)
print(
", ".join(
f"{fn}: {os.stat(os.path.join(archiver.repository_path, 'data', '0', fn)).st_size}b" for fn in segment_files
)
)
name = segment_files[3] # must be the segment file that has the file's chunks
with open(os.path.join(archiver.repository_path, "data", "0", name), "r+b") as fd:
fd.seek(100)
fd.write(b"XXXX")
with changedir("output"):
output = cmd(archiver, "extract", "--skip-errors", "test", exit_code=1)
assert "input/file1: chunk" in output
assert os.stat("input/file1").st_size == 560
cmd(archiver, "check", exit_code=1)


# derived from test_extract_xattrs_errors()
@pytest.mark.skipif(not xattr.XATTR_FAKEROOT, reason="xattr not supported on this system, or this version of fakeroot")
def test_do_not_fail_when_percent_is_in_xattr_name(archivers, request):
Expand Down