Skip to content

Commit

Permalink
extract: --skip-errors ignores corrupted chunks (w/ log message), see #…
Browse files Browse the repository at this point in the history
…840

Forward port of a change implemented by @enkore back in 2016:

enkore@09b21b1
  • Loading branch information
ThomasWaldmann committed Mar 28, 2023
1 parent 80c08ab commit 33f823d
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 14 deletions.
1 change: 1 addition & 0 deletions setup.cfg
Expand Up @@ -126,6 +126,7 @@ per_file_ignores =
src/borg/archiver/debug_cmd.py:F405
src/borg/archiver/delete_cmd.py:F405
src/borg/archiver/diff_cmd.py:F405
src/borg/archiver/extract_cmd.py:F405
src/borg/archiver/help_cmd.py:E501,F405
src/borg/archiver/key_cmds.py:F405
src/borg/archiver/prune_cmd.py:F405
Expand Down
51 changes: 39 additions & 12 deletions src/borg/archive.py
Expand Up @@ -791,6 +791,7 @@ def extract_item(
stripped_components=0,
original_path=None,
pi=None,
skip_integrity_errors=False,
):
"""
Extract archive item.
Expand All @@ -804,6 +805,8 @@ def extract_item(
:param stripped_components: stripped leading path components to correct hard link extraction
:param original_path: 'path' key as stored in archive
:param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
:param skip_integrity_errors: skip over corrupted chunks instead of raising IntegrityError
(ignored for dry_run and stdout)
"""
has_damaged_chunks = "chunks_healthy" in item
if dry_run or stdout:
Expand Down Expand Up @@ -832,7 +835,7 @@ def extract_item(
)
if has_damaged_chunks:
raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.")
return
return True

original_path = original_path or item.path
dest = self.cwd
Expand Down Expand Up @@ -867,15 +870,38 @@ def make_parent(path):
fd = open(path, "wb")
with fd:
ids = [c.id for c in item.chunks]
for data in self.pipeline.fetch_many(ids, is_preloaded=True):
chunk_index = -1
chunk_iterator = self.pipeline.fetch_many(ids, is_preloaded=True)
skipped_errors = False
while True:
try:
chunk_index += 1
data = next(chunk_iterator)
except StopIteration:
break
except IntegrityError as err:
if not skip_integrity_errors:
raise
c = item.chunks[chunk_index]
size = c.size
logger.warning("%s: chunk %s: %s", remove_surrogates(item.path), bin_to_hex(c.id), err)
with backup_io("seek"):
fd.seek(size, 1)
skipped_errors = True
# restart chunk data generator
ids = [c.id for c in item.chunks[chunk_index + 1 :]]
chunk_iterator = self.pipeline.fetch_many(ids, is_preloaded=True)
else:
with backup_io("write"):
size = len(data)
if sparse and zeros.startswith(data):
# all-zero chunk: create a hole in a sparse file
fd.seek(size, 1)
else:
fd.write(data)
if pi:
pi.show(increase=len(data), info=[remove_surrogates(item.path)])
with backup_io("write"):
if sparse and zeros.startswith(data):
# all-zero chunk: create a hole in a sparse file
fd.seek(len(data), 1)
else:
fd.write(data)
pi.show(increase=size, info=[remove_surrogates(item.path)])

with backup_io("truncate_and_attrs"):
pos = item_chunks_size = fd.tell()
fd.truncate(pos)
Expand All @@ -889,7 +915,7 @@ def make_parent(path):
)
if has_damaged_chunks:
raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.")
return
return not skipped_errors
with backup_io:
# No repository access beyond this point.
if stat.S_ISDIR(mode):
Expand All @@ -914,18 +940,19 @@ def make_parent(path):
make_parent(path)
with self.extract_helper(item, path, hlm) as hardlink_set:
if hardlink_set:
return
return True
os.mkfifo(path)
self.restore_attrs(path, item)
elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode):
make_parent(path)
with self.extract_helper(item, path, hlm) as hardlink_set:
if hardlink_set:
return
return True
os.mknod(path, item.mode, item.rdev)
self.restore_attrs(path, item)
else:
raise Exception("Unknown archive item type %r" % item.mode)
return True

def restore_attrs(self, path, item, symlink=False, fd=None):
"""
Expand Down
14 changes: 12 additions & 2 deletions src/borg/archiver/extract_cmd.py
Expand Up @@ -39,6 +39,7 @@ def do_extract(self, args, repository, manifest, archive):
progress = args.progress
output_list = args.output_list
dry_run = args.dry_run
skip_errors = args.skip_errors
stdout = args.stdout
sparse = args.sparse
strip_components = args.strip_components
Expand Down Expand Up @@ -75,15 +76,17 @@ def do_extract(self, args, repository, manifest, archive):
dirs.append(item)
archive.extract_item(item, stdout=stdout, restore_attrs=False)
else:
archive.extract_item(
if not archive.extract_item(
item,
stdout=stdout,
sparse=sparse,
hlm=hlm,
stripped_components=strip_components,
original_path=orig_path,
pi=pi,
)
skip_integrity_errors=skip_errors,
):
self.exit_code = EXIT_WARNING
except (BackupOSError, BackupError) as e:
self.print_warning("%s: %s", remove_surrogates(orig_path), e)

Expand Down Expand Up @@ -174,6 +177,13 @@ def build_parser_extract(self, subparsers, common_parser, mid_common_parser):
action="store_true",
help="create holes in output sparse file from all-zero chunks",
)
subparser.add_argument(
"--skip-errors",
dest="skip_errors",
action="store_true",
help="skip corrupted chunks with a log message (exit 1) instead of aborting "
"(no effect for --dry-run and --stdout)",
)
subparser.add_argument("name", metavar="NAME", type=archivename_validator, help="specify the archive name")
subparser.add_argument(
"paths", metavar="PATH", nargs="*", type=str, help="paths to extract; patterns are supported"
Expand Down
20 changes: 20 additions & 0 deletions src/borg/testsuite/archiver/extract_cmd.py
Expand Up @@ -585,6 +585,26 @@ def test_overwrite(self):
with changedir("output"):
self.cmd(f"--repo={self.repository_location}", "extract", "test", exit_code=1)

def test_extract_skip_errors(self):
self.create_regular_file("file1", contents=b"a" * 280 + b"b" * 280)
self.cmd(f"--repo={self.repository_location}", "rcreate", "-e" "none")
self.cmd(f"--repo={self.repository_location}", "create", "--chunker-params", "7,9,8,128", "test", "input")
segment_files = sorted(os.listdir(os.path.join(self.repository_path, "data", "0")), reverse=True)
print(
", ".join(
f"{fn}: {os.stat(os.path.join(self.repository_path, 'data', '0', fn)).st_size}b" for fn in segment_files
)
)
name = segment_files[3] # must be the segment file that has the file's chunks
with open(os.path.join(self.repository_path, "data", "0", name), "r+b") as fd:
fd.seek(100)
fd.write(b"XXXX")
with changedir("output"):
output = self.cmd(f"--repo={self.repository_location}", "extract", "--skip-errors", "test", exit_code=1)
assert "input/file1: chunk" in output
assert os.stat("input/file1").st_size == 560
self.cmd(f"--repo={self.repository_location}", "check", exit_code=1)

# derived from test_extract_xattrs_errors()
@pytest.mark.skipif(
not xattr.XATTR_FAKEROOT, reason="xattr not supported on this system or on this version of fakeroot"
Expand Down

0 comments on commit 33f823d

Please sign in to comment.