Skip to content

Commit

Permalink
Merge pull request #8468 from ThomasWaldmann/check-improvements
Browse files Browse the repository at this point in the history
Check improvements
  • Loading branch information
ThomasWaldmann authored Oct 9, 2024
2 parents 65d0ce6 + 7288f4f commit 3cae96c
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 3 deletions.
4 changes: 3 additions & 1 deletion src/borg/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -1660,7 +1660,9 @@ def check(
self.check_all = not any((first, last, match, older, newer, oldest, newest))
self.repair = repair
self.repository = repository
self.chunks = build_chunkindex_from_repo(self.repository, disable_caches=True, cache_immediately=not repair)
# Repository.check already did a full repository-level check and has built and cached a fresh chunkindex -
# we can use that here, so we don't disable the caches (also no need to cache immediately, again):
self.chunks = build_chunkindex_from_repo(self.repository, disable_caches=False, cache_immediately=False)
self.key = self.make_key(repository)
self.repo_objs = RepoObj(self.key)
if verify_data:
Expand Down
5 changes: 3 additions & 2 deletions src/borg/archiver/check_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def do_check(self, args, repository):
raise CommandError("--undelete-archives requires --repair argument.")
if args.max_duration and not args.repo_only:
# when doing a partial repo check, we can only check xxh64 hashes in repository files.
# archives check requires that a full repo check was done before and has built/cached a ChunkIndex.
# also, there is no max_duration support in the archives check code anyway.
raise CommandError("--repository-only is required for --max-duration support.")
if not args.archives_only:
Expand Down Expand Up @@ -77,8 +78,8 @@ def build_parser_check(self, subparsers, common_parser, mid_common_parser):
the repository. The read data is checked by size and hash. Bit rot and other
types of accidental damage can be detected this way. Running the repository
check can be split into multiple partial checks using ``--max-duration``.
When checking a remote repository, please note that the checks run on the
server and do not cause significant network traffic.
When checking a ssh:// remote repository, please note that the checks run on
the server and do not cause significant network traffic.
2. Checking consistency and correctness of the archive metadata and optionally
archive data (requires ``--verify-data``). This includes ensuring that the
Expand Down
18 changes: 18 additions & 0 deletions src/borg/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from .checksums import xxh64
from .constants import * # NOQA
from .hashindex import ChunkIndex, ChunkIndexEntry
from .helpers import Error, ErrorWithTraceback, IntegrityError
from .helpers import Location
from .helpers import bin_to_hex, hex_to_bin
Expand Down Expand Up @@ -306,6 +307,12 @@ def check_object(obj):
t_start = time.monotonic()
t_last_checkpoint = t_start
objs_checked = objs_errors = 0
chunks = ChunkIndex()
# we don't do refcounting anymore, neither we can know here whether any archive
# is using this object, but we assume that this is the case and set refcount to
# MAX_VALUE. As we don't do garbage collection here, this is not a problem.
# We also don't know the plaintext size, so we set it to 0.
init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)
infos = self.store.list("data")
try:
for info in infos:
Expand Down Expand Up @@ -338,6 +345,12 @@ def check_object(obj):
self.store.delete(key)
else:
log_error("reloading did help, inconsistent behaviour detected!")
if not (obj_corrupted and repair):
# add all existing objects to the index.
# borg check: the index may have corrupted objects (we did not delete them)
# borg check --repair: the index will only have non-corrupted objects.
id = hex_to_bin(info.name)
chunks[id] = init_entry
now = time.monotonic()
if now > t_last_checkpoint + 300: # checkpoint every 5 mins
t_last_checkpoint = now
Expand All @@ -353,6 +366,11 @@ def check_object(obj):
self.store.delete("config/last-key-checked")
except StoreObjectNotFound:
pass
if not partial:
# if we did a full pass in one go, we built a complete, uptodate ChunkIndex, cache it!
from .cache import write_chunkindex_to_repo_cache

write_chunkindex_to_repo_cache(self, chunks, compact=True, clear=True, force_write=True)
except StoreObjectNotFound:
# it can be that there is no "data/" at all, then it crashes when iterating infos.
pass
Expand Down

0 comments on commit 3cae96c

Please sign in to comment.