diff --git a/src/borg/archiver/__init__.py b/src/borg/archiver/__init__.py index 410ae846a8..6111199c45 100644 --- a/src/borg/archiver/__init__.py +++ b/src/borg/archiver/__init__.py @@ -125,6 +125,7 @@ class Archiver( def __init__(self, lock_wait=None, prog=None): self.lock_wait = lock_wait self.prog = prog + self.start_backup = None def print_warning(self, msg, *args, **kw): warning_code = kw.get("wc", EXIT_WARNING) # note: wc=None can be used to not influence exit code diff --git a/src/borg/archiver/_common.py b/src/borg/archiver/_common.py index 2b3920db78..f8195b6218 100644 --- a/src/borg/archiver/_common.py +++ b/src/borg/archiver/_common.py @@ -168,6 +168,7 @@ def wrapper(self, args, **kwargs): manifest_, progress=getattr(args, "progress", False), cache_mode=getattr(args, "files_cache_mode", FILES_CACHE_MODE_DISABLED), + start_backup=getattr(self, "start_backup", None), iec=getattr(args, "iec", False), ) as cache_: return method(self, args, repository=repository, cache=cache_, **kwargs) diff --git a/src/borg/archiver/create_cmd.py b/src/borg/archiver/create_cmd.py index 1fa83000d1..ce64795abd 100644 --- a/src/borg/archiver/create_cmd.py +++ b/src/borg/archiver/create_cmd.py @@ -214,6 +214,7 @@ def create_inner(archive, cache, fso): self.noxattrs = args.noxattrs self.exclude_nodump = args.exclude_nodump dry_run = args.dry_run + self.start_backup = time.time_ns() t0 = archive_ts_now() t0_monotonic = time.monotonic() logger.info('Creating archive at "%s"' % args.location.processed) diff --git a/src/borg/cache.py b/src/borg/cache.py index 7bbd81517e..b20bc01d6d 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -3,6 +3,7 @@ import shutil import stat from collections import namedtuple +from datetime import datetime, timezone from time import perf_counter from .logger import create_logger @@ -11,7 +12,7 @@ files_cache_logger = create_logger("borg.debug.files_cache") -from .constants import CACHE_README, FILES_CACHE_MODE_DISABLED, ROBJ_FILE_STREAM +from .constants import CACHE_README, FILES_CACHE_MODE_DISABLED, ROBJ_FILE_STREAM, TIME_DIFFERS2_NS from .checksums import xxh64 from .hashindex import ChunkIndex, ChunkIndexEntry from .helpers import Error @@ -343,6 +344,7 @@ def __new__( cache_mode=FILES_CACHE_MODE_DISABLED, iec=False, archive_name=None, + start_backup=None, ): return AdHocWithFilesCache( manifest=manifest, @@ -352,6 +354,7 @@ def __new__( iec=iec, cache_mode=cache_mode, archive_name=archive_name, + start_backup=start_backup, ) @@ -367,7 +370,7 @@ class FilesCacheMixin: FILES_CACHE_NAME = "files" - def __init__(self, cache_mode, archive_name=None): + def __init__(self, cache_mode, archive_name=None, start_backup=None): self.archive_name = archive_name # ideally a SERIES name assert not ("c" in cache_mode and "m" in cache_mode) assert "d" in cache_mode or "c" in cache_mode or "m" in cache_mode @@ -375,6 +378,7 @@ def __init__(self, cache_mode, archive_name=None): self._files = None self._newest_cmtime = 0 self._newest_path_hashes = set() + self.start_backup = start_backup @property def files(self): @@ -490,32 +494,43 @@ def _read_files_cache(self): def _write_files_cache(self, files): """write files cache to cache directory""" - if self._newest_cmtime is None: - # was never set because no files were modified/added - self._newest_cmtime = 2**63 - 1 # nanoseconds, good until y2262 + max_time_ns = 2**63 - 1 # nanoseconds, good until y2262 + # _self._newest_cmtime might be None if it was never set because no files were modified/added. + newest_cmtime = self._newest_cmtime if self._newest_cmtime is not None else max_time_ns + start_backup_time = self.start_backup - TIME_DIFFERS2_NS if self.start_backup is not None else max_time_ns + # we don't want to persist files cache entries of potentially problematic files: + discard_after = min(newest_cmtime, start_backup_time) ttl = int(os.environ.get("BORG_FILES_CACHE_TTL", 2)) files_cache_logger.debug("FILES-CACHE-SAVE: starting...") # TODO: use something like SaveFile here, but that didn't work due to SyncFile missing .seek(). with IntegrityCheckedFile(path=os.path.join(self.path, self.files_cache_name()), write=True) as fd: - entry_count = 0 + entries = 0 + age_discarded = 0 + race_discarded = 0 for path_hash, item in files.items(): - # Only keep files seen in this backup that are older than newest cmtime seen in this backup - - # this is to avoid issues with filesystem snapshots and cmtime granularity. - # Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet. entry = FileCacheEntry(*msgpack.unpackb(item)) - if ( - entry.age == 0 - and max(timestamp_to_int(entry.ctime), timestamp_to_int(entry.mtime)) < self._newest_cmtime - or entry.age > 0 - and entry.age < ttl - ): + if entry.age == 0: # current entries + if max(timestamp_to_int(entry.ctime), timestamp_to_int(entry.mtime)) < discard_after: + # Only keep files seen in this backup that old enough not to suffer race conditions relating + # to filesystem snapshots and ctime/mtime granularity or being modified while we read them. + keep = True + else: + keep = False + race_discarded += 1 + else: # old entries + if entry.age < ttl: + # Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet. + keep = True + else: + keep = False + age_discarded += 1 + if keep: msgpack.pack((path_hash, entry), fd) - entry_count += 1 - files_cache_logger.debug("FILES-CACHE-KILL: removed all old entries with age >= TTL [%d]", ttl) - files_cache_logger.debug( - "FILES-CACHE-KILL: removed all current entries with newest cmtime %d", self._newest_cmtime - ) - files_cache_logger.debug("FILES-CACHE-SAVE: finished, %d remaining entries saved.", entry_count) + entries += 1 + files_cache_logger.debug(f"FILES-CACHE-KILL: removed {age_discarded} entries with age >= TTL [{ttl}]") + t_str = datetime.fromtimestamp(discard_after / 1e9, timezone.utc).isoformat() + files_cache_logger.debug(f"FILES-CACHE-KILL: removed {race_discarded} entries with ctime/mtime >= {t_str}") + files_cache_logger.debug(f"FILES-CACHE-SAVE: finished, {entries} remaining entries saved.") return fd.integrity_data def file_known_and_unchanged(self, hashed_path, path_hash, st): @@ -710,12 +725,13 @@ def __init__( cache_mode=FILES_CACHE_MODE_DISABLED, iec=False, archive_name=None, + start_backup=None, ): """ :param warn_if_unencrypted: print warning if accessing unknown unencrypted repository :param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison """ - FilesCacheMixin.__init__(self, cache_mode, archive_name) + FilesCacheMixin.__init__(self, cache_mode, archive_name, start_backup) ChunksMixin.__init__(self) assert isinstance(manifest, Manifest) self.manifest = manifest diff --git a/src/borg/constants.py b/src/borg/constants.py index ea429eccf5..8e949a8be5 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -117,6 +117,9 @@ # we can't go much higher here (like e.g. to 2s) without causing false positives. TIME_DIFFERS_NS = 20000000 # 20ms [ns] +# similar to above, but for bigger granularity / clock differences +TIME_DIFFERS2_NS = 3000000000 # [ns] + # return codes returned by borg command EXIT_SUCCESS = 0 # everything done, no problems EXIT_WARNING = 1 # reached normal end of operation, but there were issues (generic warning)