diff --git a/curateipsum/backup.py b/curateipsum/backup.py index 3363368..8029208 100644 --- a/curateipsum/backup.py +++ b/curateipsum/backup.py @@ -1,24 +1,30 @@ """ Module with backup functions. """ - +import errno import logging import os import shutil +import signal from datetime import datetime, timedelta -from typing import Optional, Iterable +from typing import Optional, Iterable, Union from curateipsum import fs BACKUP_ENT_FMT = "%Y%m%d_%H%M%S" LOCK_FILE = ".backups_lock" DELTA_DIR = ".backup_delta" +BACKUP_MARKER = ".backup_finished" _lg = logging.getLogger(__name__) -def _is_backup_entity(backup_entry: os.DirEntry) -> bool: - """ Check if entity_path is a single backup dir. """ - if not backup_entry.is_dir(): +def _is_backup(backup_entry: Union[os.DirEntry, fs.PseudoDirEntry]) -> bool: + """Guess if backup_entry is a real backup.""" + # if there is no marker file in the backup dir, it's not a backup + if not os.path.exists(os.path.join(backup_entry.path, BACKUP_MARKER)): + return False + # if there is only a marker file in the backup dir, it's not a backup + if os.listdir(backup_entry.path) == [BACKUP_MARKER]: return False try: datetime.strptime(backup_entry.name, BACKUP_ENT_FMT) @@ -27,74 +33,140 @@ def _is_backup_entity(backup_entry: os.DirEntry) -> bool: return False -def _iterate_backups(backup_dir: str) -> Iterable[os.DirEntry]: - b_iter = os.scandir(backup_dir) +def _iterate_backups(backups_dir: str) -> Iterable[os.DirEntry]: + """Iterate over backups in backups_dir.""" + b_iter = os.scandir(backups_dir) b_ent: os.DirEntry for b_ent in b_iter: - if not _is_backup_entity(b_ent): - continue - if not os.listdir(b_ent.path): - _lg.info("Removing empty backup entity: %s", b_ent.name) - os.rmdir(b_ent.path) + if not _is_backup(b_ent): continue yield b_ent b_iter.close() -def _get_latest_backup(backup_dir: str) -> Optional[os.DirEntry]: - """ Returns path to latest backup created in backup_dir or None. """ - all_backups = sorted(_iterate_backups(backup_dir), key=lambda e: e.name) +def _get_latest_backup(backups_dir: str) -> Optional[os.DirEntry]: + """Returns path to latest backup created in backups_dir or None.""" + all_backups = sorted(_iterate_backups(backups_dir), key=lambda e: e.name) if all_backups: return all_backups[-1] return None -def _date_from_backup(backup: os.DirEntry) -> datetime: - return datetime.strptime(backup.name, BACKUP_ENT_FMT) +def _date_from_backup(backup_entry: os.DirEntry) -> datetime: + """Returns datetime object from backup name.""" + return datetime.strptime(backup_entry.name, BACKUP_ENT_FMT) -def set_backups_lock(backup_dir: str, force: bool = False) -> bool: - """ Return false if previous backup is still running. """ - lock_file_path = os.path.join(backup_dir, LOCK_FILE) - if os.path.exists(lock_file_path): - if not force: +def _pid_exists(pid: int) -> bool: + """Check whether pid exists in the current process table.""" + if pid == 0: + # According to "man 2 kill" PID 0 has a special meaning: + # it refers to <> so we don't want to go any further. + # If we get here it means this UNIX platform *does* have + # a process with id 0. + return True + try: + os.kill(pid, 0) + except OSError as err: + if err.errno == errno.ESRCH: + # ESRCH == No such process return False - os.unlink(lock_file_path) + elif err.errno == errno.EPERM: + # EPERM clearly means there's a process to deny access to + return True + else: + # According to "man 2 kill" possible error values are + # (EINVAL, EPERM, ESRCH) therefore we should never get + # here. If we do let's be explicit in considering this + # an error. + raise err + else: + return True - open(lock_file_path, "a").close() + +def set_backups_lock(backups_dir: str, + force: bool = False) -> bool: + """ + Set lock file to prevent multiple backups running at the same time. + Lock file contains PID of the process that created it. + Return false if previous backup is still running and force flag is not set. + """ + lock_file_path = os.path.join(backups_dir, LOCK_FILE) + + if not os.path.exists(lock_file_path): + with open(lock_file_path, "a") as f: + f.write(str(os.getpid())) + return True + + with open(lock_file_path, "r") as f: + pid = int(f.read()) + + if _pid_exists(pid): + if not force: + _lg.warning( + "Previous backup is still in progress (PID: %d), exiting", pid + ) + return False + + _lg.warning( + "Previous backup is still in progress (PID: %d), " + "but force flag is set, continuing", pid + ) + os.kill(pid, signal.SIGKILL) + + os.unlink(lock_file_path) return True -def release_backups_lock(backup_dir: str): - lock_file_path = os.path.join(backup_dir, LOCK_FILE) +def release_backups_lock(backups_dir: str): + """Remove lock file.""" + lock_file_path = os.path.join(backups_dir, LOCK_FILE) if os.path.exists(lock_file_path): os.unlink(lock_file_path) -def cleanup_old_backups( - backup_dir: str, - dry_run: bool = False, - keep_all: int = 7, - keep_daily: int = 30, - keep_weekly: int = 52, - keep_monthly: int = 12, - keep_yearly: int = 5, -): +def set_backup_marker(backup_entry: Union[os.DirEntry, fs.PseudoDirEntry]): + """Create finished backup marker file in backup's directory.""" + marker_path = os.path.join(backup_entry.path, BACKUP_MARKER) + if not os.path.exists(marker_path): + open(marker_path, "a").close() + + +def cleanup_old_backups(backups_dir: str, + dry_run: bool = False, + keep_all: int = 7, + keep_daily: int = 30, + keep_weekly: int = 52, + keep_monthly: int = 12, + keep_yearly: int = 5): """ Delete old backups. Never deletes the only backup. - :param backup_dir: full path to backup directory. - :param dry_run: don't do anything actually. - :param keep_all: the number of days that all backups must be kept. - :param keep_daily: the number of days that all daily backups must be kept. - :param keep_weekly: the number of weeks of which one weekly backup must be kept. - :param keep_monthly: the number of months (1 month = 30 days) of which - one monthly backup must be kept. - :param keep_yearly: the number of years of which one yearly backup must be kept. - :return: + For keep_* params threshold is inclusive, e.g.: + keep_weekly=1 being run on Thursday will keep one backup from this week and + one from the previous, even if the previous week's backup was created on + Monday. + keep_monthly=3 being run on any day of April will keep one backup from each + of months of January, February and March. + + :param backups_dir: full path to backups directory. + :param dry_run: don't do anything. + :param keep_all: + up to this amount of days in the past all backups must be kept. + :param keep_daily: + up to this amount of days in the past one daily backup must be kept. + :param keep_weekly: + up to this amount of weeks in the past one weekly backup must be kept. + :param keep_monthly: + up to this amount of months in the past one monthly backup must be kept. + 1 month is considered to be 30 days. + :param keep_yearly: + up to this amount of years in the past one yearly backup must be kept. + 1 year is considered to be 365 days. """ - all_backups = sorted(_iterate_backups(backup_dir), + all_backups = sorted(_iterate_backups(backups_dir), key=lambda e: e.name, reverse=True) if dry_run: _lg.info("Dry-run, no backups will be actually removed") @@ -177,12 +249,23 @@ def cleanup_old_backups( to_remove[backup] = True for backup, do_delete in to_remove.items(): - if not dry_run and do_delete: - _lg.info("Removing old backup %s", backup.name) - shutil.rmtree(backup.path) + if do_delete: + if dry_run: + _lg.info("Would remove old backup %s", backup.name) + else: + _lg.info("Removing old backup %s", backup.name) + shutil.rmtree(backup.path) -def process_backed_entry(backup_dir: str, entry_relpath: str, action: fs.Actions, msg: str): +def process_backed_entry(backup_dir: str, + entry_relpath: str, + action: fs.Actions, + msg: str): + """ + Additional processing of backed up DirEntry (file/dir/symlink). + Actions: + - if DirEntry was not deleted, hardlink it to DELTA_DIR. + """ _lg.debug("%s %s %s", action, entry_relpath, msg) if action not in (fs.Actions.ERROR, fs.Actions.DELETE): fs.nest_hardlink(src_dir=backup_dir, src_relpath=entry_relpath, @@ -190,17 +273,26 @@ def process_backed_entry(backup_dir: str, entry_relpath: str, action: fs.Actions def initiate_backup(sources, - backup_dir: str, + backups_dir: str, dry_run: bool = False, external_rsync: bool = False, external_hardlink: bool = False): - """ Main backup function """ + """ + Main backup function. + Creates a new backup directory, copies data from the latest backup, + and then syncs data from sources. + :param sources: list of directories to backup (relative paths ok) + :param backups_dir: directory where all backups are stored + :param dry_run: if True, no actual changes will be made + :param external_rsync: if True, use external rsync instead of python + :param external_hardlink: if True, use external hardlink instead of python + """ start_time_fmt = datetime.now().strftime(BACKUP_ENT_FMT) - cur_backup = fs.PseudoDirEntry(os.path.join(backup_dir, start_time_fmt)) + cur_backup = fs.PseudoDirEntry(os.path.join(backups_dir, start_time_fmt)) _lg.debug("Current backup dir: %s", cur_backup.path) - latest_backup = _get_latest_backup(backup_dir) + latest_backup = _get_latest_backup(backups_dir) if latest_backup is None: _lg.info("Creating empty directory for current backup: %s", @@ -215,13 +307,14 @@ def initiate_backup(sources, dst_dir=cur_backup.path, use_external=external_hardlink) if not hl_res: - _lg.error("Something went wrong during copying data from latest backup," - " removing created %s", cur_backup.name) + _lg.error("Something went wrong during copying data from latest" + " backup, removing created %s", cur_backup.name) shutil.rmtree(cur_backup.path, ignore_errors=True) return # clean up delta dir from copied backup - shutil.rmtree(os.path.join(cur_backup.path, DELTA_DIR), ignore_errors=True) + shutil.rmtree(os.path.join(cur_backup.path, DELTA_DIR), + ignore_errors=True) rsync_func = fs.rsync_ext if external_rsync else fs.rsync @@ -230,9 +323,13 @@ def initiate_backup(sources, src_abs = os.path.abspath(src) src_name = os.path.basename(src_abs) dst_abs = os.path.join(cur_backup.path, src_name) - _lg.info("Backing up directory %s to %s backup", src_abs, cur_backup.name) + _lg.info("Backing up directory %s to backup %s", + src_abs, cur_backup.name) try: - for entry_relpath, action, msg in rsync_func(src_abs, dst_abs, dry_run=dry_run): + for entry_relpath, action, msg in rsync_func( + src_abs, dst_abs, dry_run=dry_run + ): + # TODO maybe should be run if first backup too? if latest_backup is not None: process_backed_entry( backup_dir=cur_backup.path, @@ -240,6 +337,7 @@ def initiate_backup(sources, action=action, msg=msg, ) + # raise flag if something was changed since last backup backup_changed = True except fs.BackupCreationError as err: _lg.error("Error during backup creation: %s", err) @@ -252,8 +350,9 @@ def initiate_backup(sources, shutil.rmtree(cur_backup.path, ignore_errors=True) # do not create backup if no change from previous one elif latest_backup is not None and not backup_changed: - _lg.info("Newly created backup %s is the same as previous one %s, removing", + _lg.info("Created backup %s is the same as previous one %s, removing", cur_backup.name, latest_backup.name) shutil.rmtree(cur_backup.path, ignore_errors=True) else: + set_backup_marker(cur_backup) _lg.info("Backup created: %s", cur_backup.name) diff --git a/curateipsum/cli.py b/curateipsum/cli.py index b9eef26..0b273a3 100755 --- a/curateipsum/cli.py +++ b/curateipsum/cli.py @@ -6,6 +6,7 @@ import os.path import shutil import sys import time +from datetime import timedelta from curateipsum import backup from curateipsum._version import version @@ -20,7 +21,8 @@ def main(): console_handler.setFormatter(formatter) parser = argparse.ArgumentParser( - prog="cura-te-ipsum", description="cura-te-ipsum, my personal backup software.", + prog="cura-te-ipsum", + description="cura-te-ipsum, my personal backup software.", ) parser.add_argument("-V", "--version", action="version", @@ -30,8 +32,8 @@ def main(): default=False, help="print verbose information") parser.add_argument("-b", - dest="backup_dir", - metavar="BACKUP_DIR", + dest="backups_dir", + metavar="BACKUPS_DIR", type=str, required=True, help="directory, where all backups will be stored") @@ -78,9 +80,10 @@ def main(): cp_program) return 1 - backup_dir_abs = os.path.abspath(args.backup_dir) - if not os.path.isdir(backup_dir_abs): - _lg.error("Backup directory %s does not exist, exiting", args.backup_dir) + backups_dir_abs = os.path.abspath(args.backups_dir) + if not os.path.isdir(backups_dir_abs): + _lg.error("Backup directory %s does not exist, exiting", + args.backups_dir) return 1 for src_dir in args.sources: @@ -90,23 +93,23 @@ def main(): start_time = time.time() - if not backup.set_backups_lock(backup_dir_abs, args.force): - _lg.warning("Previous backup is still in process, exiting") + if not backup.set_backups_lock(backups_dir_abs, args.force): return 1 - backup.cleanup_old_backups(backup_dir=backup_dir_abs, dry_run=args.dry_run) + # TODO add cleaning up from non-finished backups + backup.cleanup_old_backups(backups_dir=backups_dir_abs, + dry_run=args.dry_run) backup.initiate_backup( sources=args.sources, - backup_dir=backup_dir_abs, + backups_dir=backups_dir_abs, dry_run=args.dry_run, external_rsync=args.external_rsync, external_hardlink=args.external_hardlink, ) + backup.release_backups_lock(backups_dir_abs) - backup.release_backups_lock(backup_dir_abs) end_time = time.time() - spent_time = end_time - start_time - _lg.info("Finished, time spent: %.3fs", spent_time) + _lg.info("Finished, time spent: %s", str(timedelta(end_time - start_time))) return 0 diff --git a/curateipsum/fs.py b/curateipsum/fs.py index 8554cbd..cb4d87d 100644 --- a/curateipsum/fs.py +++ b/curateipsum/fs.py @@ -90,8 +90,11 @@ def _parse_rsync_output(line: str) -> Tuple[str, Actions, str]: return relpath, action, "" -def rsync_ext(src, dst, dry_run=False): - """Call external rsync command""" +def rsync_ext(src, dst, dry_run=False) -> Iterable[Tuple[str, Actions, str]]: + """ + Call external rsync command for syncing files from src to dst. + Yield (path, action, error message) tuples. + """ rsync_args = ["rsync"] if dry_run: rsync_args.append("--dry-run") @@ -139,7 +142,9 @@ def rsync_ext(src, dst, dry_run=False): def scantree(path, dir_first=True) -> Iterable[os.DirEntry]: - """Recursively yield DirEntry file objects for given directory.""" + """ + Recursively yield DirEntry objects (dir/file/symlink) for given directory. + """ entry: os.DirEntry with os.scandir(path) as scan_it: for entry in scan_it: @@ -154,7 +159,7 @@ def scantree(path, dir_first=True) -> Iterable[os.DirEntry]: def rm_direntry(entry: Union[os.DirEntry, PseudoDirEntry]): - """ Recursively delete DirEntry (dir, file or symlink). """ + """ Recursively delete DirEntry (dir/file/symlink). """ if entry.is_file(follow_symlinks=False) or entry.is_symlink(): os.unlink(entry.path) elif entry.is_dir(follow_symlinks=False): @@ -194,7 +199,7 @@ def copy_file(src, dst): def copy_direntry(entry: Union[os.DirEntry, PseudoDirEntry], dst_path): - """ Non-recursive DirEntry (file, dir or symlink) copy. """ + """ Non-recursive DirEntry (file/dir/symlink) copy. """ src_stat = entry.stat(follow_symlinks=False) if entry.is_dir(): os.mkdir(dst_path) @@ -232,13 +237,14 @@ def update_direntry(src_entry: os.DirEntry, dst_entry: os.DirEntry): copy_direntry(src_entry, dst_entry.path) -def rsync(src_dir, dst_dir, dry_run=False) -> Iterable[tuple]: +def rsync(src_dir, + dst_dir, + dry_run=False) -> Iterable[Tuple[str, Actions, str]]: """ - Do sync - :param src_dir: source dir - :param dst_dir: dest dir, create if not exists - :param dry_run: not used - :return: nothing + Sync files/dirs/symlinks from src_dir to dst_dir. + Yield (path, action, error message) tuples. + Entries in dst_dir will be removed if not present in src_dir. + Analog of 'rsync --delete -irltpog'. """ _lg.debug("Rsync: %s -> %s", src_dir, dst_dir) @@ -282,10 +288,11 @@ def rsync(src_dir, dst_dir, dry_run=False) -> Iterable[tuple]: del src_files_map[rel_path] src_entry: os.DirEntry - # rewrite dst if it has different than src type + # rewrite dst if it has different type from src if src_entry.is_file(follow_symlinks=False): if not dst_entry.is_file(follow_symlinks=False): - _lg.debug("Rsync, rewriting (src is a file, dst is not a file): %s", + _lg.debug("Rsync, rewriting" + " (src is a file, dst is not a file): %s", rel_path) try: update_direntry(src_entry, dst_entry) @@ -296,7 +303,8 @@ def rsync(src_dir, dst_dir, dry_run=False) -> Iterable[tuple]: if src_entry.is_dir(follow_symlinks=False): if not dst_entry.is_dir(follow_symlinks=False): - _lg.debug("Rsync, rewriting (src is a dir, dst is not a dir): %s", + _lg.debug("Rsync, rewriting" + " (src is a dir, dst is not a dir): %s", rel_path) try: update_direntry(src_entry, dst_entry) @@ -307,7 +315,8 @@ def rsync(src_dir, dst_dir, dry_run=False) -> Iterable[tuple]: if src_entry.is_symlink(): if not dst_entry.is_symlink(): - _lg.debug("Rsync, rewriting (src is a symlink, dst is not a symlink): %s", + _lg.debug("Rsync, rewriting" + " (src is a symlink, dst is not a symlink): %s", rel_path) try: update_direntry(src_entry, dst_entry) @@ -329,13 +338,14 @@ def rsync(src_dir, dst_dir, dry_run=False) -> Iterable[tuple]: src_stat = src_entry.stat(follow_symlinks=False) dst_stat = dst_entry.stat(follow_symlinks=False) - # rewrite dst file/symlink which have different with src size or mtime + # rewrite dst file/symlink which have different size or mtime than src if src_entry.is_file(follow_symlinks=False): same_size = src_stat.st_size == dst_stat.st_size same_mtime = src_stat.st_mtime == dst_stat.st_mtime if not (same_size and same_mtime): reason = "size" if not same_size else "time" - _lg.debug("Rsync, rewriting (different %s): %s", reason, rel_path) + _lg.debug("Rsync, rewriting (different %s): %s", + reason, rel_path) try: update_direntry(src_entry, dst_entry) yield rel_path, Actions.REWRITE, "" @@ -346,7 +356,8 @@ def rsync(src_dir, dst_dir, dry_run=False) -> Iterable[tuple]: # rewrite dst symlink if it points somewhere else than src if src_entry.is_symlink(): if os.readlink(src_entry.path) != os.readlink(dst_entry.path): - _lg.debug("Rsync, rewriting (different symlink target): %s", rel_path) + _lg.debug("Rsync, rewriting (different symlink target): %s", + rel_path) try: update_direntry(src_entry, dst_entry) yield rel_path, Actions.REWRITE, "" @@ -360,12 +371,13 @@ def rsync(src_dir, dst_dir, dry_run=False) -> Iterable[tuple]: os.chmod(dst_entry.path, dst_stat.st_mode) yield rel_path, Actions.UPDATE_PERM, "" - if src_stat.st_uid != dst_stat.st_uid or src_stat.st_gid != dst_stat.st_gid: + if (src_stat.st_uid != dst_stat.st_uid + or src_stat.st_gid != dst_stat.st_gid): _lg.debug("Rsync, updating owners: %s", rel_path) os.chown(dst_entry.path, src_stat.st_uid, src_stat.st_gid) yield rel_path, Actions.UPDATE_OWNER, "" - # process remained source entries + # process remained source entries (new files/dirs/symlinks) for rel_path, src_entry in src_files_map.items(): dst_path = os.path.join(dst_root_abs, rel_path) _lg.debug("Rsync, creating: %s", rel_path) @@ -429,7 +441,7 @@ def _recursive_hardlink(src: str, dst: str) -> bool: Both src and dst directories should exist. :param src: absolute path to source directory. :param dst: absolute path to target directory. - :return: None + :return: True if success, False otherwise. """ with os.scandir(src) as it: ent: os.DirEntry @@ -467,7 +479,7 @@ def hardlink_dir(src_dir, dst_dir, use_external: bool = False) -> bool: :param src_dir: path to source directory :param dst_dir: path to target directory :param use_external: whether to use external cp -al command - :return: success or not + :return: True if success, False otherwise. """ _lg.debug("Recursive hardlinking: %s -> %s", src_dir, dst_dir) src_abs = os.path.abspath(src_dir) @@ -480,15 +492,18 @@ def hardlink_dir(src_dir, dst_dir, use_external: bool = False) -> bool: _lg.debug("Hardlink, creating directory: %s", dst_abs) os.mkdir(dst_abs) - hardlink_func = _recursive_hardlink_ext if use_external else _recursive_hardlink + hardlink_func = (_recursive_hardlink_ext if use_external + else _recursive_hardlink) return hardlink_func(src_abs, dst_abs) def nest_hardlink(src_dir: str, src_relpath: str, dst_dir: str): """ - Hardlink entity from (src_dir + src_relpath) to dst_dir preserving dir structure. + Hardlink entity from (src_dir + src_relpath) to dst_dir preserving dir + structure of src_relpath. """ - _lg.debug("Nested hardlinking: %s%s%s -> %s", src_dir, os.path.sep, src_relpath, dst_dir) + _lg.debug("Nested hardlinking: %s%s%s -> %s", + src_dir, os.path.sep, src_relpath, dst_dir) src_dir_abs = os.path.abspath(src_dir) src_full_path = os.path.join(src_dir_abs, src_relpath) dst_dir_abs = os.path.abspath(dst_dir) diff --git a/tests/test_backups.py b/tests/test_backups.py index 01a207e..c1795b3 100644 --- a/tests/test_backups.py +++ b/tests/test_backups.py @@ -18,6 +18,7 @@ class TestBackupCleanup(TestCase): def _add_backup(self, backup_name: str) -> fs.PseudoDirEntry: backup = fs.PseudoDirEntry(os.path.join(self.backup_dir.name, backup_name)) os.mkdir(backup.path) + bk.set_backup_marker(backup) fd, path = tempfile.mkstemp(prefix="backup_file_", dir=backup.path) with open(fd, "w") as f: @@ -38,7 +39,7 @@ class TestBackupCleanup(TestCase): def _run_cleanup(self, **kwargs): """ Run cleanup_old_backups with null parameters. """ cleanup_kwargs = { - "backup_dir": self.backup_dir.name, + "backups_dir": self.backup_dir.name, "dry_run": False, "keep_all": None, "keep_daily": None, @@ -58,6 +59,7 @@ class TestBackupCleanup(TestCase): def test_only_one_backup(self, mock_datetime): """ Test the only backup will not be removed in any case """ mock_datetime.now.return_value = datetime(2021, 10, 20) + # very old backup only_backup = self._add_backup("20010101_0000") self._run_cleanup(keep_all=1) self._check_backups([only_backup]) @@ -67,8 +69,8 @@ class TestBackupCleanup(TestCase): """ Test at least one backup should be left """ mock_datetime.now.return_value = datetime(2021, 10, 20) backups = [ - self._add_backup("20211103_0300"), - self._add_backup("20201216_0100"), + self._add_backup("20211103_0300"), # this one is the latest and should be kept + self._add_backup("20201216_0100"), # the rest should be removed self._add_backup("20200716_0100"), self._add_backup("20181116_0100"), ] @@ -81,9 +83,9 @@ class TestBackupCleanup(TestCase): """ Test threshold for keeping all backups """ mock_datetime.now.return_value = datetime(2021, 10, 20) backups = [ - self._add_backup("20211019_0300"), - self._add_backup("20211017_0100"), - self._add_backup("20211016_2300"), + self._add_backup("20211019_0300"), # keep + self._add_backup("20211017_0100"), # keep + self._add_backup("20211016_2300"), # remove, older than 3 days ] expected_backups = backups[:2] self._run_cleanup(keep_all=3) @@ -94,12 +96,12 @@ class TestBackupCleanup(TestCase): """ Test threshold for keeping daily backups """ mock_datetime.now.return_value = datetime(2021, 10, 20) backups = [ - self._add_backup("20211019_0300"), - self._add_backup("20211017_2100"), - self._add_backup("20211017_0100"), - self._add_backup("20211017_0030"), - self._add_backup("20211016_2300"), - self._add_backup("20211016_0100"), + self._add_backup("20211019_0300"), # keep, first daily backup at 2021-10-19 + self._add_backup("20211017_2100"), # remove, not the first daily backup + self._add_backup("20211017_0100"), # remove, not the first daily backup + self._add_backup("20211017_0030"), # keep, first daily backup at 2021-10-17 + self._add_backup("20211016_2300"), # remove, older than 3 days + self._add_backup("20211016_0100"), # remove, older than 3 days ] expected_backups = [backups[0], backups[3]] self._run_cleanup(keep_daily=3) @@ -110,17 +112,17 @@ class TestBackupCleanup(TestCase): """ Test threshold for keeping all and daily backups """ mock_datetime.now.return_value = datetime(2021, 10, 20) backups = [ - self._add_backup("20211019_0300"), - self._add_backup("20211017_0200"), - self._add_backup("20211017_0100"), - self._add_backup("20211016_2300"), - self._add_backup("20211016_2200"), - self._add_backup("20211015_2200"), - self._add_backup("20211015_1500"), - self._add_backup("20211015_0200"), - self._add_backup("20211014_2200"), - self._add_backup("20211014_2000"), - self._add_backup("20211014_1232"), + self._add_backup("20211019_0300"), # keep, newer than 3 days + self._add_backup("20211017_0200"), # keep, newer than 3 days + self._add_backup("20211017_0100"), # keep, newer than 3 days + self._add_backup("20211016_2300"), # remove, not the first daily backup + self._add_backup("20211016_2200"), # keep, the first daily backup at 2021-10-16 + self._add_backup("20211015_2200"), # remove, not the first daily backup + self._add_backup("20211015_1500"), # remove, not the first daily backup + self._add_backup("20211015_0200"), # keep, the first daily backup at 2021-10-15 + self._add_backup("20211014_2200"), # remove, older than 5 days + self._add_backup("20211014_2000"), # remove, older than 5 days + self._add_backup("20211014_1232"), # remove, older than 5 days ] expected_backups = backups[0:3] + [backups[4]] + [backups[7]] self._run_cleanup(keep_all=3, keep_daily=5) @@ -131,40 +133,57 @@ class TestBackupCleanup(TestCase): """ Test threshold for keeping weekly backups """ mock_datetime.now.return_value = datetime(2021, 11, 11) backups = [ - self._add_backup("20211111_0300"), - self._add_backup("20211110_0300"), - self._add_backup("20211108_0100"), - self._add_backup("20211107_2300"), - self._add_backup("20211107_0100"), - self._add_backup("20211031_0100"), - self._add_backup("20211025_0100"), - self._add_backup("20211024_0100"), - self._add_backup("20211023_0100"), - self._add_backup("20211022_0100"), - self._add_backup("20211008_0100"), - self._add_backup("20211007_0100"), - self._add_backup("20211004_0100"), - self._add_backup("20211003_0100"), + self._add_backup("20211111_0300"), # remove, not the first weekly backup (Thursday) + self._add_backup("20211110_0300"), # remove, not the first weekly backup (Wednesday) + self._add_backup("20211108_0100"), # keep, first weekly backup at 2021-11-08 (Monday) + self._add_backup("20211107_2300"), # remove, not the first weekly backup (Sunday) + self._add_backup("20211107_0100"), # keep, first weekly backup at 2021-11-07 (Sunday) + self._add_backup("20211031_0100"), # remove, not the first weekly backup (Sunday) + self._add_backup("20211025_0100"), # keep, first weekly backup at 2021-10-25 (Monday) + self._add_backup("20211024_0100"), # remove, not the first weekly backup (Sunday) + self._add_backup("20211023_0100"), # remove, not the first weekly backup (Saturday) + self._add_backup("20211022_0100"), # keep, first weekly backup at 2021-10-22 (Friday) + self._add_backup("20211008_0100"), # remove, not the first weekly backup (Friday) + self._add_backup("20211007_0100"), # remove, not the first weekly backup (Thursday) + self._add_backup("20211004_0100"), # keep, first weekly backup at 2021-10-04 (Monday) + self._add_backup("20211003_0100"), # remove, older than 5 weeks + self._add_backup("20211002_0100"), # remove, older than 5 weeks + ] expected_backups = [backups[2], backups[4], backups[6], backups[9], backups[12]] self._run_cleanup(keep_weekly=5) self._check_backups(expected_backups) + @mock.patch(f"{bk.__name__}.datetime", wraps=datetime) + def test_keep_weekly_threshold_inclusive(self, mock_datetime): + """ Test threshold for keeping weekly backups """ + mock_datetime.now.return_value = datetime(2021, 11, 11) + backups = [ + self._add_backup("20211111_0300"), # remove, not the first weekly backup (Thursday) + self._add_backup("20211110_0300"), # keep, first weekly backup (Wednesday) + self._add_backup("20211107_0100"), # remove, not the first weekly backup (Sunday) + self._add_backup("20211102_0100"), # keep, first weekly backup (Tuesday) + ] + expected_backups = [backups[1], backups[3]] + self._run_cleanup(keep_weekly=5) + self._check_backups(expected_backups) + @mock.patch(f"{bk.__name__}.datetime", wraps=datetime) def test_keep_monthly_threshold_only(self, mock_datetime): """ Test threshold for keeping monthly backups """ mock_datetime.now.return_value = datetime(2021, 11, 11) backups = [ - self._add_backup("20211103_0300"), - self._add_backup("20211019_0300"), - self._add_backup("20211017_2100"), - self._add_backup("20211017_0100"), - self._add_backup("20210916_2300"), - self._add_backup("20210916_0100"), - self._add_backup("20210816_0100"), - self._add_backup("20210810_0000"), - self._add_backup("20210716_0100"), + self._add_backup("20211103_0300"), # keep, first monthly backup at 2021-11 + self._add_backup("20211019_0300"), # remove, not the first monthly backup + self._add_backup("20211017_2100"), # remove, not the first monthly backup + self._add_backup("20211017_0100"), # keep, first monthly backup at 2021-10 + self._add_backup("20210916_2300"), # remove, not the first monthly backup + self._add_backup("20210916_0100"), # keep, first monthly backup at 2021-09 + self._add_backup("20210816_0100"), # remove, not the first monthly backup + self._add_backup("20210810_0000"), # keep, first monthly backup at 2021-08 + self._add_backup("20210716_0100"), # remove, older than 3 months + self._add_backup("20210715_0100"), # remove, older than 3 months ] expected_backups = [backups[0], backups[3], backups[5], backups[7]] self._run_cleanup(keep_monthly=3) @@ -175,15 +194,19 @@ class TestBackupCleanup(TestCase): """ Test threshold for keeping yearly backups """ mock_datetime.now.return_value = datetime(2021, 11, 11) backups = [ - self._add_backup("20211103_0300"), - self._add_backup("20210810_0000"), - self._add_backup("20210716_0100"), - self._add_backup("20201216_0100"), - self._add_backup("20200716_0100"), - self._add_backup("20190316_0100"), - self._add_backup("20181116_0100"), + self._add_backup("20211103_0300"), # remove, not the first yearly backup in 2021 + self._add_backup("20210810_0000"), # remove, not the first yearly backup in 2021 + self._add_backup("20210716_0100"), # keep, first yearly backup in 2021 + self._add_backup("20201216_0100"), # remove, not the first yearly backup in 2020 + self._add_backup("20200716_0100"), # keep, first yearly backup in 2020 + self._add_backup("20191216_0100"), # remove, not the first yearly backup in 2019 + self._add_backup("20190316_0100"), # keep, first yearly backup in 2019 + self._add_backup("20181216_0100"), # remove, not the first yearly backup in 2018 + self._add_backup("20181116_0100"), # keep, first yearly backup in 2018 + self._add_backup("20171116_0100"), # remove, older than 3 years + self._add_backup("20171115_0100"), # remove, older than 3 years ] - expected_backups = [backups[2], backups[4], backups[5], backups[6]] + expected_backups = [backups[2], backups[4], backups[6], backups[8]] self._run_cleanup(keep_yearly=3) self._check_backups(expected_backups) @@ -200,3 +223,7 @@ class TestBackupCleanup(TestCase): ] self._run_cleanup(keep_all=2, dry_run=True) self._check_backups(backups) + + +# TODO add tests for iterating over backups (marker, dirname) +# TODO add tests for backups dir lockfile diff --git a/tests/test_fs.py b/tests/test_fs.py index 5d02da2..9417cf9 100644 --- a/tests/test_fs.py +++ b/tests/test_fs.py @@ -142,7 +142,8 @@ class TestHardlinkDir(CommonFSTestCase): nfile_relpath = self.relpath(src_nfile_path) fs.hardlink_dir(self.src_dir, self.dst_dir) - self.check_directory_stats(src_ndir_path, os.path.join(self.dst_dir, ndir_relpath)) + self.check_directory_stats(src_ndir_path, + os.path.join(self.dst_dir, ndir_relpath)) # check file in nested directory src_fstat = os.lstat(src_nfile_path)