Add backups cleanup
This commit is contained in:
parent
d521ef3c89
commit
c64955362a
12
main.py
12
main.py
@ -6,7 +6,7 @@ import os.path
|
|||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from spqr.curateipsum.backup import initiate_backup
|
import spqr.curateipsum.backup as backup
|
||||||
|
|
||||||
_lg = logging.getLogger("spqr.curateipsum")
|
_lg = logging.getLogger("spqr.curateipsum")
|
||||||
SUPPORTED_PLATFORMS = ("linux", "darwin")
|
SUPPORTED_PLATFORMS = ("linux", "darwin")
|
||||||
@ -56,7 +56,8 @@ def main():
|
|||||||
_lg.info("Starting %s: %s", parser.prog, args)
|
_lg.info("Starting %s: %s", parser.prog, args)
|
||||||
|
|
||||||
if sys.platform not in SUPPORTED_PLATFORMS:
|
if sys.platform not in SUPPORTED_PLATFORMS:
|
||||||
_lg.error(f"Not supported platform: {sys.platform}. Supported platforms: {SUPPORTED_PLATFORMS}")
|
_lg.error("Not supported platform: %s. Supported platforms: %s",
|
||||||
|
sys.platform, SUPPORTED_PLATFORMS)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if args.external_rsync and not shutil.which("rsync"):
|
if args.external_rsync and not shutil.which("rsync"):
|
||||||
@ -65,7 +66,8 @@ def main():
|
|||||||
|
|
||||||
cp_program = "gcp" if sys.platform == "darwin" else "cp"
|
cp_program = "gcp" if sys.platform == "darwin" else "cp"
|
||||||
if args.external_hardlink and not shutil.which(cp_program):
|
if args.external_hardlink and not shutil.which(cp_program):
|
||||||
_lg.error(f"{cp_program} should be installed to use --external-hardlink option.")
|
_lg.error("%s should be installed to use --external-hardlink option.",
|
||||||
|
cp_program)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
backup_dir_abs = os.path.abspath(args.backup_dir)
|
backup_dir_abs = os.path.abspath(args.backup_dir)
|
||||||
@ -78,7 +80,9 @@ def main():
|
|||||||
_lg.error("Source directory %s does not exist", src_dir)
|
_lg.error("Source directory %s does not exist", src_dir)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
initiate_backup(
|
backup.cleanup_old_backups(backup_dir=backup_dir_abs, dry_run=args.dry_run)
|
||||||
|
|
||||||
|
backup.initiate_backup(
|
||||||
sources=args.sources,
|
sources=args.sources,
|
||||||
backup_dir=backup_dir_abs,
|
backup_dir=backup_dir_abs,
|
||||||
dry_run=args.dry_run,
|
dry_run=args.dry_run,
|
||||||
|
|||||||
@ -6,44 +6,166 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import time
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime, timedelta
|
||||||
from typing import Optional
|
from typing import Optional, Iterable
|
||||||
|
|
||||||
import spqr.curateipsum.fs as fs
|
import spqr.curateipsum.fs as fs
|
||||||
|
|
||||||
BACKUP_ENT_FMT = "%y%m%d_%H%M"
|
BACKUP_ENT_FMT = "%Y%m%d_%H%M"
|
||||||
DELTA_DIR = "_delta"
|
DELTA_DIR = "_delta"
|
||||||
_lg = logging.getLogger(__name__)
|
_lg = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def _is_backup_entity(entity_path: str) -> bool:
|
def _is_backup_entity(backup_entry: os.DirEntry) -> bool:
|
||||||
""" Check if entity_path is a single backup dir. """
|
""" Check if entity_path is a single backup dir. """
|
||||||
if not os.path.isdir(entity_path):
|
if not backup_entry.is_dir():
|
||||||
return False
|
return False
|
||||||
try:
|
try:
|
||||||
datetime.strptime(os.path.basename(entity_path), BACKUP_ENT_FMT)
|
datetime.strptime(backup_entry.name, BACKUP_ENT_FMT)
|
||||||
return True
|
return True
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _get_latest_backup(backup_dir: str) -> Optional[str]:
|
def _iterate_backups(backup_dir: str) -> Iterable[os.DirEntry]:
|
||||||
|
b_iter = os.scandir(backup_dir)
|
||||||
|
|
||||||
|
b_ent: os.DirEntry
|
||||||
|
for b_ent in b_iter:
|
||||||
|
if not _is_backup_entity(b_ent):
|
||||||
|
continue
|
||||||
|
if not os.listdir(b_ent.path):
|
||||||
|
_lg.info("Removing empty backup entity: %s", b_ent.name)
|
||||||
|
os.rmdir(b_ent.path)
|
||||||
|
continue
|
||||||
|
yield b_ent
|
||||||
|
|
||||||
|
b_iter.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _get_latest_backup(backup_dir: str) -> Optional[os.DirEntry]:
|
||||||
""" Returns path to latest backup created in backup_dir or None. """
|
""" Returns path to latest backup created in backup_dir or None. """
|
||||||
backups = sorted(os.listdir(backup_dir), reverse=True)
|
all_backups = sorted(_iterate_backups(backup_dir), key=lambda e: e.name)
|
||||||
|
if all_backups:
|
||||||
for b_ent in backups:
|
return all_backups[-1]
|
||||||
b_ent_abs = os.path.join(backup_dir, b_ent)
|
|
||||||
if not _is_backup_entity(b_ent_abs):
|
|
||||||
continue
|
|
||||||
if not os.listdir(b_ent_abs):
|
|
||||||
_lg.info("Removing empty backup entity: %s", os.path.basename(b_ent_abs))
|
|
||||||
os.rmdir(b_ent_abs)
|
|
||||||
continue
|
|
||||||
return b_ent_abs
|
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _date_from_backup(backup: os.DirEntry) -> datetime:
|
||||||
|
return datetime.strptime(backup.name, BACKUP_ENT_FMT)
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup_old_backups(
|
||||||
|
backup_dir: str,
|
||||||
|
dry_run: bool = False,
|
||||||
|
keep_all: int = 7,
|
||||||
|
keep_daily: int = 30,
|
||||||
|
keep_weekly: int = 52,
|
||||||
|
keep_monthly: int = 12,
|
||||||
|
keep_yearly: int = 5,
|
||||||
|
min_free_space: int = 0
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Delete old backups. Never deletes the only backup.
|
||||||
|
:param backup_dir: full path to backup directory.
|
||||||
|
:param dry_run: don't do anything actually.
|
||||||
|
:param keep_all: the number of days that all backups must be kept.
|
||||||
|
:param keep_daily: the number of days that all daily backups must be kept.
|
||||||
|
:param keep_weekly: the number of weeks of which one weekly backup must be kept.
|
||||||
|
:param keep_monthly: the number of months (1 month = 30 days) of which
|
||||||
|
one monthly backup must be kept.
|
||||||
|
:param keep_yearly: the number of years of which one yearly backup must be kept.
|
||||||
|
:param min_free_space: not used right now
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
all_backups = sorted(_iterate_backups(backup_dir),
|
||||||
|
key=lambda e: e.name, reverse=True)
|
||||||
|
if dry_run:
|
||||||
|
_lg.info("Dry-run, no backups will be actually removed")
|
||||||
|
if not all_backups:
|
||||||
|
_lg.debug("No backups, exiting")
|
||||||
|
return
|
||||||
|
elif len(all_backups) == 1:
|
||||||
|
_lg.debug("Only one backup (%s) exists, will not remove it",
|
||||||
|
all_backups[0].name)
|
||||||
|
return
|
||||||
|
|
||||||
|
now = datetime.now()
|
||||||
|
thresholds = {k: now.strftime(BACKUP_ENT_FMT)
|
||||||
|
for k in ("all", "daily", "weekly", "monthly", "yearly")}
|
||||||
|
if keep_all is not None:
|
||||||
|
thresholds["all"] = ((now - timedelta(days=keep_all))
|
||||||
|
.replace(hour=0, minute=0, second=0)
|
||||||
|
.strftime(BACKUP_ENT_FMT))
|
||||||
|
if keep_daily is not None:
|
||||||
|
thresholds["daily"] = ((now - timedelta(days=keep_daily))
|
||||||
|
.replace(hour=0, minute=0, second=0)
|
||||||
|
.strftime(BACKUP_ENT_FMT))
|
||||||
|
if keep_weekly is not None:
|
||||||
|
thresholds["weekly"] = (
|
||||||
|
(now - timedelta(weeks=keep_weekly, days=now.weekday()))
|
||||||
|
.strftime(BACKUP_ENT_FMT)
|
||||||
|
)
|
||||||
|
if keep_monthly is not None:
|
||||||
|
thresholds["monthly"] = ((now - timedelta(days=30*keep_monthly))
|
||||||
|
.replace(day=1, hour=0, minute=0, second=0)
|
||||||
|
.strftime(BACKUP_ENT_FMT))
|
||||||
|
if keep_yearly is not None:
|
||||||
|
thresholds["yearly"] = (
|
||||||
|
(now - timedelta(days=365*keep_yearly))
|
||||||
|
.replace(month=1, day=1, hour=0, minute=0, second=0)
|
||||||
|
.strftime(BACKUP_ENT_FMT)
|
||||||
|
)
|
||||||
|
|
||||||
|
prev_backup = all_backups[0]
|
||||||
|
to_remove = {b: False for b in all_backups}
|
||||||
|
|
||||||
|
for backup in all_backups[1:]:
|
||||||
|
# skip all backups made after threshold
|
||||||
|
if backup.name > thresholds["all"]:
|
||||||
|
prev_backup = backup
|
||||||
|
continue
|
||||||
|
|
||||||
|
# leave only one backup per day for backups made after threshold
|
||||||
|
if backup.name > thresholds["daily"]:
|
||||||
|
if (_date_from_backup(prev_backup).date()
|
||||||
|
== _date_from_backup(backup).date()):
|
||||||
|
to_remove[prev_backup] = True
|
||||||
|
prev_backup = backup
|
||||||
|
continue
|
||||||
|
|
||||||
|
# leave only one backup per week for backups made after threshold
|
||||||
|
if backup.name > thresholds["weekly"]:
|
||||||
|
if (_date_from_backup(prev_backup).isocalendar()[1]
|
||||||
|
== _date_from_backup(backup).isocalendar()[1]):
|
||||||
|
to_remove[prev_backup] = True
|
||||||
|
prev_backup = backup
|
||||||
|
continue
|
||||||
|
|
||||||
|
# leave only one backup per month for backups made after threshold
|
||||||
|
if backup.name > thresholds["monthly"]:
|
||||||
|
if (_date_from_backup(prev_backup).date().replace(day=1)
|
||||||
|
== _date_from_backup(backup).date().replace(day=1)):
|
||||||
|
to_remove[prev_backup] = True
|
||||||
|
prev_backup = backup
|
||||||
|
continue
|
||||||
|
|
||||||
|
# leave only one backup per year for backups made after threshold
|
||||||
|
if backup.name > thresholds["yearly"]:
|
||||||
|
if (_date_from_backup(prev_backup).date().replace(month=1, day=1)
|
||||||
|
== _date_from_backup(backup).date().replace(month=1, day=1)):
|
||||||
|
to_remove[prev_backup] = True
|
||||||
|
prev_backup = backup
|
||||||
|
continue
|
||||||
|
|
||||||
|
to_remove[backup] = True
|
||||||
|
|
||||||
|
for backup, do_delete in to_remove.items():
|
||||||
|
_lg.info("Removing old backup %s", backup.name)
|
||||||
|
if not dry_run and do_delete:
|
||||||
|
shutil.rmtree(backup.path)
|
||||||
|
|
||||||
|
|
||||||
def process_backed_entry(backup_dir: str, entry_relpath: str, action: fs.Actions):
|
def process_backed_entry(backup_dir: str, entry_relpath: str, action: fs.Actions):
|
||||||
_lg.debug("%s %s", action, entry_relpath)
|
_lg.debug("%s %s", action, entry_relpath)
|
||||||
if action is not fs.Actions.delete:
|
if action is not fs.Actions.delete:
|
||||||
@ -60,33 +182,37 @@ def initiate_backup(sources,
|
|||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
start_time_fmt = datetime.fromtimestamp(start_time).strftime(BACKUP_ENT_FMT)
|
start_time_fmt = datetime.fromtimestamp(start_time).strftime(BACKUP_ENT_FMT)
|
||||||
cur_backup = os.path.join(backup_dir, start_time_fmt)
|
cur_backup = fs.PseudoDirEntry(os.path.join(backup_dir, start_time_fmt))
|
||||||
cur_backup_name = os.path.basename(cur_backup)
|
_lg.debug("Current backup dir: %s", cur_backup.path)
|
||||||
_lg.debug("Current backup dir: %s", cur_backup)
|
|
||||||
|
|
||||||
latest_backup = _get_latest_backup(backup_dir)
|
latest_backup = _get_latest_backup(backup_dir)
|
||||||
if cur_backup == latest_backup:
|
|
||||||
_lg.warning("Latest backup %s was created less than minute ago, exiting",
|
|
||||||
os.path.basename(latest_backup))
|
|
||||||
return
|
|
||||||
|
|
||||||
if latest_backup is None:
|
if latest_backup is None:
|
||||||
_lg.info("Creating empty directory for current backup: %s", cur_backup_name)
|
_lg.info("Creating empty directory for current backup: %s",
|
||||||
os.mkdir(cur_backup)
|
cur_backup.name)
|
||||||
else:
|
os.mkdir(cur_backup.path)
|
||||||
_lg.info("Copying data from latest backup %s to current backup %s",
|
|
||||||
os.path.basename(latest_backup), cur_backup_name)
|
|
||||||
|
|
||||||
hl_res = fs.hardlink_dir(src_dir=latest_backup, dst_dir=cur_backup,
|
else:
|
||||||
|
# TODO check last backup is finalized
|
||||||
|
if cur_backup.name == latest_backup.name:
|
||||||
|
_lg.warning("Latest backup %s was created less than minute ago, exiting",
|
||||||
|
latest_backup.name)
|
||||||
|
return
|
||||||
|
|
||||||
|
_lg.info("Copying data from latest backup %s to current backup %s",
|
||||||
|
latest_backup.name, cur_backup.name)
|
||||||
|
|
||||||
|
hl_res = fs.hardlink_dir(src_dir=latest_backup.path,
|
||||||
|
dst_dir=cur_backup.path,
|
||||||
use_external=external_hardlink)
|
use_external=external_hardlink)
|
||||||
if not hl_res:
|
if not hl_res:
|
||||||
_lg.error("Something went wrong during copying data from latest backup,"
|
_lg.error("Something went wrong during copying data from latest backup,"
|
||||||
" removing created %s", cur_backup_name)
|
" removing created %s", cur_backup.name)
|
||||||
shutil.rmtree(cur_backup, ignore_errors=True)
|
shutil.rmtree(cur_backup.path, ignore_errors=True)
|
||||||
return
|
return
|
||||||
|
|
||||||
# clean up delta dir from copied backup
|
# clean up delta dir from copied backup
|
||||||
shutil.rmtree(os.path.join(cur_backup, DELTA_DIR), ignore_errors=True)
|
shutil.rmtree(os.path.join(cur_backup.path, DELTA_DIR), ignore_errors=True)
|
||||||
|
|
||||||
rsync_func = fs.rsync_ext if external_rsync else fs.rsync
|
rsync_func = fs.rsync_ext if external_rsync else fs.rsync
|
||||||
|
|
||||||
@ -94,12 +220,12 @@ def initiate_backup(sources,
|
|||||||
for src in sources:
|
for src in sources:
|
||||||
src_abs = os.path.abspath(src)
|
src_abs = os.path.abspath(src)
|
||||||
src_name = os.path.basename(src_abs)
|
src_name = os.path.basename(src_abs)
|
||||||
dst_abs = os.path.join(cur_backup, src_name)
|
dst_abs = os.path.join(cur_backup.path, src_name)
|
||||||
_lg.info("Backing up directory %s to %s backup", src_abs, cur_backup_name)
|
_lg.info("Backing up directory %s to %s backup", src_abs, cur_backup.name)
|
||||||
for entry_relpath, action in rsync_func(src_abs, dst_abs, dry_run=dry_run):
|
for entry_relpath, action in rsync_func(src_abs, dst_abs, dry_run=dry_run):
|
||||||
if latest_backup is not None:
|
if latest_backup is not None:
|
||||||
process_backed_entry(
|
process_backed_entry(
|
||||||
backup_dir=cur_backup,
|
backup_dir=cur_backup.path,
|
||||||
entry_relpath=os.path.join(src_name, entry_relpath),
|
entry_relpath=os.path.join(src_name, entry_relpath),
|
||||||
action=action
|
action=action
|
||||||
)
|
)
|
||||||
@ -107,15 +233,15 @@ def initiate_backup(sources,
|
|||||||
|
|
||||||
# do not create backup on dry-run
|
# do not create backup on dry-run
|
||||||
if dry_run:
|
if dry_run:
|
||||||
_lg.info("Dry-run, removing created backup: %s", cur_backup_name)
|
_lg.info("Dry-run, removing created backup: %s", cur_backup.name)
|
||||||
shutil.rmtree(cur_backup, ignore_errors=True)
|
shutil.rmtree(cur_backup.path, ignore_errors=True)
|
||||||
# do not create backup if no change from previous one
|
# do not create backup if no change from previous one
|
||||||
elif latest_backup is not None and not backup_changed:
|
elif latest_backup is not None and not backup_changed:
|
||||||
_lg.info("Newly created backup %s is the same as previous one %s, removing",
|
_lg.info("Newly created backup %s is the same as previous one %s, removing",
|
||||||
cur_backup_name, os.path.basename(latest_backup))
|
cur_backup.name, latest_backup.name)
|
||||||
shutil.rmtree(cur_backup, ignore_errors=True)
|
shutil.rmtree(cur_backup.path, ignore_errors=True)
|
||||||
else:
|
else:
|
||||||
_lg.info("Backup created: %s", cur_backup_name)
|
_lg.info("Backup created: %s", cur_backup.name)
|
||||||
|
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
spend_time = end_time - start_time
|
spend_time = end_time - start_time
|
||||||
|
|||||||
@ -22,6 +22,27 @@ class Actions(enum.Enum):
|
|||||||
update_owner = enum.auto()
|
update_owner = enum.auto()
|
||||||
create = enum.auto()
|
create = enum.auto()
|
||||||
|
|
||||||
|
|
||||||
|
class PseudoDirEntry:
|
||||||
|
def __init__(self, path):
|
||||||
|
self.path = os.path.realpath(path)
|
||||||
|
self.name = os.path.basename(self.path)
|
||||||
|
self._is_dir = None
|
||||||
|
self._stat = None
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
def is_dir(self) -> bool:
|
||||||
|
if self._is_dir is None:
|
||||||
|
self._is_dir = os.path.isdir(self.path)
|
||||||
|
return self._is_dir
|
||||||
|
|
||||||
|
def stat(self):
|
||||||
|
if self._stat is None:
|
||||||
|
self._stat = os.lstat(self.path)
|
||||||
|
return self._stat
|
||||||
|
|
||||||
# *deleting will_be_deleted
|
# *deleting will_be_deleted
|
||||||
# >f.st.... .gitignore
|
# >f.st.... .gitignore
|
||||||
# >f+++++++ LICENSE
|
# >f+++++++ LICENSE
|
||||||
@ -79,7 +100,9 @@ def rsync_ext(src, dst, dry_run=False):
|
|||||||
rsync_args.append(str(dst))
|
rsync_args.append(str(dst))
|
||||||
|
|
||||||
_lg.info("Executing external command: %s", " ".join(rsync_args))
|
_lg.info("Executing external command: %s", " ".join(rsync_args))
|
||||||
process = subprocess.Popen(rsync_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
process = subprocess.Popen(rsync_args,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.STDOUT)
|
||||||
with process.stdout:
|
with process.stdout:
|
||||||
prev_line = None
|
prev_line = None
|
||||||
for line in iter(process.stdout.readline, b""):
|
for line in iter(process.stdout.readline, b""):
|
||||||
@ -246,10 +269,12 @@ def rsync(src_dir, dst_dir, dry_run=False) -> Iterable[tuple]:
|
|||||||
dst_root_abs = os.path.abspath(dst_dir)
|
dst_root_abs = os.path.abspath(dst_dir)
|
||||||
|
|
||||||
if not os.path.isdir(src_root_abs):
|
if not os.path.isdir(src_root_abs):
|
||||||
raise RuntimeError(f"Error during reading source directory: {src_root_abs}")
|
raise RuntimeError("Error during reading source directory: %s"
|
||||||
|
% src_root_abs)
|
||||||
if os.path.exists(dst_root_abs):
|
if os.path.exists(dst_root_abs):
|
||||||
if not os.path.isdir(dst_root_abs):
|
if not os.path.isdir(dst_root_abs):
|
||||||
raise RuntimeError("Destination path is not a directory: %s" % dst_root_abs)
|
raise RuntimeError("Destination path is not a directory: %s"
|
||||||
|
% dst_root_abs)
|
||||||
else:
|
else:
|
||||||
os.mkdir(dst_root_abs)
|
os.mkdir(dst_root_abs)
|
||||||
|
|
||||||
@ -278,19 +303,22 @@ def rsync(src_dir, dst_dir, dry_run=False) -> Iterable[tuple]:
|
|||||||
# rewrite dst if it has different than src type
|
# rewrite dst if it has different than src type
|
||||||
if src_entry.is_file(follow_symlinks=False):
|
if src_entry.is_file(follow_symlinks=False):
|
||||||
if not dst_entry.is_file(follow_symlinks=False):
|
if not dst_entry.is_file(follow_symlinks=False):
|
||||||
_lg.debug("Rewriting (src is a file, dst is not a file): %s", rel_path)
|
_lg.debug("Rewriting (src is a file, dst is not a file): %s",
|
||||||
|
rel_path)
|
||||||
update_direntry(src_entry, dst_entry)
|
update_direntry(src_entry, dst_entry)
|
||||||
yield rel_path, Actions.rewrite
|
yield rel_path, Actions.rewrite
|
||||||
continue
|
continue
|
||||||
if src_entry.is_dir(follow_symlinks=False):
|
if src_entry.is_dir(follow_symlinks=False):
|
||||||
if not dst_entry.is_dir(follow_symlinks=False):
|
if not dst_entry.is_dir(follow_symlinks=False):
|
||||||
_lg.debug("Rewriting (src is a dir, dst is not a dir): %s", rel_path)
|
_lg.debug("Rewriting (src is a dir, dst is not a dir): %s",
|
||||||
|
rel_path)
|
||||||
update_direntry(src_entry, dst_entry)
|
update_direntry(src_entry, dst_entry)
|
||||||
yield rel_path, Actions.rewrite
|
yield rel_path, Actions.rewrite
|
||||||
continue
|
continue
|
||||||
if src_entry.is_symlink():
|
if src_entry.is_symlink():
|
||||||
if not dst_entry.is_symlink():
|
if not dst_entry.is_symlink():
|
||||||
_lg.debug("Rewriting (src is a symlink, dst is not a symlink): %s", rel_path)
|
_lg.debug("Rewriting (src is a symlink, dst is not a symlink): %s",
|
||||||
|
rel_path)
|
||||||
update_direntry(src_entry, dst_entry)
|
update_direntry(src_entry, dst_entry)
|
||||||
yield rel_path, Actions.rewrite
|
yield rel_path, Actions.rewrite
|
||||||
continue
|
continue
|
||||||
@ -379,7 +407,9 @@ def _recursive_hardlink_ext(src: str, dst: str) -> bool:
|
|||||||
src_content = glob.glob(f"{src}/*")
|
src_content = glob.glob(f"{src}/*")
|
||||||
cmd = [cp, "--archive", "--verbose", "--link", *src_content, dst]
|
cmd = [cp, "--archive", "--verbose", "--link", *src_content, dst]
|
||||||
_lg.info("Executing external command: %s", " ".join(cmd))
|
_lg.info("Executing external command: %s", " ".join(cmd))
|
||||||
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
process = subprocess.Popen(cmd,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.STDOUT)
|
||||||
with process.stdout:
|
with process.stdout:
|
||||||
for line in iter(process.stdout.readline, b""):
|
for line in iter(process.stdout.readline, b""):
|
||||||
_lg.debug("%s: %s", cp, line.decode("utf-8").strip())
|
_lg.debug("%s: %s", cp, line.decode("utf-8").strip())
|
||||||
@ -458,10 +488,11 @@ def nest_hardlink(src_dir: str, src_relpath: str, dst_dir: str):
|
|||||||
|
|
||||||
# check source entity and destination directory
|
# check source entity and destination directory
|
||||||
if not os.path.exists(src_full_path):
|
if not os.path.exists(src_full_path):
|
||||||
raise RuntimeError(f"Error reading source entity: {src_full_path}")
|
raise RuntimeError("Error reading source entity: %s" % src_full_path)
|
||||||
if os.path.exists(dst_dir_abs):
|
if os.path.exists(dst_dir_abs):
|
||||||
if not os.path.isdir(dst_dir_abs):
|
if not os.path.isdir(dst_dir_abs):
|
||||||
raise RuntimeError("Destination path is not a directory: %s" % dst_dir_abs)
|
raise RuntimeError("Destination path is not a directory: %s"
|
||||||
|
% dst_dir_abs)
|
||||||
else:
|
else:
|
||||||
os.mkdir(dst_dir_abs)
|
os.mkdir(dst_dir_abs)
|
||||||
|
|
||||||
|
|||||||
203
tests/test_backups.py
Normal file
203
tests/test_backups.py
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
import os
|
||||||
|
import random
|
||||||
|
import string
|
||||||
|
import tempfile
|
||||||
|
from unittest import TestCase, mock
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import spqr.curateipsum.backup as bk
|
||||||
|
import spqr.curateipsum.fs as fs
|
||||||
|
|
||||||
|
|
||||||
|
class TestBackupCleanup(TestCase):
|
||||||
|
def setUp(self) -> None:
|
||||||
|
self.backup_dir = tempfile.TemporaryDirectory(prefix="backup_")
|
||||||
|
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
self.backup_dir.cleanup()
|
||||||
|
|
||||||
|
def _add_backup(self, backup_name: str) -> fs.PseudoDirEntry:
|
||||||
|
backup = fs.PseudoDirEntry(os.path.join(self.backup_dir.name, backup_name))
|
||||||
|
os.mkdir(backup.path)
|
||||||
|
|
||||||
|
fd, path = tempfile.mkstemp(prefix="backup_file_", dir=backup.path)
|
||||||
|
with open(fd, "w") as f:
|
||||||
|
f.write(''.join(random.choices(string.printable, k=128)))
|
||||||
|
return backup
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _check_backup_not_empty(backup: fs.PseudoDirEntry) -> bool:
|
||||||
|
return bool(os.listdir(backup.path))
|
||||||
|
|
||||||
|
def _check_backups(self, expected_backups):
|
||||||
|
backups_list = os.listdir(self.backup_dir.name)
|
||||||
|
self.assertEqual(sorted(b.name for b in expected_backups),
|
||||||
|
sorted(backups_list))
|
||||||
|
for b in expected_backups:
|
||||||
|
self.assertTrue(self._check_backup_not_empty(b))
|
||||||
|
|
||||||
|
def _run_cleanup(self, **kwargs):
|
||||||
|
""" Run cleanup_old_backups with null parameters. """
|
||||||
|
cleanup_kwargs = {
|
||||||
|
"backup_dir": self.backup_dir.name,
|
||||||
|
"dry_run": False,
|
||||||
|
"keep_all": None,
|
||||||
|
"keep_daily": None,
|
||||||
|
"keep_weekly": None,
|
||||||
|
"keep_monthly": None,
|
||||||
|
"keep_yearly": None,
|
||||||
|
}
|
||||||
|
cleanup_kwargs.update(**kwargs)
|
||||||
|
bk.cleanup_old_backups(**cleanup_kwargs)
|
||||||
|
|
||||||
|
def test_no_backups(self):
|
||||||
|
""" Test behaviour with no available backups """
|
||||||
|
bk.cleanup_old_backups(self.backup_dir.name)
|
||||||
|
self.assertFalse(os.listdir(self.backup_dir.name))
|
||||||
|
|
||||||
|
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
|
||||||
|
def test_only_one_backup(self, mock_datetime):
|
||||||
|
""" Test the only backup will not be removed in any case """
|
||||||
|
mock_datetime.now.return_value = datetime(2021, 10, 20)
|
||||||
|
only_backup = self._add_backup("20010101_0000")
|
||||||
|
self._run_cleanup(keep_all=1)
|
||||||
|
self._check_backups([only_backup])
|
||||||
|
|
||||||
|
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
|
||||||
|
def test_at_least_one_should_be_left(self, mock_datetime):
|
||||||
|
""" Test at least one backup should be left """
|
||||||
|
mock_datetime.now.return_value = datetime(2021, 10, 20)
|
||||||
|
backups = [
|
||||||
|
self._add_backup("20211103_0300"),
|
||||||
|
self._add_backup("20201216_0100"),
|
||||||
|
self._add_backup("20200716_0100"),
|
||||||
|
self._add_backup("20181116_0100"),
|
||||||
|
]
|
||||||
|
expected_backups = [backups[0]]
|
||||||
|
self._run_cleanup()
|
||||||
|
self._check_backups(expected_backups)
|
||||||
|
|
||||||
|
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
|
||||||
|
def test_keep_all_threshold_only(self, mock_datetime):
|
||||||
|
""" Test threshold for keeping all backups """
|
||||||
|
mock_datetime.now.return_value = datetime(2021, 10, 20)
|
||||||
|
backups = [
|
||||||
|
self._add_backup("20211019_0300"),
|
||||||
|
self._add_backup("20211017_0100"),
|
||||||
|
self._add_backup("20211016_2300"),
|
||||||
|
]
|
||||||
|
expected_backups = backups[:2]
|
||||||
|
self._run_cleanup(keep_all=3)
|
||||||
|
self._check_backups(expected_backups)
|
||||||
|
|
||||||
|
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
|
||||||
|
def test_keep_daily_threshold_only(self, mock_datetime):
|
||||||
|
""" Test threshold for keeping daily backups """
|
||||||
|
mock_datetime.now.return_value = datetime(2021, 10, 20)
|
||||||
|
backups = [
|
||||||
|
self._add_backup("20211019_0300"),
|
||||||
|
self._add_backup("20211017_2100"),
|
||||||
|
self._add_backup("20211017_0100"),
|
||||||
|
self._add_backup("20211017_0030"),
|
||||||
|
self._add_backup("20211016_2300"),
|
||||||
|
self._add_backup("20211016_0100"),
|
||||||
|
]
|
||||||
|
expected_backups = [backups[0], backups[3]]
|
||||||
|
self._run_cleanup(keep_daily=3)
|
||||||
|
self._check_backups(expected_backups)
|
||||||
|
|
||||||
|
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
|
||||||
|
def test_keep_all_and_daily_thresholds(self, mock_datetime):
|
||||||
|
""" Test threshold for keeping all and daily backups """
|
||||||
|
mock_datetime.now.return_value = datetime(2021, 10, 20)
|
||||||
|
backups = [
|
||||||
|
self._add_backup("20211019_0300"),
|
||||||
|
self._add_backup("20211017_0200"),
|
||||||
|
self._add_backup("20211017_0100"),
|
||||||
|
self._add_backup("20211016_2300"),
|
||||||
|
self._add_backup("20211016_2200"),
|
||||||
|
self._add_backup("20211015_2200"),
|
||||||
|
self._add_backup("20211015_1500"),
|
||||||
|
self._add_backup("20211015_0200"),
|
||||||
|
self._add_backup("20211014_2200"),
|
||||||
|
self._add_backup("20211014_2000"),
|
||||||
|
self._add_backup("20211014_1232"),
|
||||||
|
]
|
||||||
|
expected_backups = backups[0:3] + [backups[4]] + [backups[7]]
|
||||||
|
self._run_cleanup(keep_all=3, keep_daily=5)
|
||||||
|
self._check_backups(expected_backups)
|
||||||
|
|
||||||
|
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
|
||||||
|
def test_keep_weekly_threshold_only(self, mock_datetime):
|
||||||
|
""" Test threshold for keeping weekly backups """
|
||||||
|
mock_datetime.now.return_value = datetime(2021, 11, 11)
|
||||||
|
backups = [
|
||||||
|
self._add_backup("20211111_0300"),
|
||||||
|
self._add_backup("20211110_0300"),
|
||||||
|
self._add_backup("20211108_0100"),
|
||||||
|
self._add_backup("20211107_2300"),
|
||||||
|
self._add_backup("20211107_0100"),
|
||||||
|
self._add_backup("20211031_0100"),
|
||||||
|
self._add_backup("20211025_0100"),
|
||||||
|
self._add_backup("20211024_0100"),
|
||||||
|
self._add_backup("20211023_0100"),
|
||||||
|
self._add_backup("20211022_0100"),
|
||||||
|
self._add_backup("20211008_0100"),
|
||||||
|
self._add_backup("20211007_0100"),
|
||||||
|
self._add_backup("20211004_0100"),
|
||||||
|
self._add_backup("20211003_0100"),
|
||||||
|
]
|
||||||
|
expected_backups = [backups[2], backups[4], backups[6],
|
||||||
|
backups[9], backups[12]]
|
||||||
|
self._run_cleanup(keep_weekly=5)
|
||||||
|
self._check_backups(expected_backups)
|
||||||
|
|
||||||
|
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
|
||||||
|
def test_keep_monthly_threshold_only(self, mock_datetime):
|
||||||
|
""" Test threshold for keeping monthly backups """
|
||||||
|
mock_datetime.now.return_value = datetime(2021, 11, 11)
|
||||||
|
backups = [
|
||||||
|
self._add_backup("20211103_0300"),
|
||||||
|
self._add_backup("20211019_0300"),
|
||||||
|
self._add_backup("20211017_2100"),
|
||||||
|
self._add_backup("20211017_0100"),
|
||||||
|
self._add_backup("20210916_2300"),
|
||||||
|
self._add_backup("20210916_0100"),
|
||||||
|
self._add_backup("20210816_0100"),
|
||||||
|
self._add_backup("20210810_0000"),
|
||||||
|
self._add_backup("20210716_0100"),
|
||||||
|
]
|
||||||
|
expected_backups = [backups[0], backups[3], backups[5], backups[7]]
|
||||||
|
self._run_cleanup(keep_monthly=3)
|
||||||
|
self._check_backups(expected_backups)
|
||||||
|
|
||||||
|
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
|
||||||
|
def test_keep_yearly_threshold_only(self, mock_datetime):
|
||||||
|
""" Test threshold for keeping yearly backups """
|
||||||
|
mock_datetime.now.return_value = datetime(2021, 11, 11)
|
||||||
|
backups = [
|
||||||
|
self._add_backup("20211103_0300"),
|
||||||
|
self._add_backup("20210810_0000"),
|
||||||
|
self._add_backup("20210716_0100"),
|
||||||
|
self._add_backup("20201216_0100"),
|
||||||
|
self._add_backup("20200716_0100"),
|
||||||
|
self._add_backup("20190316_0100"),
|
||||||
|
self._add_backup("20181116_0100"),
|
||||||
|
]
|
||||||
|
expected_backups = [backups[2], backups[4], backups[5], backups[6]]
|
||||||
|
self._run_cleanup(keep_yearly=3)
|
||||||
|
self._check_backups(expected_backups)
|
||||||
|
|
||||||
|
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
|
||||||
|
def test_dry_run(self, mock_datetime):
|
||||||
|
""" Test dry run does not remove anything """
|
||||||
|
mock_datetime.now.return_value = datetime(2021, 11, 11)
|
||||||
|
backups = [
|
||||||
|
self._add_backup("20211103_0300"),
|
||||||
|
self._add_backup("20210810_0000"),
|
||||||
|
self._add_backup("20210716_0100"),
|
||||||
|
self._add_backup("20200716_0100"),
|
||||||
|
self._add_backup("20181116_0100"),
|
||||||
|
]
|
||||||
|
self._run_cleanup(keep_all=2, dry_run=True)
|
||||||
|
self._check_backups(backups)
|
||||||
Loading…
Reference in New Issue
Block a user