Add backups cleanup
This commit is contained in:
@@ -6,44 +6,166 @@ import logging
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, Iterable
|
||||
|
||||
import spqr.curateipsum.fs as fs
|
||||
|
||||
BACKUP_ENT_FMT = "%y%m%d_%H%M"
|
||||
BACKUP_ENT_FMT = "%Y%m%d_%H%M"
|
||||
DELTA_DIR = "_delta"
|
||||
_lg = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _is_backup_entity(entity_path: str) -> bool:
|
||||
def _is_backup_entity(backup_entry: os.DirEntry) -> bool:
|
||||
""" Check if entity_path is a single backup dir. """
|
||||
if not os.path.isdir(entity_path):
|
||||
if not backup_entry.is_dir():
|
||||
return False
|
||||
try:
|
||||
datetime.strptime(os.path.basename(entity_path), BACKUP_ENT_FMT)
|
||||
datetime.strptime(backup_entry.name, BACKUP_ENT_FMT)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def _get_latest_backup(backup_dir: str) -> Optional[str]:
|
||||
def _iterate_backups(backup_dir: str) -> Iterable[os.DirEntry]:
|
||||
b_iter = os.scandir(backup_dir)
|
||||
|
||||
b_ent: os.DirEntry
|
||||
for b_ent in b_iter:
|
||||
if not _is_backup_entity(b_ent):
|
||||
continue
|
||||
if not os.listdir(b_ent.path):
|
||||
_lg.info("Removing empty backup entity: %s", b_ent.name)
|
||||
os.rmdir(b_ent.path)
|
||||
continue
|
||||
yield b_ent
|
||||
|
||||
b_iter.close()
|
||||
|
||||
|
||||
def _get_latest_backup(backup_dir: str) -> Optional[os.DirEntry]:
|
||||
""" Returns path to latest backup created in backup_dir or None. """
|
||||
backups = sorted(os.listdir(backup_dir), reverse=True)
|
||||
|
||||
for b_ent in backups:
|
||||
b_ent_abs = os.path.join(backup_dir, b_ent)
|
||||
if not _is_backup_entity(b_ent_abs):
|
||||
continue
|
||||
if not os.listdir(b_ent_abs):
|
||||
_lg.info("Removing empty backup entity: %s", os.path.basename(b_ent_abs))
|
||||
os.rmdir(b_ent_abs)
|
||||
continue
|
||||
return b_ent_abs
|
||||
|
||||
all_backups = sorted(_iterate_backups(backup_dir), key=lambda e: e.name)
|
||||
if all_backups:
|
||||
return all_backups[-1]
|
||||
return None
|
||||
|
||||
|
||||
def _date_from_backup(backup: os.DirEntry) -> datetime:
|
||||
return datetime.strptime(backup.name, BACKUP_ENT_FMT)
|
||||
|
||||
|
||||
def cleanup_old_backups(
|
||||
backup_dir: str,
|
||||
dry_run: bool = False,
|
||||
keep_all: int = 7,
|
||||
keep_daily: int = 30,
|
||||
keep_weekly: int = 52,
|
||||
keep_monthly: int = 12,
|
||||
keep_yearly: int = 5,
|
||||
min_free_space: int = 0
|
||||
):
|
||||
"""
|
||||
Delete old backups. Never deletes the only backup.
|
||||
:param backup_dir: full path to backup directory.
|
||||
:param dry_run: don't do anything actually.
|
||||
:param keep_all: the number of days that all backups must be kept.
|
||||
:param keep_daily: the number of days that all daily backups must be kept.
|
||||
:param keep_weekly: the number of weeks of which one weekly backup must be kept.
|
||||
:param keep_monthly: the number of months (1 month = 30 days) of which
|
||||
one monthly backup must be kept.
|
||||
:param keep_yearly: the number of years of which one yearly backup must be kept.
|
||||
:param min_free_space: not used right now
|
||||
:return:
|
||||
"""
|
||||
all_backups = sorted(_iterate_backups(backup_dir),
|
||||
key=lambda e: e.name, reverse=True)
|
||||
if dry_run:
|
||||
_lg.info("Dry-run, no backups will be actually removed")
|
||||
if not all_backups:
|
||||
_lg.debug("No backups, exiting")
|
||||
return
|
||||
elif len(all_backups) == 1:
|
||||
_lg.debug("Only one backup (%s) exists, will not remove it",
|
||||
all_backups[0].name)
|
||||
return
|
||||
|
||||
now = datetime.now()
|
||||
thresholds = {k: now.strftime(BACKUP_ENT_FMT)
|
||||
for k in ("all", "daily", "weekly", "monthly", "yearly")}
|
||||
if keep_all is not None:
|
||||
thresholds["all"] = ((now - timedelta(days=keep_all))
|
||||
.replace(hour=0, minute=0, second=0)
|
||||
.strftime(BACKUP_ENT_FMT))
|
||||
if keep_daily is not None:
|
||||
thresholds["daily"] = ((now - timedelta(days=keep_daily))
|
||||
.replace(hour=0, minute=0, second=0)
|
||||
.strftime(BACKUP_ENT_FMT))
|
||||
if keep_weekly is not None:
|
||||
thresholds["weekly"] = (
|
||||
(now - timedelta(weeks=keep_weekly, days=now.weekday()))
|
||||
.strftime(BACKUP_ENT_FMT)
|
||||
)
|
||||
if keep_monthly is not None:
|
||||
thresholds["monthly"] = ((now - timedelta(days=30*keep_monthly))
|
||||
.replace(day=1, hour=0, minute=0, second=0)
|
||||
.strftime(BACKUP_ENT_FMT))
|
||||
if keep_yearly is not None:
|
||||
thresholds["yearly"] = (
|
||||
(now - timedelta(days=365*keep_yearly))
|
||||
.replace(month=1, day=1, hour=0, minute=0, second=0)
|
||||
.strftime(BACKUP_ENT_FMT)
|
||||
)
|
||||
|
||||
prev_backup = all_backups[0]
|
||||
to_remove = {b: False for b in all_backups}
|
||||
|
||||
for backup in all_backups[1:]:
|
||||
# skip all backups made after threshold
|
||||
if backup.name > thresholds["all"]:
|
||||
prev_backup = backup
|
||||
continue
|
||||
|
||||
# leave only one backup per day for backups made after threshold
|
||||
if backup.name > thresholds["daily"]:
|
||||
if (_date_from_backup(prev_backup).date()
|
||||
== _date_from_backup(backup).date()):
|
||||
to_remove[prev_backup] = True
|
||||
prev_backup = backup
|
||||
continue
|
||||
|
||||
# leave only one backup per week for backups made after threshold
|
||||
if backup.name > thresholds["weekly"]:
|
||||
if (_date_from_backup(prev_backup).isocalendar()[1]
|
||||
== _date_from_backup(backup).isocalendar()[1]):
|
||||
to_remove[prev_backup] = True
|
||||
prev_backup = backup
|
||||
continue
|
||||
|
||||
# leave only one backup per month for backups made after threshold
|
||||
if backup.name > thresholds["monthly"]:
|
||||
if (_date_from_backup(prev_backup).date().replace(day=1)
|
||||
== _date_from_backup(backup).date().replace(day=1)):
|
||||
to_remove[prev_backup] = True
|
||||
prev_backup = backup
|
||||
continue
|
||||
|
||||
# leave only one backup per year for backups made after threshold
|
||||
if backup.name > thresholds["yearly"]:
|
||||
if (_date_from_backup(prev_backup).date().replace(month=1, day=1)
|
||||
== _date_from_backup(backup).date().replace(month=1, day=1)):
|
||||
to_remove[prev_backup] = True
|
||||
prev_backup = backup
|
||||
continue
|
||||
|
||||
to_remove[backup] = True
|
||||
|
||||
for backup, do_delete in to_remove.items():
|
||||
_lg.info("Removing old backup %s", backup.name)
|
||||
if not dry_run and do_delete:
|
||||
shutil.rmtree(backup.path)
|
||||
|
||||
|
||||
def process_backed_entry(backup_dir: str, entry_relpath: str, action: fs.Actions):
|
||||
_lg.debug("%s %s", action, entry_relpath)
|
||||
if action is not fs.Actions.delete:
|
||||
@@ -60,33 +182,37 @@ def initiate_backup(sources,
|
||||
|
||||
start_time = time.time()
|
||||
start_time_fmt = datetime.fromtimestamp(start_time).strftime(BACKUP_ENT_FMT)
|
||||
cur_backup = os.path.join(backup_dir, start_time_fmt)
|
||||
cur_backup_name = os.path.basename(cur_backup)
|
||||
_lg.debug("Current backup dir: %s", cur_backup)
|
||||
cur_backup = fs.PseudoDirEntry(os.path.join(backup_dir, start_time_fmt))
|
||||
_lg.debug("Current backup dir: %s", cur_backup.path)
|
||||
|
||||
latest_backup = _get_latest_backup(backup_dir)
|
||||
if cur_backup == latest_backup:
|
||||
_lg.warning("Latest backup %s was created less than minute ago, exiting",
|
||||
os.path.basename(latest_backup))
|
||||
return
|
||||
|
||||
if latest_backup is None:
|
||||
_lg.info("Creating empty directory for current backup: %s", cur_backup_name)
|
||||
os.mkdir(cur_backup)
|
||||
else:
|
||||
_lg.info("Copying data from latest backup %s to current backup %s",
|
||||
os.path.basename(latest_backup), cur_backup_name)
|
||||
_lg.info("Creating empty directory for current backup: %s",
|
||||
cur_backup.name)
|
||||
os.mkdir(cur_backup.path)
|
||||
|
||||
hl_res = fs.hardlink_dir(src_dir=latest_backup, dst_dir=cur_backup,
|
||||
else:
|
||||
# TODO check last backup is finalized
|
||||
if cur_backup.name == latest_backup.name:
|
||||
_lg.warning("Latest backup %s was created less than minute ago, exiting",
|
||||
latest_backup.name)
|
||||
return
|
||||
|
||||
_lg.info("Copying data from latest backup %s to current backup %s",
|
||||
latest_backup.name, cur_backup.name)
|
||||
|
||||
hl_res = fs.hardlink_dir(src_dir=latest_backup.path,
|
||||
dst_dir=cur_backup.path,
|
||||
use_external=external_hardlink)
|
||||
if not hl_res:
|
||||
_lg.error("Something went wrong during copying data from latest backup,"
|
||||
" removing created %s", cur_backup_name)
|
||||
shutil.rmtree(cur_backup, ignore_errors=True)
|
||||
" removing created %s", cur_backup.name)
|
||||
shutil.rmtree(cur_backup.path, ignore_errors=True)
|
||||
return
|
||||
|
||||
# clean up delta dir from copied backup
|
||||
shutil.rmtree(os.path.join(cur_backup, DELTA_DIR), ignore_errors=True)
|
||||
shutil.rmtree(os.path.join(cur_backup.path, DELTA_DIR), ignore_errors=True)
|
||||
|
||||
rsync_func = fs.rsync_ext if external_rsync else fs.rsync
|
||||
|
||||
@@ -94,12 +220,12 @@ def initiate_backup(sources,
|
||||
for src in sources:
|
||||
src_abs = os.path.abspath(src)
|
||||
src_name = os.path.basename(src_abs)
|
||||
dst_abs = os.path.join(cur_backup, src_name)
|
||||
_lg.info("Backing up directory %s to %s backup", src_abs, cur_backup_name)
|
||||
dst_abs = os.path.join(cur_backup.path, src_name)
|
||||
_lg.info("Backing up directory %s to %s backup", src_abs, cur_backup.name)
|
||||
for entry_relpath, action in rsync_func(src_abs, dst_abs, dry_run=dry_run):
|
||||
if latest_backup is not None:
|
||||
process_backed_entry(
|
||||
backup_dir=cur_backup,
|
||||
backup_dir=cur_backup.path,
|
||||
entry_relpath=os.path.join(src_name, entry_relpath),
|
||||
action=action
|
||||
)
|
||||
@@ -107,15 +233,15 @@ def initiate_backup(sources,
|
||||
|
||||
# do not create backup on dry-run
|
||||
if dry_run:
|
||||
_lg.info("Dry-run, removing created backup: %s", cur_backup_name)
|
||||
shutil.rmtree(cur_backup, ignore_errors=True)
|
||||
_lg.info("Dry-run, removing created backup: %s", cur_backup.name)
|
||||
shutil.rmtree(cur_backup.path, ignore_errors=True)
|
||||
# do not create backup if no change from previous one
|
||||
elif latest_backup is not None and not backup_changed:
|
||||
_lg.info("Newly created backup %s is the same as previous one %s, removing",
|
||||
cur_backup_name, os.path.basename(latest_backup))
|
||||
shutil.rmtree(cur_backup, ignore_errors=True)
|
||||
cur_backup.name, latest_backup.name)
|
||||
shutil.rmtree(cur_backup.path, ignore_errors=True)
|
||||
else:
|
||||
_lg.info("Backup created: %s", cur_backup_name)
|
||||
_lg.info("Backup created: %s", cur_backup.name)
|
||||
|
||||
end_time = time.time()
|
||||
spend_time = end_time - start_time
|
||||
|
||||
@@ -22,6 +22,27 @@ class Actions(enum.Enum):
|
||||
update_owner = enum.auto()
|
||||
create = enum.auto()
|
||||
|
||||
|
||||
class PseudoDirEntry:
|
||||
def __init__(self, path):
|
||||
self.path = os.path.realpath(path)
|
||||
self.name = os.path.basename(self.path)
|
||||
self._is_dir = None
|
||||
self._stat = None
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
def is_dir(self) -> bool:
|
||||
if self._is_dir is None:
|
||||
self._is_dir = os.path.isdir(self.path)
|
||||
return self._is_dir
|
||||
|
||||
def stat(self):
|
||||
if self._stat is None:
|
||||
self._stat = os.lstat(self.path)
|
||||
return self._stat
|
||||
|
||||
# *deleting will_be_deleted
|
||||
# >f.st.... .gitignore
|
||||
# >f+++++++ LICENSE
|
||||
@@ -79,7 +100,9 @@ def rsync_ext(src, dst, dry_run=False):
|
||||
rsync_args.append(str(dst))
|
||||
|
||||
_lg.info("Executing external command: %s", " ".join(rsync_args))
|
||||
process = subprocess.Popen(rsync_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
process = subprocess.Popen(rsync_args,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT)
|
||||
with process.stdout:
|
||||
prev_line = None
|
||||
for line in iter(process.stdout.readline, b""):
|
||||
@@ -246,10 +269,12 @@ def rsync(src_dir, dst_dir, dry_run=False) -> Iterable[tuple]:
|
||||
dst_root_abs = os.path.abspath(dst_dir)
|
||||
|
||||
if not os.path.isdir(src_root_abs):
|
||||
raise RuntimeError(f"Error during reading source directory: {src_root_abs}")
|
||||
raise RuntimeError("Error during reading source directory: %s"
|
||||
% src_root_abs)
|
||||
if os.path.exists(dst_root_abs):
|
||||
if not os.path.isdir(dst_root_abs):
|
||||
raise RuntimeError("Destination path is not a directory: %s" % dst_root_abs)
|
||||
raise RuntimeError("Destination path is not a directory: %s"
|
||||
% dst_root_abs)
|
||||
else:
|
||||
os.mkdir(dst_root_abs)
|
||||
|
||||
@@ -278,19 +303,22 @@ def rsync(src_dir, dst_dir, dry_run=False) -> Iterable[tuple]:
|
||||
# rewrite dst if it has different than src type
|
||||
if src_entry.is_file(follow_symlinks=False):
|
||||
if not dst_entry.is_file(follow_symlinks=False):
|
||||
_lg.debug("Rewriting (src is a file, dst is not a file): %s", rel_path)
|
||||
_lg.debug("Rewriting (src is a file, dst is not a file): %s",
|
||||
rel_path)
|
||||
update_direntry(src_entry, dst_entry)
|
||||
yield rel_path, Actions.rewrite
|
||||
continue
|
||||
if src_entry.is_dir(follow_symlinks=False):
|
||||
if not dst_entry.is_dir(follow_symlinks=False):
|
||||
_lg.debug("Rewriting (src is a dir, dst is not a dir): %s", rel_path)
|
||||
_lg.debug("Rewriting (src is a dir, dst is not a dir): %s",
|
||||
rel_path)
|
||||
update_direntry(src_entry, dst_entry)
|
||||
yield rel_path, Actions.rewrite
|
||||
continue
|
||||
if src_entry.is_symlink():
|
||||
if not dst_entry.is_symlink():
|
||||
_lg.debug("Rewriting (src is a symlink, dst is not a symlink): %s", rel_path)
|
||||
_lg.debug("Rewriting (src is a symlink, dst is not a symlink): %s",
|
||||
rel_path)
|
||||
update_direntry(src_entry, dst_entry)
|
||||
yield rel_path, Actions.rewrite
|
||||
continue
|
||||
@@ -379,7 +407,9 @@ def _recursive_hardlink_ext(src: str, dst: str) -> bool:
|
||||
src_content = glob.glob(f"{src}/*")
|
||||
cmd = [cp, "--archive", "--verbose", "--link", *src_content, dst]
|
||||
_lg.info("Executing external command: %s", " ".join(cmd))
|
||||
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
process = subprocess.Popen(cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT)
|
||||
with process.stdout:
|
||||
for line in iter(process.stdout.readline, b""):
|
||||
_lg.debug("%s: %s", cp, line.decode("utf-8").strip())
|
||||
@@ -458,10 +488,11 @@ def nest_hardlink(src_dir: str, src_relpath: str, dst_dir: str):
|
||||
|
||||
# check source entity and destination directory
|
||||
if not os.path.exists(src_full_path):
|
||||
raise RuntimeError(f"Error reading source entity: {src_full_path}")
|
||||
raise RuntimeError("Error reading source entity: %s" % src_full_path)
|
||||
if os.path.exists(dst_dir_abs):
|
||||
if not os.path.isdir(dst_dir_abs):
|
||||
raise RuntimeError("Destination path is not a directory: %s" % dst_dir_abs)
|
||||
raise RuntimeError("Destination path is not a directory: %s"
|
||||
% dst_dir_abs)
|
||||
else:
|
||||
os.mkdir(dst_dir_abs)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user