cura-te-ipsum/spqr/curateipsum/backup.py

256 lines
9.3 KiB
Python
Raw Normal View History

"""
Module with backup functions.
"""
import logging
import os
2021-06-21 05:27:18 +00:00
import shutil
2021-11-11 23:53:21 +00:00
from datetime import datetime, timedelta
from typing import Optional, Iterable
2021-06-21 07:20:45 +00:00
import spqr.curateipsum.fs as fs
2021-11-12 08:30:11 +00:00
BACKUP_ENT_FMT = "%Y%m%d_%H%M%S"
LOCK_FILE = ".backups_lock"
DELTA_DIR = ".backup_delta"
_lg = logging.getLogger(__name__)
2021-11-11 23:53:21 +00:00
def _is_backup_entity(backup_entry: os.DirEntry) -> bool:
""" Check if entity_path is a single backup dir. """
2021-11-11 23:53:21 +00:00
if not backup_entry.is_dir():
2021-10-24 18:20:05 +00:00
return False
try:
2021-11-11 23:53:21 +00:00
datetime.strptime(backup_entry.name, BACKUP_ENT_FMT)
return True
except ValueError:
return False
2021-11-11 23:53:21 +00:00
def _iterate_backups(backup_dir: str) -> Iterable[os.DirEntry]:
b_iter = os.scandir(backup_dir)
2021-11-11 23:53:21 +00:00
b_ent: os.DirEntry
for b_ent in b_iter:
if not _is_backup_entity(b_ent):
continue
2021-11-11 23:53:21 +00:00
if not os.listdir(b_ent.path):
_lg.info("Removing empty backup entity: %s", b_ent.name)
os.rmdir(b_ent.path)
continue
2021-11-11 23:53:21 +00:00
yield b_ent
b_iter.close()
2021-11-11 23:53:21 +00:00
def _get_latest_backup(backup_dir: str) -> Optional[os.DirEntry]:
""" Returns path to latest backup created in backup_dir or None. """
all_backups = sorted(_iterate_backups(backup_dir), key=lambda e: e.name)
if all_backups:
return all_backups[-1]
return None
2021-11-11 23:53:21 +00:00
def _date_from_backup(backup: os.DirEntry) -> datetime:
return datetime.strptime(backup.name, BACKUP_ENT_FMT)
2021-11-12 08:30:11 +00:00
def set_backups_lock(backup_dir: str, force: bool = False) -> bool:
""" Return false if previous backup is still running. """
lock_file_path = os.path.join(backup_dir, LOCK_FILE)
if os.path.exists(lock_file_path):
if not force:
return False
os.unlink(lock_file_path)
open(lock_file_path, "a").close()
return True
def release_backups_lock(backup_dir: str):
lock_file_path = os.path.join(backup_dir, LOCK_FILE)
if os.path.exists(lock_file_path):
os.unlink(lock_file_path)
2021-11-11 23:53:21 +00:00
def cleanup_old_backups(
backup_dir: str,
dry_run: bool = False,
keep_all: int = 7,
keep_daily: int = 30,
keep_weekly: int = 52,
keep_monthly: int = 12,
keep_yearly: int = 5,
min_free_space: int = 0
):
"""
Delete old backups. Never deletes the only backup.
:param backup_dir: full path to backup directory.
:param dry_run: don't do anything actually.
:param keep_all: the number of days that all backups must be kept.
:param keep_daily: the number of days that all daily backups must be kept.
:param keep_weekly: the number of weeks of which one weekly backup must be kept.
:param keep_monthly: the number of months (1 month = 30 days) of which
one monthly backup must be kept.
:param keep_yearly: the number of years of which one yearly backup must be kept.
:param min_free_space: not used right now
:return:
"""
all_backups = sorted(_iterate_backups(backup_dir),
key=lambda e: e.name, reverse=True)
if dry_run:
_lg.info("Dry-run, no backups will be actually removed")
if not all_backups:
_lg.debug("No backups, exiting")
return
elif len(all_backups) == 1:
_lg.debug("Only one backup (%s) exists, will not remove it",
all_backups[0].name)
return
now = datetime.now()
thresholds = {k: now.strftime(BACKUP_ENT_FMT)
for k in ("all", "daily", "weekly", "monthly", "yearly")}
if keep_all is not None:
thresholds["all"] = ((now - timedelta(days=keep_all))
.replace(hour=0, minute=0, second=0)
.strftime(BACKUP_ENT_FMT))
if keep_daily is not None:
thresholds["daily"] = ((now - timedelta(days=keep_daily))
.replace(hour=0, minute=0, second=0)
.strftime(BACKUP_ENT_FMT))
if keep_weekly is not None:
thresholds["weekly"] = (
(now - timedelta(weeks=keep_weekly, days=now.weekday()))
.strftime(BACKUP_ENT_FMT)
)
if keep_monthly is not None:
thresholds["monthly"] = ((now - timedelta(days=30*keep_monthly))
.replace(day=1, hour=0, minute=0, second=0)
.strftime(BACKUP_ENT_FMT))
if keep_yearly is not None:
thresholds["yearly"] = (
(now - timedelta(days=365*keep_yearly))
.replace(month=1, day=1, hour=0, minute=0, second=0)
.strftime(BACKUP_ENT_FMT)
)
prev_backup = all_backups[0]
to_remove = {b: False for b in all_backups}
for backup in all_backups[1:]:
# skip all backups made after threshold
if backup.name > thresholds["all"]:
prev_backup = backup
continue
# leave only one backup per day for backups made after threshold
if backup.name > thresholds["daily"]:
if (_date_from_backup(prev_backup).date()
== _date_from_backup(backup).date()):
to_remove[prev_backup] = True
prev_backup = backup
continue
# leave only one backup per week for backups made after threshold
if backup.name > thresholds["weekly"]:
if (_date_from_backup(prev_backup).isocalendar()[1]
== _date_from_backup(backup).isocalendar()[1]):
to_remove[prev_backup] = True
prev_backup = backup
continue
# leave only one backup per month for backups made after threshold
if backup.name > thresholds["monthly"]:
if (_date_from_backup(prev_backup).date().replace(day=1)
== _date_from_backup(backup).date().replace(day=1)):
to_remove[prev_backup] = True
prev_backup = backup
continue
# leave only one backup per year for backups made after threshold
if backup.name > thresholds["yearly"]:
if (_date_from_backup(prev_backup).date().replace(month=1, day=1)
== _date_from_backup(backup).date().replace(month=1, day=1)):
to_remove[prev_backup] = True
prev_backup = backup
continue
to_remove[backup] = True
for backup, do_delete in to_remove.items():
if not dry_run and do_delete:
2021-11-12 08:30:11 +00:00
_lg.info("Removing old backup %s", backup.name)
2021-11-11 23:53:21 +00:00
shutil.rmtree(backup.path)
2021-11-08 21:43:18 +00:00
def process_backed_entry(backup_dir: str, entry_relpath: str, action: fs.Actions):
_lg.debug("%s %s", action, entry_relpath)
if action is not fs.Actions.delete:
fs.nest_hardlink(src_dir=backup_dir, src_relpath=entry_relpath,
dst_dir=os.path.join(backup_dir, DELTA_DIR))
2021-06-23 17:11:42 +00:00
def initiate_backup(sources,
2021-11-08 21:43:18 +00:00
backup_dir: str,
2021-06-23 17:11:42 +00:00
dry_run: bool = False,
2021-10-24 18:20:05 +00:00
external_rsync: bool = False,
external_hardlink: bool = False):
""" Main backup function """
2021-11-12 08:30:11 +00:00
start_time_fmt = datetime.now().strftime(BACKUP_ENT_FMT)
2021-11-11 23:53:21 +00:00
cur_backup = fs.PseudoDirEntry(os.path.join(backup_dir, start_time_fmt))
_lg.debug("Current backup dir: %s", cur_backup.path)
latest_backup = _get_latest_backup(backup_dir)
if latest_backup is None:
2021-11-11 23:53:21 +00:00
_lg.info("Creating empty directory for current backup: %s",
cur_backup.name)
os.mkdir(cur_backup.path)
else:
2021-06-21 07:25:13 +00:00
_lg.info("Copying data from latest backup %s to current backup %s",
2021-11-11 23:53:21 +00:00
latest_backup.name, cur_backup.name)
2021-06-19 12:28:42 +00:00
2021-11-11 23:53:21 +00:00
hl_res = fs.hardlink_dir(src_dir=latest_backup.path,
dst_dir=cur_backup.path,
2021-10-24 18:20:05 +00:00
use_external=external_hardlink)
2021-06-21 07:20:45 +00:00
if not hl_res:
_lg.error("Something went wrong during copying data from latest backup,"
2021-11-11 23:53:21 +00:00
" removing created %s", cur_backup.name)
shutil.rmtree(cur_backup.path, ignore_errors=True)
2021-06-21 07:20:45 +00:00
return
2021-06-21 07:24:53 +00:00
2021-11-08 21:43:18 +00:00
# clean up delta dir from copied backup
2021-11-11 23:53:21 +00:00
shutil.rmtree(os.path.join(cur_backup.path, DELTA_DIR), ignore_errors=True)
2021-11-08 21:43:18 +00:00
2021-06-23 17:11:42 +00:00
rsync_func = fs.rsync_ext if external_rsync else fs.rsync
backup_changed = False
2021-06-21 07:24:53 +00:00
for src in sources:
2021-11-08 21:43:18 +00:00
src_abs = os.path.abspath(src)
src_name = os.path.basename(src_abs)
2021-11-11 23:53:21 +00:00
dst_abs = os.path.join(cur_backup.path, src_name)
_lg.info("Backing up directory %s to %s backup", src_abs, cur_backup.name)
2021-11-08 21:43:18 +00:00
for entry_relpath, action in rsync_func(src_abs, dst_abs, dry_run=dry_run):
2021-11-09 19:50:28 +00:00
if latest_backup is not None:
process_backed_entry(
2021-11-11 23:53:21 +00:00
backup_dir=cur_backup.path,
2021-11-09 19:50:28 +00:00
entry_relpath=os.path.join(src_name, entry_relpath),
action=action
)
backup_changed = True
# do not create backup on dry-run
2021-06-21 05:27:18 +00:00
if dry_run:
2021-11-11 23:53:21 +00:00
_lg.info("Dry-run, removing created backup: %s", cur_backup.name)
shutil.rmtree(cur_backup.path, ignore_errors=True)
# do not create backup if no change from previous one
elif latest_backup is not None and not backup_changed:
_lg.info("Newly created backup %s is the same as previous one %s, removing",
2021-11-11 23:53:21 +00:00
cur_backup.name, latest_backup.name)
shutil.rmtree(cur_backup.path, ignore_errors=True)
2021-06-21 05:27:18 +00:00
else:
2021-11-11 23:53:21 +00:00
_lg.info("Backup created: %s", cur_backup.name)