Add setup.py
This commit is contained in:
0
curateipsum/__init__.py
Normal file
0
curateipsum/__init__.py
Normal file
255
curateipsum/backup.py
Normal file
255
curateipsum/backup.py
Normal file
@@ -0,0 +1,255 @@
|
||||
"""
|
||||
Module with backup functions.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, Iterable
|
||||
|
||||
import curateipsum.fs as fs
|
||||
|
||||
BACKUP_ENT_FMT = "%Y%m%d_%H%M%S"
|
||||
LOCK_FILE = ".backups_lock"
|
||||
DELTA_DIR = ".backup_delta"
|
||||
_lg = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _is_backup_entity(backup_entry: os.DirEntry) -> bool:
|
||||
""" Check if entity_path is a single backup dir. """
|
||||
if not backup_entry.is_dir():
|
||||
return False
|
||||
try:
|
||||
datetime.strptime(backup_entry.name, BACKUP_ENT_FMT)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def _iterate_backups(backup_dir: str) -> Iterable[os.DirEntry]:
|
||||
b_iter = os.scandir(backup_dir)
|
||||
|
||||
b_ent: os.DirEntry
|
||||
for b_ent in b_iter:
|
||||
if not _is_backup_entity(b_ent):
|
||||
continue
|
||||
if not os.listdir(b_ent.path):
|
||||
_lg.info("Removing empty backup entity: %s", b_ent.name)
|
||||
os.rmdir(b_ent.path)
|
||||
continue
|
||||
yield b_ent
|
||||
|
||||
b_iter.close()
|
||||
|
||||
|
||||
def _get_latest_backup(backup_dir: str) -> Optional[os.DirEntry]:
|
||||
""" Returns path to latest backup created in backup_dir or None. """
|
||||
all_backups = sorted(_iterate_backups(backup_dir), key=lambda e: e.name)
|
||||
if all_backups:
|
||||
return all_backups[-1]
|
||||
return None
|
||||
|
||||
|
||||
def _date_from_backup(backup: os.DirEntry) -> datetime:
|
||||
return datetime.strptime(backup.name, BACKUP_ENT_FMT)
|
||||
|
||||
|
||||
def set_backups_lock(backup_dir: str, force: bool = False) -> bool:
|
||||
""" Return false if previous backup is still running. """
|
||||
lock_file_path = os.path.join(backup_dir, LOCK_FILE)
|
||||
if os.path.exists(lock_file_path):
|
||||
if not force:
|
||||
return False
|
||||
os.unlink(lock_file_path)
|
||||
|
||||
open(lock_file_path, "a").close()
|
||||
return True
|
||||
|
||||
|
||||
def release_backups_lock(backup_dir: str):
|
||||
lock_file_path = os.path.join(backup_dir, LOCK_FILE)
|
||||
if os.path.exists(lock_file_path):
|
||||
os.unlink(lock_file_path)
|
||||
|
||||
|
||||
def cleanup_old_backups(
|
||||
backup_dir: str,
|
||||
dry_run: bool = False,
|
||||
keep_all: int = 7,
|
||||
keep_daily: int = 30,
|
||||
keep_weekly: int = 52,
|
||||
keep_monthly: int = 12,
|
||||
keep_yearly: int = 5,
|
||||
min_free_space: int = 0
|
||||
):
|
||||
"""
|
||||
Delete old backups. Never deletes the only backup.
|
||||
:param backup_dir: full path to backup directory.
|
||||
:param dry_run: don't do anything actually.
|
||||
:param keep_all: the number of days that all backups must be kept.
|
||||
:param keep_daily: the number of days that all daily backups must be kept.
|
||||
:param keep_weekly: the number of weeks of which one weekly backup must be kept.
|
||||
:param keep_monthly: the number of months (1 month = 30 days) of which
|
||||
one monthly backup must be kept.
|
||||
:param keep_yearly: the number of years of which one yearly backup must be kept.
|
||||
:param min_free_space: not used right now
|
||||
:return:
|
||||
"""
|
||||
all_backups = sorted(_iterate_backups(backup_dir),
|
||||
key=lambda e: e.name, reverse=True)
|
||||
if dry_run:
|
||||
_lg.info("Dry-run, no backups will be actually removed")
|
||||
if not all_backups:
|
||||
_lg.debug("No backups, exiting")
|
||||
return
|
||||
elif len(all_backups) == 1:
|
||||
_lg.debug("Only one backup (%s) exists, will not remove it",
|
||||
all_backups[0].name)
|
||||
return
|
||||
|
||||
now = datetime.now()
|
||||
thresholds = {k: now.strftime(BACKUP_ENT_FMT)
|
||||
for k in ("all", "daily", "weekly", "monthly", "yearly")}
|
||||
if keep_all is not None:
|
||||
thresholds["all"] = ((now - timedelta(days=keep_all))
|
||||
.replace(hour=0, minute=0, second=0)
|
||||
.strftime(BACKUP_ENT_FMT))
|
||||
if keep_daily is not None:
|
||||
thresholds["daily"] = ((now - timedelta(days=keep_daily))
|
||||
.replace(hour=0, minute=0, second=0)
|
||||
.strftime(BACKUP_ENT_FMT))
|
||||
if keep_weekly is not None:
|
||||
thresholds["weekly"] = (
|
||||
(now - timedelta(weeks=keep_weekly, days=now.weekday()))
|
||||
.strftime(BACKUP_ENT_FMT)
|
||||
)
|
||||
if keep_monthly is not None:
|
||||
thresholds["monthly"] = ((now - timedelta(days=30*keep_monthly))
|
||||
.replace(day=1, hour=0, minute=0, second=0)
|
||||
.strftime(BACKUP_ENT_FMT))
|
||||
if keep_yearly is not None:
|
||||
thresholds["yearly"] = (
|
||||
(now - timedelta(days=365*keep_yearly))
|
||||
.replace(month=1, day=1, hour=0, minute=0, second=0)
|
||||
.strftime(BACKUP_ENT_FMT)
|
||||
)
|
||||
|
||||
prev_backup = all_backups[0]
|
||||
to_remove = {b: False for b in all_backups}
|
||||
|
||||
for backup in all_backups[1:]:
|
||||
# skip all backups made after threshold
|
||||
if backup.name > thresholds["all"]:
|
||||
prev_backup = backup
|
||||
continue
|
||||
|
||||
# leave only one backup per day for backups made after threshold
|
||||
if backup.name > thresholds["daily"]:
|
||||
if (_date_from_backup(prev_backup).date()
|
||||
== _date_from_backup(backup).date()):
|
||||
to_remove[prev_backup] = True
|
||||
prev_backup = backup
|
||||
continue
|
||||
|
||||
# leave only one backup per week for backups made after threshold
|
||||
if backup.name > thresholds["weekly"]:
|
||||
if (_date_from_backup(prev_backup).isocalendar()[1]
|
||||
== _date_from_backup(backup).isocalendar()[1]):
|
||||
to_remove[prev_backup] = True
|
||||
prev_backup = backup
|
||||
continue
|
||||
|
||||
# leave only one backup per month for backups made after threshold
|
||||
if backup.name > thresholds["monthly"]:
|
||||
if (_date_from_backup(prev_backup).date().replace(day=1)
|
||||
== _date_from_backup(backup).date().replace(day=1)):
|
||||
to_remove[prev_backup] = True
|
||||
prev_backup = backup
|
||||
continue
|
||||
|
||||
# leave only one backup per year for backups made after threshold
|
||||
if backup.name > thresholds["yearly"]:
|
||||
if (_date_from_backup(prev_backup).date().replace(month=1, day=1)
|
||||
== _date_from_backup(backup).date().replace(month=1, day=1)):
|
||||
to_remove[prev_backup] = True
|
||||
prev_backup = backup
|
||||
continue
|
||||
|
||||
to_remove[backup] = True
|
||||
|
||||
for backup, do_delete in to_remove.items():
|
||||
if not dry_run and do_delete:
|
||||
_lg.info("Removing old backup %s", backup.name)
|
||||
shutil.rmtree(backup.path)
|
||||
|
||||
|
||||
def process_backed_entry(backup_dir: str, entry_relpath: str, action: fs.Actions):
|
||||
_lg.debug("%s %s", action, entry_relpath)
|
||||
if action is not fs.Actions.delete:
|
||||
fs.nest_hardlink(src_dir=backup_dir, src_relpath=entry_relpath,
|
||||
dst_dir=os.path.join(backup_dir, DELTA_DIR))
|
||||
|
||||
|
||||
def initiate_backup(sources,
|
||||
backup_dir: str,
|
||||
dry_run: bool = False,
|
||||
external_rsync: bool = False,
|
||||
external_hardlink: bool = False):
|
||||
""" Main backup function """
|
||||
|
||||
start_time_fmt = datetime.now().strftime(BACKUP_ENT_FMT)
|
||||
cur_backup = fs.PseudoDirEntry(os.path.join(backup_dir, start_time_fmt))
|
||||
_lg.debug("Current backup dir: %s", cur_backup.path)
|
||||
|
||||
latest_backup = _get_latest_backup(backup_dir)
|
||||
|
||||
if latest_backup is None:
|
||||
_lg.info("Creating empty directory for current backup: %s",
|
||||
cur_backup.name)
|
||||
os.mkdir(cur_backup.path)
|
||||
|
||||
else:
|
||||
_lg.info("Copying data from latest backup %s to current backup %s",
|
||||
latest_backup.name, cur_backup.name)
|
||||
|
||||
hl_res = fs.hardlink_dir(src_dir=latest_backup.path,
|
||||
dst_dir=cur_backup.path,
|
||||
use_external=external_hardlink)
|
||||
if not hl_res:
|
||||
_lg.error("Something went wrong during copying data from latest backup,"
|
||||
" removing created %s", cur_backup.name)
|
||||
shutil.rmtree(cur_backup.path, ignore_errors=True)
|
||||
return
|
||||
|
||||
# clean up delta dir from copied backup
|
||||
shutil.rmtree(os.path.join(cur_backup.path, DELTA_DIR), ignore_errors=True)
|
||||
|
||||
rsync_func = fs.rsync_ext if external_rsync else fs.rsync
|
||||
|
||||
backup_changed = False
|
||||
for src in sources:
|
||||
src_abs = os.path.abspath(src)
|
||||
src_name = os.path.basename(src_abs)
|
||||
dst_abs = os.path.join(cur_backup.path, src_name)
|
||||
_lg.info("Backing up directory %s to %s backup", src_abs, cur_backup.name)
|
||||
for entry_relpath, action in rsync_func(src_abs, dst_abs, dry_run=dry_run):
|
||||
if latest_backup is not None:
|
||||
process_backed_entry(
|
||||
backup_dir=cur_backup.path,
|
||||
entry_relpath=os.path.join(src_name, entry_relpath),
|
||||
action=action
|
||||
)
|
||||
backup_changed = True
|
||||
|
||||
# do not create backup on dry-run
|
||||
if dry_run:
|
||||
_lg.info("Dry-run, removing created backup: %s", cur_backup.name)
|
||||
shutil.rmtree(cur_backup.path, ignore_errors=True)
|
||||
# do not create backup if no change from previous one
|
||||
elif latest_backup is not None and not backup_changed:
|
||||
_lg.info("Newly created backup %s is the same as previous one %s, removing",
|
||||
cur_backup.name, latest_backup.name)
|
||||
shutil.rmtree(cur_backup.path, ignore_errors=True)
|
||||
else:
|
||||
_lg.info("Backup created: %s", cur_backup.name)
|
||||
504
curateipsum/fs.py
Normal file
504
curateipsum/fs.py
Normal file
@@ -0,0 +1,504 @@
|
||||
"""
|
||||
Module with filesystem-related functions.
|
||||
"""
|
||||
|
||||
import enum
|
||||
import glob
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import Iterable, Tuple
|
||||
|
||||
_lg = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Actions(enum.Enum):
|
||||
nothing = enum.auto()
|
||||
delete = enum.auto()
|
||||
rewrite = enum.auto()
|
||||
update_time = enum.auto()
|
||||
update_perm = enum.auto()
|
||||
update_owner = enum.auto()
|
||||
create = enum.auto()
|
||||
|
||||
|
||||
class PseudoDirEntry:
|
||||
def __init__(self, path):
|
||||
self.path = os.path.realpath(path)
|
||||
self.name = os.path.basename(self.path)
|
||||
self._is_dir = None
|
||||
self._stat = None
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
def is_dir(self) -> bool:
|
||||
if self._is_dir is None:
|
||||
self._is_dir = os.path.isdir(self.path)
|
||||
return self._is_dir
|
||||
|
||||
def stat(self):
|
||||
if self._stat is None:
|
||||
self._stat = os.lstat(self.path)
|
||||
return self._stat
|
||||
|
||||
|
||||
def _parse_rsync_output(line: str) -> Tuple[str, Actions]:
|
||||
action = None
|
||||
change_string, relpath = line.split(' ', maxsplit=1)
|
||||
if change_string == "*deleting":
|
||||
return relpath, Actions.delete
|
||||
|
||||
update_type = change_string[0]
|
||||
entity_type = change_string[1]
|
||||
change_type = change_string[2:]
|
||||
|
||||
if update_type == "c" and entity_type in {"d", "L"} and "+" in change_type:
|
||||
action = Actions.create
|
||||
elif update_type == ">" and entity_type == "f" and "+" in change_type:
|
||||
action = Actions.create
|
||||
elif entity_type == "f" and ("s" in change_type or "t" in change_type):
|
||||
action = Actions.rewrite
|
||||
elif entity_type == "d" and "t" in change_type:
|
||||
action = Actions.update_time
|
||||
elif "p" in change_type:
|
||||
action = Actions.update_perm
|
||||
elif "o" in change_type or "g" in change_type:
|
||||
action = Actions.update_owner
|
||||
|
||||
if action is None:
|
||||
raise RuntimeError("Not parsed string: %s" % line)
|
||||
return relpath, action
|
||||
|
||||
|
||||
def rsync_ext(src, dst, dry_run=False):
|
||||
"""Call external rsync command"""
|
||||
rsync_args = ["rsync"]
|
||||
if dry_run:
|
||||
rsync_args.append("--dry-run")
|
||||
rsync_args.append("--archive")
|
||||
# rsync_args.append("--compress")
|
||||
# rsync_args.append("--inplace")
|
||||
rsync_args.append("--whole-file")
|
||||
rsync_args.append("--human-readable")
|
||||
rsync_args.append("--delete-during")
|
||||
rsync_args.append("--itemize-changes")
|
||||
rsync_args.append(f"{src}/")
|
||||
rsync_args.append(str(dst))
|
||||
|
||||
_lg.info("Executing external command: %s", " ".join(rsync_args))
|
||||
process = subprocess.Popen(rsync_args,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT)
|
||||
with process.stdout:
|
||||
prev_line = None
|
||||
for line in iter(process.stdout.readline, b""):
|
||||
_lg.debug("Rsync current line: %s", line)
|
||||
if prev_line is None:
|
||||
prev_line = line
|
||||
continue
|
||||
|
||||
try:
|
||||
prev_line = prev_line.decode("utf-8").strip()
|
||||
# some issues with cyrillic in filenames
|
||||
except UnicodeDecodeError:
|
||||
_lg.error("Can't process rsync line: %s", prev_line)
|
||||
continue
|
||||
_lg.debug("Rsync itemize line: %s", prev_line)
|
||||
yield _parse_rsync_output(prev_line)
|
||||
prev_line = line
|
||||
|
||||
try:
|
||||
prev_line = prev_line.decode("utf-8").strip()
|
||||
_lg.debug("Rsync itemize line: %s", prev_line)
|
||||
yield _parse_rsync_output(prev_line)
|
||||
# some issues with cyrillic in filenames
|
||||
except UnicodeDecodeError:
|
||||
_lg.error("Can't process rsync line: %s", prev_line)
|
||||
|
||||
process.wait()
|
||||
|
||||
|
||||
def scantree(path, dir_first=True) -> Iterable[os.DirEntry]:
|
||||
"""Recursively yield DirEntry file objects for given directory."""
|
||||
entry: os.DirEntry
|
||||
"""Recursively yield DirEntry objects for given directory."""
|
||||
with os.scandir(path) as scan_it:
|
||||
for entry in scan_it:
|
||||
if entry.is_dir(follow_symlinks=False):
|
||||
if dir_first:
|
||||
yield entry
|
||||
yield from scantree(entry.path, dir_first)
|
||||
if not dir_first:
|
||||
yield entry
|
||||
else:
|
||||
yield entry
|
||||
|
||||
|
||||
def rm_direntry(entry: os.DirEntry):
|
||||
""" Recursively delete DirEntry (dir, file or symlink). """
|
||||
if entry.is_file(follow_symlinks=False) or entry.is_symlink():
|
||||
os.unlink(entry.path)
|
||||
return
|
||||
if entry.is_dir(follow_symlinks=False):
|
||||
with os.scandir(entry.path) as it:
|
||||
child_entry: os.DirEntry
|
||||
for child_entry in it:
|
||||
rm_direntry(child_entry)
|
||||
os.rmdir(entry.path)
|
||||
|
||||
|
||||
try:
|
||||
O_BINARY = os.O_BINARY # Windows only
|
||||
except AttributeError:
|
||||
O_BINARY = 0
|
||||
READ_FLAGS = os.O_RDONLY | O_BINARY
|
||||
WRITE_FLAGS = os.O_WRONLY | os.O_CREAT | os.O_TRUNC | O_BINARY
|
||||
BUFFER_SIZE = 128 * 1024
|
||||
|
||||
|
||||
def copy_file(src, dst):
|
||||
""" Copy file from src to dst. Faster than shutil.copy. """
|
||||
try:
|
||||
fin = os.open(src, READ_FLAGS)
|
||||
stat = os.fstat(fin)
|
||||
fout = os.open(dst, WRITE_FLAGS, stat.st_mode)
|
||||
for x in iter(lambda: os.read(fin, BUFFER_SIZE), b""):
|
||||
os.write(fout, x)
|
||||
finally:
|
||||
try: os.close(fout)
|
||||
except: pass
|
||||
try: os.close(fin)
|
||||
except: pass
|
||||
|
||||
|
||||
def copy_entity(src_path: str, dst_path: str):
|
||||
""" Non-recursive fs entity (file, dir or symlink) copy. """
|
||||
src_stat = os.lstat(src_path)
|
||||
is_symlink = os.path.islink(src_path)
|
||||
|
||||
if os.path.isdir(src_path):
|
||||
os.mkdir(dst_path)
|
||||
|
||||
elif is_symlink:
|
||||
link_target = os.readlink(src_path)
|
||||
os.symlink(link_target, dst_path)
|
||||
|
||||
else:
|
||||
copy_file(src_path, dst_path)
|
||||
|
||||
if is_symlink:
|
||||
# change symlink attributes only if supported by OS
|
||||
if os.chown in os.supports_follow_symlinks:
|
||||
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid,
|
||||
follow_symlinks=False)
|
||||
if os.chmod in os.supports_follow_symlinks:
|
||||
os.chmod(dst_path, src_stat.st_mode, follow_symlinks=False)
|
||||
if os.utime in os.supports_follow_symlinks:
|
||||
os.utime(dst_path, (src_stat.st_atime, src_stat.st_mtime),
|
||||
follow_symlinks=False)
|
||||
else:
|
||||
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid)
|
||||
os.chmod(dst_path, src_stat.st_mode)
|
||||
os.utime(dst_path, (src_stat.st_atime, src_stat.st_mtime))
|
||||
|
||||
|
||||
def copy_direntry(entry: os.DirEntry, dst_path):
|
||||
""" Non-recursive DirEntry (file, dir or symlink) copy. """
|
||||
if entry.is_dir():
|
||||
os.mkdir(dst_path)
|
||||
|
||||
elif entry.is_symlink():
|
||||
link_target = os.readlink(entry.path)
|
||||
os.symlink(link_target, dst_path)
|
||||
|
||||
else:
|
||||
copy_file(entry.path, dst_path)
|
||||
|
||||
src_stat = entry.stat(follow_symlinks=False)
|
||||
if entry.is_symlink():
|
||||
# change symlink attributes only if supported by OS
|
||||
if os.chown in os.supports_follow_symlinks:
|
||||
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid,
|
||||
follow_symlinks=False)
|
||||
if os.chmod in os.supports_follow_symlinks:
|
||||
os.chmod(dst_path, src_stat.st_mode, follow_symlinks=False)
|
||||
if os.utime in os.supports_follow_symlinks:
|
||||
os.utime(dst_path, (src_stat.st_atime, src_stat.st_mtime),
|
||||
follow_symlinks=False)
|
||||
else:
|
||||
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid)
|
||||
os.chmod(dst_path, src_stat.st_mode)
|
||||
os.utime(dst_path, (src_stat.st_atime, src_stat.st_mtime))
|
||||
|
||||
|
||||
def update_direntry(src_entry: os.DirEntry, dst_entry: os.DirEntry):
|
||||
"""
|
||||
Make dst DirEntry (file/dir/symlink) same as src.
|
||||
If dst is directory, its content will be removed.
|
||||
Src dir content will not be copied into dst dir.
|
||||
"""
|
||||
rm_direntry(dst_entry)
|
||||
copy_direntry(src_entry, dst_entry.path)
|
||||
|
||||
|
||||
def rsync(src_dir, dst_dir, dry_run=False) -> Iterable[tuple]:
|
||||
"""
|
||||
Do sync
|
||||
:param src_dir: source dir
|
||||
:param dst_dir: dest dir, create if not exists
|
||||
:param dry_run: not used
|
||||
:return: nothing
|
||||
"""
|
||||
|
||||
_lg.debug("Rsync: %s -> %s", src_dir, dst_dir)
|
||||
src_root_abs = os.path.abspath(src_dir)
|
||||
dst_root_abs = os.path.abspath(dst_dir)
|
||||
|
||||
if not os.path.isdir(src_root_abs):
|
||||
raise RuntimeError("Error during reading source directory: %s"
|
||||
% src_root_abs)
|
||||
if os.path.exists(dst_root_abs):
|
||||
if not os.path.isdir(dst_root_abs):
|
||||
raise RuntimeError("Destination path is not a directory: %s"
|
||||
% dst_root_abs)
|
||||
else:
|
||||
os.mkdir(dst_root_abs)
|
||||
|
||||
# Create source map {rel_path: dir_entry}
|
||||
src_files_map = {
|
||||
ent.path[len(src_root_abs) + 1:]: ent for ent in scantree(src_root_abs)
|
||||
}
|
||||
|
||||
# process dst tree
|
||||
for dst_entry in scantree(dst_root_abs, dir_first=False):
|
||||
rel_path = dst_entry.path[len(dst_root_abs) + 1:]
|
||||
|
||||
src_entry = src_files_map.get(rel_path)
|
||||
|
||||
# remove dst entries not existing in source
|
||||
if src_entry is None:
|
||||
_lg.debug("Rsync, deleting: %s", rel_path)
|
||||
rm_direntry(dst_entry)
|
||||
yield rel_path, Actions.delete
|
||||
continue
|
||||
|
||||
# mark src entry as taken for processing
|
||||
del src_files_map[rel_path]
|
||||
|
||||
src_entry: os.DirEntry
|
||||
# rewrite dst if it has different than src type
|
||||
if src_entry.is_file(follow_symlinks=False):
|
||||
if not dst_entry.is_file(follow_symlinks=False):
|
||||
_lg.debug("Rsync, rewriting (src is a file, dst is not a file): %s",
|
||||
rel_path)
|
||||
update_direntry(src_entry, dst_entry)
|
||||
yield rel_path, Actions.rewrite
|
||||
continue
|
||||
if src_entry.is_dir(follow_symlinks=False):
|
||||
if not dst_entry.is_dir(follow_symlinks=False):
|
||||
_lg.debug("Rsync, rewriting (src is a dir, dst is not a dir): %s",
|
||||
rel_path)
|
||||
update_direntry(src_entry, dst_entry)
|
||||
yield rel_path, Actions.rewrite
|
||||
continue
|
||||
if src_entry.is_symlink():
|
||||
if not dst_entry.is_symlink():
|
||||
_lg.debug("Rsync, rewriting (src is a symlink, dst is not a symlink): %s",
|
||||
rel_path)
|
||||
update_direntry(src_entry, dst_entry)
|
||||
yield rel_path, Actions.rewrite
|
||||
continue
|
||||
|
||||
# rewrite dst if it is hard link to src (bad for backups)
|
||||
if src_entry.inode() == dst_entry.inode():
|
||||
_lg.debug("Rsync, rewriting (different inodes): %s", rel_path)
|
||||
update_direntry(src_entry, dst_entry)
|
||||
yield rel_path, Actions.rewrite
|
||||
continue
|
||||
|
||||
src_stat = src_entry.stat(follow_symlinks=False)
|
||||
dst_stat = dst_entry.stat(follow_symlinks=False)
|
||||
|
||||
# rewrite dst file/symlink which have different with src size or mtime
|
||||
if src_entry.is_file(follow_symlinks=False):
|
||||
same_size = src_stat.st_size == dst_stat.st_size
|
||||
same_mtime = src_stat.st_mtime == dst_stat.st_mtime
|
||||
if not (same_size and same_mtime):
|
||||
reason = "size" if not same_size else "time"
|
||||
_lg.debug("Rsync, rewriting (different %s): %s", reason, rel_path)
|
||||
update_direntry(src_entry, dst_entry)
|
||||
yield rel_path, Actions.rewrite
|
||||
continue
|
||||
|
||||
# rewrite dst symlink if it points somewhere else than src
|
||||
if src_entry.is_symlink():
|
||||
if os.readlink(src_entry.path) != os.readlink(dst_entry.path):
|
||||
_lg.debug("Rsync, rewriting (different symlink target): %s", rel_path)
|
||||
update_direntry(src_entry, dst_entry)
|
||||
continue
|
||||
|
||||
# update permissions and ownership
|
||||
if src_stat.st_mode != dst_stat.st_mode:
|
||||
_lg.debug("Rsync, updating permissions: %s", rel_path)
|
||||
yield rel_path, Actions.update_perm
|
||||
os.chmod(dst_entry.path, dst_stat.st_mode)
|
||||
|
||||
if src_stat.st_uid != dst_stat.st_uid or src_stat.st_gid != dst_stat.st_gid:
|
||||
_lg.debug("Rsync, updating owners: %s", rel_path)
|
||||
yield rel_path, Actions.update_owner
|
||||
os.chown(dst_entry.path, src_stat.st_uid, src_stat.st_gid)
|
||||
|
||||
# process remained source entries
|
||||
for rel_path, src_entry in src_files_map.items():
|
||||
dst_path = os.path.join(dst_root_abs, rel_path)
|
||||
_lg.debug("Rsync, creating: %s", rel_path)
|
||||
copy_direntry(src_entry, dst_path)
|
||||
yield rel_path, Actions.create
|
||||
|
||||
# restore dir mtimes in dst, updated by updating files
|
||||
for src_entry in scantree(src_root_abs, dir_first=True):
|
||||
if not src_entry.is_dir():
|
||||
continue
|
||||
rel_path = src_entry.path[len(src_root_abs) + 1:]
|
||||
dst_path = os.path.join(dst_root_abs, rel_path)
|
||||
src_stat = src_entry.stat(follow_symlinks=False)
|
||||
dst_stat = os.lstat(dst_path)
|
||||
if src_stat.st_mtime != dst_stat.st_mtime:
|
||||
_lg.debug("Rsync, restoring directory mtime: %s", dst_path)
|
||||
os.utime(dst_path,
|
||||
(src_stat.st_atime, src_stat.st_mtime),
|
||||
follow_symlinks=False)
|
||||
|
||||
# restore dst_root dir mtime
|
||||
src_root_stat = os.lstat(src_root_abs)
|
||||
dst_root_stat = os.lstat(dst_root_abs)
|
||||
if src_root_stat.st_mtime != dst_root_stat.st_mtime:
|
||||
_lg.debug("Rsync, restoring root directory mtime: %s", src_root_abs)
|
||||
os.utime(dst_root_abs,
|
||||
(src_root_stat.st_atime, src_root_stat.st_mtime),
|
||||
follow_symlinks=False)
|
||||
|
||||
|
||||
def _recursive_hardlink_ext(src: str, dst: str) -> bool:
|
||||
"""
|
||||
Make hardlink for a directory using cp -al. Both src and dst should exist.
|
||||
:param src: absolute path to source directory.
|
||||
:param dst: absolute path to target directory.
|
||||
:return: success or not
|
||||
"""
|
||||
if sys.platform == "darwin":
|
||||
cp = "gcp"
|
||||
else:
|
||||
cp = "cp"
|
||||
src_content = glob.glob(f"{src}/*")
|
||||
cmd = [cp, "--archive", "--verbose", "--link", *src_content, dst]
|
||||
_lg.info("Executing external command: %s", " ".join(cmd))
|
||||
process = subprocess.Popen(cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT)
|
||||
with process.stdout:
|
||||
for line in iter(process.stdout.readline, b""):
|
||||
_lg.debug("%s: %s", cp, line.decode("utf-8").strip())
|
||||
exitcode = process.wait()
|
||||
return not bool(exitcode)
|
||||
|
||||
|
||||
def _recursive_hardlink(src: str, dst: str) -> bool:
|
||||
"""
|
||||
Do hardlink directory recursively using python only.
|
||||
Both src and dst directories should exist.
|
||||
:param src: absolute path to source directory.
|
||||
:param dst: absolute path to target directory.
|
||||
:return: None
|
||||
"""
|
||||
with os.scandir(src) as it:
|
||||
ent: os.DirEntry
|
||||
for ent in it:
|
||||
ent_dst_path = os.path.join(dst, ent.name)
|
||||
if ent.is_dir(follow_symlinks=False):
|
||||
_lg.debug("Hardlink, copying directory: %s -> %s",
|
||||
ent.path, ent_dst_path)
|
||||
os.mkdir(ent_dst_path)
|
||||
|
||||
# process directory children
|
||||
_recursive_hardlink(ent.path, ent_dst_path)
|
||||
|
||||
# save directory's metainfo
|
||||
ent_stat = ent.stat(follow_symlinks=False)
|
||||
os.chown(ent_dst_path, ent_stat.st_uid, ent_stat.st_gid)
|
||||
os.chmod(ent_dst_path, ent_stat.st_mode)
|
||||
os.utime(ent_dst_path, (ent_stat.st_atime, ent_stat.st_mtime))
|
||||
|
||||
continue
|
||||
if ent.is_file(follow_symlinks=False) or ent.is_symlink():
|
||||
_lg.debug("Hardlink, creating link for file: %s -> %s",
|
||||
ent.path, ent_dst_path)
|
||||
os.link(ent.path, ent_dst_path, follow_symlinks=False)
|
||||
continue
|
||||
# something that is not a file, symlink or directory
|
||||
raise NotImplementedError(ent.path)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def hardlink_dir(src_dir, dst_dir, use_external: bool = False) -> bool:
|
||||
"""
|
||||
Make hardlink for a directory with all its content.
|
||||
:param src_dir: path to source directory
|
||||
:param dst_dir: path to target directory
|
||||
:param use_external: whether to use external cp -al command
|
||||
:return: success or not
|
||||
"""
|
||||
_lg.debug("Recursive hardlinking: %s -> %s", src_dir, dst_dir)
|
||||
src_abs = os.path.abspath(src_dir)
|
||||
dst_abs = os.path.abspath(dst_dir)
|
||||
|
||||
if not os.path.isdir(src_abs):
|
||||
raise RuntimeError(f"Error reading source directory: {src_dir}")
|
||||
if os.path.exists(dst_abs):
|
||||
raise RuntimeError(f"Destination already exists: {dst_dir}")
|
||||
_lg.debug("Hardlink, creating directory: %s", dst_abs)
|
||||
os.mkdir(dst_abs)
|
||||
|
||||
hardlink_func = _recursive_hardlink_ext if use_external else _recursive_hardlink
|
||||
return hardlink_func(src_abs, dst_abs)
|
||||
|
||||
|
||||
def nest_hardlink(src_dir: str, src_relpath: str, dst_dir: str):
|
||||
"""
|
||||
Hardlink entity from (src_dir + src_relpath) to dst_dir preserving dir structure.
|
||||
"""
|
||||
_lg.debug("Nested hardlinking: %s/%s -> %s", src_dir, src_relpath, dst_dir)
|
||||
src_dir_abs = os.path.abspath(src_dir)
|
||||
src_full_path = os.path.join(src_dir_abs, src_relpath)
|
||||
dst_dir_abs = os.path.abspath(dst_dir)
|
||||
dst_full_path = os.path.join(dst_dir_abs, src_relpath)
|
||||
|
||||
# check source entity and destination directory
|
||||
if not os.path.exists(src_full_path):
|
||||
raise RuntimeError("Error reading source entity: %s" % src_full_path)
|
||||
if os.path.exists(dst_dir_abs):
|
||||
if not os.path.isdir(dst_dir_abs):
|
||||
raise RuntimeError("Destination path is not a directory: %s"
|
||||
% dst_dir_abs)
|
||||
else:
|
||||
os.mkdir(dst_dir_abs)
|
||||
|
||||
# if destination entity exists, check it points to source entity
|
||||
if os.path.exists(dst_full_path):
|
||||
src_stat = os.lstat(src_full_path)
|
||||
if os.path.samestat(src_stat, os.lstat(dst_full_path)):
|
||||
return
|
||||
# remove otherwise
|
||||
os.unlink(dst_full_path)
|
||||
|
||||
src_cur_path = src_dir_abs
|
||||
dst_cur_path = dst_dir_abs
|
||||
for rel_part in src_relpath.split(sep=os.path.sep):
|
||||
src_cur_path = os.path.join(src_cur_path, rel_part)
|
||||
dst_cur_path = os.path.join(dst_cur_path, rel_part)
|
||||
if os.path.exists(dst_cur_path):
|
||||
continue
|
||||
copy_entity(src_cur_path, dst_cur_path)
|
||||
110
curateipsum/main.py
Executable file
110
curateipsum/main.py
Executable file
@@ -0,0 +1,110 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os.path
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
|
||||
import curateipsum.backup as backup
|
||||
|
||||
_lg = logging.getLogger("spqr.curateipsum")
|
||||
SUPPORTED_PLATFORMS = ("linux", "darwin")
|
||||
|
||||
|
||||
def main():
|
||||
formatter = logging.Formatter("{asctime}|{levelname}|{message}", style="{")
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(formatter)
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="cura-te-ipsum", description="cura-te-ipsum, my personal backup software.",
|
||||
)
|
||||
parser.add_argument("-V", "--version", action="version", version="%(prog)s 0.1")
|
||||
parser.add_argument("-v", "--verbose",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="print verbose information")
|
||||
parser.add_argument("-b",
|
||||
dest="backup_dir",
|
||||
metavar="BACKUP_DIR",
|
||||
type=str,
|
||||
required=True,
|
||||
help="directory, where all backups will be stored")
|
||||
parser.add_argument("-n", "--dry-run",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Do not do create backup")
|
||||
parser.add_argument("-f", "--force",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Force run when previous backup is still in process")
|
||||
parser.add_argument("--external-rsync",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Use external rsync for copying")
|
||||
parser.add_argument("--external-hardlink",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Use cp command for creating hardlink copies")
|
||||
parser.add_argument("sources",
|
||||
nargs="+",
|
||||
metavar="SOURCE",
|
||||
type=str,
|
||||
help="backup source (file/dir/smth else)")
|
||||
args = parser.parse_args()
|
||||
|
||||
loglevel = logging.DEBUG if args.verbose else logging.INFO
|
||||
logging.basicConfig(level=loglevel, handlers=[console_handler])
|
||||
|
||||
_lg.info("Starting %s: %s", parser.prog, args)
|
||||
|
||||
if sys.platform not in SUPPORTED_PLATFORMS:
|
||||
_lg.error("Not supported platform: %s. Supported platforms: %s",
|
||||
sys.platform, SUPPORTED_PLATFORMS)
|
||||
return 1
|
||||
|
||||
if args.external_rsync and not shutil.which("rsync"):
|
||||
_lg.error("rsync should be installed to use --external-rsync option.")
|
||||
return 1
|
||||
|
||||
cp_program = "gcp" if sys.platform == "darwin" else "cp"
|
||||
if args.external_hardlink and not shutil.which(cp_program):
|
||||
_lg.error("%s should be installed to use --external-hardlink option.",
|
||||
cp_program)
|
||||
return 1
|
||||
|
||||
backup_dir_abs = os.path.abspath(args.backup_dir)
|
||||
if not os.path.isdir(backup_dir_abs):
|
||||
_lg.error("Backup directory %s does not exist, exiting", args.backup_dir)
|
||||
return 1
|
||||
|
||||
for src_dir in args.sources:
|
||||
if not os.path.isdir(src_dir):
|
||||
_lg.error("Source directory %s does not exist", src_dir)
|
||||
return 1
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
if not backup.set_backups_lock(backup_dir_abs, args.force):
|
||||
_lg.warning("Previous backup is still in process, exiting")
|
||||
return 1
|
||||
|
||||
backup.cleanup_old_backups(backup_dir=backup_dir_abs, dry_run=args.dry_run)
|
||||
backup.initiate_backup(
|
||||
sources=args.sources,
|
||||
backup_dir=backup_dir_abs,
|
||||
dry_run=args.dry_run,
|
||||
external_rsync=args.external_rsync,
|
||||
external_hardlink=args.external_hardlink,
|
||||
)
|
||||
|
||||
backup.release_backups_lock(backup_dir_abs)
|
||||
end_time = time.time()
|
||||
spent_time = end_time - start_time
|
||||
_lg.info("Finished, time spent: %.3fs", spent_time)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user