Add setup.py

This commit is contained in:
2021-11-13 07:39:23 +03:00
parent 990faf9a43
commit 84546243cb
9 changed files with 40 additions and 18 deletions

0
curateipsum/__init__.py Normal file
View File

255
curateipsum/backup.py Normal file
View File

@@ -0,0 +1,255 @@
"""
Module with backup functions.
"""
import logging
import os
import shutil
from datetime import datetime, timedelta
from typing import Optional, Iterable
import curateipsum.fs as fs
BACKUP_ENT_FMT = "%Y%m%d_%H%M%S"
LOCK_FILE = ".backups_lock"
DELTA_DIR = ".backup_delta"
_lg = logging.getLogger(__name__)
def _is_backup_entity(backup_entry: os.DirEntry) -> bool:
""" Check if entity_path is a single backup dir. """
if not backup_entry.is_dir():
return False
try:
datetime.strptime(backup_entry.name, BACKUP_ENT_FMT)
return True
except ValueError:
return False
def _iterate_backups(backup_dir: str) -> Iterable[os.DirEntry]:
b_iter = os.scandir(backup_dir)
b_ent: os.DirEntry
for b_ent in b_iter:
if not _is_backup_entity(b_ent):
continue
if not os.listdir(b_ent.path):
_lg.info("Removing empty backup entity: %s", b_ent.name)
os.rmdir(b_ent.path)
continue
yield b_ent
b_iter.close()
def _get_latest_backup(backup_dir: str) -> Optional[os.DirEntry]:
""" Returns path to latest backup created in backup_dir or None. """
all_backups = sorted(_iterate_backups(backup_dir), key=lambda e: e.name)
if all_backups:
return all_backups[-1]
return None
def _date_from_backup(backup: os.DirEntry) -> datetime:
return datetime.strptime(backup.name, BACKUP_ENT_FMT)
def set_backups_lock(backup_dir: str, force: bool = False) -> bool:
""" Return false if previous backup is still running. """
lock_file_path = os.path.join(backup_dir, LOCK_FILE)
if os.path.exists(lock_file_path):
if not force:
return False
os.unlink(lock_file_path)
open(lock_file_path, "a").close()
return True
def release_backups_lock(backup_dir: str):
lock_file_path = os.path.join(backup_dir, LOCK_FILE)
if os.path.exists(lock_file_path):
os.unlink(lock_file_path)
def cleanup_old_backups(
backup_dir: str,
dry_run: bool = False,
keep_all: int = 7,
keep_daily: int = 30,
keep_weekly: int = 52,
keep_monthly: int = 12,
keep_yearly: int = 5,
min_free_space: int = 0
):
"""
Delete old backups. Never deletes the only backup.
:param backup_dir: full path to backup directory.
:param dry_run: don't do anything actually.
:param keep_all: the number of days that all backups must be kept.
:param keep_daily: the number of days that all daily backups must be kept.
:param keep_weekly: the number of weeks of which one weekly backup must be kept.
:param keep_monthly: the number of months (1 month = 30 days) of which
one monthly backup must be kept.
:param keep_yearly: the number of years of which one yearly backup must be kept.
:param min_free_space: not used right now
:return:
"""
all_backups = sorted(_iterate_backups(backup_dir),
key=lambda e: e.name, reverse=True)
if dry_run:
_lg.info("Dry-run, no backups will be actually removed")
if not all_backups:
_lg.debug("No backups, exiting")
return
elif len(all_backups) == 1:
_lg.debug("Only one backup (%s) exists, will not remove it",
all_backups[0].name)
return
now = datetime.now()
thresholds = {k: now.strftime(BACKUP_ENT_FMT)
for k in ("all", "daily", "weekly", "monthly", "yearly")}
if keep_all is not None:
thresholds["all"] = ((now - timedelta(days=keep_all))
.replace(hour=0, minute=0, second=0)
.strftime(BACKUP_ENT_FMT))
if keep_daily is not None:
thresholds["daily"] = ((now - timedelta(days=keep_daily))
.replace(hour=0, minute=0, second=0)
.strftime(BACKUP_ENT_FMT))
if keep_weekly is not None:
thresholds["weekly"] = (
(now - timedelta(weeks=keep_weekly, days=now.weekday()))
.strftime(BACKUP_ENT_FMT)
)
if keep_monthly is not None:
thresholds["monthly"] = ((now - timedelta(days=30*keep_monthly))
.replace(day=1, hour=0, minute=0, second=0)
.strftime(BACKUP_ENT_FMT))
if keep_yearly is not None:
thresholds["yearly"] = (
(now - timedelta(days=365*keep_yearly))
.replace(month=1, day=1, hour=0, minute=0, second=0)
.strftime(BACKUP_ENT_FMT)
)
prev_backup = all_backups[0]
to_remove = {b: False for b in all_backups}
for backup in all_backups[1:]:
# skip all backups made after threshold
if backup.name > thresholds["all"]:
prev_backup = backup
continue
# leave only one backup per day for backups made after threshold
if backup.name > thresholds["daily"]:
if (_date_from_backup(prev_backup).date()
== _date_from_backup(backup).date()):
to_remove[prev_backup] = True
prev_backup = backup
continue
# leave only one backup per week for backups made after threshold
if backup.name > thresholds["weekly"]:
if (_date_from_backup(prev_backup).isocalendar()[1]
== _date_from_backup(backup).isocalendar()[1]):
to_remove[prev_backup] = True
prev_backup = backup
continue
# leave only one backup per month for backups made after threshold
if backup.name > thresholds["monthly"]:
if (_date_from_backup(prev_backup).date().replace(day=1)
== _date_from_backup(backup).date().replace(day=1)):
to_remove[prev_backup] = True
prev_backup = backup
continue
# leave only one backup per year for backups made after threshold
if backup.name > thresholds["yearly"]:
if (_date_from_backup(prev_backup).date().replace(month=1, day=1)
== _date_from_backup(backup).date().replace(month=1, day=1)):
to_remove[prev_backup] = True
prev_backup = backup
continue
to_remove[backup] = True
for backup, do_delete in to_remove.items():
if not dry_run and do_delete:
_lg.info("Removing old backup %s", backup.name)
shutil.rmtree(backup.path)
def process_backed_entry(backup_dir: str, entry_relpath: str, action: fs.Actions):
_lg.debug("%s %s", action, entry_relpath)
if action is not fs.Actions.delete:
fs.nest_hardlink(src_dir=backup_dir, src_relpath=entry_relpath,
dst_dir=os.path.join(backup_dir, DELTA_DIR))
def initiate_backup(sources,
backup_dir: str,
dry_run: bool = False,
external_rsync: bool = False,
external_hardlink: bool = False):
""" Main backup function """
start_time_fmt = datetime.now().strftime(BACKUP_ENT_FMT)
cur_backup = fs.PseudoDirEntry(os.path.join(backup_dir, start_time_fmt))
_lg.debug("Current backup dir: %s", cur_backup.path)
latest_backup = _get_latest_backup(backup_dir)
if latest_backup is None:
_lg.info("Creating empty directory for current backup: %s",
cur_backup.name)
os.mkdir(cur_backup.path)
else:
_lg.info("Copying data from latest backup %s to current backup %s",
latest_backup.name, cur_backup.name)
hl_res = fs.hardlink_dir(src_dir=latest_backup.path,
dst_dir=cur_backup.path,
use_external=external_hardlink)
if not hl_res:
_lg.error("Something went wrong during copying data from latest backup,"
" removing created %s", cur_backup.name)
shutil.rmtree(cur_backup.path, ignore_errors=True)
return
# clean up delta dir from copied backup
shutil.rmtree(os.path.join(cur_backup.path, DELTA_DIR), ignore_errors=True)
rsync_func = fs.rsync_ext if external_rsync else fs.rsync
backup_changed = False
for src in sources:
src_abs = os.path.abspath(src)
src_name = os.path.basename(src_abs)
dst_abs = os.path.join(cur_backup.path, src_name)
_lg.info("Backing up directory %s to %s backup", src_abs, cur_backup.name)
for entry_relpath, action in rsync_func(src_abs, dst_abs, dry_run=dry_run):
if latest_backup is not None:
process_backed_entry(
backup_dir=cur_backup.path,
entry_relpath=os.path.join(src_name, entry_relpath),
action=action
)
backup_changed = True
# do not create backup on dry-run
if dry_run:
_lg.info("Dry-run, removing created backup: %s", cur_backup.name)
shutil.rmtree(cur_backup.path, ignore_errors=True)
# do not create backup if no change from previous one
elif latest_backup is not None and not backup_changed:
_lg.info("Newly created backup %s is the same as previous one %s, removing",
cur_backup.name, latest_backup.name)
shutil.rmtree(cur_backup.path, ignore_errors=True)
else:
_lg.info("Backup created: %s", cur_backup.name)

504
curateipsum/fs.py Normal file
View File

@@ -0,0 +1,504 @@
"""
Module with filesystem-related functions.
"""
import enum
import glob
import logging
import os
import subprocess
import sys
from typing import Iterable, Tuple
_lg = logging.getLogger(__name__)
class Actions(enum.Enum):
nothing = enum.auto()
delete = enum.auto()
rewrite = enum.auto()
update_time = enum.auto()
update_perm = enum.auto()
update_owner = enum.auto()
create = enum.auto()
class PseudoDirEntry:
def __init__(self, path):
self.path = os.path.realpath(path)
self.name = os.path.basename(self.path)
self._is_dir = None
self._stat = None
def __str__(self):
return self.name
def is_dir(self) -> bool:
if self._is_dir is None:
self._is_dir = os.path.isdir(self.path)
return self._is_dir
def stat(self):
if self._stat is None:
self._stat = os.lstat(self.path)
return self._stat
def _parse_rsync_output(line: str) -> Tuple[str, Actions]:
action = None
change_string, relpath = line.split(' ', maxsplit=1)
if change_string == "*deleting":
return relpath, Actions.delete
update_type = change_string[0]
entity_type = change_string[1]
change_type = change_string[2:]
if update_type == "c" and entity_type in {"d", "L"} and "+" in change_type:
action = Actions.create
elif update_type == ">" and entity_type == "f" and "+" in change_type:
action = Actions.create
elif entity_type == "f" and ("s" in change_type or "t" in change_type):
action = Actions.rewrite
elif entity_type == "d" and "t" in change_type:
action = Actions.update_time
elif "p" in change_type:
action = Actions.update_perm
elif "o" in change_type or "g" in change_type:
action = Actions.update_owner
if action is None:
raise RuntimeError("Not parsed string: %s" % line)
return relpath, action
def rsync_ext(src, dst, dry_run=False):
"""Call external rsync command"""
rsync_args = ["rsync"]
if dry_run:
rsync_args.append("--dry-run")
rsync_args.append("--archive")
# rsync_args.append("--compress")
# rsync_args.append("--inplace")
rsync_args.append("--whole-file")
rsync_args.append("--human-readable")
rsync_args.append("--delete-during")
rsync_args.append("--itemize-changes")
rsync_args.append(f"{src}/")
rsync_args.append(str(dst))
_lg.info("Executing external command: %s", " ".join(rsync_args))
process = subprocess.Popen(rsync_args,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
with process.stdout:
prev_line = None
for line in iter(process.stdout.readline, b""):
_lg.debug("Rsync current line: %s", line)
if prev_line is None:
prev_line = line
continue
try:
prev_line = prev_line.decode("utf-8").strip()
# some issues with cyrillic in filenames
except UnicodeDecodeError:
_lg.error("Can't process rsync line: %s", prev_line)
continue
_lg.debug("Rsync itemize line: %s", prev_line)
yield _parse_rsync_output(prev_line)
prev_line = line
try:
prev_line = prev_line.decode("utf-8").strip()
_lg.debug("Rsync itemize line: %s", prev_line)
yield _parse_rsync_output(prev_line)
# some issues with cyrillic in filenames
except UnicodeDecodeError:
_lg.error("Can't process rsync line: %s", prev_line)
process.wait()
def scantree(path, dir_first=True) -> Iterable[os.DirEntry]:
"""Recursively yield DirEntry file objects for given directory."""
entry: os.DirEntry
"""Recursively yield DirEntry objects for given directory."""
with os.scandir(path) as scan_it:
for entry in scan_it:
if entry.is_dir(follow_symlinks=False):
if dir_first:
yield entry
yield from scantree(entry.path, dir_first)
if not dir_first:
yield entry
else:
yield entry
def rm_direntry(entry: os.DirEntry):
""" Recursively delete DirEntry (dir, file or symlink). """
if entry.is_file(follow_symlinks=False) or entry.is_symlink():
os.unlink(entry.path)
return
if entry.is_dir(follow_symlinks=False):
with os.scandir(entry.path) as it:
child_entry: os.DirEntry
for child_entry in it:
rm_direntry(child_entry)
os.rmdir(entry.path)
try:
O_BINARY = os.O_BINARY # Windows only
except AttributeError:
O_BINARY = 0
READ_FLAGS = os.O_RDONLY | O_BINARY
WRITE_FLAGS = os.O_WRONLY | os.O_CREAT | os.O_TRUNC | O_BINARY
BUFFER_SIZE = 128 * 1024
def copy_file(src, dst):
""" Copy file from src to dst. Faster than shutil.copy. """
try:
fin = os.open(src, READ_FLAGS)
stat = os.fstat(fin)
fout = os.open(dst, WRITE_FLAGS, stat.st_mode)
for x in iter(lambda: os.read(fin, BUFFER_SIZE), b""):
os.write(fout, x)
finally:
try: os.close(fout)
except: pass
try: os.close(fin)
except: pass
def copy_entity(src_path: str, dst_path: str):
""" Non-recursive fs entity (file, dir or symlink) copy. """
src_stat = os.lstat(src_path)
is_symlink = os.path.islink(src_path)
if os.path.isdir(src_path):
os.mkdir(dst_path)
elif is_symlink:
link_target = os.readlink(src_path)
os.symlink(link_target, dst_path)
else:
copy_file(src_path, dst_path)
if is_symlink:
# change symlink attributes only if supported by OS
if os.chown in os.supports_follow_symlinks:
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid,
follow_symlinks=False)
if os.chmod in os.supports_follow_symlinks:
os.chmod(dst_path, src_stat.st_mode, follow_symlinks=False)
if os.utime in os.supports_follow_symlinks:
os.utime(dst_path, (src_stat.st_atime, src_stat.st_mtime),
follow_symlinks=False)
else:
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid)
os.chmod(dst_path, src_stat.st_mode)
os.utime(dst_path, (src_stat.st_atime, src_stat.st_mtime))
def copy_direntry(entry: os.DirEntry, dst_path):
""" Non-recursive DirEntry (file, dir or symlink) copy. """
if entry.is_dir():
os.mkdir(dst_path)
elif entry.is_symlink():
link_target = os.readlink(entry.path)
os.symlink(link_target, dst_path)
else:
copy_file(entry.path, dst_path)
src_stat = entry.stat(follow_symlinks=False)
if entry.is_symlink():
# change symlink attributes only if supported by OS
if os.chown in os.supports_follow_symlinks:
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid,
follow_symlinks=False)
if os.chmod in os.supports_follow_symlinks:
os.chmod(dst_path, src_stat.st_mode, follow_symlinks=False)
if os.utime in os.supports_follow_symlinks:
os.utime(dst_path, (src_stat.st_atime, src_stat.st_mtime),
follow_symlinks=False)
else:
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid)
os.chmod(dst_path, src_stat.st_mode)
os.utime(dst_path, (src_stat.st_atime, src_stat.st_mtime))
def update_direntry(src_entry: os.DirEntry, dst_entry: os.DirEntry):
"""
Make dst DirEntry (file/dir/symlink) same as src.
If dst is directory, its content will be removed.
Src dir content will not be copied into dst dir.
"""
rm_direntry(dst_entry)
copy_direntry(src_entry, dst_entry.path)
def rsync(src_dir, dst_dir, dry_run=False) -> Iterable[tuple]:
"""
Do sync
:param src_dir: source dir
:param dst_dir: dest dir, create if not exists
:param dry_run: not used
:return: nothing
"""
_lg.debug("Rsync: %s -> %s", src_dir, dst_dir)
src_root_abs = os.path.abspath(src_dir)
dst_root_abs = os.path.abspath(dst_dir)
if not os.path.isdir(src_root_abs):
raise RuntimeError("Error during reading source directory: %s"
% src_root_abs)
if os.path.exists(dst_root_abs):
if not os.path.isdir(dst_root_abs):
raise RuntimeError("Destination path is not a directory: %s"
% dst_root_abs)
else:
os.mkdir(dst_root_abs)
# Create source map {rel_path: dir_entry}
src_files_map = {
ent.path[len(src_root_abs) + 1:]: ent for ent in scantree(src_root_abs)
}
# process dst tree
for dst_entry in scantree(dst_root_abs, dir_first=False):
rel_path = dst_entry.path[len(dst_root_abs) + 1:]
src_entry = src_files_map.get(rel_path)
# remove dst entries not existing in source
if src_entry is None:
_lg.debug("Rsync, deleting: %s", rel_path)
rm_direntry(dst_entry)
yield rel_path, Actions.delete
continue
# mark src entry as taken for processing
del src_files_map[rel_path]
src_entry: os.DirEntry
# rewrite dst if it has different than src type
if src_entry.is_file(follow_symlinks=False):
if not dst_entry.is_file(follow_symlinks=False):
_lg.debug("Rsync, rewriting (src is a file, dst is not a file): %s",
rel_path)
update_direntry(src_entry, dst_entry)
yield rel_path, Actions.rewrite
continue
if src_entry.is_dir(follow_symlinks=False):
if not dst_entry.is_dir(follow_symlinks=False):
_lg.debug("Rsync, rewriting (src is a dir, dst is not a dir): %s",
rel_path)
update_direntry(src_entry, dst_entry)
yield rel_path, Actions.rewrite
continue
if src_entry.is_symlink():
if not dst_entry.is_symlink():
_lg.debug("Rsync, rewriting (src is a symlink, dst is not a symlink): %s",
rel_path)
update_direntry(src_entry, dst_entry)
yield rel_path, Actions.rewrite
continue
# rewrite dst if it is hard link to src (bad for backups)
if src_entry.inode() == dst_entry.inode():
_lg.debug("Rsync, rewriting (different inodes): %s", rel_path)
update_direntry(src_entry, dst_entry)
yield rel_path, Actions.rewrite
continue
src_stat = src_entry.stat(follow_symlinks=False)
dst_stat = dst_entry.stat(follow_symlinks=False)
# rewrite dst file/symlink which have different with src size or mtime
if src_entry.is_file(follow_symlinks=False):
same_size = src_stat.st_size == dst_stat.st_size
same_mtime = src_stat.st_mtime == dst_stat.st_mtime
if not (same_size and same_mtime):
reason = "size" if not same_size else "time"
_lg.debug("Rsync, rewriting (different %s): %s", reason, rel_path)
update_direntry(src_entry, dst_entry)
yield rel_path, Actions.rewrite
continue
# rewrite dst symlink if it points somewhere else than src
if src_entry.is_symlink():
if os.readlink(src_entry.path) != os.readlink(dst_entry.path):
_lg.debug("Rsync, rewriting (different symlink target): %s", rel_path)
update_direntry(src_entry, dst_entry)
continue
# update permissions and ownership
if src_stat.st_mode != dst_stat.st_mode:
_lg.debug("Rsync, updating permissions: %s", rel_path)
yield rel_path, Actions.update_perm
os.chmod(dst_entry.path, dst_stat.st_mode)
if src_stat.st_uid != dst_stat.st_uid or src_stat.st_gid != dst_stat.st_gid:
_lg.debug("Rsync, updating owners: %s", rel_path)
yield rel_path, Actions.update_owner
os.chown(dst_entry.path, src_stat.st_uid, src_stat.st_gid)
# process remained source entries
for rel_path, src_entry in src_files_map.items():
dst_path = os.path.join(dst_root_abs, rel_path)
_lg.debug("Rsync, creating: %s", rel_path)
copy_direntry(src_entry, dst_path)
yield rel_path, Actions.create
# restore dir mtimes in dst, updated by updating files
for src_entry in scantree(src_root_abs, dir_first=True):
if not src_entry.is_dir():
continue
rel_path = src_entry.path[len(src_root_abs) + 1:]
dst_path = os.path.join(dst_root_abs, rel_path)
src_stat = src_entry.stat(follow_symlinks=False)
dst_stat = os.lstat(dst_path)
if src_stat.st_mtime != dst_stat.st_mtime:
_lg.debug("Rsync, restoring directory mtime: %s", dst_path)
os.utime(dst_path,
(src_stat.st_atime, src_stat.st_mtime),
follow_symlinks=False)
# restore dst_root dir mtime
src_root_stat = os.lstat(src_root_abs)
dst_root_stat = os.lstat(dst_root_abs)
if src_root_stat.st_mtime != dst_root_stat.st_mtime:
_lg.debug("Rsync, restoring root directory mtime: %s", src_root_abs)
os.utime(dst_root_abs,
(src_root_stat.st_atime, src_root_stat.st_mtime),
follow_symlinks=False)
def _recursive_hardlink_ext(src: str, dst: str) -> bool:
"""
Make hardlink for a directory using cp -al. Both src and dst should exist.
:param src: absolute path to source directory.
:param dst: absolute path to target directory.
:return: success or not
"""
if sys.platform == "darwin":
cp = "gcp"
else:
cp = "cp"
src_content = glob.glob(f"{src}/*")
cmd = [cp, "--archive", "--verbose", "--link", *src_content, dst]
_lg.info("Executing external command: %s", " ".join(cmd))
process = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
with process.stdout:
for line in iter(process.stdout.readline, b""):
_lg.debug("%s: %s", cp, line.decode("utf-8").strip())
exitcode = process.wait()
return not bool(exitcode)
def _recursive_hardlink(src: str, dst: str) -> bool:
"""
Do hardlink directory recursively using python only.
Both src and dst directories should exist.
:param src: absolute path to source directory.
:param dst: absolute path to target directory.
:return: None
"""
with os.scandir(src) as it:
ent: os.DirEntry
for ent in it:
ent_dst_path = os.path.join(dst, ent.name)
if ent.is_dir(follow_symlinks=False):
_lg.debug("Hardlink, copying directory: %s -> %s",
ent.path, ent_dst_path)
os.mkdir(ent_dst_path)
# process directory children
_recursive_hardlink(ent.path, ent_dst_path)
# save directory's metainfo
ent_stat = ent.stat(follow_symlinks=False)
os.chown(ent_dst_path, ent_stat.st_uid, ent_stat.st_gid)
os.chmod(ent_dst_path, ent_stat.st_mode)
os.utime(ent_dst_path, (ent_stat.st_atime, ent_stat.st_mtime))
continue
if ent.is_file(follow_symlinks=False) or ent.is_symlink():
_lg.debug("Hardlink, creating link for file: %s -> %s",
ent.path, ent_dst_path)
os.link(ent.path, ent_dst_path, follow_symlinks=False)
continue
# something that is not a file, symlink or directory
raise NotImplementedError(ent.path)
return True
def hardlink_dir(src_dir, dst_dir, use_external: bool = False) -> bool:
"""
Make hardlink for a directory with all its content.
:param src_dir: path to source directory
:param dst_dir: path to target directory
:param use_external: whether to use external cp -al command
:return: success or not
"""
_lg.debug("Recursive hardlinking: %s -> %s", src_dir, dst_dir)
src_abs = os.path.abspath(src_dir)
dst_abs = os.path.abspath(dst_dir)
if not os.path.isdir(src_abs):
raise RuntimeError(f"Error reading source directory: {src_dir}")
if os.path.exists(dst_abs):
raise RuntimeError(f"Destination already exists: {dst_dir}")
_lg.debug("Hardlink, creating directory: %s", dst_abs)
os.mkdir(dst_abs)
hardlink_func = _recursive_hardlink_ext if use_external else _recursive_hardlink
return hardlink_func(src_abs, dst_abs)
def nest_hardlink(src_dir: str, src_relpath: str, dst_dir: str):
"""
Hardlink entity from (src_dir + src_relpath) to dst_dir preserving dir structure.
"""
_lg.debug("Nested hardlinking: %s/%s -> %s", src_dir, src_relpath, dst_dir)
src_dir_abs = os.path.abspath(src_dir)
src_full_path = os.path.join(src_dir_abs, src_relpath)
dst_dir_abs = os.path.abspath(dst_dir)
dst_full_path = os.path.join(dst_dir_abs, src_relpath)
# check source entity and destination directory
if not os.path.exists(src_full_path):
raise RuntimeError("Error reading source entity: %s" % src_full_path)
if os.path.exists(dst_dir_abs):
if not os.path.isdir(dst_dir_abs):
raise RuntimeError("Destination path is not a directory: %s"
% dst_dir_abs)
else:
os.mkdir(dst_dir_abs)
# if destination entity exists, check it points to source entity
if os.path.exists(dst_full_path):
src_stat = os.lstat(src_full_path)
if os.path.samestat(src_stat, os.lstat(dst_full_path)):
return
# remove otherwise
os.unlink(dst_full_path)
src_cur_path = src_dir_abs
dst_cur_path = dst_dir_abs
for rel_part in src_relpath.split(sep=os.path.sep):
src_cur_path = os.path.join(src_cur_path, rel_part)
dst_cur_path = os.path.join(dst_cur_path, rel_part)
if os.path.exists(dst_cur_path):
continue
copy_entity(src_cur_path, dst_cur_path)

110
curateipsum/main.py Executable file
View File

@@ -0,0 +1,110 @@
#!/usr/bin/env python
import argparse
import logging
import os.path
import shutil
import sys
import time
import curateipsum.backup as backup
_lg = logging.getLogger("spqr.curateipsum")
SUPPORTED_PLATFORMS = ("linux", "darwin")
def main():
formatter = logging.Formatter("{asctime}|{levelname}|{message}", style="{")
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
parser = argparse.ArgumentParser(
prog="cura-te-ipsum", description="cura-te-ipsum, my personal backup software.",
)
parser.add_argument("-V", "--version", action="version", version="%(prog)s 0.1")
parser.add_argument("-v", "--verbose",
action="store_true",
default=False,
help="print verbose information")
parser.add_argument("-b",
dest="backup_dir",
metavar="BACKUP_DIR",
type=str,
required=True,
help="directory, where all backups will be stored")
parser.add_argument("-n", "--dry-run",
action="store_true",
default=False,
help="Do not do create backup")
parser.add_argument("-f", "--force",
action="store_true",
default=False,
help="Force run when previous backup is still in process")
parser.add_argument("--external-rsync",
action="store_true",
default=False,
help="Use external rsync for copying")
parser.add_argument("--external-hardlink",
action="store_true",
default=False,
help="Use cp command for creating hardlink copies")
parser.add_argument("sources",
nargs="+",
metavar="SOURCE",
type=str,
help="backup source (file/dir/smth else)")
args = parser.parse_args()
loglevel = logging.DEBUG if args.verbose else logging.INFO
logging.basicConfig(level=loglevel, handlers=[console_handler])
_lg.info("Starting %s: %s", parser.prog, args)
if sys.platform not in SUPPORTED_PLATFORMS:
_lg.error("Not supported platform: %s. Supported platforms: %s",
sys.platform, SUPPORTED_PLATFORMS)
return 1
if args.external_rsync and not shutil.which("rsync"):
_lg.error("rsync should be installed to use --external-rsync option.")
return 1
cp_program = "gcp" if sys.platform == "darwin" else "cp"
if args.external_hardlink and not shutil.which(cp_program):
_lg.error("%s should be installed to use --external-hardlink option.",
cp_program)
return 1
backup_dir_abs = os.path.abspath(args.backup_dir)
if not os.path.isdir(backup_dir_abs):
_lg.error("Backup directory %s does not exist, exiting", args.backup_dir)
return 1
for src_dir in args.sources:
if not os.path.isdir(src_dir):
_lg.error("Source directory %s does not exist", src_dir)
return 1
start_time = time.time()
if not backup.set_backups_lock(backup_dir_abs, args.force):
_lg.warning("Previous backup is still in process, exiting")
return 1
backup.cleanup_old_backups(backup_dir=backup_dir_abs, dry_run=args.dry_run)
backup.initiate_backup(
sources=args.sources,
backup_dir=backup_dir_abs,
dry_run=args.dry_run,
external_rsync=args.external_rsync,
external_hardlink=args.external_hardlink,
)
backup.release_backups_lock(backup_dir_abs)
end_time = time.time()
spent_time = end_time - start_time
_lg.info("Finished, time spent: %.3fs", spent_time)
if __name__ == "__main__":
sys.exit(main())