Add rsync support

This commit is contained in:
2021-06-19 15:28:42 +03:00
parent 17826c6978
commit 862c20ed24
3 changed files with 340 additions and 57 deletions

View File

@@ -69,4 +69,11 @@ def initiate_backup(sources, backup_dir: pathlib.Path):
latest_backup.name,
cur_backup.name,
)
hardlink_dir(latest_backup, cur_backup)
# for src in sources:
# src_abs = pathlib.Path(os.path.abspath(src))
# dst_abs = pathlib.Path(os.path.join(cur_backup, src_abs.name))
# _lg.info("Backing up directory %s to %s backup", src_abs, cur_backup.name)
# rsync(src_abs, cur_backup)

View File

@@ -2,6 +2,7 @@
Module with filesystem-related functions.
"""
import enum
import logging
import os
import subprocess
@@ -28,15 +29,83 @@ def rsync_ext(src, dst, dry_run=False):
return res
def scantree(path) -> Iterable[os.DirEntry]:
def scantree(path, dir_first=True) -> Iterable[os.DirEntry]:
"""Recursively yield DirEntry file objects for given directory."""
entry: os.DirEntry
for entry in os.scandir(path):
if entry.is_dir(follow_symlinks=False):
yield entry
yield from scantree(entry.path)
else:
yield entry
"""Recursively yield DirEntry objects for given directory."""
with os.scandir(path) as scan_it:
for entry in scan_it:
if entry.is_dir(follow_symlinks=False):
if dir_first:
yield entry
yield from scantree(entry.path, dir_first)
if not dir_first:
yield entry
else:
yield entry
def rm_direntry(entry: os.DirEntry):
""" Recursively delete DirEntry (dir, file or symlink). """
if entry.is_file(follow_symlinks=False) or entry.is_symlink():
os.unlink(entry.path)
return
if entry.is_dir(follow_symlinks=False):
with os.scandir(entry.path) as it:
child_entry: os.DirEntry
for child_entry in it:
rm_direntry(child_entry)
os.rmdir(entry.path)
try:
O_BINARY = os.O_BINARY # Windows only
except AttributeError:
O_BINARY = 0
READ_FLAGS = os.O_RDONLY | O_BINARY
WRITE_FLAGS = os.O_WRONLY | os.O_CREAT | os.O_TRUNC | O_BINARY
BUFFER_SIZE = 128 * 1024
def copyfile(src, dst):
fin = os.open(src, READ_FLAGS)
stat = os.fstat(fin)
fout = os.open(dst, WRITE_FLAGS, stat.st_mode)
for x in iter(lambda: os.read(fin, BUFFER_SIZE), b""):
os.write(fout, x)
os.close(fout)
os.close(fin)
def copy_direntry(entry: os.DirEntry, dst_path):
if entry.is_dir():
os.mkdir(dst_path)
elif entry.is_symlink():
link_target = os.readlink(entry.path)
os.symlink(link_target, dst_path)
else:
copyfile(entry.path, dst_path)
src_stat = entry.stat(follow_symlinks=False)
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid, follow_symlinks=False)
os.chmod(dst_path, src_stat.st_mode, follow_symlinks=False)
os.utime(dst_path, (src_stat.st_atime, src_stat.st_mtime), follow_symlinks=False)
def update_direntry(src_entry: os.DirEntry, dst_entry: os.DirEntry):
rm_direntry(dst_entry)
copy_direntry(src_entry, dst_entry.path)
class Actions(enum.Enum):
nothing = enum.auto()
delete = enum.auto()
rewrite = enum.auto()
update_perm = enum.auto()
update_owner = enum.auto()
create = enum.auto()
def rsync(src_dir, dst_dir=None):
@@ -48,37 +117,93 @@ def rsync(src_dir, dst_dir=None):
"""
_lg.info(f"Rsync: {src_dir} -> {dst_dir}")
src_abs = os.path.abspath(src_dir)
dst_abs = os.path.abspath(dst_dir)
src_root_abs = os.path.abspath(src_dir)
dst_root_abs = os.path.abspath(dst_dir)
if not os.path.isdir(src_abs):
raise RuntimeError(f"Error during reading source directory: {src_abs}")
if os.path.exists(dst_abs):
if not os.path.isdir(dst_abs):
raise RuntimeError(f"Destination path is not a directory: {dst_abs}")
if not os.path.isdir(src_root_abs):
raise RuntimeError(f"Error during reading source directory: {src_root_abs}")
if os.path.exists(dst_root_abs):
if not os.path.isdir(dst_root_abs):
raise RuntimeError(f"Destination path is not a directory: {dst_root_abs}")
else:
os.mkdir(dst_abs)
os.mkdir(dst_root_abs)
for src_entry in scantree(src_abs):
rel_path = src_entry.path[len(src_abs)+1:]
dst_path = os.path.join(dst_abs, rel_path)
src_stat = src_entry.stat(follow_symlinks=False)
# {rel_path: dir_entry} map
src_files_map = {
ent.path[len(src_root_abs) + 1 :]: ent for ent in scantree(src_root_abs)
}
dst_stat = os.lstat(dst_path)
# process dst tree
for dst_entry in scantree(dst_root_abs, dir_first=False):
rel_path = dst_entry.path[len(dst_root_abs) + 1 :]
src_entry = src_files_map.get(rel_path)
# remove dst entries not existing in source
if src_entry is None:
_lg.debug("deleting %s", rel_path)
rm_direntry(dst_entry)
continue
# mark src entry as taken for processing
del src_files_map[rel_path]
src_entry: os.DirEntry
# rewrite dst if it has different than src type
if src_entry.is_file(follow_symlinks=False):
if not dst_entry.is_file(follow_symlinks=False):
_lg.info("rewriting %s", rel_path)
update_direntry(src_entry, dst_entry)
continue
if src_entry.is_dir(follow_symlinks=False):
pass
if not dst_entry.is_dir(follow_symlinks=False):
_lg.info("rewriting %s", rel_path)
update_direntry(src_entry, dst_entry)
continue
if src_entry.is_symlink():
if not dst_entry.is_symlink():
_lg.info("rewriting %s", rel_path)
update_direntry(src_entry, dst_entry)
continue
do_update = False
# check file size
if src_stat.st_size != dst_stat.st_size:
do_update = True
# check modification time (mtime)
if src_stat.st_mtime > dst_stat.st_mtime:
do_update = True
# rewrite dst if it is hard link to src (bad for backups)
if src_entry.inode() == dst_entry.inode():
_lg.info("rewriting %s", rel_path)
update_direntry(src_entry, dst_entry)
continue
if do_update:
_lg.info("Updating %s", src_entry)
src_stat = src_entry.stat(follow_symlinks=False)
dst_stat = dst_entry.stat(follow_symlinks=False)
# rewrite dst file/symlink which have different with src size or mtime
if src_entry.is_file(follow_symlinks=False):
same_size = src_stat.st_size == dst_stat.st_size
same_mtime = src_stat.st_mtime == dst_stat.st_mtime
if not (same_size and same_mtime):
_lg.info("rewriting %s", rel_path)
update_direntry(src_entry, dst_entry)
continue
# rewrite dst symlink if it points somewhere else than src
if src_entry.is_symlink():
if os.readlink(src_entry.path) != os.readlink(dst_entry.path):
_lg.info("rewriting %s", rel_path)
update_direntry(src_entry, dst_entry)
continue
# update permissions and ownership
if src_stat.st_mode != dst_stat.st_mode:
_lg.info("updating permissions %s", rel_path)
os.chmod(dst_entry.path, dst_stat.st_mode)
if src_stat.st_uid != dst_stat.st_uid or src_stat.st_gid != dst_stat.st_gid:
_lg.info("updating owners %s", rel_path)
os.chown(dst_entry.path, src_stat.st_uid, src_stat.st_gid)
for rel_path, src_entry in src_files_map.items():
dst_path = os.path.join(dst_root_abs, rel_path)
_lg.info("creating %s", rel_path)
copy_direntry(src_entry, dst_path)
def _hardlink_dir_ext(src, dst):