Add rsync support
This commit is contained in:
parent
17826c6978
commit
862c20ed24
@ -69,4 +69,11 @@ def initiate_backup(sources, backup_dir: pathlib.Path):
|
||||
latest_backup.name,
|
||||
cur_backup.name,
|
||||
)
|
||||
|
||||
hardlink_dir(latest_backup, cur_backup)
|
||||
|
||||
# for src in sources:
|
||||
# src_abs = pathlib.Path(os.path.abspath(src))
|
||||
# dst_abs = pathlib.Path(os.path.join(cur_backup, src_abs.name))
|
||||
# _lg.info("Backing up directory %s to %s backup", src_abs, cur_backup.name)
|
||||
# rsync(src_abs, cur_backup)
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
Module with filesystem-related functions.
|
||||
"""
|
||||
|
||||
import enum
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
@ -28,15 +29,83 @@ def rsync_ext(src, dst, dry_run=False):
|
||||
return res
|
||||
|
||||
|
||||
def scantree(path) -> Iterable[os.DirEntry]:
|
||||
def scantree(path, dir_first=True) -> Iterable[os.DirEntry]:
|
||||
"""Recursively yield DirEntry file objects for given directory."""
|
||||
entry: os.DirEntry
|
||||
for entry in os.scandir(path):
|
||||
if entry.is_dir(follow_symlinks=False):
|
||||
yield entry
|
||||
yield from scantree(entry.path)
|
||||
else:
|
||||
yield entry
|
||||
"""Recursively yield DirEntry objects for given directory."""
|
||||
with os.scandir(path) as scan_it:
|
||||
for entry in scan_it:
|
||||
if entry.is_dir(follow_symlinks=False):
|
||||
if dir_first:
|
||||
yield entry
|
||||
yield from scantree(entry.path, dir_first)
|
||||
if not dir_first:
|
||||
yield entry
|
||||
else:
|
||||
yield entry
|
||||
|
||||
|
||||
def rm_direntry(entry: os.DirEntry):
|
||||
""" Recursively delete DirEntry (dir, file or symlink). """
|
||||
if entry.is_file(follow_symlinks=False) or entry.is_symlink():
|
||||
os.unlink(entry.path)
|
||||
return
|
||||
if entry.is_dir(follow_symlinks=False):
|
||||
with os.scandir(entry.path) as it:
|
||||
child_entry: os.DirEntry
|
||||
for child_entry in it:
|
||||
rm_direntry(child_entry)
|
||||
os.rmdir(entry.path)
|
||||
|
||||
|
||||
try:
|
||||
O_BINARY = os.O_BINARY # Windows only
|
||||
except AttributeError:
|
||||
O_BINARY = 0
|
||||
READ_FLAGS = os.O_RDONLY | O_BINARY
|
||||
WRITE_FLAGS = os.O_WRONLY | os.O_CREAT | os.O_TRUNC | O_BINARY
|
||||
BUFFER_SIZE = 128 * 1024
|
||||
|
||||
|
||||
def copyfile(src, dst):
|
||||
fin = os.open(src, READ_FLAGS)
|
||||
stat = os.fstat(fin)
|
||||
fout = os.open(dst, WRITE_FLAGS, stat.st_mode)
|
||||
for x in iter(lambda: os.read(fin, BUFFER_SIZE), b""):
|
||||
os.write(fout, x)
|
||||
os.close(fout)
|
||||
os.close(fin)
|
||||
|
||||
|
||||
def copy_direntry(entry: os.DirEntry, dst_path):
|
||||
if entry.is_dir():
|
||||
os.mkdir(dst_path)
|
||||
|
||||
elif entry.is_symlink():
|
||||
link_target = os.readlink(entry.path)
|
||||
os.symlink(link_target, dst_path)
|
||||
|
||||
else:
|
||||
copyfile(entry.path, dst_path)
|
||||
|
||||
src_stat = entry.stat(follow_symlinks=False)
|
||||
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid, follow_symlinks=False)
|
||||
os.chmod(dst_path, src_stat.st_mode, follow_symlinks=False)
|
||||
os.utime(dst_path, (src_stat.st_atime, src_stat.st_mtime), follow_symlinks=False)
|
||||
|
||||
|
||||
def update_direntry(src_entry: os.DirEntry, dst_entry: os.DirEntry):
|
||||
rm_direntry(dst_entry)
|
||||
copy_direntry(src_entry, dst_entry.path)
|
||||
|
||||
|
||||
class Actions(enum.Enum):
|
||||
nothing = enum.auto()
|
||||
delete = enum.auto()
|
||||
rewrite = enum.auto()
|
||||
update_perm = enum.auto()
|
||||
update_owner = enum.auto()
|
||||
create = enum.auto()
|
||||
|
||||
|
||||
def rsync(src_dir, dst_dir=None):
|
||||
@ -48,37 +117,93 @@ def rsync(src_dir, dst_dir=None):
|
||||
"""
|
||||
|
||||
_lg.info(f"Rsync: {src_dir} -> {dst_dir}")
|
||||
src_abs = os.path.abspath(src_dir)
|
||||
dst_abs = os.path.abspath(dst_dir)
|
||||
src_root_abs = os.path.abspath(src_dir)
|
||||
dst_root_abs = os.path.abspath(dst_dir)
|
||||
|
||||
if not os.path.isdir(src_abs):
|
||||
raise RuntimeError(f"Error during reading source directory: {src_abs}")
|
||||
if os.path.exists(dst_abs):
|
||||
if not os.path.isdir(dst_abs):
|
||||
raise RuntimeError(f"Destination path is not a directory: {dst_abs}")
|
||||
if not os.path.isdir(src_root_abs):
|
||||
raise RuntimeError(f"Error during reading source directory: {src_root_abs}")
|
||||
if os.path.exists(dst_root_abs):
|
||||
if not os.path.isdir(dst_root_abs):
|
||||
raise RuntimeError(f"Destination path is not a directory: {dst_root_abs}")
|
||||
else:
|
||||
os.mkdir(dst_abs)
|
||||
os.mkdir(dst_root_abs)
|
||||
|
||||
for src_entry in scantree(src_abs):
|
||||
rel_path = src_entry.path[len(src_abs)+1:]
|
||||
dst_path = os.path.join(dst_abs, rel_path)
|
||||
src_stat = src_entry.stat(follow_symlinks=False)
|
||||
# {rel_path: dir_entry} map
|
||||
src_files_map = {
|
||||
ent.path[len(src_root_abs) + 1 :]: ent for ent in scantree(src_root_abs)
|
||||
}
|
||||
|
||||
dst_stat = os.lstat(dst_path)
|
||||
# process dst tree
|
||||
for dst_entry in scantree(dst_root_abs, dir_first=False):
|
||||
rel_path = dst_entry.path[len(dst_root_abs) + 1 :]
|
||||
|
||||
src_entry = src_files_map.get(rel_path)
|
||||
|
||||
# remove dst entries not existing in source
|
||||
if src_entry is None:
|
||||
_lg.debug("deleting %s", rel_path)
|
||||
rm_direntry(dst_entry)
|
||||
continue
|
||||
|
||||
# mark src entry as taken for processing
|
||||
del src_files_map[rel_path]
|
||||
|
||||
src_entry: os.DirEntry
|
||||
# rewrite dst if it has different than src type
|
||||
if src_entry.is_file(follow_symlinks=False):
|
||||
if not dst_entry.is_file(follow_symlinks=False):
|
||||
_lg.info("rewriting %s", rel_path)
|
||||
update_direntry(src_entry, dst_entry)
|
||||
continue
|
||||
if src_entry.is_dir(follow_symlinks=False):
|
||||
pass
|
||||
if not dst_entry.is_dir(follow_symlinks=False):
|
||||
_lg.info("rewriting %s", rel_path)
|
||||
update_direntry(src_entry, dst_entry)
|
||||
continue
|
||||
if src_entry.is_symlink():
|
||||
if not dst_entry.is_symlink():
|
||||
_lg.info("rewriting %s", rel_path)
|
||||
update_direntry(src_entry, dst_entry)
|
||||
continue
|
||||
|
||||
do_update = False
|
||||
# check file size
|
||||
if src_stat.st_size != dst_stat.st_size:
|
||||
do_update = True
|
||||
# check modification time (mtime)
|
||||
if src_stat.st_mtime > dst_stat.st_mtime:
|
||||
do_update = True
|
||||
# rewrite dst if it is hard link to src (bad for backups)
|
||||
if src_entry.inode() == dst_entry.inode():
|
||||
_lg.info("rewriting %s", rel_path)
|
||||
update_direntry(src_entry, dst_entry)
|
||||
continue
|
||||
|
||||
if do_update:
|
||||
_lg.info("Updating %s", src_entry)
|
||||
src_stat = src_entry.stat(follow_symlinks=False)
|
||||
dst_stat = dst_entry.stat(follow_symlinks=False)
|
||||
|
||||
# rewrite dst file/symlink which have different with src size or mtime
|
||||
if src_entry.is_file(follow_symlinks=False):
|
||||
same_size = src_stat.st_size == dst_stat.st_size
|
||||
same_mtime = src_stat.st_mtime == dst_stat.st_mtime
|
||||
if not (same_size and same_mtime):
|
||||
_lg.info("rewriting %s", rel_path)
|
||||
update_direntry(src_entry, dst_entry)
|
||||
continue
|
||||
|
||||
# rewrite dst symlink if it points somewhere else than src
|
||||
if src_entry.is_symlink():
|
||||
if os.readlink(src_entry.path) != os.readlink(dst_entry.path):
|
||||
_lg.info("rewriting %s", rel_path)
|
||||
update_direntry(src_entry, dst_entry)
|
||||
continue
|
||||
|
||||
# update permissions and ownership
|
||||
if src_stat.st_mode != dst_stat.st_mode:
|
||||
_lg.info("updating permissions %s", rel_path)
|
||||
os.chmod(dst_entry.path, dst_stat.st_mode)
|
||||
|
||||
if src_stat.st_uid != dst_stat.st_uid or src_stat.st_gid != dst_stat.st_gid:
|
||||
_lg.info("updating owners %s", rel_path)
|
||||
os.chown(dst_entry.path, src_stat.st_uid, src_stat.st_gid)
|
||||
|
||||
for rel_path, src_entry in src_files_map.items():
|
||||
dst_path = os.path.join(dst_root_abs, rel_path)
|
||||
_lg.info("creating %s", rel_path)
|
||||
copy_direntry(src_entry, dst_path)
|
||||
|
||||
|
||||
def _hardlink_dir_ext(src, dst):
|
||||
|
||||
205
tests/test_fs.py
205
tests/test_fs.py
@ -8,31 +8,58 @@ import unittest
|
||||
from spqr.curateipsum import fs
|
||||
|
||||
|
||||
class TestHardlinkDir(unittest.TestCase):
|
||||
class CommonFSTestCase(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.tmp_dir = tempfile.TemporaryDirectory()
|
||||
self.tmp_dir_src = tempfile.TemporaryDirectory(prefix="source_")
|
||||
self.tmp_dir_dst = tempfile.TemporaryDirectory(prefix="dest_")
|
||||
self.src_dir = self.tmp_dir_src.name
|
||||
self.dst_dir = self.tmp_dir_dst.name
|
||||
|
||||
def tearDown(self):
|
||||
self.tmp_dir_src.cleanup()
|
||||
self.tmp_dir_dst.cleanup()
|
||||
|
||||
@staticmethod
|
||||
def create_file(parent_dir, prefix=None):
|
||||
fd, path = tempfile.mkstemp(prefix=prefix, dir=parent_dir)
|
||||
with open(fd, "w") as f:
|
||||
f.write(string.printable)
|
||||
return path
|
||||
|
||||
@staticmethod
|
||||
def create_dir(parent_dir, prefix=None):
|
||||
return tempfile.mkdtemp(prefix=prefix, dir=parent_dir)
|
||||
|
||||
def relpath(self, full_path):
|
||||
if full_path.startswith(self.src_dir):
|
||||
p_dir = self.src_dir
|
||||
elif full_path.startswith(self.dst_dir):
|
||||
p_dir = self.dst_dir
|
||||
else:
|
||||
raise RuntimeError(f"Path {full_path} is not src_dir nor dst_dir")
|
||||
|
||||
return full_path[len(p_dir) + 1 :]
|
||||
|
||||
|
||||
class TestHardlinkDir(CommonFSTestCase):
|
||||
def setUp(self):
|
||||
self.tmp_dir = tempfile.TemporaryDirectory(prefix="source_")
|
||||
self.src_dir = self.tmp_dir.name
|
||||
self.dst_dir = self.src_dir + ".copy"
|
||||
|
||||
def _create_common_file(self):
|
||||
cf_relpath = "common_file"
|
||||
cf_path = os.path.join(self.src_dir, cf_relpath)
|
||||
with open(cf_path, "w") as f:
|
||||
f.write(string.printable)
|
||||
return cf_relpath
|
||||
|
||||
def test_common_file(self):
|
||||
cf_relpath = self._create_common_file()
|
||||
cf_path = self.create_file(self.src_dir)
|
||||
cf_relpath = self.relpath(cf_path)
|
||||
|
||||
fs.hardlink_dir(self.src_dir, self.dst_dir)
|
||||
|
||||
src_stat = os.lstat(os.path.join(self.dst_dir, cf_relpath))
|
||||
src_stat = os.lstat(cf_path)
|
||||
dst_stat = os.lstat(os.path.join(self.src_dir, cf_relpath))
|
||||
self.assertTrue(os.path.samestat(src_stat, dst_stat))
|
||||
self.assertEqual(src_stat.st_nlink, 2)
|
||||
assert os.path.samestat(src_stat, dst_stat)
|
||||
assert src_stat.st_nlink == 2
|
||||
|
||||
def test_relative_symlink_to_common_file(self):
|
||||
cf_relpath = self._create_common_file()
|
||||
cf_relpath = self.relpath(self.create_file(self.src_dir))
|
||||
sl2cf_relpath = "symlink_to_common_file"
|
||||
os.chdir(self.src_dir)
|
||||
os.symlink(cf_relpath, sl2cf_relpath)
|
||||
@ -41,17 +68,16 @@ class TestHardlinkDir(unittest.TestCase):
|
||||
|
||||
# check link
|
||||
dst_sl2cf_path = os.path.join(self.dst_dir, sl2cf_relpath)
|
||||
self.assertEqual(os.readlink(dst_sl2cf_path), cf_relpath)
|
||||
assert os.readlink(dst_sl2cf_path) == cf_relpath
|
||||
|
||||
# check stats
|
||||
src_stat = os.lstat(os.path.join(self.dst_dir, sl2cf_relpath))
|
||||
dst_stat = os.lstat(dst_sl2cf_path)
|
||||
self.assertTrue(os.path.samestat(src_stat, dst_stat))
|
||||
self.assertEqual(src_stat.st_nlink, 2)
|
||||
assert os.path.samestat(src_stat, dst_stat)
|
||||
assert src_stat.st_nlink == 2
|
||||
|
||||
def test_absolute_symlink_to_common_file(self):
|
||||
cf_relpath = self._create_common_file()
|
||||
cf_path = os.path.join(self.src_dir, cf_relpath)
|
||||
cf_path = self.create_file(self.src_dir)
|
||||
sl2cf_relpath = "symlink_to_common_file"
|
||||
sl2cf_path = os.path.join(self.src_dir, sl2cf_relpath)
|
||||
os.symlink(cf_path, sl2cf_path)
|
||||
@ -60,17 +86,16 @@ class TestHardlinkDir(unittest.TestCase):
|
||||
|
||||
# check link
|
||||
dst_sl2cf_path = os.path.join(self.dst_dir, sl2cf_relpath)
|
||||
self.assertEqual(os.readlink(dst_sl2cf_path), cf_path)
|
||||
assert os.readlink(dst_sl2cf_path) == cf_path
|
||||
|
||||
# check stats
|
||||
src_stat = os.lstat(os.path.join(self.dst_dir, sl2cf_relpath))
|
||||
dst_stat = os.lstat(dst_sl2cf_path)
|
||||
self.assertTrue(os.path.samestat(src_stat, dst_stat))
|
||||
self.assertEqual(src_stat.st_nlink, 2)
|
||||
assert os.path.samestat(src_stat, dst_stat)
|
||||
assert src_stat.st_nlink == 2
|
||||
|
||||
def test_hardlink_to_common_file(self):
|
||||
cf_relpath = self._create_common_file()
|
||||
cf_path = os.path.join(self.src_dir, cf_relpath)
|
||||
cf_path = self.create_file(self.src_dir)
|
||||
hl2cf_relpath = "hardlink_to_common_file"
|
||||
hl2cf_path = os.path.join(self.src_dir, hl2cf_relpath)
|
||||
os.link(cf_path, hl2cf_path)
|
||||
@ -81,10 +106,136 @@ class TestHardlinkDir(unittest.TestCase):
|
||||
src_hl_stat = os.lstat(hl2cf_path)
|
||||
dst_hl_stat = os.lstat(os.path.join(self.dst_dir, hl2cf_relpath))
|
||||
|
||||
self.assertTrue(os.path.samestat(src_cf_stat, dst_hl_stat))
|
||||
self.assertTrue(os.path.samestat(src_hl_stat, dst_hl_stat))
|
||||
self.assertEqual(src_cf_stat.st_nlink, 4)
|
||||
assert os.path.samestat(src_cf_stat, dst_hl_stat)
|
||||
assert os.path.samestat(src_hl_stat, dst_hl_stat)
|
||||
assert src_cf_stat.st_nlink == 4
|
||||
|
||||
def tearDown(self):
|
||||
self.tmp_dir.cleanup()
|
||||
shutil.rmtree(self.dst_dir, ignore_errors=True)
|
||||
|
||||
|
||||
# TODO not finished
|
||||
class TestRsync(CommonFSTestCase):
|
||||
@staticmethod
|
||||
def check_identical_file(file1, file2):
|
||||
st1 = os.lstat(file1)
|
||||
st2 = os.lstat(file2)
|
||||
|
||||
assert st1.st_uid == st2.st_uid
|
||||
assert st1.st_gid == st2.st_gid
|
||||
assert st1.st_mode == st2.st_mode
|
||||
assert st1.st_mtime == st2.st_mtime
|
||||
assert st1.st_size == st2.st_size
|
||||
|
||||
def test_dst_has_excess_file(self):
|
||||
dst_fpath = self.create_file(self.dst_dir)
|
||||
|
||||
fs.rsync(self.src_dir, self.dst_dir)
|
||||
assert not os.path.lexists(dst_fpath)
|
||||
|
||||
def test_dst_has_excess_symlink(self):
|
||||
dst_lpath = os.path.join(self.dst_dir, 'broken_symlink')
|
||||
os.symlink('broken_symlink', dst_lpath)
|
||||
|
||||
fs.rsync(self.src_dir, self.dst_dir)
|
||||
assert not os.path.lexists(dst_lpath)
|
||||
|
||||
def test_dst_has_excess_empty_dir(self):
|
||||
dst_dpath = self.create_dir(self.dst_dir)
|
||||
|
||||
fs.rsync(self.src_dir, self.dst_dir)
|
||||
assert not os.path.lexists(dst_dpath)
|
||||
|
||||
def test_dst_has_excess_nonempty_dir(self):
|
||||
dst_dpath = self.create_dir(self.dst_dir)
|
||||
self.create_file(dst_dpath)
|
||||
|
||||
fs.rsync(self.src_dir, self.dst_dir)
|
||||
assert not os.path.lexists(dst_dpath)
|
||||
|
||||
def test_dst_has_excess_nonempty_recursive_dir(self):
|
||||
dst_dpath = self.create_dir(self.dst_dir)
|
||||
nested_dpath = self.create_dir(dst_dpath)
|
||||
self.create_file(nested_dpath)
|
||||
|
||||
fs.rsync(self.src_dir, self.dst_dir)
|
||||
assert not os.path.lexists(dst_dpath)
|
||||
|
||||
def test_different_types_src_file_dst_dir(self):
|
||||
src_fpath = self.create_file(self.src_dir)
|
||||
dst_path = os.path.join(self.dst_dir, self.relpath(src_fpath))
|
||||
os.mkdir(dst_path)
|
||||
|
||||
fs.rsync(self.src_dir, self.dst_dir)
|
||||
assert os.path.lexists(dst_path)
|
||||
assert os.path.isfile(dst_path)
|
||||
|
||||
def test_different_types_src_file_dst_symlink(self):
|
||||
src_fpath = self.create_file(self.src_dir)
|
||||
dst_path = os.path.join(self.dst_dir, self.relpath(src_fpath))
|
||||
os.symlink('broken_link', dst_path)
|
||||
|
||||
fs.rsync(self.src_dir, self.dst_dir)
|
||||
assert os.path.lexists(dst_path)
|
||||
assert os.path.isfile(dst_path)
|
||||
|
||||
def test_different_types_src_symlink_dst_file(self):
|
||||
dst_path = self.create_file(self.dst_dir)
|
||||
src_lpath = os.path.join(self.src_dir, self.relpath(dst_path))
|
||||
os.symlink('broken_link', src_lpath)
|
||||
|
||||
fs.rsync(self.src_dir, self.dst_dir)
|
||||
assert os.path.lexists(dst_path)
|
||||
assert os.path.islink(dst_path)
|
||||
|
||||
def test_different_types_src_symlink_dst_dir(self):
|
||||
dst_path = self.create_dir(self.dst_dir)
|
||||
src_lpath = os.path.join(self.src_dir, self.relpath(dst_path))
|
||||
os.symlink('broken_link', src_lpath)
|
||||
|
||||
fs.rsync(self.src_dir, self.dst_dir)
|
||||
assert os.path.lexists(dst_path)
|
||||
assert os.path.islink(dst_path)
|
||||
|
||||
def test_different_types_src_dir_dst_file(self):
|
||||
src_dpath = self.create_dir(self.src_dir)
|
||||
dst_path = os.path.join(self.dst_dir, self.relpath(src_dpath))
|
||||
with open(dst_path, "w") as f:
|
||||
f.write(string.printable)
|
||||
|
||||
fs.rsync(self.src_dir, self.dst_dir)
|
||||
assert os.path.lexists(dst_path)
|
||||
assert os.path.isdir(dst_path)
|
||||
|
||||
def test_different_types_src_dir_dst_symlink(self):
|
||||
src_dpath = self.create_dir(self.src_dir)
|
||||
dst_path = os.path.join(self.dst_dir, self.relpath(src_dpath))
|
||||
os.symlink('broken_link', dst_path)
|
||||
|
||||
fs.rsync(self.src_dir, self.dst_dir)
|
||||
assert os.path.lexists(dst_path)
|
||||
assert os.path.isdir(dst_path)
|
||||
|
||||
def test_src_dst_same_inode(self):
|
||||
src_fpath = self.create_file(self.src_dir)
|
||||
dst_fpath = os.path.join(self.dst_dir, self.relpath(src_fpath))
|
||||
os.link(src_fpath, dst_fpath)
|
||||
|
||||
fs.rsync(self.src_dir, self.dst_dir)
|
||||
assert os.path.lexists(dst_fpath)
|
||||
src_stat = os.lstat(src_fpath)
|
||||
dst_stat = os.lstat(dst_fpath)
|
||||
assert src_stat.st_nlink == 1
|
||||
assert dst_stat.st_nlink == 1
|
||||
assert src_stat.st_ino != dst_stat.st_ino
|
||||
|
||||
def test_src_dst_diff_size(self):
|
||||
src_fpath = self.create_file(self.src_dir)
|
||||
dst_fpath = os.path.join(self.dst_dir, self.relpath(src_fpath))
|
||||
with open(dst_fpath, "w") as df:
|
||||
df.write(string.printable * 2)
|
||||
|
||||
fs.rsync(self.src_dir, self.dst_dir)
|
||||
assert os.path.lexists(dst_fpath)
|
||||
self.check_identical_file(src_fpath, dst_fpath)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user