Add processing of changed files

This commit is contained in:
Maks Snegov 2021-11-09 00:43:18 +03:00
parent 1ed1032f42
commit 7395268e97
4 changed files with 231 additions and 79 deletions

View File

@ -3,7 +3,6 @@
import argparse import argparse
import logging import logging
import os.path import os.path
import pathlib
import shutil import shutil
import sys import sys
@ -29,7 +28,7 @@ def main():
parser.add_argument("-b", parser.add_argument("-b",
dest="backup_dir", dest="backup_dir",
metavar="BACKUP_DIR", metavar="BACKUP_DIR",
type=pathlib.Path, type=str,
required=True, required=True,
help="directory, where all backups will be stored") help="directory, where all backups will be stored")
parser.add_argument("-n", "--dry-run", parser.add_argument("-n", "--dry-run",
@ -47,7 +46,7 @@ def main():
parser.add_argument("sources", parser.add_argument("sources",
nargs="+", nargs="+",
metavar="SOURCE", metavar="SOURCE",
type=pathlib.Path, type=str,
help="backup source (file/dir/smth else)") help="backup source (file/dir/smth else)")
args = parser.parse_args() args = parser.parse_args()
@ -69,7 +68,7 @@ def main():
_lg.error(f"{cp_program} should be installed to use --external-hardlink option.") _lg.error(f"{cp_program} should be installed to use --external-hardlink option.")
return 1 return 1
backup_dir_abs = pathlib.Path(os.path.abspath(args.backup_dir)) backup_dir_abs = os.path.abspath(args.backup_dir)
if not os.path.isdir(backup_dir_abs): if not os.path.isdir(backup_dir_abs):
_lg.error("Backup directory %s does not exist, exiting", args.backup_dir) _lg.error("Backup directory %s does not exist, exiting", args.backup_dir)
return 1 return 1

View File

@ -4,7 +4,6 @@ Module with backup functions.
import logging import logging
import os import os
import pathlib
import shutil import shutil
import time import time
from datetime import datetime from datetime import datetime
@ -13,31 +12,31 @@ from typing import Optional
import spqr.curateipsum.fs as fs import spqr.curateipsum.fs as fs
BACKUP_ENT_FMT = "%y%m%d_%H%M" BACKUP_ENT_FMT = "%y%m%d_%H%M"
DELTA_DIR = "_delta"
_lg = logging.getLogger(__name__) _lg = logging.getLogger(__name__)
def _is_backup_entity(entity_path: pathlib.Path) -> bool: def _is_backup_entity(entity_path: str) -> bool:
""" Check if entity_path is a single backup dir. """ """ Check if entity_path is a single backup dir. """
if not os.path.isdir(entity_path): if not os.path.isdir(entity_path):
return False return False
try: try:
datetime.strptime(entity_path.name, BACKUP_ENT_FMT) datetime.strptime(os.path.basename(entity_path), BACKUP_ENT_FMT)
return True return True
except ValueError: except ValueError:
return False return False
def _get_latest_backup(backup_dir: pathlib.Path) -> Optional[pathlib.Path]: def _get_latest_backup(backup_dir: str) -> Optional[str]:
""" Returns path to latest backup created in backup_dir or None. """ """ Returns path to latest backup created in backup_dir or None. """
backups = sorted(os.listdir(backup_dir), reverse=True) backups = sorted(os.listdir(backup_dir), reverse=True)
for b_ent in backups: for b_ent in backups:
b_ent_abs = backup_dir / b_ent b_ent_abs = os.path.join(backup_dir, b_ent)
if not _is_backup_entity(b_ent_abs): if not _is_backup_entity(b_ent_abs):
continue continue
if not os.listdir(b_ent_abs): if not os.listdir(b_ent_abs):
_lg.info("Removing empty backup entity: %s", b_ent_abs.name) _lg.info("Removing empty backup entity: %s", os.path.basename(b_ent_abs))
_lg.debug("Removing directory %s", b_ent_abs)
os.rmdir(b_ent_abs) os.rmdir(b_ent_abs)
continue continue
return b_ent_abs return b_ent_abs
@ -45,8 +44,15 @@ def _get_latest_backup(backup_dir: pathlib.Path) -> Optional[pathlib.Path]:
return None return None
def process_backed_entry(backup_dir: str, entry_relpath: str, action: fs.Actions):
_lg.debug("%s %s", action, entry_relpath)
if action is not fs.Actions.delete:
fs.nest_hardlink(src_dir=backup_dir, src_relpath=entry_relpath,
dst_dir=os.path.join(backup_dir, DELTA_DIR))
def initiate_backup(sources, def initiate_backup(sources,
backup_dir: pathlib.Path, backup_dir: str,
dry_run: bool = False, dry_run: bool = False,
external_rsync: bool = False, external_rsync: bool = False,
external_hardlink: bool = False): external_hardlink: bool = False):
@ -54,43 +60,53 @@ def initiate_backup(sources,
start_time = time.time() start_time = time.time()
start_time_fmt = datetime.fromtimestamp(start_time).strftime(BACKUP_ENT_FMT) start_time_fmt = datetime.fromtimestamp(start_time).strftime(BACKUP_ENT_FMT)
cur_backup = backup_dir / start_time_fmt cur_backup = os.path.join(backup_dir, start_time_fmt)
cur_backup_name = os.path.basename(cur_backup)
_lg.debug("Current backup dir: %s", cur_backup) _lg.debug("Current backup dir: %s", cur_backup)
latest_backup = _get_latest_backup(backup_dir) latest_backup = _get_latest_backup(backup_dir)
if cur_backup == latest_backup: if cur_backup == latest_backup:
_lg.warning("Latest backup %s was created less than minute ago, exiting", _lg.warning("Latest backup %s was created less than minute ago, exiting",
latest_backup.name) os.path.basename(latest_backup))
return return
if latest_backup is None: if latest_backup is None:
_lg.info("Creating empty directory for current backup: %s", cur_backup.name) _lg.info("Creating empty directory for current backup: %s", cur_backup_name)
os.mkdir(cur_backup) os.mkdir(cur_backup)
else: else:
_lg.info("Copying data from latest backup %s to current backup %s", _lg.info("Copying data from latest backup %s to current backup %s",
latest_backup.name, cur_backup.name) os.path.basename(latest_backup), cur_backup_name)
hl_res = fs.hardlink_dir(src_dir=latest_backup, dst_dir=cur_backup, hl_res = fs.hardlink_dir(src_dir=latest_backup, dst_dir=cur_backup,
use_external=external_hardlink) use_external=external_hardlink)
if not hl_res: if not hl_res:
_lg.error("Something went wrong during copying data from latest backup," _lg.error("Something went wrong during copying data from latest backup,"
" removing created %s", cur_backup.name) " removing created %s", cur_backup_name)
shutil.rmtree(cur_backup, ignore_errors=True) shutil.rmtree(cur_backup, ignore_errors=True)
return return
# clean up delta dir from copied backup
shutil.rmtree(os.path.join(cur_backup, DELTA_DIR), ignore_errors=True)
rsync_func = fs.rsync_ext if external_rsync else fs.rsync rsync_func = fs.rsync_ext if external_rsync else fs.rsync
for src in sources: for src in sources:
src_abs = pathlib.Path(os.path.abspath(src)) src_abs = os.path.abspath(src)
dst_abs = cur_backup / src_abs.name src_name = os.path.basename(src_abs)
_lg.info("Backing up directory %s to %s backup", src_abs, cur_backup.name) dst_abs = os.path.join(cur_backup, src_name)
rsync_func(src_abs, dst_abs, dry_run=dry_run) _lg.info("Backing up directory %s to %s backup", src_abs, cur_backup_name)
for entry_relpath, action in rsync_func(src_abs, dst_abs, dry_run=dry_run):
process_backed_entry(
backup_dir=cur_backup,
entry_relpath=os.path.join(src_name, entry_relpath),
action=action
)
if dry_run: if dry_run:
_lg.info("Dry-run, removing created backup: %s", cur_backup.name) _lg.info("Dry-run, removing created backup: %s", cur_backup_name)
shutil.rmtree(cur_backup, ignore_errors=True) shutil.rmtree(cur_backup, ignore_errors=True)
else: else:
_lg.info("Backup created: %s", cur_backup.name) _lg.info("Backup created: %s", cur_backup_name)
end_time = time.time() end_time = time.time()
spend_time = end_time - start_time spend_time = end_time - start_time

View File

@ -8,11 +8,20 @@ import logging
import os import os
import subprocess import subprocess
import sys import sys
from typing import Iterable from typing import Iterable, Tuple
_lg = logging.getLogger(__name__) _lg = logging.getLogger(__name__)
class Actions(enum.Enum):
nothing = enum.auto()
delete = enum.auto()
rewrite = enum.auto()
update_time = enum.auto()
update_perm = enum.auto()
update_owner = enum.auto()
create = enum.auto()
# *deleting will_be_deleted # *deleting will_be_deleted
# >f.st.... .gitignore # >f.st.... .gitignore
# >f+++++++ LICENSE # >f+++++++ LICENSE
@ -26,6 +35,34 @@ _lg = logging.getLogger(__name__)
# cd+++++++ java-alg/ # cd+++++++ java-alg/
def _parse_rsync_output(line: str) -> Tuple[str, Actions]:
action = None
change_string, relpath = line.split(' ', maxsplit=1)
if change_string == "*deleting":
return relpath, Actions.delete
update_type = change_string[0]
entity_type = change_string[1]
change_type = change_string[2:]
if update_type == "c" and entity_type in {"d", "L"} and "+" in change_type:
action = Actions.create
elif update_type == ">" and entity_type == "f" and "+" in change_type:
action = Actions.create
elif entity_type == "f" and ("s" in change_type or "t" in change_type):
action = Actions.rewrite
elif entity_type == "d" and "t" in change_type:
action = Actions.update_time
elif "p" in change_type:
action = Actions.update_perm
elif "o" in change_type or "g" in change_type:
action = Actions.update_owner
if action is None:
raise RuntimeError("Not parsed string: %s" % line)
return relpath, action
def rsync_ext(src, dst, dry_run=False): def rsync_ext(src, dst, dry_run=False):
"""Call external rsync command""" """Call external rsync command"""
rsync_args = ["rsync"] rsync_args = ["rsync"]
@ -42,8 +79,34 @@ def rsync_ext(src, dst, dry_run=False):
rsync_args.append(str(dst)) rsync_args.append(str(dst))
_lg.info("Executing external command: %s", " ".join(rsync_args)) _lg.info("Executing external command: %s", " ".join(rsync_args))
res = subprocess.run(rsync_args) process = subprocess.Popen(rsync_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
return res with process.stdout:
prev_line = None
for line in iter(process.stdout.readline, b""):
_lg.debug("Rsync current line: %s", line)
if prev_line is None:
prev_line = line
continue
try:
prev_line = prev_line.decode("utf-8").strip()
# some issues with cyrillic in filenames
except UnicodeDecodeError:
_lg.error("Can't process rsync line: %s", prev_line)
continue
_lg.debug("Rsync itemize line: %s", prev_line)
yield _parse_rsync_output(prev_line)
prev_line = line
try:
prev_line = prev_line.decode("utf-8").strip()
_lg.debug("Rsync itemize line: %s", prev_line)
yield _parse_rsync_output(prev_line)
# some issues with cyrillic in filenames
except UnicodeDecodeError:
_lg.error("Can't process rsync line: %s", prev_line)
process.wait()
def scantree(path, dir_first=True) -> Iterable[os.DirEntry]: def scantree(path, dir_first=True) -> Iterable[os.DirEntry]:
@ -99,6 +162,37 @@ def copy_file(src, dst):
except: pass except: pass
def copy_entity(src_path: str, dst_path: str):
""" Non-recursive fs entity (file, dir or symlink) copy. """
src_stat = os.lstat(src_path)
is_symlink = os.path.islink(src_path)
if os.path.isdir(src_path):
os.mkdir(dst_path)
elif is_symlink:
link_target = os.readlink(src_path)
os.symlink(link_target, dst_path)
else:
copy_file(src_path, dst_path)
if is_symlink:
# change symlink attributes only if supported by OS
if os.chown in os.supports_follow_symlinks:
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid,
follow_symlinks=False)
if os.chmod in os.supports_follow_symlinks:
os.chmod(dst_path, src_stat.st_mode, follow_symlinks=False)
if os.utime in os.supports_follow_symlinks:
os.utime(dst_path, (src_stat.st_atime, src_stat.st_mtime),
follow_symlinks=False)
else:
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid)
os.chmod(dst_path, src_stat.st_mode)
os.utime(dst_path, (src_stat.st_atime, src_stat.st_mtime))
def copy_direntry(entry: os.DirEntry, dst_path): def copy_direntry(entry: os.DirEntry, dst_path):
""" Non-recursive DirEntry (file, dir or symlink) copy. """ """ Non-recursive DirEntry (file, dir or symlink) copy. """
if entry.is_dir(): if entry.is_dir():
@ -115,11 +209,13 @@ def copy_direntry(entry: os.DirEntry, dst_path):
if entry.is_symlink(): if entry.is_symlink():
# change symlink attributes only if supported by OS # change symlink attributes only if supported by OS
if os.chown in os.supports_follow_symlinks: if os.chown in os.supports_follow_symlinks:
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid, follow_symlinks=False) os.chown(dst_path, src_stat.st_uid, src_stat.st_gid,
follow_symlinks=False)
if os.chmod in os.supports_follow_symlinks: if os.chmod in os.supports_follow_symlinks:
os.chmod(dst_path, src_stat.st_mode, follow_symlinks=False) os.chmod(dst_path, src_stat.st_mode, follow_symlinks=False)
if os.utime in os.supports_follow_symlinks: if os.utime in os.supports_follow_symlinks:
os.utime(dst_path, (src_stat.st_atime, src_stat.st_mtime), follow_symlinks=False) os.utime(dst_path, (src_stat.st_atime, src_stat.st_mtime),
follow_symlinks=False)
else: else:
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid) os.chown(dst_path, src_stat.st_uid, src_stat.st_gid)
os.chmod(dst_path, src_stat.st_mode) os.chmod(dst_path, src_stat.st_mode)
@ -136,24 +232,16 @@ def update_direntry(src_entry: os.DirEntry, dst_entry: os.DirEntry):
copy_direntry(src_entry, dst_entry.path) copy_direntry(src_entry, dst_entry.path)
class Actions(enum.Enum): def rsync(src_dir, dst_dir, dry_run=False) -> Iterable[tuple]:
nothing = enum.auto()
delete = enum.auto()
rewrite = enum.auto()
update_perm = enum.auto()
update_owner = enum.auto()
create = enum.auto()
def rsync(src_dir, dst_dir, dry_run=False):
""" """
Do sync Do sync
:param src_dir: source dir :param src_dir: source dir
:param dst_dir: dest dir, create if not exists :param dst_dir: dest dir, create if not exists
:param dry_run: not used
:return: nothing :return: nothing
""" """
_lg.info(f"Rsync: {src_dir} -> {dst_dir}") _lg.debug("Rsync: %s -> %s", src_dir, dst_dir)
src_root_abs = os.path.abspath(src_dir) src_root_abs = os.path.abspath(src_dir)
dst_root_abs = os.path.abspath(dst_dir) dst_root_abs = os.path.abspath(dst_dir)
@ -161,11 +249,11 @@ def rsync(src_dir, dst_dir, dry_run=False):
raise RuntimeError(f"Error during reading source directory: {src_root_abs}") raise RuntimeError(f"Error during reading source directory: {src_root_abs}")
if os.path.exists(dst_root_abs): if os.path.exists(dst_root_abs):
if not os.path.isdir(dst_root_abs): if not os.path.isdir(dst_root_abs):
raise RuntimeError(f"Destination path is not a directory: {dst_root_abs}") raise RuntimeError("Destination path is not a directory: %s" % dst_root_abs)
else: else:
os.mkdir(dst_root_abs) os.mkdir(dst_root_abs)
# {rel_path: dir_entry} map # Create source map {rel_path: dir_entry}
src_files_map = { src_files_map = {
ent.path[len(src_root_abs) + 1:]: ent for ent in scantree(src_root_abs) ent.path[len(src_root_abs) + 1:]: ent for ent in scantree(src_root_abs)
} }
@ -178,8 +266,9 @@ def rsync(src_dir, dst_dir, dry_run=False):
# remove dst entries not existing in source # remove dst entries not existing in source
if src_entry is None: if src_entry is None:
_lg.info("deleting %s", rel_path) _lg.debug("Deleting: %s", rel_path)
rm_direntry(dst_entry) rm_direntry(dst_entry)
yield rel_path, Actions.delete
continue continue
# mark src entry as taken for processing # mark src entry as taken for processing
@ -189,24 +278,28 @@ def rsync(src_dir, dst_dir, dry_run=False):
# rewrite dst if it has different than src type # rewrite dst if it has different than src type
if src_entry.is_file(follow_symlinks=False): if src_entry.is_file(follow_symlinks=False):
if not dst_entry.is_file(follow_symlinks=False): if not dst_entry.is_file(follow_symlinks=False):
_lg.info("rewriting %s", rel_path) _lg.debug("Rewriting (src is a file, dst is not a file): %s", rel_path)
update_direntry(src_entry, dst_entry) update_direntry(src_entry, dst_entry)
yield rel_path, Actions.rewrite
continue continue
if src_entry.is_dir(follow_symlinks=False): if src_entry.is_dir(follow_symlinks=False):
if not dst_entry.is_dir(follow_symlinks=False): if not dst_entry.is_dir(follow_symlinks=False):
_lg.info("rewriting %s", rel_path) _lg.debug("Rewriting (src is a dir, dst is not a dir): %s", rel_path)
update_direntry(src_entry, dst_entry) update_direntry(src_entry, dst_entry)
yield rel_path, Actions.rewrite
continue continue
if src_entry.is_symlink(): if src_entry.is_symlink():
if not dst_entry.is_symlink(): if not dst_entry.is_symlink():
_lg.info("rewriting %s", rel_path) _lg.debug("Rewriting (src is a symlink, dst is not a symlink): %s", rel_path)
update_direntry(src_entry, dst_entry) update_direntry(src_entry, dst_entry)
yield rel_path, Actions.rewrite
continue continue
# rewrite dst if it is hard link to src (bad for backups) # rewrite dst if it is hard link to src (bad for backups)
if src_entry.inode() == dst_entry.inode(): if src_entry.inode() == dst_entry.inode():
_lg.info("rewriting %s", rel_path) _lg.debug("Rewriting (different inodes): %s", rel_path)
update_direntry(src_entry, dst_entry) update_direntry(src_entry, dst_entry)
yield rel_path, Actions.rewrite
continue continue
src_stat = src_entry.stat(follow_symlinks=False) src_stat = src_entry.stat(follow_symlinks=False)
@ -217,31 +310,36 @@ def rsync(src_dir, dst_dir, dry_run=False):
same_size = src_stat.st_size == dst_stat.st_size same_size = src_stat.st_size == dst_stat.st_size
same_mtime = src_stat.st_mtime == dst_stat.st_mtime same_mtime = src_stat.st_mtime == dst_stat.st_mtime
if not (same_size and same_mtime): if not (same_size and same_mtime):
_lg.info("rewriting %s", rel_path) reason = "size" if not same_size else "time"
_lg.debug("Rewriting (different %s): %s", reason, rel_path)
update_direntry(src_entry, dst_entry) update_direntry(src_entry, dst_entry)
yield rel_path, Actions.rewrite
continue continue
# rewrite dst symlink if it points somewhere else than src # rewrite dst symlink if it points somewhere else than src
if src_entry.is_symlink(): if src_entry.is_symlink():
if os.readlink(src_entry.path) != os.readlink(dst_entry.path): if os.readlink(src_entry.path) != os.readlink(dst_entry.path):
_lg.info("rewriting %s", rel_path) _lg.debug("Rewriting (different symlink target): %s", rel_path)
update_direntry(src_entry, dst_entry) update_direntry(src_entry, dst_entry)
continue continue
# update permissions and ownership # update permissions and ownership
if src_stat.st_mode != dst_stat.st_mode: if src_stat.st_mode != dst_stat.st_mode:
_lg.info("updating permissions %s", rel_path) _lg.debug("Updating permissions: %s", rel_path)
yield rel_path, Actions.update_perm
os.chmod(dst_entry.path, dst_stat.st_mode) os.chmod(dst_entry.path, dst_stat.st_mode)
if src_stat.st_uid != dst_stat.st_uid or src_stat.st_gid != dst_stat.st_gid: if src_stat.st_uid != dst_stat.st_uid or src_stat.st_gid != dst_stat.st_gid:
_lg.info("updating owners %s", rel_path) _lg.debug("Updating owners: %s", rel_path)
yield rel_path, Actions.update_owner
os.chown(dst_entry.path, src_stat.st_uid, src_stat.st_gid) os.chown(dst_entry.path, src_stat.st_uid, src_stat.st_gid)
# process remained source entries # process remained source entries
for rel_path, src_entry in src_files_map.items(): for rel_path, src_entry in src_files_map.items():
dst_path = os.path.join(dst_root_abs, rel_path) dst_path = os.path.join(dst_root_abs, rel_path)
_lg.info("creating %s", rel_path) _lg.debug("Creating: %s", rel_path)
copy_direntry(src_entry, dst_path) copy_direntry(src_entry, dst_path)
yield rel_path, Actions.create
# restore dir mtimes in dst, updated by updating files # restore dir mtimes in dst, updated by updating files
for src_entry in scantree(src_root_abs, dir_first=True): for src_entry in scantree(src_root_abs, dir_first=True):
@ -250,15 +348,21 @@ def rsync(src_dir, dst_dir, dry_run=False):
rel_path = src_entry.path[len(src_root_abs) + 1:] rel_path = src_entry.path[len(src_root_abs) + 1:]
dst_path = os.path.join(dst_root_abs, rel_path) dst_path = os.path.join(dst_root_abs, rel_path)
src_stat = src_entry.stat(follow_symlinks=False) src_stat = src_entry.stat(follow_symlinks=False)
os.utime(dst_path, dst_stat = os.lstat(dst_path)
(src_stat.st_atime, src_stat.st_mtime), if src_stat.st_mtime != dst_stat.st_mtime:
follow_symlinks=False) _lg.debug("Restoring directory mtime: %s", dst_path)
os.utime(dst_path,
(src_stat.st_atime, src_stat.st_mtime),
follow_symlinks=False)
# restore dst_root dir mtime # restore dst_root dir mtime
src_root_stat = os.lstat(src_root_abs) src_root_stat = os.lstat(src_root_abs)
os.utime(dst_root_abs, dst_root_stat = os.lstat(dst_root_abs)
(src_root_stat.st_atime, src_root_stat.st_mtime), if src_root_stat.st_mtime != dst_root_stat.st_mtime:
follow_symlinks=False) _lg.debug("Restoring root directory mtime: %s", src_root_abs)
os.utime(dst_root_abs,
(src_root_stat.st_atime, src_root_stat.st_mtime),
follow_symlinks=False)
def _recursive_hardlink_ext(src: str, dst: str) -> bool: def _recursive_hardlink_ext(src: str, dst: str) -> bool:
@ -278,7 +382,7 @@ def _recursive_hardlink_ext(src: str, dst: str) -> bool:
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
with process.stdout: with process.stdout:
for line in iter(process.stdout.readline, b""): for line in iter(process.stdout.readline, b""):
logging.debug("%s: %s", cp, line.decode("utf-8").strip()) _lg.debug("%s: %s", cp, line.decode("utf-8").strip())
exitcode = process.wait() exitcode = process.wait()
return not bool(exitcode) return not bool(exitcode)
@ -296,7 +400,7 @@ def _recursive_hardlink(src: str, dst: str) -> bool:
for ent in it: for ent in it:
ent_dst_path = os.path.join(dst, ent.name) ent_dst_path = os.path.join(dst, ent.name)
if ent.is_dir(follow_symlinks=False): if ent.is_dir(follow_symlinks=False):
_lg.debug(f"Copying directory: {ent.path} -> {ent_dst_path}") _lg.debug("Copying directory: %s -> %s", ent.path, ent_dst_path)
os.mkdir(ent_dst_path) os.mkdir(ent_dst_path)
# process directory children # process directory children
@ -310,7 +414,7 @@ def _recursive_hardlink(src: str, dst: str) -> bool:
continue continue
if ent.is_file(follow_symlinks=False) or ent.is_symlink(): if ent.is_file(follow_symlinks=False) or ent.is_symlink():
_lg.debug(f"Hardlink file: {ent.path} -> {ent_dst_path}") _lg.debug("Hardlink file: %s -> %s", ent.path, ent_dst_path)
os.link(ent.path, ent_dst_path, follow_symlinks=False) os.link(ent.path, ent_dst_path, follow_symlinks=False)
continue continue
# something that is not a file, symlink or directory # something that is not a file, symlink or directory
@ -327,20 +431,53 @@ def hardlink_dir(src_dir, dst_dir, use_external: bool = False) -> bool:
:param use_external: whether to use external cp -al command :param use_external: whether to use external cp -al command
:return: success or not :return: success or not
""" """
_lg.info(f"Recursive hardlinking: {src_dir} -> {dst_dir}") _lg.debug("Recursive hardlinking: %s -> %s", src_dir, dst_dir)
src_abs = os.path.abspath(src_dir) src_abs = os.path.abspath(src_dir)
dst_abs = os.path.abspath(dst_dir) dst_abs = os.path.abspath(dst_dir)
if not os.path.isdir(src_abs): if not os.path.isdir(src_abs):
_lg.error(f"Error reading source directory: {src_dir}")
raise RuntimeError(f"Error reading source directory: {src_dir}") raise RuntimeError(f"Error reading source directory: {src_dir}")
if os.path.exists(dst_abs): if os.path.exists(dst_abs):
_lg.error(f"Destination already exists: {dst_dir}")
raise RuntimeError(f"Destination already exists: {dst_dir}") raise RuntimeError(f"Destination already exists: {dst_dir}")
_lg.debug("Creating directory: %s", dst_abs)
_lg.debug(f"Creating directory: {dst_abs}")
os.mkdir(dst_abs) os.mkdir(dst_abs)
hardlink_func = _recursive_hardlink_ext if use_external else _recursive_hardlink hardlink_func = _recursive_hardlink_ext if use_external else _recursive_hardlink
return hardlink_func(src_abs, dst_abs) return hardlink_func(src_abs, dst_abs)
def nest_hardlink(src_dir: str, src_relpath: str, dst_dir: str):
"""
Hardlink entity from (src_dir + src_relpath) to dst_dir preserving dir structure.
"""
_lg.debug("Nested hardlinking: %s/%s -> %s", src_dir, src_relpath, dst_dir)
src_dir_abs = os.path.abspath(src_dir)
src_full_path = os.path.join(src_dir_abs, src_relpath)
dst_dir_abs = os.path.abspath(dst_dir)
dst_full_path = os.path.join(dst_dir_abs, src_relpath)
# check source entity and destination directory
if not os.path.exists(src_full_path):
raise RuntimeError(f"Error reading source entity: {src_full_path}")
if os.path.exists(dst_dir_abs):
if not os.path.isdir(dst_dir_abs):
raise RuntimeError("Destination path is not a directory: %s" % dst_dir_abs)
else:
os.mkdir(dst_dir_abs)
# if destination entity exists, check it points to source entity
if os.path.exists(dst_full_path):
src_stat = os.lstat(src_full_path)
if os.path.samestat(src_stat, os.lstat(dst_full_path)):
return
# remove otherwise
os.unlink(dst_full_path)
src_cur_path = src_dir_abs
dst_cur_path = dst_dir_abs
for rel_part in src_relpath.split(sep=os.path.sep):
src_cur_path = os.path.join(src_cur_path, rel_part)
dst_cur_path = os.path.join(dst_cur_path, rel_part)
if os.path.exists(dst_cur_path):
continue
copy_entity(src_cur_path, dst_cur_path)

View File

@ -170,27 +170,27 @@ class TestRsync(CommonFSTestCase):
def test_dst_has_excess_file(self): def test_dst_has_excess_file(self):
dst_fpath = self.create_file(self.dst_dir) dst_fpath = self.create_file(self.dst_dir)
fs.rsync(self.src_dir, self.dst_dir) all(fs.rsync(self.src_dir, self.dst_dir))
assert not os.path.lexists(dst_fpath) assert not os.path.lexists(dst_fpath)
def test_dst_has_excess_symlink(self): def test_dst_has_excess_symlink(self):
dst_lpath = os.path.join(self.dst_dir, 'nonexisting_file') dst_lpath = os.path.join(self.dst_dir, 'nonexisting_file')
os.symlink('broken_symlink', dst_lpath) os.symlink('broken_symlink', dst_lpath)
fs.rsync(self.src_dir, self.dst_dir) all(fs.rsync(self.src_dir, self.dst_dir))
assert not os.path.lexists(dst_lpath) assert not os.path.lexists(dst_lpath)
def test_dst_has_excess_empty_dir(self): def test_dst_has_excess_empty_dir(self):
dst_dpath = self.create_dir(self.dst_dir) dst_dpath = self.create_dir(self.dst_dir)
fs.rsync(self.src_dir, self.dst_dir) all(fs.rsync(self.src_dir, self.dst_dir))
assert not os.path.lexists(dst_dpath) assert not os.path.lexists(dst_dpath)
def test_dst_has_excess_nonempty_dir(self): def test_dst_has_excess_nonempty_dir(self):
dst_dpath = self.create_dir(self.dst_dir) dst_dpath = self.create_dir(self.dst_dir)
self.create_file(dst_dpath) self.create_file(dst_dpath)
fs.rsync(self.src_dir, self.dst_dir) all(fs.rsync(self.src_dir, self.dst_dir))
assert not os.path.lexists(dst_dpath) assert not os.path.lexists(dst_dpath)
def test_dst_has_excess_nonempty_recursive_dir(self): def test_dst_has_excess_nonempty_recursive_dir(self):
@ -198,7 +198,7 @@ class TestRsync(CommonFSTestCase):
nested_dpath = self.create_dir(dst_dpath) nested_dpath = self.create_dir(dst_dpath)
self.create_file(nested_dpath) self.create_file(nested_dpath)
fs.rsync(self.src_dir, self.dst_dir) all(fs.rsync(self.src_dir, self.dst_dir))
assert not os.path.lexists(dst_dpath) assert not os.path.lexists(dst_dpath)
def test_different_types_src_file_dst_dir(self): def test_different_types_src_file_dst_dir(self):
@ -206,7 +206,7 @@ class TestRsync(CommonFSTestCase):
dst_path = os.path.join(self.dst_dir, self.relpath(src_fpath)) dst_path = os.path.join(self.dst_dir, self.relpath(src_fpath))
os.mkdir(dst_path) os.mkdir(dst_path)
fs.rsync(self.src_dir, self.dst_dir) all(fs.rsync(self.src_dir, self.dst_dir))
assert os.path.lexists(dst_path) assert os.path.lexists(dst_path)
assert os.path.isfile(dst_path) assert os.path.isfile(dst_path)
@ -215,7 +215,7 @@ class TestRsync(CommonFSTestCase):
dst_path = os.path.join(self.dst_dir, self.relpath(src_fpath)) dst_path = os.path.join(self.dst_dir, self.relpath(src_fpath))
os.symlink('broken_link', dst_path) os.symlink('broken_link', dst_path)
fs.rsync(self.src_dir, self.dst_dir) all(fs.rsync(self.src_dir, self.dst_dir))
assert os.path.lexists(dst_path) assert os.path.lexists(dst_path)
assert os.path.isfile(dst_path) assert os.path.isfile(dst_path)
@ -224,7 +224,7 @@ class TestRsync(CommonFSTestCase):
src_lpath = os.path.join(self.src_dir, self.relpath(dst_path)) src_lpath = os.path.join(self.src_dir, self.relpath(dst_path))
os.symlink('broken_link', src_lpath) os.symlink('broken_link', src_lpath)
fs.rsync(self.src_dir, self.dst_dir) all(fs.rsync(self.src_dir, self.dst_dir))
assert os.path.lexists(dst_path) assert os.path.lexists(dst_path)
assert os.path.islink(dst_path) assert os.path.islink(dst_path)
@ -233,7 +233,7 @@ class TestRsync(CommonFSTestCase):
src_lpath = os.path.join(self.src_dir, self.relpath(dst_path)) src_lpath = os.path.join(self.src_dir, self.relpath(dst_path))
os.symlink('broken_link', src_lpath) os.symlink('broken_link', src_lpath)
fs.rsync(self.src_dir, self.dst_dir) all(fs.rsync(self.src_dir, self.dst_dir))
assert os.path.lexists(dst_path) assert os.path.lexists(dst_path)
assert os.path.islink(dst_path) assert os.path.islink(dst_path)
@ -243,7 +243,7 @@ class TestRsync(CommonFSTestCase):
with open(dst_path, "w") as f: with open(dst_path, "w") as f:
f.write(string.printable) f.write(string.printable)
fs.rsync(self.src_dir, self.dst_dir) all(fs.rsync(self.src_dir, self.dst_dir))
assert os.path.lexists(dst_path) assert os.path.lexists(dst_path)
assert os.path.isdir(dst_path) assert os.path.isdir(dst_path)
@ -252,7 +252,7 @@ class TestRsync(CommonFSTestCase):
dst_path = os.path.join(self.dst_dir, self.relpath(src_dpath)) dst_path = os.path.join(self.dst_dir, self.relpath(src_dpath))
os.symlink('broken_link', dst_path) os.symlink('broken_link', dst_path)
fs.rsync(self.src_dir, self.dst_dir) all(fs.rsync(self.src_dir, self.dst_dir))
assert os.path.lexists(dst_path) assert os.path.lexists(dst_path)
assert os.path.isdir(dst_path) assert os.path.isdir(dst_path)
@ -261,7 +261,7 @@ class TestRsync(CommonFSTestCase):
dst_fpath = os.path.join(self.dst_dir, self.relpath(src_fpath)) dst_fpath = os.path.join(self.dst_dir, self.relpath(src_fpath))
os.link(src_fpath, dst_fpath) os.link(src_fpath, dst_fpath)
fs.rsync(self.src_dir, self.dst_dir) all(fs.rsync(self.src_dir, self.dst_dir))
assert os.path.lexists(dst_fpath) assert os.path.lexists(dst_fpath)
src_stat = os.lstat(src_fpath) src_stat = os.lstat(src_fpath)
dst_stat = os.lstat(dst_fpath) dst_stat = os.lstat(dst_fpath)
@ -275,7 +275,7 @@ class TestRsync(CommonFSTestCase):
with open(dst_fpath, "w") as df: with open(dst_fpath, "w") as df:
df.write(string.printable * 2) df.write(string.printable * 2)
fs.rsync(self.src_dir, self.dst_dir) all(fs.rsync(self.src_dir, self.dst_dir))
assert os.path.lexists(dst_fpath) assert os.path.lexists(dst_fpath)
self.check_identical_file(src_fpath, dst_fpath) self.check_identical_file(src_fpath, dst_fpath)