Files
cura-te-ipsum/curateipsum/fs.py

596 lines
22 KiB
Python
Raw Normal View History

"""
Module with filesystem-related functions.
"""
2021-06-19 15:28:42 +03:00
import enum
2021-06-20 19:16:05 +03:00
import glob
2019-01-29 11:22:08 +03:00
import logging
import os
import subprocess
2021-06-20 19:16:05 +03:00
import sys
from typing import Iterable, Tuple, Union
2019-01-29 11:22:08 +03:00
2021-05-28 00:40:54 +03:00
_lg = logging.getLogger(__name__)
2019-01-29 11:22:08 +03:00
2021-11-15 22:36:09 +03:00
class BackupCreationError(Exception):
pass
2021-11-09 00:43:18 +03:00
class Actions(enum.Enum):
2021-11-13 07:57:11 +03:00
NOTHING = enum.auto()
DELETE = enum.auto()
REWRITE = enum.auto()
UPDATE_TIME = enum.auto()
UPDATE_PERM = enum.auto()
UPDATE_OWNER = enum.auto()
CREATE = enum.auto()
2021-11-15 22:36:09 +03:00
ERROR = enum.auto()
2021-11-09 00:43:18 +03:00
2021-11-12 02:53:21 +03:00
class PseudoDirEntry:
"""
Duck-typed os.DirEntry for paths that don't exist yet or when you need
DirEntry-like interface for arbitrary paths.
Problem: os.DirEntry is created by os.scandir() and cannot be manually
constructed. But we need DirEntry-compatible objects for:
- Paths that will exist soon (new backup directories)
- Constructed paths (marker files)
- Uniform interface in functions accepting both real and future entries
Why not just use strings? Functions like rm_direntry(), copy_direntry()
accept Union[os.DirEntry, PseudoDirEntry] and call .is_dir(), .stat()
methods. Using this class avoids branching on type throughout the codebase.
Why not pathlib.Path? We heavily use os.scandir() which returns DirEntry
objects with cached stat info. PseudoDirEntry maintains API consistency
with minimal overhead.
Example usage:
# Create entry for future backup directory
cur_backup = PseudoDirEntry("/backups/20260204_120000")
os.mkdir(cur_backup.path)
set_backup_marker(cur_backup) # accepts DirEntry-like object
Caches stat results like real DirEntry to avoid repeated syscalls.
"""
2021-11-12 02:53:21 +03:00
def __init__(self, path):
# Use abspath, not realpath - realpath resolves symlinks
self.path = os.path.abspath(path)
2021-11-12 02:53:21 +03:00
self.name = os.path.basename(self.path)
self._is_dir = None
self._is_file = None
self._is_symlink = None
# Cache both stat and lstat separately
self._stat_follow = None
self._stat_nofollow = None
2021-11-12 02:53:21 +03:00
def __str__(self):
return self.name
def is_dir(self, follow_symlinks: bool = True) -> bool:
if follow_symlinks:
if self._is_dir is None:
self._is_dir = os.path.isdir(self.path)
return self._is_dir
else:
# When not following symlinks, must return False if path is symlink
return os.path.isdir(self.path) and not os.path.islink(self.path)
2021-11-12 02:53:21 +03:00
def is_file(self, follow_symlinks: bool = True) -> bool:
if follow_symlinks:
if self._is_file is None:
self._is_file = os.path.isfile(self.path)
return self._is_file
else:
# When not following symlinks, must return False if path is symlink
return os.path.isfile(self.path) and not os.path.islink(self.path)
def is_symlink(self) -> bool:
if self._is_symlink is None:
self._is_symlink = os.path.islink(self.path)
return self._is_symlink
def stat(self, follow_symlinks: bool = True):
if follow_symlinks:
if self._stat_follow is None:
self._stat_follow = os.stat(self.path)
return self._stat_follow
else:
if self._stat_nofollow is None:
self._stat_nofollow = os.lstat(self.path)
return self._stat_nofollow
2021-11-12 02:53:21 +03:00
2021-06-23 20:11:42 +03:00
2021-11-15 22:36:09 +03:00
def _parse_rsync_output(line: str) -> Tuple[str, Actions, str]:
2021-11-09 00:43:18 +03:00
action = None
change_string, relpath = line.split(' ', maxsplit=1)
if change_string == "*deleting":
2021-11-15 22:36:09 +03:00
return relpath, Actions.DELETE, ""
2021-11-09 00:43:18 +03:00
update_type = change_string[0]
entity_type = change_string[1]
change_type = change_string[2:]
if update_type == "c" and entity_type in {"d", "L"} and "+" in change_type:
2021-11-13 07:57:11 +03:00
action = Actions.CREATE
2021-11-09 00:43:18 +03:00
elif update_type == ">" and entity_type == "f" and "+" in change_type:
2021-11-13 07:57:11 +03:00
action = Actions.CREATE
2021-11-09 00:43:18 +03:00
elif entity_type == "f" and ("s" in change_type or "t" in change_type):
2021-11-13 07:57:11 +03:00
action = Actions.REWRITE
2021-11-09 00:43:18 +03:00
elif entity_type == "d" and "t" in change_type:
2021-11-13 07:57:11 +03:00
action = Actions.UPDATE_TIME
2021-11-09 00:43:18 +03:00
elif "p" in change_type:
2021-11-13 07:57:11 +03:00
action = Actions.UPDATE_PERM
2021-11-09 00:43:18 +03:00
elif "o" in change_type or "g" in change_type:
2021-11-13 07:57:11 +03:00
action = Actions.UPDATE_OWNER
2021-11-09 00:43:18 +03:00
if action is None:
raise RuntimeError("Not parsed string: %s" % line)
2021-11-15 22:36:09 +03:00
return relpath, action, ""
2021-11-09 00:43:18 +03:00
def rsync_ext(src, dst, dry_run=False) -> Iterable[Tuple[str, Actions, str]]:
"""
Call external rsync command for syncing files from src to dst.
Yield (path, action, error message) tuples.
"""
2019-01-29 11:22:08 +03:00
rsync_args = ["rsync"]
if dry_run:
2021-06-21 10:25:13 +03:00
rsync_args.append("--dry-run")
rsync_args.append("--archive")
# rsync_args.append("--compress")
# rsync_args.append("--inplace")
rsync_args.append("--whole-file")
rsync_args.append("--human-readable")
rsync_args.append("--delete-during")
rsync_args.append("--itemize-changes")
rsync_args.append(f"{src}/")
rsync_args.append(str(dst))
_lg.info("Executing external command: %s", " ".join(rsync_args))
2021-11-12 02:53:21 +03:00
process = subprocess.Popen(rsync_args,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
2021-11-09 00:43:18 +03:00
with process.stdout:
prev_line = None
for line in iter(process.stdout.readline, b""):
_lg.debug("Rsync current line: %s", line)
if prev_line is None:
prev_line = line
continue
try:
prev_line = prev_line.decode("utf-8").strip()
# some issues with cyrillic in filenames
except UnicodeDecodeError:
_lg.error("Can't process rsync line: %s", prev_line)
continue
_lg.debug("Rsync itemize line: %s", prev_line)
yield _parse_rsync_output(prev_line)
prev_line = line
try:
prev_line = prev_line.decode("utf-8").strip()
_lg.debug("Rsync itemize line: %s", prev_line)
yield _parse_rsync_output(prev_line)
# some issues with cyrillic in filenames
except UnicodeDecodeError:
_lg.error("Can't process rsync line: %s", prev_line)
process.wait()
2019-01-29 11:22:08 +03:00
2021-06-19 15:28:42 +03:00
def scantree(path, dir_first=True) -> Iterable[os.DirEntry]:
"""
Recursively yield DirEntry objects (dir/file/symlink) for given directory.
"""
2019-01-29 11:22:08 +03:00
entry: os.DirEntry
2021-06-19 15:28:42 +03:00
with os.scandir(path) as scan_it:
for entry in scan_it:
if entry.is_dir(follow_symlinks=False):
if dir_first:
yield entry
yield from scantree(entry.path, dir_first)
if not dir_first:
yield entry
else:
yield entry
def rm_direntry(entry: Union[os.DirEntry, PseudoDirEntry]):
""" Recursively delete DirEntry (dir/file/symlink). """
2021-06-19 15:28:42 +03:00
if entry.is_file(follow_symlinks=False) or entry.is_symlink():
os.unlink(entry.path)
2021-11-15 22:36:09 +03:00
elif entry.is_dir(follow_symlinks=False):
2021-06-19 15:28:42 +03:00
with os.scandir(entry.path) as it:
child_entry: os.DirEntry
for child_entry in it:
rm_direntry(child_entry)
os.rmdir(entry.path)
try:
O_BINARY = os.O_BINARY # Windows only
except AttributeError:
O_BINARY = 0
READ_FLAGS = os.O_RDONLY | O_BINARY
WRITE_FLAGS = os.O_WRONLY | os.O_CREAT | os.O_TRUNC | O_BINARY
BUFFER_SIZE = 128 * 1024
2021-10-22 17:30:45 +03:00
def copy_file(src, dst):
""" Copy file from src to dst. Faster than shutil.copy. """
fin = None
fout = None
2021-10-22 17:30:45 +03:00
try:
fin = os.open(src, READ_FLAGS)
2021-11-15 22:36:09 +03:00
fstat = os.fstat(fin)
fout = os.open(dst, WRITE_FLAGS, fstat.st_mode)
2021-10-22 17:30:45 +03:00
for x in iter(lambda: os.read(fin, BUFFER_SIZE), b""):
os.write(fout, x)
finally:
if fout is not None:
try:
os.close(fout)
except OSError:
pass
if fin is not None:
try:
os.close(fin)
except OSError:
pass
2021-06-19 15:28:42 +03:00
2021-10-23 00:22:04 +03:00
def copy_direntry(entry: Union[os.DirEntry, PseudoDirEntry], dst_path):
""" Non-recursive DirEntry (file/dir/symlink) copy. """
2021-11-15 22:36:09 +03:00
src_stat = entry.stat(follow_symlinks=False)
2021-06-19 15:28:42 +03:00
if entry.is_dir():
os.mkdir(dst_path)
elif entry.is_symlink():
link_target = os.readlink(entry.path)
os.symlink(link_target, dst_path)
else:
2021-10-22 17:30:45 +03:00
copy_file(entry.path, dst_path)
2021-06-19 15:28:42 +03:00
if entry.is_symlink():
# change symlink attributes only if supported by OS
if os.chown in os.supports_follow_symlinks:
Fix high-priority bugs and add comprehensive test coverage This commit addresses 8 high-priority issues identified in code analysis. Fixes #3 Fixes #4 Fixes #5 Fixes #7 Fixes #10 Fixes #19 Fixes #20 Fixes #21 ## Critical Bug Fixes 1. **Race condition in lock file creation (#3)** - Changed to atomic file creation using os.O_CREAT | os.O_EXCL - Prevents two processes from both acquiring the lock - Location: curateipsum/backup.py:110-115 2. **Invalid lock file error handling (#4)** - Added try/except for corrupted/empty lock files - Gracefully removes corrupted locks and retries - Location: curateipsum/backup.py:121-133 3. **SIGKILL vs SIGTERM issue (#5)** - Now sends SIGTERM first for graceful shutdown - Waits 5 seconds before escalating to SIGKILL - Allows previous process to clean up resources - Location: curateipsum/backup.py:146-156 4. **Wrong stat object for permissions (#7)** - Fixed bug where dst_stat was used instead of src_stat - Permissions are now correctly updated during rsync - Location: curateipsum/fs.py:371 5. **os.chown() fails for non-root users (#10)** - Wrapped all os.chown() calls in try/except blocks - Logs debug message instead of crashing - Allows backups to succeed for non-root users - Locations: curateipsum/fs.py:217-221, 228-231, 383-387, 469-472 ## Comprehensive Test Coverage 6. **Lock file tests (#19)** - Added TestBackupLock class with 7 test cases - Tests: creation, concurrent prevention, stale locks, corruption - Location: tests/test_backups.py:228-330 7. **Filesystem operation tests (#20)** - Added tests/test_fs_extended.py with 6 test classes - Tests: copy_file, copy_direntry, rsync, hardlink_dir, scantree, rm_direntry - 20+ test cases covering normal and edge cases - Location: tests/test_fs_extended.py 8. **Integration tests (#21)** - Added tests/test_integration.py with 2 test classes - Tests full backup workflow end-to-end - Tests: incremental backups, hardlinks, delta dirs, cleanup, recovery - 14 test cases covering complete backup lifecycle - Location: tests/test_integration.py ## Test Results All 68 tests pass successfully: - 11 original backup cleanup tests - 7 new lock file tests - 16 original fs tests - 20 new fs extended tests - 14 new integration tests ## Impact These fixes address critical bugs that could cause: - Data corruption from concurrent backups - Incomplete cleanup from forced process termination - Permission sync failures - Tool unusability for non-root users The comprehensive test coverage ensures these bugs are caught early and provides confidence for future refactoring.
2025-11-15 04:34:41 +00:00
try:
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid,
follow_symlinks=False)
except PermissionError:
_lg.debug("Cannot change ownership (not root): %s", dst_path)
if os.chmod in os.supports_follow_symlinks:
os.chmod(dst_path, src_stat.st_mode, follow_symlinks=False)
if os.utime in os.supports_follow_symlinks:
2021-11-09 00:43:18 +03:00
os.utime(dst_path, (src_stat.st_atime, src_stat.st_mtime),
follow_symlinks=False)
else:
Fix high-priority bugs and add comprehensive test coverage This commit addresses 8 high-priority issues identified in code analysis. Fixes #3 Fixes #4 Fixes #5 Fixes #7 Fixes #10 Fixes #19 Fixes #20 Fixes #21 ## Critical Bug Fixes 1. **Race condition in lock file creation (#3)** - Changed to atomic file creation using os.O_CREAT | os.O_EXCL - Prevents two processes from both acquiring the lock - Location: curateipsum/backup.py:110-115 2. **Invalid lock file error handling (#4)** - Added try/except for corrupted/empty lock files - Gracefully removes corrupted locks and retries - Location: curateipsum/backup.py:121-133 3. **SIGKILL vs SIGTERM issue (#5)** - Now sends SIGTERM first for graceful shutdown - Waits 5 seconds before escalating to SIGKILL - Allows previous process to clean up resources - Location: curateipsum/backup.py:146-156 4. **Wrong stat object for permissions (#7)** - Fixed bug where dst_stat was used instead of src_stat - Permissions are now correctly updated during rsync - Location: curateipsum/fs.py:371 5. **os.chown() fails for non-root users (#10)** - Wrapped all os.chown() calls in try/except blocks - Logs debug message instead of crashing - Allows backups to succeed for non-root users - Locations: curateipsum/fs.py:217-221, 228-231, 383-387, 469-472 ## Comprehensive Test Coverage 6. **Lock file tests (#19)** - Added TestBackupLock class with 7 test cases - Tests: creation, concurrent prevention, stale locks, corruption - Location: tests/test_backups.py:228-330 7. **Filesystem operation tests (#20)** - Added tests/test_fs_extended.py with 6 test classes - Tests: copy_file, copy_direntry, rsync, hardlink_dir, scantree, rm_direntry - 20+ test cases covering normal and edge cases - Location: tests/test_fs_extended.py 8. **Integration tests (#21)** - Added tests/test_integration.py with 2 test classes - Tests full backup workflow end-to-end - Tests: incremental backups, hardlinks, delta dirs, cleanup, recovery - 14 test cases covering complete backup lifecycle - Location: tests/test_integration.py ## Test Results All 68 tests pass successfully: - 11 original backup cleanup tests - 7 new lock file tests - 16 original fs tests - 20 new fs extended tests - 14 new integration tests ## Impact These fixes address critical bugs that could cause: - Data corruption from concurrent backups - Incomplete cleanup from forced process termination - Permission sync failures - Tool unusability for non-root users The comprehensive test coverage ensures these bugs are caught early and provides confidence for future refactoring.
2025-11-15 04:34:41 +00:00
try:
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid)
except PermissionError:
_lg.debug("Cannot change ownership (not root): %s", dst_path)
os.chmod(dst_path, src_stat.st_mode)
os.utime(dst_path, (src_stat.st_atime, src_stat.st_mtime))
2021-06-19 15:28:42 +03:00
def update_direntry(src_entry: os.DirEntry, dst_entry: os.DirEntry):
2021-10-22 17:30:45 +03:00
"""
Make dst DirEntry (file/dir/symlink) same as src.
If dst is directory, its content will be removed.
Src dir content will not be copied into dst dir.
"""
2021-06-19 15:28:42 +03:00
rm_direntry(dst_entry)
copy_direntry(src_entry, dst_entry.path)
def rsync(src_dir,
dst_dir,
dry_run=False) -> Iterable[Tuple[str, Actions, str]]:
2019-01-29 11:22:08 +03:00
"""
Sync files/dirs/symlinks from src_dir to dst_dir.
Yield (path, action, error message) tuples.
Entries in dst_dir will be removed if not present in src_dir.
Analog of 'rsync --delete -irltpog'.
2019-01-29 11:22:08 +03:00
"""
2021-11-09 00:43:18 +03:00
_lg.debug("Rsync: %s -> %s", src_dir, dst_dir)
2021-06-19 15:28:42 +03:00
src_root_abs = os.path.abspath(src_dir)
dst_root_abs = os.path.abspath(dst_dir)
if not os.path.isdir(src_root_abs):
2021-11-15 22:36:09 +03:00
raise BackupCreationError(
"Error during reading source directory: %s" % src_root_abs
)
2021-06-19 15:28:42 +03:00
if os.path.exists(dst_root_abs):
if not os.path.isdir(dst_root_abs):
2021-11-15 22:36:09 +03:00
raise BackupCreationError(
"Destination path is not a directory: %s" % dst_root_abs
)
2019-01-29 11:22:08 +03:00
else:
2021-06-19 15:28:42 +03:00
os.mkdir(dst_root_abs)
2021-11-09 00:43:18 +03:00
# Create source map {rel_path: dir_entry}
2021-06-19 15:28:42 +03:00
src_files_map = {
2021-10-23 00:22:04 +03:00
ent.path[len(src_root_abs) + 1:]: ent for ent in scantree(src_root_abs)
2021-06-19 15:28:42 +03:00
}
# process dst tree
for dst_entry in scantree(dst_root_abs, dir_first=False):
2021-10-23 00:22:04 +03:00
rel_path = dst_entry.path[len(dst_root_abs) + 1:]
2021-06-19 15:28:42 +03:00
src_entry = src_files_map.get(rel_path)
# remove dst entries not existing in source
if src_entry is None:
2021-11-12 11:33:53 +03:00
_lg.debug("Rsync, deleting: %s", rel_path)
2021-11-15 22:36:09 +03:00
try:
rm_direntry(dst_entry)
yield rel_path, Actions.DELETE, ""
continue
except OSError as exc:
raise BackupCreationError(exc) from exc
2021-06-19 15:28:42 +03:00
# mark src entry as taken for processing
del src_files_map[rel_path]
src_entry: os.DirEntry
# rewrite dst if it has different type from src
2021-06-19 15:28:42 +03:00
if src_entry.is_file(follow_symlinks=False):
if not dst_entry.is_file(follow_symlinks=False):
_lg.debug("Rsync, rewriting"
" (src is a file, dst is not a file): %s",
2021-11-12 02:53:21 +03:00
rel_path)
2021-11-15 22:36:09 +03:00
try:
update_direntry(src_entry, dst_entry)
yield rel_path, Actions.REWRITE, ""
except OSError as exc:
yield rel_path, Actions.ERROR, str(exc)
2021-06-19 15:28:42 +03:00
continue
2021-11-15 22:36:09 +03:00
2021-06-19 15:28:42 +03:00
if src_entry.is_dir(follow_symlinks=False):
if not dst_entry.is_dir(follow_symlinks=False):
_lg.debug("Rsync, rewriting"
" (src is a dir, dst is not a dir): %s",
2021-11-12 02:53:21 +03:00
rel_path)
2021-11-15 22:36:09 +03:00
try:
update_direntry(src_entry, dst_entry)
yield rel_path, Actions.REWRITE, ""
except OSError as exc:
yield rel_path, Actions.ERROR, str(exc)
2021-06-19 15:28:42 +03:00
continue
2021-11-15 22:36:09 +03:00
2021-06-19 15:28:42 +03:00
if src_entry.is_symlink():
if not dst_entry.is_symlink():
_lg.debug("Rsync, rewriting"
" (src is a symlink, dst is not a symlink): %s",
2021-11-12 02:53:21 +03:00
rel_path)
2021-11-15 22:36:09 +03:00
try:
update_direntry(src_entry, dst_entry)
yield rel_path, Actions.REWRITE, ""
except OSError as exc:
yield rel_path, Actions.ERROR, str(exc)
2021-06-19 15:28:42 +03:00
continue
# rewrite dst if it is hard link to src (bad for backups)
if src_entry.inode() == dst_entry.inode():
2021-11-12 11:33:53 +03:00
_lg.debug("Rsync, rewriting (different inodes): %s", rel_path)
2021-11-15 22:36:09 +03:00
try:
update_direntry(src_entry, dst_entry)
yield rel_path, Actions.REWRITE, ""
except OSError as exc:
yield rel_path, Actions.ERROR, str(exc)
2021-06-19 15:28:42 +03:00
continue
2019-01-29 11:22:08 +03:00
2021-06-10 19:33:08 +03:00
src_stat = src_entry.stat(follow_symlinks=False)
2021-06-19 15:28:42 +03:00
dst_stat = dst_entry.stat(follow_symlinks=False)
# rewrite dst file/symlink which have different size or mtime than src
2021-06-19 15:28:42 +03:00
if src_entry.is_file(follow_symlinks=False):
same_size = src_stat.st_size == dst_stat.st_size
same_mtime = src_stat.st_mtime == dst_stat.st_mtime
if not (same_size and same_mtime):
2021-11-09 00:43:18 +03:00
reason = "size" if not same_size else "time"
_lg.debug("Rsync, rewriting (different %s): %s",
reason, rel_path)
2021-11-15 22:36:09 +03:00
try:
update_direntry(src_entry, dst_entry)
yield rel_path, Actions.REWRITE, ""
except OSError as exc:
yield rel_path, Actions.ERROR, str(exc)
2021-06-19 15:28:42 +03:00
continue
# rewrite dst symlink if it points somewhere else than src
if src_entry.is_symlink():
if os.readlink(src_entry.path) != os.readlink(dst_entry.path):
_lg.debug("Rsync, rewriting (different symlink target): %s",
rel_path)
2021-11-15 22:36:09 +03:00
try:
update_direntry(src_entry, dst_entry)
yield rel_path, Actions.REWRITE, ""
except OSError as exc:
yield rel_path, Actions.ERROR, str(exc)
2021-06-19 15:28:42 +03:00
continue
# update permissions and ownership
if src_stat.st_mode != dst_stat.st_mode:
2021-11-12 11:33:53 +03:00
_lg.debug("Rsync, updating permissions: %s", rel_path)
Fix high-priority bugs and add comprehensive test coverage This commit addresses 8 high-priority issues identified in code analysis. Fixes #3 Fixes #4 Fixes #5 Fixes #7 Fixes #10 Fixes #19 Fixes #20 Fixes #21 ## Critical Bug Fixes 1. **Race condition in lock file creation (#3)** - Changed to atomic file creation using os.O_CREAT | os.O_EXCL - Prevents two processes from both acquiring the lock - Location: curateipsum/backup.py:110-115 2. **Invalid lock file error handling (#4)** - Added try/except for corrupted/empty lock files - Gracefully removes corrupted locks and retries - Location: curateipsum/backup.py:121-133 3. **SIGKILL vs SIGTERM issue (#5)** - Now sends SIGTERM first for graceful shutdown - Waits 5 seconds before escalating to SIGKILL - Allows previous process to clean up resources - Location: curateipsum/backup.py:146-156 4. **Wrong stat object for permissions (#7)** - Fixed bug where dst_stat was used instead of src_stat - Permissions are now correctly updated during rsync - Location: curateipsum/fs.py:371 5. **os.chown() fails for non-root users (#10)** - Wrapped all os.chown() calls in try/except blocks - Logs debug message instead of crashing - Allows backups to succeed for non-root users - Locations: curateipsum/fs.py:217-221, 228-231, 383-387, 469-472 ## Comprehensive Test Coverage 6. **Lock file tests (#19)** - Added TestBackupLock class with 7 test cases - Tests: creation, concurrent prevention, stale locks, corruption - Location: tests/test_backups.py:228-330 7. **Filesystem operation tests (#20)** - Added tests/test_fs_extended.py with 6 test classes - Tests: copy_file, copy_direntry, rsync, hardlink_dir, scantree, rm_direntry - 20+ test cases covering normal and edge cases - Location: tests/test_fs_extended.py 8. **Integration tests (#21)** - Added tests/test_integration.py with 2 test classes - Tests full backup workflow end-to-end - Tests: incremental backups, hardlinks, delta dirs, cleanup, recovery - 14 test cases covering complete backup lifecycle - Location: tests/test_integration.py ## Test Results All 68 tests pass successfully: - 11 original backup cleanup tests - 7 new lock file tests - 16 original fs tests - 20 new fs extended tests - 14 new integration tests ## Impact These fixes address critical bugs that could cause: - Data corruption from concurrent backups - Incomplete cleanup from forced process termination - Permission sync failures - Tool unusability for non-root users The comprehensive test coverage ensures these bugs are caught early and provides confidence for future refactoring.
2025-11-15 04:34:41 +00:00
os.chmod(dst_entry.path, src_stat.st_mode)
2021-11-15 22:36:09 +03:00
yield rel_path, Actions.UPDATE_PERM, ""
2021-06-19 15:28:42 +03:00
if (src_stat.st_uid != dst_stat.st_uid
or src_stat.st_gid != dst_stat.st_gid):
2021-11-12 11:33:53 +03:00
_lg.debug("Rsync, updating owners: %s", rel_path)
Fix high-priority bugs and add comprehensive test coverage This commit addresses 8 high-priority issues identified in code analysis. Fixes #3 Fixes #4 Fixes #5 Fixes #7 Fixes #10 Fixes #19 Fixes #20 Fixes #21 ## Critical Bug Fixes 1. **Race condition in lock file creation (#3)** - Changed to atomic file creation using os.O_CREAT | os.O_EXCL - Prevents two processes from both acquiring the lock - Location: curateipsum/backup.py:110-115 2. **Invalid lock file error handling (#4)** - Added try/except for corrupted/empty lock files - Gracefully removes corrupted locks and retries - Location: curateipsum/backup.py:121-133 3. **SIGKILL vs SIGTERM issue (#5)** - Now sends SIGTERM first for graceful shutdown - Waits 5 seconds before escalating to SIGKILL - Allows previous process to clean up resources - Location: curateipsum/backup.py:146-156 4. **Wrong stat object for permissions (#7)** - Fixed bug where dst_stat was used instead of src_stat - Permissions are now correctly updated during rsync - Location: curateipsum/fs.py:371 5. **os.chown() fails for non-root users (#10)** - Wrapped all os.chown() calls in try/except blocks - Logs debug message instead of crashing - Allows backups to succeed for non-root users - Locations: curateipsum/fs.py:217-221, 228-231, 383-387, 469-472 ## Comprehensive Test Coverage 6. **Lock file tests (#19)** - Added TestBackupLock class with 7 test cases - Tests: creation, concurrent prevention, stale locks, corruption - Location: tests/test_backups.py:228-330 7. **Filesystem operation tests (#20)** - Added tests/test_fs_extended.py with 6 test classes - Tests: copy_file, copy_direntry, rsync, hardlink_dir, scantree, rm_direntry - 20+ test cases covering normal and edge cases - Location: tests/test_fs_extended.py 8. **Integration tests (#21)** - Added tests/test_integration.py with 2 test classes - Tests full backup workflow end-to-end - Tests: incremental backups, hardlinks, delta dirs, cleanup, recovery - 14 test cases covering complete backup lifecycle - Location: tests/test_integration.py ## Test Results All 68 tests pass successfully: - 11 original backup cleanup tests - 7 new lock file tests - 16 original fs tests - 20 new fs extended tests - 14 new integration tests ## Impact These fixes address critical bugs that could cause: - Data corruption from concurrent backups - Incomplete cleanup from forced process termination - Permission sync failures - Tool unusability for non-root users The comprehensive test coverage ensures these bugs are caught early and provides confidence for future refactoring.
2025-11-15 04:34:41 +00:00
try:
os.chown(dst_entry.path, src_stat.st_uid, src_stat.st_gid)
yield rel_path, Actions.UPDATE_OWNER, ""
except PermissionError:
_lg.debug("Cannot change ownership (not root): %s", rel_path)
2021-06-19 15:28:42 +03:00
# process remained source entries (new files/dirs/symlinks)
2021-06-19 15:28:42 +03:00
for rel_path, src_entry in src_files_map.items():
dst_path = os.path.join(dst_root_abs, rel_path)
2021-11-12 11:33:53 +03:00
_lg.debug("Rsync, creating: %s", rel_path)
2021-11-15 22:36:09 +03:00
try:
copy_direntry(src_entry, dst_path)
yield rel_path, Actions.CREATE, ""
except OSError as exc:
yield rel_path, Actions.ERROR, str(exc)
2019-01-29 11:22:08 +03:00
# restore dir mtimes in dst, updated by updating files
for src_entry in scantree(src_root_abs, dir_first=True):
if not src_entry.is_dir():
continue
2021-10-23 00:22:04 +03:00
rel_path = src_entry.path[len(src_root_abs) + 1:]
dst_path = os.path.join(dst_root_abs, rel_path)
src_stat = src_entry.stat(follow_symlinks=False)
2021-11-09 00:43:18 +03:00
dst_stat = os.lstat(dst_path)
if src_stat.st_mtime != dst_stat.st_mtime:
2021-11-12 11:33:53 +03:00
_lg.debug("Rsync, restoring directory mtime: %s", dst_path)
2021-11-09 00:43:18 +03:00
os.utime(dst_path,
(src_stat.st_atime, src_stat.st_mtime),
follow_symlinks=False)
# restore dst_root dir mtime
src_root_stat = os.lstat(src_root_abs)
2021-11-09 00:43:18 +03:00
dst_root_stat = os.lstat(dst_root_abs)
if src_root_stat.st_mtime != dst_root_stat.st_mtime:
2022-08-28 19:42:00 +04:00
_lg.debug("Rsync, restoring root directory mtime: %s", dst_root_abs)
2021-11-09 00:43:18 +03:00
os.utime(dst_root_abs,
(src_root_stat.st_atime, src_root_stat.st_mtime),
follow_symlinks=False)
2019-01-29 11:22:08 +03:00
2021-10-24 21:20:05 +03:00
def _recursive_hardlink_ext(src: str, dst: str) -> bool:
2021-06-10 22:19:26 +03:00
"""
Make hardlink for a directory using cp -al. Both src and dst should exist.
:param src: absolute path to source directory.
:param dst: absolute path to target directory.
2021-10-22 17:30:45 +03:00
:return: success or not
2021-06-10 22:19:26 +03:00
"""
2021-06-20 19:16:05 +03:00
if sys.platform == "darwin":
cp = "gcp"
else:
cp = "cp"
src_content = glob.glob(f"{src}/*")
cmd = [cp, "--archive", "--verbose", "--link", *src_content, dst]
2021-06-21 10:20:45 +03:00
_lg.info("Executing external command: %s", " ".join(cmd))
2021-11-12 02:53:21 +03:00
process = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
2021-06-20 19:16:05 +03:00
with process.stdout:
for line in iter(process.stdout.readline, b""):
2021-11-09 00:43:18 +03:00
_lg.debug("%s: %s", cp, line.decode("utf-8").strip())
2021-06-20 19:16:05 +03:00
exitcode = process.wait()
2021-06-21 10:20:45 +03:00
return not bool(exitcode)
2021-06-10 22:19:26 +03:00
2021-10-24 21:20:05 +03:00
def _recursive_hardlink(src: str, dst: str) -> bool:
2021-06-10 22:19:26 +03:00
"""
Do hardlink directory recursively using python only.
Both src and dst directories should exist.
:param src: absolute path to source directory.
:param dst: absolute path to target directory.
:return: True if success, False otherwise.
2021-06-10 22:19:26 +03:00
"""
with os.scandir(src) as it:
ent: os.DirEntry
for ent in it:
ent_dst_path = os.path.join(dst, ent.name)
if ent.is_dir(follow_symlinks=False):
2021-11-12 11:33:53 +03:00
_lg.debug("Hardlink, copying directory: %s -> %s",
ent.path, ent_dst_path)
2021-06-10 22:19:26 +03:00
os.mkdir(ent_dst_path)
# process directory children
_recursive_hardlink(ent.path, ent_dst_path)
# save directory's metainfo
2021-06-10 22:19:26 +03:00
ent_stat = ent.stat(follow_symlinks=False)
Fix high-priority bugs and add comprehensive test coverage This commit addresses 8 high-priority issues identified in code analysis. Fixes #3 Fixes #4 Fixes #5 Fixes #7 Fixes #10 Fixes #19 Fixes #20 Fixes #21 ## Critical Bug Fixes 1. **Race condition in lock file creation (#3)** - Changed to atomic file creation using os.O_CREAT | os.O_EXCL - Prevents two processes from both acquiring the lock - Location: curateipsum/backup.py:110-115 2. **Invalid lock file error handling (#4)** - Added try/except for corrupted/empty lock files - Gracefully removes corrupted locks and retries - Location: curateipsum/backup.py:121-133 3. **SIGKILL vs SIGTERM issue (#5)** - Now sends SIGTERM first for graceful shutdown - Waits 5 seconds before escalating to SIGKILL - Allows previous process to clean up resources - Location: curateipsum/backup.py:146-156 4. **Wrong stat object for permissions (#7)** - Fixed bug where dst_stat was used instead of src_stat - Permissions are now correctly updated during rsync - Location: curateipsum/fs.py:371 5. **os.chown() fails for non-root users (#10)** - Wrapped all os.chown() calls in try/except blocks - Logs debug message instead of crashing - Allows backups to succeed for non-root users - Locations: curateipsum/fs.py:217-221, 228-231, 383-387, 469-472 ## Comprehensive Test Coverage 6. **Lock file tests (#19)** - Added TestBackupLock class with 7 test cases - Tests: creation, concurrent prevention, stale locks, corruption - Location: tests/test_backups.py:228-330 7. **Filesystem operation tests (#20)** - Added tests/test_fs_extended.py with 6 test classes - Tests: copy_file, copy_direntry, rsync, hardlink_dir, scantree, rm_direntry - 20+ test cases covering normal and edge cases - Location: tests/test_fs_extended.py 8. **Integration tests (#21)** - Added tests/test_integration.py with 2 test classes - Tests full backup workflow end-to-end - Tests: incremental backups, hardlinks, delta dirs, cleanup, recovery - 14 test cases covering complete backup lifecycle - Location: tests/test_integration.py ## Test Results All 68 tests pass successfully: - 11 original backup cleanup tests - 7 new lock file tests - 16 original fs tests - 20 new fs extended tests - 14 new integration tests ## Impact These fixes address critical bugs that could cause: - Data corruption from concurrent backups - Incomplete cleanup from forced process termination - Permission sync failures - Tool unusability for non-root users The comprehensive test coverage ensures these bugs are caught early and provides confidence for future refactoring.
2025-11-15 04:34:41 +00:00
try:
os.chown(ent_dst_path, ent_stat.st_uid, ent_stat.st_gid)
except PermissionError:
_lg.debug("Cannot change ownership (not root): %s", ent_dst_path)
2021-06-10 22:19:26 +03:00
os.chmod(ent_dst_path, ent_stat.st_mode)
os.utime(ent_dst_path, (ent_stat.st_atime, ent_stat.st_mtime))
2021-06-10 22:19:26 +03:00
continue
if ent.is_file(follow_symlinks=False) or ent.is_symlink():
2021-11-12 11:33:53 +03:00
_lg.debug("Hardlink, creating link for file: %s -> %s",
ent.path, ent_dst_path)
2021-06-10 22:19:26 +03:00
os.link(ent.path, ent_dst_path, follow_symlinks=False)
continue
# something that is not a file, symlink or directory
raise NotImplementedError(ent.path)
2021-06-21 10:20:45 +03:00
return True
2021-06-10 22:19:26 +03:00
2021-06-21 10:20:45 +03:00
2021-10-24 21:20:05 +03:00
def hardlink_dir(src_dir, dst_dir, use_external: bool = False) -> bool:
2019-01-29 11:22:08 +03:00
"""
Make hardlink for a directory with all its content.
:param src_dir: path to source directory
:param dst_dir: path to target directory
2021-10-24 21:20:05 +03:00
:param use_external: whether to use external cp -al command
:return: True if success, False otherwise.
2019-01-29 11:22:08 +03:00
"""
2021-11-09 00:43:18 +03:00
_lg.debug("Recursive hardlinking: %s -> %s", src_dir, dst_dir)
2019-01-29 11:22:08 +03:00
src_abs = os.path.abspath(src_dir)
dst_abs = os.path.abspath(dst_dir)
if not os.path.isdir(src_abs):
raise RuntimeError(f"Error reading source directory: {src_dir}")
if os.path.exists(dst_abs):
raise RuntimeError(f"Destination already exists: {dst_dir}")
2021-11-12 11:33:53 +03:00
_lg.debug("Hardlink, creating directory: %s", dst_abs)
os.mkdir(dst_abs)
2021-06-21 10:20:45 +03:00
hardlink_func = (_recursive_hardlink_ext if use_external
else _recursive_hardlink)
2021-10-24 21:20:05 +03:00
return hardlink_func(src_abs, dst_abs)
2021-11-09 00:43:18 +03:00
def nest_hardlink(src_dir: str, src_relpath: str, dst_dir: str):
"""
Hardlink entity from (src_dir + src_relpath) to dst_dir preserving dir
structure of src_relpath.
2021-11-09 00:43:18 +03:00
"""
_lg.debug("Nested hardlinking: %s%s%s -> %s",
src_dir, os.path.sep, src_relpath, dst_dir)
2021-11-09 00:43:18 +03:00
src_dir_abs = os.path.abspath(src_dir)
src_full_path = os.path.join(src_dir_abs, src_relpath)
dst_dir_abs = os.path.abspath(dst_dir)
dst_full_path = os.path.join(dst_dir_abs, src_relpath)
# check source entity and destination directory
if not os.path.lexists(src_full_path):
2021-11-12 02:53:21 +03:00
raise RuntimeError("Error reading source entity: %s" % src_full_path)
if os.path.lexists(dst_dir_abs):
2021-11-09 00:43:18 +03:00
if not os.path.isdir(dst_dir_abs):
2021-11-12 02:53:21 +03:00
raise RuntimeError("Destination path is not a directory: %s"
% dst_dir_abs)
2021-11-09 00:43:18 +03:00
else:
os.mkdir(dst_dir_abs)
# if destination entity exists, check it points to source entity
dst_entry = PseudoDirEntry(dst_full_path)
if os.path.lexists(dst_entry.path):
2021-11-09 00:43:18 +03:00
src_stat = os.lstat(src_full_path)
if os.path.samestat(src_stat, dst_entry.stat()):
2021-11-09 00:43:18 +03:00
return
# remove otherwise
rm_direntry(dst_entry)
2021-11-09 00:43:18 +03:00
src_cur_path = src_dir_abs
dst_cur_path = dst_dir_abs
for rel_part in src_relpath.split(sep=os.path.sep):
src_cur_path = os.path.join(src_cur_path, rel_part)
dst_cur_path = os.path.join(dst_cur_path, rel_part)
if os.path.exists(dst_cur_path):
continue
copy_direntry(PseudoDirEntry(src_cur_path), dst_cur_path)