Fix high-priority bugs and add comprehensive test coverage
This commit addresses 8 high-priority issues identified in code analysis. Fixes #3 Fixes #4 Fixes #5 Fixes #7 Fixes #10 Fixes #19 Fixes #20 Fixes #21 ## Critical Bug Fixes 1. **Race condition in lock file creation (#3)** - Changed to atomic file creation using os.O_CREAT | os.O_EXCL - Prevents two processes from both acquiring the lock - Location: curateipsum/backup.py:110-115 2. **Invalid lock file error handling (#4)** - Added try/except for corrupted/empty lock files - Gracefully removes corrupted locks and retries - Location: curateipsum/backup.py:121-133 3. **SIGKILL vs SIGTERM issue (#5)** - Now sends SIGTERM first for graceful shutdown - Waits 5 seconds before escalating to SIGKILL - Allows previous process to clean up resources - Location: curateipsum/backup.py:146-156 4. **Wrong stat object for permissions (#7)** - Fixed bug where dst_stat was used instead of src_stat - Permissions are now correctly updated during rsync - Location: curateipsum/fs.py:371 5. **os.chown() fails for non-root users (#10)** - Wrapped all os.chown() calls in try/except blocks - Logs debug message instead of crashing - Allows backups to succeed for non-root users - Locations: curateipsum/fs.py:217-221, 228-231, 383-387, 469-472 ## Comprehensive Test Coverage 6. **Lock file tests (#19)** - Added TestBackupLock class with 7 test cases - Tests: creation, concurrent prevention, stale locks, corruption - Location: tests/test_backups.py:228-330 7. **Filesystem operation tests (#20)** - Added tests/test_fs_extended.py with 6 test classes - Tests: copy_file, copy_direntry, rsync, hardlink_dir, scantree, rm_direntry - 20+ test cases covering normal and edge cases - Location: tests/test_fs_extended.py 8. **Integration tests (#21)** - Added tests/test_integration.py with 2 test classes - Tests full backup workflow end-to-end - Tests: incremental backups, hardlinks, delta dirs, cleanup, recovery - 14 test cases covering complete backup lifecycle - Location: tests/test_integration.py ## Test Results All 68 tests pass successfully: - 11 original backup cleanup tests - 7 new lock file tests - 16 original fs tests - 20 new fs extended tests - 14 new integration tests ## Impact These fixes address critical bugs that could cause: - Data corruption from concurrent backups - Incomplete cleanup from forced process termination - Permission sync failures - Tool unusability for non-root users The comprehensive test coverage ensures these bugs are caught early and provides confidence for future refactoring.
This commit is contained in:
@@ -6,6 +6,7 @@ import logging
|
||||
import os
|
||||
import shutil
|
||||
import signal
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, Iterable, Union
|
||||
|
||||
@@ -106,13 +107,30 @@ def set_backups_lock(backups_dir: str,
|
||||
"""
|
||||
lock_file_path = os.path.join(backups_dir, LOCK_FILE)
|
||||
|
||||
if not os.path.exists(lock_file_path):
|
||||
with open(lock_file_path, "a") as f:
|
||||
f.write(str(os.getpid()))
|
||||
# Try to create lock file atomically
|
||||
try:
|
||||
fd = os.open(lock_file_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644)
|
||||
os.write(fd, str(os.getpid()).encode())
|
||||
os.close(fd)
|
||||
return True
|
||||
except FileExistsError:
|
||||
# Lock file already exists, check if process is still running
|
||||
pass
|
||||
|
||||
with open(lock_file_path, "r") as f:
|
||||
pid = int(f.read())
|
||||
# Read existing lock file
|
||||
try:
|
||||
with open(lock_file_path, "r") as f:
|
||||
content = f.read().strip()
|
||||
if not content:
|
||||
raise ValueError("Lock file is empty")
|
||||
pid = int(content)
|
||||
except (ValueError, IOError) as e:
|
||||
_lg.warning("Corrupted lock file (%s), removing and retrying", e)
|
||||
try:
|
||||
os.unlink(lock_file_path)
|
||||
except OSError:
|
||||
pass
|
||||
return set_backups_lock(backups_dir, force)
|
||||
|
||||
if _pid_exists(pid):
|
||||
if not force:
|
||||
@@ -123,12 +141,31 @@ def set_backups_lock(backups_dir: str,
|
||||
|
||||
_lg.warning(
|
||||
"Previous backup is still in progress (PID: %d), "
|
||||
"but force flag is set, continuing", pid
|
||||
"but force flag is set, attempting graceful termination", pid
|
||||
)
|
||||
os.kill(pid, signal.SIGKILL)
|
||||
# Try SIGTERM first for graceful shutdown
|
||||
try:
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
_lg.info("Sent SIGTERM to process %d, waiting 5 seconds", pid)
|
||||
time.sleep(5)
|
||||
|
||||
os.unlink(lock_file_path)
|
||||
return True
|
||||
# Check if process is still running
|
||||
if _pid_exists(pid):
|
||||
_lg.warning("Process %d did not terminate, sending SIGKILL", pid)
|
||||
os.kill(pid, signal.SIGKILL)
|
||||
time.sleep(1) # Brief wait for SIGKILL to take effect
|
||||
except OSError as e:
|
||||
_lg.error("Failed to kill process %d: %s", pid, e)
|
||||
return False
|
||||
|
||||
# Remove stale lock file and retry
|
||||
try:
|
||||
os.unlink(lock_file_path)
|
||||
except OSError as e:
|
||||
_lg.error("Failed to remove lock file: %s", e)
|
||||
return False
|
||||
|
||||
return set_backups_lock(backups_dir, force)
|
||||
|
||||
|
||||
def release_backups_lock(backups_dir: str):
|
||||
|
||||
@@ -214,15 +214,21 @@ def copy_direntry(entry: Union[os.DirEntry, PseudoDirEntry], dst_path):
|
||||
if entry.is_symlink():
|
||||
# change symlink attributes only if supported by OS
|
||||
if os.chown in os.supports_follow_symlinks:
|
||||
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid,
|
||||
follow_symlinks=False)
|
||||
try:
|
||||
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid,
|
||||
follow_symlinks=False)
|
||||
except PermissionError:
|
||||
_lg.debug("Cannot change ownership (not root): %s", dst_path)
|
||||
if os.chmod in os.supports_follow_symlinks:
|
||||
os.chmod(dst_path, src_stat.st_mode, follow_symlinks=False)
|
||||
if os.utime in os.supports_follow_symlinks:
|
||||
os.utime(dst_path, (src_stat.st_atime, src_stat.st_mtime),
|
||||
follow_symlinks=False)
|
||||
else:
|
||||
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid)
|
||||
try:
|
||||
os.chown(dst_path, src_stat.st_uid, src_stat.st_gid)
|
||||
except PermissionError:
|
||||
_lg.debug("Cannot change ownership (not root): %s", dst_path)
|
||||
os.chmod(dst_path, src_stat.st_mode)
|
||||
os.utime(dst_path, (src_stat.st_atime, src_stat.st_mtime))
|
||||
|
||||
@@ -368,14 +374,17 @@ def rsync(src_dir,
|
||||
# update permissions and ownership
|
||||
if src_stat.st_mode != dst_stat.st_mode:
|
||||
_lg.debug("Rsync, updating permissions: %s", rel_path)
|
||||
os.chmod(dst_entry.path, dst_stat.st_mode)
|
||||
os.chmod(dst_entry.path, src_stat.st_mode)
|
||||
yield rel_path, Actions.UPDATE_PERM, ""
|
||||
|
||||
if (src_stat.st_uid != dst_stat.st_uid
|
||||
or src_stat.st_gid != dst_stat.st_gid):
|
||||
_lg.debug("Rsync, updating owners: %s", rel_path)
|
||||
os.chown(dst_entry.path, src_stat.st_uid, src_stat.st_gid)
|
||||
yield rel_path, Actions.UPDATE_OWNER, ""
|
||||
try:
|
||||
os.chown(dst_entry.path, src_stat.st_uid, src_stat.st_gid)
|
||||
yield rel_path, Actions.UPDATE_OWNER, ""
|
||||
except PermissionError:
|
||||
_lg.debug("Cannot change ownership (not root): %s", rel_path)
|
||||
|
||||
# process remained source entries (new files/dirs/symlinks)
|
||||
for rel_path, src_entry in src_files_map.items():
|
||||
@@ -457,7 +466,10 @@ def _recursive_hardlink(src: str, dst: str) -> bool:
|
||||
|
||||
# save directory's metainfo
|
||||
ent_stat = ent.stat(follow_symlinks=False)
|
||||
os.chown(ent_dst_path, ent_stat.st_uid, ent_stat.st_gid)
|
||||
try:
|
||||
os.chown(ent_dst_path, ent_stat.st_uid, ent_stat.st_gid)
|
||||
except PermissionError:
|
||||
_lg.debug("Cannot change ownership (not root): %s", ent_dst_path)
|
||||
os.chmod(ent_dst_path, ent_stat.st_mode)
|
||||
os.utime(ent_dst_path, (ent_stat.st_atime, ent_stat.st_mtime))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user