Files
cura-te-ipsum/tests/test_backups.py
Maks Snegov 06583f7e1a Add test for combined retention policy interaction
Add test_combined_retention_policies to verify all 5 retention policies
(keep_all, keep_daily, keep_weekly, keep_monthly, keep_yearly) work
correctly together. Test validates precedence rules where later retention
tiers can override earlier ones (e.g., monthly overrides weekly, yearly
overrides monthly) and documents threshold inclusive behavior.

Test uses 36 backups spanning 2017-2021 with expected 21 backups kept
based on policy interaction.
2026-02-04 20:58:06 -08:00

564 lines
24 KiB
Python

import os
import pytest
from unittest import mock
from datetime import datetime
from curateipsum import backup as bk
class TestBackupCleanup:
"""Tests for backup cleanup and retention policies."""
def test_no_backups(self, backup_dir, run_cleanup):
"""Test behaviour with no available backups"""
backup_dir.mkdir()
bk.cleanup_old_backups(str(backup_dir))
assert not os.listdir(str(backup_dir))
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
def test_only_one_backup(self, mock_datetime, add_backup, run_cleanup,
check_backups):
"""Test the only backup will not be removed in any case"""
mock_datetime.now.return_value = datetime(2021, 10, 20)
# very old backup
only_backup = add_backup("20010101_0000")
run_cleanup(keep_all=1)
check_backups([only_backup])
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
def test_at_least_one_should_be_left(self, mock_datetime, add_backup,
run_cleanup, check_backups):
"""Test at least one backup should be left"""
mock_datetime.now.return_value = datetime(2021, 10, 20)
backups = [
add_backup("20211103_0300"), # latest, should be kept
add_backup("20201216_0100"), # rest should be removed
add_backup("20200716_0100"),
add_backup("20181116_0100"),
]
expected_backups = [backups[0]]
run_cleanup()
check_backups(expected_backups)
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
def test_keep_all_threshold_only(self, mock_datetime, add_backup,
run_cleanup, check_backups):
"""Test threshold for keeping all backups"""
mock_datetime.now.return_value = datetime(2021, 10, 20)
backups = [
add_backup("20211019_0300"), # keep
add_backup("20211017_0100"), # keep
add_backup("20211016_2300"), # remove, older than 3 days
]
expected_backups = backups[:2]
run_cleanup(keep_all=3)
check_backups(expected_backups)
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
def test_keep_daily_threshold_only(self, mock_datetime, add_backup,
run_cleanup, check_backups):
"""Test threshold for keeping daily backups"""
mock_datetime.now.return_value = datetime(2021, 10, 20)
backups = [
add_backup("20211019_0300"), # keep, first daily at 2021-10-19
add_backup("20211017_2100"), # remove, not first daily
add_backup("20211017_0100"), # remove, not first daily
add_backup("20211017_0030"), # keep, first daily at 2021-10-17
add_backup("20211016_2300"), # remove, older than 3 days
add_backup("20211016_0100"), # remove, older than 3 days
]
expected_backups = [backups[0], backups[3]]
run_cleanup(keep_daily=3)
check_backups(expected_backups)
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
def test_keep_all_and_daily_thresholds(self, mock_datetime, add_backup,
run_cleanup, check_backups):
"""Test threshold for keeping all and daily backups"""
mock_datetime.now.return_value = datetime(2021, 10, 20)
backups = [
add_backup("20211019_0300"), # keep, newer than 3 days
add_backup("20211017_0200"), # keep, newer than 3 days
add_backup("20211017_0100"), # keep, newer than 3 days
add_backup("20211016_2300"), # remove, not first daily
add_backup("20211016_2200"), # keep, first daily at 2021-10-16
add_backup("20211015_2200"), # remove, not first daily
add_backup("20211015_1500"), # remove, not first daily
add_backup("20211015_0200"), # keep, first daily at 2021-10-15
add_backup("20211014_2200"), # remove, older than 5 days
add_backup("20211014_2000"), # remove, older than 5 days
add_backup("20211014_1232"), # remove, older than 5 days
]
expected_backups = backups[0:3] + [backups[4]] + [backups[7]]
run_cleanup(keep_all=3, keep_daily=5)
check_backups(expected_backups)
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
def test_keep_weekly_threshold_only(self, mock_datetime, add_backup,
run_cleanup, check_backups):
"""Test threshold for keeping weekly backups"""
mock_datetime.now.return_value = datetime(2021, 11, 11)
backups = [
add_backup("20211111_0300"), # remove, not first weekly (Thu)
add_backup("20211110_0300"), # remove, not first weekly (Wed)
add_backup("20211108_0100"), # keep, first weekly 2021-11-08 (Mon)
add_backup("20211107_2300"), # remove, not first weekly (Sun)
add_backup("20211107_0100"), # keep, first weekly 2021-11-07 (Sun)
add_backup("20211031_0100"), # remove, not first weekly (Sun)
add_backup("20211025_0100"), # keep, first weekly 2021-10-25 (Mon)
add_backup("20211024_0100"), # remove, not first weekly (Sun)
add_backup("20211023_0100"), # remove, not first weekly (Sat)
add_backup("20211022_0100"), # keep, first weekly 2021-10-22 (Fri)
add_backup("20211008_0100"), # remove, not first weekly (Fri)
add_backup("20211007_0100"), # remove, not first weekly (Thu)
add_backup("20211004_0100"), # keep, first weekly 2021-10-04 (Mon)
add_backup("20211003_0100"), # remove, older than 5 weeks
add_backup("20211002_0100"), # remove, older than 5 weeks
]
expected_backups = [backups[2], backups[4], backups[6],
backups[9], backups[12]]
run_cleanup(keep_weekly=5)
check_backups(expected_backups)
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
def test_keep_weekly_threshold_inclusive(self, mock_datetime, add_backup,
run_cleanup, check_backups):
"""Test threshold for keeping weekly backups"""
mock_datetime.now.return_value = datetime(2021, 11, 11)
backups = [
add_backup("20211111_0300"), # remove, not first weekly (Thu)
add_backup("20211110_0300"), # keep, first weekly (Wed)
add_backup("20211107_0100"), # remove, not first weekly (Sun)
add_backup("20211102_0100"), # keep, first weekly (Tue)
]
expected_backups = [backups[1], backups[3]]
run_cleanup(keep_weekly=5)
check_backups(expected_backups)
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
def test_keep_monthly_threshold_only(self, mock_datetime, add_backup,
run_cleanup, check_backups):
"""Test threshold for keeping monthly backups"""
mock_datetime.now.return_value = datetime(2021, 11, 11)
backups = [
add_backup("20211103_0300"), # keep, first monthly at 2021-11
add_backup("20211019_0300"), # remove, not first monthly
add_backup("20211017_2100"), # remove, not first monthly
add_backup("20211017_0100"), # keep, first monthly at 2021-10
add_backup("20210916_2300"), # remove, not first monthly
add_backup("20210916_0100"), # keep, first monthly at 2021-09
add_backup("20210816_0100"), # remove, not first monthly
add_backup("20210810_0000"), # keep, first monthly at 2021-08
add_backup("20210716_0100"), # remove, older than 3 months
add_backup("20210715_0100"), # remove, older than 3 months
]
expected_backups = [backups[0], backups[3], backups[5], backups[7]]
run_cleanup(keep_monthly=3)
check_backups(expected_backups)
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
def test_keep_yearly_threshold_only(self, mock_datetime, add_backup,
run_cleanup, check_backups):
"""Test threshold for keeping yearly backups"""
mock_datetime.now.return_value = datetime(2021, 11, 11)
backups = [
add_backup("20211103_0300"), # remove, not first yearly in 2021
add_backup("20210810_0000"), # remove, not first yearly in 2021
add_backup("20210716_0100"), # keep, first yearly in 2021
add_backup("20201216_0100"), # remove, not first yearly in 2020
add_backup("20200716_0100"), # keep, first yearly in 2020
add_backup("20191216_0100"), # remove, not first yearly in 2019
add_backup("20190316_0100"), # keep, first yearly in 2019
add_backup("20181216_0100"), # remove, not first yearly in 2018
add_backup("20181116_0100"), # keep, first yearly in 2018
add_backup("20171116_0100"), # remove, older than 3 years
add_backup("20171115_0100"), # remove, older than 3 years
]
expected_backups = [backups[2], backups[4], backups[6], backups[8]]
run_cleanup(keep_yearly=3)
check_backups(expected_backups)
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
def test_dry_run(self, mock_datetime, add_backup, run_cleanup,
check_backups):
"""Test dry run does not remove anything"""
mock_datetime.now.return_value = datetime(2021, 11, 11)
backups = [
add_backup("20211103_0300"),
add_backup("20210810_0000"),
add_backup("20210716_0100"),
add_backup("20200716_0100"),
add_backup("20181116_0100"),
]
run_cleanup(keep_all=2, dry_run=True)
check_backups(backups)
@pytest.mark.xfail(reason="Bug #32: coarser retention tiers override finer "
"tiers, violating retention guarantees", strict=True)
@mock.patch(f"{bk.__name__}.datetime", wraps=datetime)
def test_combined_retention_policies(self, mock_datetime, add_backup,
run_cleanup, check_backups):
"""Test all retention policies working together
Tests EXPECTED behavior where retention policies honor their
documented guarantees. Currently fails due to bug where coarser
retention tiers (weekly/monthly/yearly) can override finer tiers
(all/daily/weekly/monthly), violating retention guarantees.
Bug affects multiple tier interactions (see issue #32):
- Weekly overrides daily: Nov 4 removed by weekly
- Monthly overrides weekly: Oct 11 removed by monthly
- Yearly overrides monthly: May 15 removed by yearly
Once fixed, this test will pass and validate that each tier protects
its threshold range from coarser tiers.
"""
# Current: 2021-11-11 (Thursday)
# Policies: keep_all=2, keep_daily=7, keep_weekly=4,
# keep_monthly=6, keep_yearly=3
# Thresholds: all=Nov 9, daily=Nov 4, weekly=Oct 11,
# monthly=May 1, yearly=Jan 1 2018
mock_datetime.now.return_value = datetime(2021, 11, 11)
# Use dict for readability
b = {}
# Keep-all range (all backups after Nov 9)
b["nov_11"] = add_backup("20211111_0300")
b["nov_10_23h"] = add_backup("20211110_2300")
b["nov_10_12h"] = add_backup("20211110_1200")
b["nov_10_01h"] = add_backup("20211110_0100")
b["nov_09"] = add_backup("20211109_0300")
# Daily range (one per day: Nov 4-8)
b["nov_08_dupe"] = add_backup("20211108_2200") # removed: not oldest
b["nov_08"] = add_backup("20211108_0100")
b["nov_07"] = add_backup("20211107_0100")
b["nov_06_dupe"] = add_backup("20211106_1800") # removed: not oldest
b["nov_06"] = add_backup("20211106_0100")
b["nov_05"] = add_backup("20211105_0100")
b["nov_04"] = add_backup("20211104_0100") # BUG: removed by weekly!
# Weekly range (one per week: Oct 11-Nov 7)
b["nov_03"] = add_backup("20211103_0300") # removed: same week
b["nov_01_dupe"] = add_backup("20211101_1500") # removed: not oldest
b["nov_01"] = add_backup("20211101_0100")
b["oct_31"] = add_backup("20211031_0100") # removed: same week
b["oct_25_dupe"] = add_backup("20211025_2000") # removed: not oldest
b["oct_25"] = add_backup("20211025_0100")
b["oct_18"] = add_backup("20211018_0100")
b["oct_11"] = add_backup("20211011_0100") # BUG: removed by monthly!
# Monthly range (one per month: May-Oct)
b["oct_04"] = add_backup("20211004_0100") # removed: same month
b["oct_01"] = add_backup("20211001_0100")
b["sep_15_dupe"] = add_backup("20210915_2000") # removed: not oldest
b["sep_15"] = add_backup("20210915_0100")
b["aug_15"] = add_backup("20210815_0100")
b["jul_15_dupe"] = add_backup("20210715_1200") # removed: not oldest
b["jul_15"] = add_backup("20210715_0100")
b["jun_15"] = add_backup("20210615_0100")
b["may_15"] = add_backup("20210515_0100") # BUG: removed by yearly!
# Yearly range (one per year: 2018-2021)
b["apr_15"] = add_backup("20210415_0100") # removed: same year
b["jan_15_2021"] = add_backup("20210115_0100")
b["dec_15_2020"] = add_backup("20201215_0100") # removed: same year
b["jul_15_2020"] = add_backup("20200715_0100") # removed: same year
b["jan_15_2020"] = add_backup("20200115_0100")
b["dec_15_2019_dupe"] = add_backup("20191215_2300") # removed: not oldest
b["dec_15_2019"] = add_backup("20191215_0100") # removed: same year
b["jul_15_2019"] = add_backup("20190715_0100") # removed: same year
b["jan_15_2019"] = add_backup("20190115_0100")
b["nov_15_2018"] = add_backup("20181115_0100") # removed: same year
b["jan_15_2018"] = add_backup("20180115_0100")
# Beyond all thresholds
b["jan_15_2017"] = add_backup("20170115_0100") # removed: too old
# Expected: each tier protects its range
expected = [
# keep_all: all 5 backups
b["nov_11"], b["nov_10_23h"], b["nov_10_12h"],
b["nov_10_01h"], b["nov_09"],
# daily: oldest per day (Nov 4-8) = 5 backups
# BUG: nov_04 missing - removed by weekly (same week as nov_03)
b["nov_08"], b["nov_07"], b["nov_06"], b["nov_05"],
b["nov_04"],
# weekly: oldest per week (Oct 11 - Nov 7) = 4 backups
# BUG: oct_11 missing - removed by monthly (same month as oct_01)
b["nov_01"], b["oct_25"], b["oct_18"], b["oct_11"],
# monthly: oldest per month (May-Oct) = 6 backups
# BUG: may_15 missing - removed by yearly (same year as jan_15_2021)
b["oct_01"], b["sep_15"], b["aug_15"],
b["jul_15"], b["jun_15"],
b["may_15"],
# yearly: oldest per year (2018-2021) = 4 backups
b["jan_15_2021"], b["jan_15_2020"],
b["jan_15_2019"], b["jan_15_2018"],
]
run_cleanup(keep_all=2, keep_daily=7, keep_weekly=4,
keep_monthly=6, keep_yearly=3)
check_backups(expected)
class TestBackupLock:
"""Test suite for backup lock file functionality."""
def test_lock_creation(self, backup_dir):
"""Test that lock file is created with current PID"""
backup_dir.mkdir()
result = bk.set_backups_lock(str(backup_dir))
assert result
lock_path = os.path.join(str(backup_dir), bk.LOCK_FILE)
assert os.path.exists(lock_path)
with open(lock_path, "r") as f:
pid = int(f.read().strip())
assert pid == os.getpid()
def test_lock_prevents_concurrent_backup(self, backup_dir):
"""Test that second lock acquisition is blocked"""
backup_dir.mkdir()
# First lock should succeed
result1 = bk.set_backups_lock(str(backup_dir))
assert result1
# The second lock should fail (same process trying to lock again)
# Write a different PID to simulate another process
lock_path = os.path.join(str(backup_dir), bk.LOCK_FILE)
with open(lock_path, "w") as f:
f.write(str(os.getpid()))
result2 = bk.set_backups_lock(str(backup_dir), force=False)
assert not result2
def test_stale_lock_is_removed(self, backup_dir):
"""Test that lock from non-existent process is cleaned up"""
backup_dir.mkdir()
lock_path = os.path.join(str(backup_dir), bk.LOCK_FILE)
# Create lock with non-existent PID
with open(lock_path, "w") as f:
f.write("999999")
# Lock should succeed by removing stale lock
result = bk.set_backups_lock(str(backup_dir))
assert result
# Verify new lock has current PID
with open(lock_path, "r") as f:
pid = int(f.read().strip())
assert pid == os.getpid()
def test_corrupted_lock_is_handled(self, backup_dir):
"""Test that corrupted lock file is handled gracefully"""
backup_dir.mkdir()
lock_path = os.path.join(str(backup_dir), bk.LOCK_FILE)
# Create corrupted lock file (non-numeric content)
with open(lock_path, "w") as f:
f.write("not a number")
# Lock should succeed by removing corrupted lock
result = bk.set_backups_lock(str(backup_dir))
assert result
# Verify new lock has current PID
with open(lock_path, "r") as f:
pid = int(f.read().strip())
assert pid == os.getpid()
def test_empty_lock_is_handled(self, backup_dir):
"""Test that empty lock file is handled gracefully"""
backup_dir.mkdir()
lock_path = os.path.join(str(backup_dir), bk.LOCK_FILE)
# Create the empty lock file
open(lock_path, "w").close()
# Lock should succeed by removing empty lock
result = bk.set_backups_lock(str(backup_dir))
assert result
# Verify new lock has current PID
with open(lock_path, "r") as f:
pid = int(f.read().strip())
assert pid == os.getpid()
def test_lock_release(self, backup_dir):
"""Test that lock file is properly released"""
backup_dir.mkdir()
bk.set_backups_lock(str(backup_dir))
lock_path = os.path.join(str(backup_dir), bk.LOCK_FILE)
assert os.path.exists(lock_path)
bk.release_backups_lock(str(backup_dir))
assert not os.path.exists(lock_path)
def test_release_nonexistent_lock(self, backup_dir):
"""Test that releasing non-existent lock doesn't raise error"""
backup_dir.mkdir()
# Should not raise any exception
bk.release_backups_lock(str(backup_dir))
lock_path = os.path.join(str(backup_dir), bk.LOCK_FILE)
assert not os.path.exists(lock_path)
class TestBackupIteration:
"""Tests for internal backup iteration and validation functions."""
def test_is_backup_valid_backup(self, add_backup):
"""Test _is_backup recognizes valid backup directory"""
backup = add_backup("20210101_120000")
assert bk._is_backup(backup)
def test_is_backup_missing_marker(self, backup_dir, tmp_path):
"""Test _is_backup rejects directory without marker"""
backup_dir.mkdir()
backup_path = backup_dir / "20210101_120000"
backup_path.mkdir()
# Create content but no marker
(backup_path / "file.txt").write_text("content")
entry = os.scandir(str(backup_dir))
backup = next(entry)
entry.close()
assert not bk._is_backup(backup)
def test_is_backup_only_marker_no_content(self, backup_dir):
"""Test _is_backup rejects directory with only marker file"""
backup_dir.mkdir()
backup_path = backup_dir / "20210101_120000"
backup_path.mkdir()
# Create only marker, no content
marker_name = f"{bk.BACKUP_MARKER}_20210101_120000"
(backup_path / marker_name).touch()
entry = os.scandir(str(backup_dir))
backup = next(entry)
entry.close()
assert not bk._is_backup(backup)
def test_is_backup_invalid_name_format(self, backup_dir):
"""Test _is_backup rejects invalid directory name"""
backup_dir.mkdir()
backup_path = backup_dir / "not-a-backup"
backup_path.mkdir()
# Create marker and content
marker_name = f"{bk.BACKUP_MARKER}_not-a-backup"
(backup_path / marker_name).touch()
(backup_path / "file.txt").write_text("content")
entry = os.scandir(str(backup_dir))
backup = next(entry)
entry.close()
assert not bk._is_backup(backup)
def test_iterate_backups_empty_directory(self, backup_dir):
"""Test _iterate_backups on empty directory"""
backup_dir.mkdir()
backups = list(bk._iterate_backups(str(backup_dir)))
assert backups == []
def test_iterate_backups_mixed_contents(self, backup_dir, add_backup):
"""Test _iterate_backups filters non-backup entries"""
# Create valid backups
backup1 = add_backup("20210101_120000")
backup2 = add_backup("20210102_120000")
# Create invalid entries
(backup_dir / "random_file.txt").write_text("not a backup")
(backup_dir / "invalid_dir").mkdir()
(backup_dir / bk.LOCK_FILE).touch()
backups = sorted(bk._iterate_backups(str(backup_dir)),
key=lambda e: e.name)
assert len(backups) == 2
assert backups[0].name == backup1.name
assert backups[1].name == backup2.name
def test_iterate_backups_incomplete_backup(self, backup_dir):
"""Test _iterate_backups skips backup without marker"""
backup_dir.mkdir()
# Create complete backup
complete = backup_dir / "20210101_120000"
complete.mkdir()
(complete / "file.txt").write_text("content")
marker_name = f"{bk.BACKUP_MARKER}_20210101_120000"
(complete / marker_name).touch()
# Create incomplete backup (no marker)
incomplete = backup_dir / "20210102_120000"
incomplete.mkdir()
(incomplete / "file.txt").write_text("content")
backups = list(bk._iterate_backups(str(backup_dir)))
assert len(backups) == 1
assert backups[0].name == "20210101_120000"
def test_get_latest_backup_returns_most_recent(self, backup_dir,
add_backup):
"""Test _get_latest_backup returns most recent backup"""
add_backup("20210101_120000")
add_backup("20210102_120000")
latest = add_backup("20210103_120000")
result = bk._get_latest_backup(str(backup_dir))
assert result is not None
assert result.name == latest.name
def test_get_latest_backup_empty_directory(self, backup_dir):
"""Test _get_latest_backup returns None for empty directory"""
backup_dir.mkdir()
result = bk._get_latest_backup(str(backup_dir))
assert result is None
def test_get_latest_backup_no_valid_backups(self, backup_dir):
"""Test _get_latest_backup returns None with no valid backups"""
backup_dir.mkdir()
# Create incomplete backup
incomplete = backup_dir / "20210101_120000"
incomplete.mkdir()
(incomplete / "file.txt").write_text("content")
# no marker
result = bk._get_latest_backup(str(backup_dir))
assert result is None
def test_set_backup_marker_creates_marker(self, backup_dir):
"""Test set_backup_marker creates marker file"""
backup_dir.mkdir()
backup_path = backup_dir / "20210101_120000"
backup_path.mkdir()
backup_entry = bk.fs.PseudoDirEntry(str(backup_path))
bk.set_backup_marker(backup_entry)
marker_name = f"{bk.BACKUP_MARKER}_20210101_120000"
marker_path = backup_path / marker_name
assert marker_path.exists()
def test_set_backup_marker_idempotent(self, backup_dir):
"""Test set_backup_marker is idempotent"""
backup_dir.mkdir()
backup_path = backup_dir / "20210101_120000"
backup_path.mkdir()
backup_entry = bk.fs.PseudoDirEntry(str(backup_path))
bk.set_backup_marker(backup_entry)
# Call again - should not fail
bk.set_backup_marker(backup_entry)
marker_name = f"{bk.BACKUP_MARKER}_20210101_120000"
marker_path = backup_path / marker_name
assert marker_path.exists()