Skip to content

Commit 061a196

Browse files
committed
Add a primitive backup rotation mechanism
We have daily backups that periodically consume all available disk space. Currently, I clean up those files once in awhile using a Rust program I wrote back in the day, but I would like to make this part of a normal setup. We _could_ use that Rust program, but it gets a bit annoying with building and shipping the binary. For something simple like this Python seems to be a better fit. This updates the existing systemd unit to run the rotation script after each successful backup. Currently, in the dry run mode. Outside of dry run, the script will keep 30 most recent backups and delete the rest.
1 parent ded4aec commit 061a196

File tree

6 files changed

+272
-0
lines changed

6 files changed

+272
-0
lines changed

.github/workflows/ci.yaml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,32 @@ jobs:
3333
run: |
3434
python -m pre_commit run --all-files --show-diff-on-failure
3535
36+
python:
37+
runs-on: ubuntu-latest
38+
39+
steps:
40+
- uses: actions/checkout@v4
41+
42+
- name: Setup Python
43+
uses: actions/setup-python@v4
44+
with:
45+
python-version: '3.12'
46+
47+
- name: Install pytest / Ruff
48+
run: |
49+
python -m pip install pytest ruff
50+
51+
- name: Run Python linter
52+
run: |
53+
ruff format --check
54+
ruff check
55+
56+
- name: Run Python tests
57+
run: |
58+
pushd roles/postgres/files
59+
PATH=$PWD:$PATH pytest -v .
60+
popd
61+
3662
ansible:
3763
runs-on: ubuntu-latest
3864
steps:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__pycache__

roles/postgres/files/rotate.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
#!/usr/bin/python
2+
3+
"""Rotate backup files in a given directory.
4+
5+
Implements a primitive FIFO backup rotation strategy by keeping N most recent backup files. The
6+
order is defined by sorting the file names lexicographically: the files that appear later in
7+
the sorted list are considered to be newer.
8+
"""
9+
10+
import argparse
11+
import dataclasses
12+
import pathlib
13+
import typing
14+
15+
16+
@dataclasses.dataclass(order=True)
17+
class Backup:
18+
path: pathlib.Path
19+
size: int
20+
21+
22+
class Args(typing.Protocol):
23+
keep: int
24+
dir: pathlib.Path
25+
pattern: str
26+
no_dry_run: bool
27+
28+
29+
def non_negative_int(str_value: str) -> int:
30+
value = int(str_value)
31+
if value < 0:
32+
raise argparse.ArgumentTypeError(f"Value must be non-negative: {value} < 0")
33+
34+
return value
35+
36+
37+
def parse_args() -> Args:
38+
parser = argparse.ArgumentParser(description=__doc__)
39+
parser.add_argument(
40+
"-n",
41+
"--keep",
42+
required=True,
43+
type=non_negative_int,
44+
help="Keep this many most recent backup files",
45+
)
46+
parser.add_argument(
47+
"-d", "--dir", type=pathlib.Path, help="Path to the directory with backup files"
48+
)
49+
parser.add_argument(
50+
"-p",
51+
"--pattern",
52+
type=str,
53+
default="*",
54+
help="Only consider files that match this glob pattern",
55+
)
56+
parser.add_argument(
57+
"--no-dry-run",
58+
action="store_true",
59+
help="Actually remove the rotated files",
60+
)
61+
62+
namespace = parser.parse_args()
63+
return typing.cast(Args, namespace)
64+
65+
66+
def rotate(
67+
dir: pathlib.Path,
68+
keep: int,
69+
pattern: str = "*",
70+
) -> tuple[list[Backup], list[Backup]]:
71+
"""Scan a directory and return a pair of lists: files to be kept, and files to be removed."""
72+
73+
backups = sorted(
74+
(
75+
Backup(path=entry, size=entry.stat().st_size)
76+
for entry in dir.glob(pattern)
77+
if entry.is_file()
78+
),
79+
reverse=True,
80+
)
81+
82+
to_keep = backups[:keep]
83+
to_remove = backups[keep:]
84+
85+
return (to_keep, to_remove)
86+
87+
88+
def cleanup(to_keep: list[Backup], to_remove: list[Backup], *, dry_run: bool = True):
89+
"""Delete old backup files and print disk space usage stats."""
90+
91+
used_space = sum(backup.size for backup in to_keep)
92+
freed_space = sum(backup.size for backup in to_remove)
93+
94+
if dry_run:
95+
print("Dry run. No changes will be made.\n")
96+
else:
97+
for backup in to_remove:
98+
backup.path.unlink()
99+
100+
print(f"Used space: {len(to_keep)} files, {used_space} bytes")
101+
print(f"Freed space: {len(to_remove)} files, {freed_space} bytes")
102+
103+
104+
def main():
105+
args = parse_args()
106+
107+
to_keep, to_remove = rotate(args.dir, args.keep, args.pattern)
108+
cleanup(to_keep, to_remove, dry_run=not args.no_dry_run)
109+
110+
111+
if __name__ == "__main__":
112+
main()
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
import pathlib
2+
import subprocess
3+
4+
import pytest
5+
6+
7+
EXPECTED_FILES = [
8+
"2024-06-27.tar.gz",
9+
"2024-06-28.tar.gz",
10+
"2024-06-29.tar.gz",
11+
"2024-06-30.tar.gz",
12+
"2024-07-01.tar.gz",
13+
"2024-07-02.tar.gz",
14+
"2024-07-03.tar.gz",
15+
"2024-07-08.tar.gz",
16+
"2024-07-09.tar.gz",
17+
]
18+
19+
20+
@pytest.fixture
21+
def backups(tmp_path: pathlib.Path):
22+
for filename in EXPECTED_FILES:
23+
(tmp_path / filename).write_text(filename)
24+
25+
assert sorted(item.name for item in tmp_path.glob("*")) == EXPECTED_FILES
26+
return tmp_path
27+
28+
29+
@pytest.fixture
30+
def no_backups(tmp_path: pathlib.Path):
31+
assert sorted(item.name for item in tmp_path.glob("*")) == []
32+
return tmp_path
33+
34+
35+
def test_normal_invocation(backups: pathlib.Path):
36+
subprocess.check_call(
37+
["rotate.py", "--no-dry-run", "--keep", "3", "--dir", backups]
38+
)
39+
assert sorted(item.name for item in backups.glob("*")) == [
40+
"2024-07-03.tar.gz",
41+
"2024-07-08.tar.gz",
42+
"2024-07-09.tar.gz",
43+
]
44+
45+
46+
def test_normal_invocation_is_idempotent(backups: pathlib.Path):
47+
subprocess.check_call(
48+
["rotate.py", "--no-dry-run", "--keep", "3", "--dir", backups]
49+
)
50+
subprocess.check_call(
51+
["rotate.py", "--no-dry-run", "--keep", "3", "--dir", backups]
52+
)
53+
subprocess.check_call(
54+
["rotate.py", "--no-dry-run", "--keep", "3", "--dir", backups]
55+
)
56+
assert sorted(item.name for item in backups.glob("*")) == [
57+
"2024-07-03.tar.gz",
58+
"2024-07-08.tar.gz",
59+
"2024-07-09.tar.gz",
60+
]
61+
62+
63+
def test_dry_run_invocation(backups: pathlib.Path):
64+
subprocess.check_call(["rotate.py", "--keep", "3", "--dir", backups])
65+
assert sorted(item.name for item in backups.glob("*")) == EXPECTED_FILES
66+
67+
68+
def test_keep_zero(backups: pathlib.Path):
69+
subprocess.check_call(
70+
["rotate.py", "--no-dry-run", "--keep", "0", "--dir", backups]
71+
)
72+
assert sorted(item.name for item in backups.glob("*")) == []
73+
74+
75+
def test_keep_negative(backups: pathlib.Path):
76+
with pytest.raises(subprocess.CalledProcessError):
77+
subprocess.check_call(
78+
["rotate.py", "--no-dry-run", "--keep", "-1", "--dir", backups]
79+
)
80+
81+
82+
def test_keep_more_than_files(backups: pathlib.Path):
83+
subprocess.check_call(
84+
["rotate.py", "--no-dry-run", "--keep", "100", "--dir", backups]
85+
)
86+
assert sorted(item.name for item in backups.glob("*")) == EXPECTED_FILES
87+
88+
89+
def test_keep_pattern(backups):
90+
subprocess.check_call(
91+
[
92+
"rotate.py",
93+
"--no-dry-run",
94+
"--keep",
95+
"1",
96+
"--dir",
97+
backups,
98+
"--pattern",
99+
"2024-06*tar.gz",
100+
]
101+
)
102+
assert sorted(item.name for item in backups.glob("*")) == [
103+
"2024-06-30.tar.gz",
104+
"2024-07-01.tar.gz",
105+
"2024-07-02.tar.gz",
106+
"2024-07-03.tar.gz",
107+
"2024-07-08.tar.gz",
108+
"2024-07-09.tar.gz",
109+
]
110+
111+
112+
def test_keep_pattern_does_not_match_anything(backups):
113+
subprocess.check_call(
114+
[
115+
"rotate.py",
116+
"--no-dry-run",
117+
"--keep",
118+
"1",
119+
"--dir",
120+
backups,
121+
"--pattern",
122+
"2024-08*tar.gz",
123+
]
124+
)
125+
assert sorted(item.name for item in backups.glob("*")) == EXPECTED_FILES

roles/postgres/tasks/main.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,13 @@
6868
become: true
6969
become_user: postgres
7070

71+
- name: Install the script for backup rotation
72+
ansible.builtin.copy:
73+
src: 'rotate.py'
74+
dest: '/usr/local/bin/rotate.py'
75+
mode: 'u=rwx,g=rx,o=rx'
76+
become: true
77+
7178
- name: Add a service template that allows creating backups of postgresql databases
7279
ansible.builtin.template:
7380

roles/postgres/templates/[email protected]

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ Group = postgres
99
WorkingDirectory = {{ postgres_backups_dir }}
1010
ExecStartPre = +/usr/bin/chown postgres:postgres {{ postgres_backups_dir }}
1111
ExecStart = /usr/bin/bash -c "/usr/bin/pg_dump --compress=9 --no-owner --format=p --file=%i_$(TZ=UTC date +%%Y%%m%%d-%%H%%M%%S).sql.gz %i"
12+
ExecStartPost = /usr/bin/python /usr/local/bin/rotate.py --keep 30 --dir {{ postgres_backups_dir }}

0 commit comments

Comments
 (0)