Added documentation

This commit is contained in:
2026-01-22 12:25:58 +01:00
parent 7098286b43
commit 97e66d5fae
4 changed files with 132 additions and 11 deletions

BIN
.usage.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 685 KiB

102
README.md
View File

@@ -1,3 +1,101 @@
# backup.py
<div align="center">
<h1>backup.py</h1>
<h6><i>Modular and lightweight backup utility to save, encrypt and verify your personal data.</i></h6>
<img src=".usage.gif" />
</div>
## Overview
`backup.py` supports two major options: `--backup`, to create a new backup and `--extract`
to extract an existing backup archive.
Work in progress
In order to create a backup file, you first need to create a *"sources file"* to specify
the absolute paths to backup. For instance:
```ini
# directories end with a slash...
photos=/home/marco/Pictures/
documents=/home/marco/Documents/
# while individual files do not
wireguard=/etc/wireguard/wg0.conf
```
Then, you can start the backup with the following command:
```sh
$ sudo ./backup.py --checksum --backup sources.ini $PWD "very_bad_pw"
Copying photos (1/3)
Copying documents (2/3)
Copying wireguard (3/3)
File name: '/home/marco/Projects/backup.py/backup-wood-20260122.tar.gz.enc'
Checksums file: '/home/marco/Projects/backup.py/backup-wood-20260122.sha256'
File size: 5533818904 bytes (5.15 GiB)
Elapsed time: 2 minutes, 12 seconds
```
The `--checsum` (optional) is used to generate a checksum file containing the hashes of each single of the backup.
To extract an existing backup, you can instead issue the following command:
```sh
$ ./backup.py -c --extract backup-wood-20260122.tar.gz.enc "very_bad_pw" backup-wood-20260122.sha256
Backup extracted to: '/home/marco/Projects/backup.py/backup.py.tmp'
Elapsed time: 1 minute, 3 seconds
```
This will create a new directory named `backup.py.tmp` on your local path. Just like before,
the `-c` option is optional.
## Usage
As stated before, `python.py` is built from scratch in modern Python (3.10+) without using
any external dependency except for `tar` and `gpg`.
The *sources file* follows an INI-like syntax structured using associative records between
labels and absolute paths. In order words:
```ini
<label>=<path>
```
where:
- `<label>` is a descriptive name of a backup entry;
- `<path>` is the absolute path to a directory or a file.
For example:
```ini
# File 'server_backup.ini'
# List directories and files to backup
#
nginx=/etc/nginx/
ssh=/etc/ssh/
www=/var/www/html/
# no slash here ----v
host_file=/etc/hosts
```
As you can see, individual files are specified by omitting the trailing slash at the end
of the absolute path. Comments, on the other hand, are inserted using the `#` token. Blank
lines are ignored.
Internally, `backup.py` orchestrates several UNIX utilities to create backups. In particular,
it follows the procedure listed below:
1. **Copy phase**: uses Python `shutil.copytree()` to copy files while preserving metadata and
symlinks (without following them) and by ignoring special files;
2. **Compression**: creates a gzip-compressed tar archive using GNU tar;
3. **Encryption**: encrypts the archive with GPG using AES-256 symmetric encryption;
4. **Checksum** (optional): computes SHA256 hashes for each file in the backup archive.
The backup process creates temporary files in `backup.py.tmp` and `backup.py.tar.gz`, which are
automatically cleaned up on completion or interruption (i.e., `C-c`).
## Old version
This implementation of `backup.py` is a porting of an old backup script originally written in Bash
that I developed back in 2018. While this new version should be compatible with old backup archives,
it may start to diverge at a certain point in the future. If you're experience incompatibilities and want
to revert to the original version, you can do so by visiting the
[latest stable commit](https://git.marcocetica.com/marco/backup.py/src/commit/786c30ef14abe2056dfa5cb250b766db73ca71aa).
## License
This software is released under GPLv3. You can obtain a copy of this license by visiting [this page](https://choosealicense.com/licenses/gpl-3.0/).

View File

@@ -48,10 +48,12 @@ class SignalHandler:
def __init__(self) -> None:
self.interrupted = False
self.output_path: Optional[Path] = None
self.checksum_file: Optional[Path] = None
def setup(self, output_path: Path) -> None:
def setup(self, output_path: Path, checksum_file: Optional[Path] = None) -> None:
"""Configure signal handler with cleanup paths"""
self.output_path = output_path
self.checksum_file = checksum_file
signal.signal(signal.SIGINT, self.handle_interrupt)
def handle_interrupt(self, _sig_num: int, _frame: Any) -> None:
@@ -76,6 +78,9 @@ class SignalHandler:
self.output_path / "backup.py.tar.gz"
]
if self.checksum_file:
temp_files.append(self.checksum_file)
Backup.cleanup_files(*temp_files)
print("DONE.", file=sys.stderr)
@@ -198,11 +203,11 @@ class Backup:
path = Path(path_str.strip())
if not path.exists():
return Err(f"Path does not exist: {path}.")
return Err(f"Path does not exist: '{path}'.")
sources.append(BackupSource(label.strip(), path))
except IOError as err:
return Err(f"Failed to read sources file: {err}.")
return Err(f"Failed to read sources file: '{err}'.")
if not sources:
return Err(f"No valid sources found in file.")
@@ -278,7 +283,7 @@ class Backup:
return Ok(None)
return Err(f"The following source element is neither a file nor a directory: {source}.")
return Err(f"The following source element is neither a file nor a directory: '{source}'.")
except (IOError, OSError, shutil.Error) as err:
return Err(f"Copy failed: {err}.")
@@ -315,7 +320,7 @@ class Backup:
hash_obj.update(byte_block)
return Ok(hash_obj.hexdigest())
except IOError as e:
return Err(f"Failed to read file {file_path}: {e}.")
return Err(f"Failed to read file '{file_path}': {e}.")
@staticmethod
def count_tar_entries(source_dir: Path) -> int:
@@ -502,7 +507,7 @@ class Backup:
print(f"File name: '{backup_archive}'")
if config.checksum:
print(f"Checksum file: '{checksum_file}'")
print(f"Checksums file: '{checksum_file}'")
print(f"File size: {file_size} bytes ({file_size_hr})")
print(f"Elapsed time: {self.prettify_timestamp(elapsed_time)}")
@@ -620,7 +625,7 @@ class Backup:
with open(checksum_file, 'r') as cf:
expected_hashes = set(line.strip() for line in cf if line.strip())
except IOError as err:
return Err(f"Failed to load checksum file: {err}.")
return Err(f"Failed to load checksums file: {err}.")
files = Backup.collect_files(extracted_dir)
progress = None
@@ -738,7 +743,13 @@ def main():
sources_file, output_path, encryption_pass = args.backup
sources_path = Path(sources_file)
output_dir = Path(output_path)
signal_handler.setup(output_dir)
# Determine checksum file if requested
date_str = datetime.now().strftime("%Y%m%d")
hostname = os.uname().nodename
checksum_file = output_dir / f"backup-{hostname}-{date_str}.sha256" if args.checksum else None
signal_handler.setup(output_dir, checksum_file)
# Create output directory if it doesn't exist
if not output_dir.exists():
@@ -792,7 +803,7 @@ def main():
sys.exit(1)
if not checksum_file.exists():
print(f"Checksum file '{checksum_file}' does not exist.", file=sys.stderr)
print(f"Checksums file '{checksum_file}' does not exist.", file=sys.stderr)
sys.exit(1)
extract_res = backup.extract_backup(archive_file, decryption_pass, checksum_file, args.verbose)

12
sources.ini Normal file
View File

@@ -0,0 +1,12 @@
# Backup.py sample sources file
# To backup new entries, add a mapping
# of the time '<LABEL>=<PATH>'
#
# directories end with a slash...
web_server=/var/www/
ssh=/etc/ssh/
# while individual files do not
wireguard=/etc/wireguard/wg0.conf
firewall=/etc/nftables.conf