| 47 |
rodolico |
1 |
#!/usr/bin/env perl
|
|
|
2 |
|
| 48 |
rodolico |
3 |
# Simplified BSD License (FreeBSD License)
|
|
|
4 |
#
|
|
|
5 |
# Copyright (c) 2025, Daily Data Inc.
|
|
|
6 |
# All rights reserved.
|
|
|
7 |
#
|
|
|
8 |
# Redistribution and use in source and binary forms, with or without
|
|
|
9 |
# modification, are permitted provided that the following conditions are met:
|
|
|
10 |
#
|
|
|
11 |
# 1. Redistributions of source code must retain the above copyright notice, this
|
|
|
12 |
# list of conditions and the following disclaimer.
|
|
|
13 |
#
|
|
|
14 |
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
|
15 |
# this list of conditions and the following disclaimer in the documentation
|
|
|
16 |
# and/or other materials provided with the distribution.
|
|
|
17 |
#
|
|
|
18 |
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
19 |
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
20 |
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
|
21 |
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
|
22 |
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
23 |
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
|
24 |
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
|
25 |
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
|
26 |
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
27 |
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
28 |
|
| 47 |
rodolico |
29 |
# cleanSnaps - detect and remove old ZFS snapshots named like YYYY-MM-DD-<N><d|w|m>
|
| 60 |
rodolico |
30 |
# Usage: cleanSnaps [-f] [-v] [-t <shift>] [-V] [-h]
|
| 54 |
rodolico |
31 |
# -f actually destroy snapshots (use with care)
|
| 47 |
rodolico |
32 |
# -v verbose
|
| 48 |
rodolico |
33 |
# -t time shift, simulate running at a different time. Examples:
|
|
|
34 |
# -t 2m (run as if 2 months ago)
|
|
|
35 |
# -t -4w (run as if 4 weeks earlier)
|
|
|
36 |
# -t +3d (run as if 3 days in the future)
|
| 60 |
rodolico |
37 |
# -V show version and exit
|
| 48 |
rodolico |
38 |
# -h show this help and exit
|
| 60 |
rodolico |
39 |
#
|
|
|
40 |
# Author: R. W. Rodlico <rodo@dailydata.net>
|
|
|
41 |
# Created: December 2025
|
|
|
42 |
#
|
|
|
43 |
# Revision History:
|
|
|
44 |
# Version: 1.0 RWR 2025-12-18
|
|
|
45 |
# Initial release with time-shift simulation and snapshot retention logic
|
|
|
46 |
#
|
|
|
47 |
# Version: 1.1 RWR 2025-12-18
|
|
|
48 |
# Added --unmatched/-u flag to clean snapshots that don't match the retention pattern regex
|
|
|
49 |
# When --unmatched is used, ONLY unmatched snapshots are removed (normal retention logic is skipped)
|
| 47 |
rodolico |
50 |
|
|
|
51 |
use strict;
|
|
|
52 |
use warnings;
|
| 60 |
rodolico |
53 |
|
|
|
54 |
our $VERSION = '1.1';
|
|
|
55 |
|
| 54 |
rodolico |
56 |
use Getopt::Long qw(GetOptions);
|
| 47 |
rodolico |
57 |
use Time::Piece;
|
|
|
58 |
|
| 54 |
rodolico |
59 |
# Allow short option bundling (e.g. -fv) and accept long names
|
|
|
60 |
Getopt::Long::Configure('bundling');
|
|
|
61 |
|
| 47 |
rodolico |
62 |
my %opts;
|
| 54 |
rodolico |
63 |
GetOptions(\%opts,
|
|
|
64 |
'force|f', # force (actually perform destroys)
|
|
|
65 |
'verbose|v', # verbose
|
|
|
66 |
'help|h', # help
|
| 60 |
rodolico |
67 |
'version|V', # version
|
| 54 |
rodolico |
68 |
'timeshift|t=s', # time shift string (alias -t)
|
| 60 |
rodolico |
69 |
'unmatched|u', # clean snapshots that don't match the pattern
|
| 54 |
rodolico |
70 |
) or die "Error parsing command line options\n";
|
| 47 |
rodolico |
71 |
|
| 54 |
rodolico |
72 |
# Normalize options: default behaviour is dry-run unless --force is specified.
|
| 60 |
rodolico |
73 |
my $FORCE = (defined $opts{'force'} || defined $opts{'f'}) ? 1 : 0;
|
|
|
74 |
my $VERBOSE = (defined $opts{'verbose'} || defined $opts{'v'}) ? 1 : 0;
|
|
|
75 |
my $UNMATCHED = (defined $opts{'unmatched'} || defined $opts{'u'}) ? 1 : 0;
|
| 54 |
rodolico |
76 |
my $timeshift = defined $opts{'timeshift'} ? $opts{'timeshift'} : $opts{'t'};
|
| 47 |
rodolico |
77 |
|
|
|
78 |
my $ZFS_CMD = $ENV{ZFS_CMD} // 'zfs';
|
|
|
79 |
|
|
|
80 |
sub logmsg { print @_, "\n" if $VERBOSE }
|
|
|
81 |
|
| 60 |
rodolico |
82 |
# show version and exit
|
|
|
83 |
if ($opts{'version'} || $opts{'V'}) {
|
|
|
84 |
print "cleanSnaps version $VERSION\n";
|
|
|
85 |
exit 0;
|
|
|
86 |
}
|
|
|
87 |
|
| 48 |
rodolico |
88 |
# show help and exit
|
| 54 |
rodolico |
89 |
if ($opts{'help'} || $opts{'h'}) {
|
| 60 |
rodolico |
90 |
print "Usage: cleanSnaps [--force|-f] [--verbose|-v] [--timeshift|-t <shift>] [--unmatched|-u] [--version|-V] [--help|-h] [pool]\n";
|
| 54 |
rodolico |
91 |
print " --force, -f actually destroy snapshots (default: dry-run)\n";
|
|
|
92 |
print " --verbose, -v verbose logging\n";
|
| 55 |
rodolico |
93 |
print " --timeshift, -t time shift, simulate running at a previous time (examples: 2m, -4w, +3d)\n";
|
| 60 |
rodolico |
94 |
print " --unmatched, -u ONLY clean snapshots that don't match the retention pattern (skips normal retention)\n";
|
|
|
95 |
print " --version, -V show version and exit\n";
|
| 54 |
rodolico |
96 |
print " --help, -h show this help and exit\n";
|
| 48 |
rodolico |
97 |
exit 0;
|
|
|
98 |
}
|
|
|
99 |
|
|
|
100 |
# compute simulated "now" if the user requested a time shift
|
| 50 |
rodolico |
101 |
# allows us to keep or remove snapshots as if we were running at a different time
|
|
|
102 |
# giving retention decisions less stringent on, say, backup systems
|
| 47 |
rodolico |
103 |
my $now = time();
|
| 54 |
rodolico |
104 |
if (defined $timeshift && $timeshift =~ /^(?:([+-]?)(\d+)([smhdwy]))$/i) {
|
| 48 |
rodolico |
105 |
my ($sign, $num, $unit) = ($1, $2, lc $3);
|
|
|
106 |
# default behavior: no leading sign => treat as negative (simulate running in the past)
|
|
|
107 |
$sign = '-' unless defined $sign && $sign ne '';
|
|
|
108 |
my %unit_secs = (
|
|
|
109 |
s => 1,
|
|
|
110 |
m => 30 * 86400, # months approximated as 30 days to match snapshot 'm' semantics
|
|
|
111 |
h => 3600,
|
|
|
112 |
d => 86400,
|
|
|
113 |
w => 7 * 86400,
|
|
|
114 |
y => 365 * 86400,
|
|
|
115 |
);
|
|
|
116 |
if (!exists $unit_secs{$unit}) {
|
|
|
117 |
die "Invalid time unit '$unit' in -t option; use s,m,h,d,w,y\n";
|
|
|
118 |
}
|
|
|
119 |
my $shift_seconds = $num * $unit_secs{$unit};
|
|
|
120 |
$shift_seconds = -$shift_seconds if $sign eq '-';
|
|
|
121 |
$now += $shift_seconds;
|
| 54 |
rodolico |
122 |
my $ts_display = defined $timeshift ? $timeshift : $opts{t};
|
|
|
123 |
logmsg("Simulating current time with shift $ts_display; adjusted now -> " . scalar(localtime($now)) );
|
| 48 |
rodolico |
124 |
}
|
| 47 |
rodolico |
125 |
|
| 50 |
rodolico |
126 |
my @candidates; # this will hold the candidates for removal
|
|
|
127 |
|
|
|
128 |
# Optional command-line parameter: pool name to process (only snapshots from this pool will be considered)
|
|
|
129 |
my $pool = shift @ARGV;
|
|
|
130 |
if ($pool) {
|
|
|
131 |
logmsg("Filtering snapshots to pool: $pool");
|
|
|
132 |
}
|
|
|
133 |
|
|
|
134 |
# if a pool name was given, limit zfs list to that pool and do a recursive list
|
|
|
135 |
# recursion is done by default if no pool given
|
|
|
136 |
my $command = $ZFS_CMD . ' list -H -o name -t snapshot' . ($pool ? " -r $pool" : '');
|
|
|
137 |
|
|
|
138 |
open my $fh, '-|', $command or die "Failed to run $ZFS_CMD: $!";
|
| 47 |
rodolico |
139 |
while (my $snap = <$fh>) {
|
|
|
140 |
chomp $snap;
|
|
|
141 |
next unless defined $snap && $snap =~ /\S/;
|
|
|
142 |
|
|
|
143 |
unless ($snap =~ /@/) {
|
|
|
144 |
logmsg("skipping invalid snapshot name: $snap");
|
|
|
145 |
next;
|
|
|
146 |
}
|
|
|
147 |
my ($dataset, $snapname) = split /@/, $snap, 2;
|
|
|
148 |
|
| 50 |
rodolico |
149 |
# match an optional prefix, a date or date-time stamp, optional text, then TTL
|
|
|
150 |
# Examples matched:
|
|
|
151 |
# pool@snapname-2025-12-01-3d
|
|
|
152 |
# pool@prefix_2025-12-01T12:30:00_foo_3h
|
|
|
153 |
# pool@2025-12-01_foo_2w
|
|
|
154 |
# allow time separators of ':' or '.' and optional underscore or space before time
|
|
|
155 |
unless ($snapname =~ m/.*?(\d{4}-\d{2}-\d{2}(?:[T _]\d{2}[:\.]\d{2}(?:[:\.]\d{2})?)?)(?:.*?)(\d+)([smhdwy])$/) {
|
|
|
156 |
logmsg("snapshot does not match ...<date/time>...<N><s|m|h|d|w|y>: $snap");
|
| 60 |
rodolico |
157 |
# if --unmatched flag is set, mark this snapshot for removal
|
|
|
158 |
if ($UNMATCHED) {
|
|
|
159 |
push @candidates, $snap;
|
|
|
160 |
logmsg("marked for removal (unmatched pattern): $snap") if $VERBOSE;
|
|
|
161 |
}
|
| 47 |
rodolico |
162 |
next;
|
|
|
163 |
}
|
| 60 |
rodolico |
164 |
|
|
|
165 |
# If --unmatched flag is set, skip normal retention processing for matched snapshots
|
|
|
166 |
if ($UNMATCHED) {
|
|
|
167 |
logmsg("skipping retention check (matched pattern): $snap") if $VERBOSE;
|
|
|
168 |
next;
|
|
|
169 |
}
|
|
|
170 |
|
| 47 |
rodolico |
171 |
my ($snap_date, $num, $unit) = ($1, $2, $3);
|
|
|
172 |
|
| 50 |
rodolico |
173 |
# parse snapshot date/time using Time::Piece; accept several common formats
|
| 47 |
rodolico |
174 |
my $snap_epoch;
|
| 50 |
rodolico |
175 |
my @fmts = (
|
|
|
176 |
'%Y-%m-%d %H:%M:%S',
|
|
|
177 |
'%Y-%m-%dT%H:%M:%S',
|
|
|
178 |
'%Y-%m-%d %H:%M',
|
|
|
179 |
'%Y-%m-%d_%H.%M.%S',
|
|
|
180 |
'%Y-%m-%d_%H.%M',
|
|
|
181 |
'%Y-%m-%d',
|
|
|
182 |
);
|
|
|
183 |
# try to parse using each format until one works
|
|
|
184 |
my $parsed = 0;
|
|
|
185 |
for my $fmt (@fmts) {
|
|
|
186 |
eval {
|
|
|
187 |
my $tp = Time::Piece->strptime($snap_date, $fmt);
|
|
|
188 |
$snap_epoch = $tp->epoch;
|
|
|
189 |
$parsed = 1;
|
|
|
190 |
1;
|
|
|
191 |
};
|
|
|
192 |
last if $parsed;
|
|
|
193 |
}
|
|
|
194 |
# didn't parse successfully, show message and skip
|
|
|
195 |
unless ($parsed) {
|
|
|
196 |
logmsg("failed to parse date/time '$snap_date' for $snap");
|
| 60 |
rodolico |
197 |
# if --unmatched flag is set, mark this snapshot for removal (pattern matched but date parsing failed)
|
|
|
198 |
if ($UNMATCHED) {
|
|
|
199 |
push @candidates, $snap;
|
|
|
200 |
logmsg("marked for removal (date parse failed): $snap") if $VERBOSE;
|
|
|
201 |
}
|
| 47 |
rodolico |
202 |
next;
|
| 50 |
rodolico |
203 |
}
|
| 47 |
rodolico |
204 |
|
| 50 |
rodolico |
205 |
# compute retention in seconds according to unit (s,m,h,d,w,m,y)
|
|
|
206 |
my $retention_seconds;
|
|
|
207 |
if ($unit eq 's') { $retention_seconds = $num }
|
|
|
208 |
elsif ($unit eq 'm') { $retention_seconds = $num * 30 * 86400 } # months ~= 30 days
|
|
|
209 |
elsif ($unit eq 'h') { $retention_seconds = $num * 3600 }
|
|
|
210 |
elsif ($unit eq 'd') { $retention_seconds = $num * 86400 }
|
|
|
211 |
elsif ($unit eq 'w') { $retention_seconds = $num * 7 * 86400 }
|
|
|
212 |
elsif ($unit eq 'y') { $retention_seconds = $num * 365 * 86400 }
|
| 47 |
rodolico |
213 |
else { logmsg("unknown unit $unit for $snap"); next }
|
|
|
214 |
|
| 50 |
rodolico |
215 |
# determine if snapshot is older than retention period
|
| 47 |
rodolico |
216 |
my $age = $now - $snap_epoch;
|
| 50 |
rodolico |
217 |
if ($age > $retention_seconds) { # ready for removal
|
| 47 |
rodolico |
218 |
push @candidates, $snap;
|
| 50 |
rodolico |
219 |
} else { # keep
|
|
|
220 |
my $age_days = $age / 86400;
|
|
|
221 |
my $ret_days = $retention_seconds / 86400;
|
|
|
222 |
logmsg("keep: $snap (age " . sprintf("%.2f", $age_days) . "d <= " . sprintf("%.2f", $ret_days) . "d)");
|
| 47 |
rodolico |
223 |
}
|
|
|
224 |
}
|
|
|
225 |
close $fh;
|
|
|
226 |
|
|
|
227 |
if (!@candidates) {
|
|
|
228 |
print "No snapshots to remove.\n";
|
|
|
229 |
exit 0;
|
|
|
230 |
}
|
|
|
231 |
|
|
|
232 |
printf "Snapshots to remove (%d):\n", scalar @candidates;
|
|
|
233 |
for my $s (@candidates) { printf " %s\n", $s }
|
|
|
234 |
|
| 54 |
rodolico |
235 |
if (!$FORCE) {
|
| 47 |
rodolico |
236 |
print "\nDry-run: no snapshots were destroyed. Use -f to actually remove them.\n";
|
|
|
237 |
exit 0;
|
|
|
238 |
}
|
|
|
239 |
|
| 50 |
rodolico |
240 |
# actual removal. If any fail, report error but continue to remove rest
|
| 47 |
rodolico |
241 |
my $failed = 0;
|
|
|
242 |
for my $s (@candidates) {
|
|
|
243 |
printf "Destroying: %s\n", $s;
|
|
|
244 |
my $rc = system($ZFS_CMD, 'destroy', $s);
|
|
|
245 |
if ($rc != 0) {
|
|
|
246 |
printf STDERR "Failed to destroy %s\n", $s;
|
|
|
247 |
$failed = 1;
|
|
|
248 |
}
|
|
|
249 |
}
|
|
|
250 |
|
|
|
251 |
if ($failed) {
|
|
|
252 |
print STDERR "One or more destroys failed.\n";
|
|
|
253 |
exit 1;
|
|
|
254 |
}
|
|
|
255 |
|
|
|
256 |
print "Done.\n";
|