Subversion Repositories zfs_utils

Rev

Rev 55 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
47 rodolico 1
#!/usr/bin/env perl
2
 
48 rodolico 3
# Simplified BSD License (FreeBSD License)
4
#
5
# Copyright (c) 2025, Daily Data Inc.
6
# All rights reserved.
7
#
8
# Redistribution and use in source and binary forms, with or without
9
# modification, are permitted provided that the following conditions are met:
10
#
11
# 1. Redistributions of source code must retain the above copyright notice, this
12
#    list of conditions and the following disclaimer.
13
#
14
# 2. Redistributions in binary form must reproduce the above copyright notice,
15
#    this list of conditions and the following disclaimer in the documentation
16
#    and/or other materials provided with the distribution.
17
#
18
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 
47 rodolico 29
# cleanSnaps - detect and remove old ZFS snapshots named like YYYY-MM-DD-<N><d|w|m>
60 rodolico 30
# Usage: cleanSnaps [-f] [-v] [-t <shift>] [-V] [-h]
54 rodolico 31
#   -f  actually destroy snapshots (use with care)
47 rodolico 32
#   -v  verbose
48 rodolico 33
#   -t  time shift, simulate running at a different time. Examples:
34
#       -t 2m   (run as if 2 months ago)
35
#       -t -4w  (run as if 4 weeks earlier)
36
#       -t +3d  (run as if 3 days in the future)
60 rodolico 37
#   -V  show version and exit
48 rodolico 38
#   -h  show this help and exit
60 rodolico 39
#
40
# Author: R. W. Rodlico <rodo@dailydata.net>
41
# Created: December 2025
42
#
43
# Revision History:
44
# Version: 1.0 RWR 2025-12-18
45
# Initial release with time-shift simulation and snapshot retention logic
46
#
47
# Version: 1.1 RWR 2025-12-18
48
# Added --unmatched/-u flag to clean snapshots that don't match the retention pattern regex
49
# When --unmatched is used, ONLY unmatched snapshots are removed (normal retention logic is skipped)
47 rodolico 50
 
51
use strict;
52
use warnings;
60 rodolico 53
 
54
our $VERSION = '1.1';
55
 
54 rodolico 56
use Getopt::Long qw(GetOptions);
47 rodolico 57
use Time::Piece;
58
 
54 rodolico 59
# Allow short option bundling (e.g. -fv) and accept long names
60
Getopt::Long::Configure('bundling');
61
 
47 rodolico 62
my %opts;
54 rodolico 63
GetOptions(\%opts,
64
    'force|f',        # force (actually perform destroys)
65
    'verbose|v',      # verbose
66
    'help|h',         # help
60 rodolico 67
    'version|V',      # version
54 rodolico 68
    'timeshift|t=s',  # time shift string (alias -t)
60 rodolico 69
    'unmatched|u',    # clean snapshots that don't match the pattern
54 rodolico 70
) or die "Error parsing command line options\n";
47 rodolico 71
 
54 rodolico 72
# Normalize options: default behaviour is dry-run unless --force is specified.
60 rodolico 73
my $FORCE     = (defined $opts{'force'} || defined $opts{'f'}) ? 1 : 0;
74
my $VERBOSE   = (defined $opts{'verbose'} || defined $opts{'v'}) ? 1 : 0;
75
my $UNMATCHED = (defined $opts{'unmatched'} || defined $opts{'u'}) ? 1 : 0;
54 rodolico 76
my $timeshift = defined $opts{'timeshift'} ? $opts{'timeshift'} : $opts{'t'};
47 rodolico 77
 
78
my $ZFS_CMD = $ENV{ZFS_CMD} // 'zfs';
79
 
80
sub logmsg { print @_, "\n" if $VERBOSE }
81
 
60 rodolico 82
# show version and exit
83
if ($opts{'version'} || $opts{'V'}) {
84
    print "cleanSnaps version $VERSION\n";
85
    exit 0;
86
}
87
 
48 rodolico 88
# show help and exit
54 rodolico 89
if ($opts{'help'} || $opts{'h'}) {
60 rodolico 90
    print "Usage: cleanSnaps [--force|-f] [--verbose|-v] [--timeshift|-t <shift>] [--unmatched|-u] [--version|-V] [--help|-h] [pool]\n";
54 rodolico 91
    print "  --force, -f         actually destroy snapshots (default: dry-run)\n";
92
    print "  --verbose, -v       verbose logging\n";
55 rodolico 93
    print "  --timeshift, -t     time shift, simulate running at a previous time (examples: 2m, -4w, +3d)\n";
60 rodolico 94
    print "  --unmatched, -u     ONLY clean snapshots that don't match the retention pattern (skips normal retention)\n";
95
    print "  --version, -V       show version and exit\n";
54 rodolico 96
    print "  --help, -h          show this help and exit\n";
48 rodolico 97
    exit 0;
98
}
99
 
100
# compute simulated "now" if the user requested a time shift
50 rodolico 101
# allows us to keep or remove snapshots as if we were running at a different time
102
# giving retention decisions less stringent on, say, backup systems
47 rodolico 103
my $now = time();
54 rodolico 104
if (defined $timeshift && $timeshift =~ /^(?:([+-]?)(\d+)([smhdwy]))$/i) {
48 rodolico 105
    my ($sign, $num, $unit) = ($1, $2, lc $3);
106
    # default behavior: no leading sign => treat as negative (simulate running in the past)
107
    $sign = '-' unless defined $sign && $sign ne '';
108
    my %unit_secs = (
109
        s => 1,
110
        m => 30 * 86400,   # months approximated as 30 days to match snapshot 'm' semantics
111
        h => 3600,
112
        d => 86400,
113
        w => 7 * 86400,
114
        y => 365 * 86400,
115
    );
116
    if (!exists $unit_secs{$unit}) {
117
        die "Invalid time unit '$unit' in -t option; use s,m,h,d,w,y\n";
118
    }
119
    my $shift_seconds = $num * $unit_secs{$unit};
120
    $shift_seconds = -$shift_seconds if $sign eq '-';
121
    $now += $shift_seconds;
54 rodolico 122
    my $ts_display = defined $timeshift ? $timeshift : $opts{t};
123
    logmsg("Simulating current time with shift $ts_display; adjusted now -> " . scalar(localtime($now)) );
48 rodolico 124
}
47 rodolico 125
 
50 rodolico 126
my @candidates; # this will hold the candidates for removal
127
 
128
# Optional command-line parameter: pool name to process (only snapshots from this pool will be considered)
129
my $pool = shift @ARGV;
130
if ($pool) {
131
    logmsg("Filtering snapshots to pool: $pool");
132
}
133
 
134
# if a pool name was given, limit zfs list to that pool and do a recursive list
135
# recursion is done by default if no pool given
136
my $command = $ZFS_CMD . ' list -H -o name -t snapshot' . ($pool ? " -r $pool" : '');
137
 
138
open my $fh, '-|', $command or die "Failed to run $ZFS_CMD: $!";
47 rodolico 139
while (my $snap = <$fh>) {
140
    chomp $snap;
141
    next unless defined $snap && $snap =~ /\S/;
142
 
143
    unless ($snap =~ /@/) {
144
        logmsg("skipping invalid snapshot name: $snap");
145
        next;
146
    }
147
    my ($dataset, $snapname) = split /@/, $snap, 2;
148
 
50 rodolico 149
    # match an optional prefix, a date or date-time stamp, optional text, then TTL
150
    # Examples matched:
151
    #   pool@snapname-2025-12-01-3d
152
    #   pool@prefix_2025-12-01T12:30:00_foo_3h
153
    #   pool@2025-12-01_foo_2w
154
    # allow time separators of ':' or '.' and optional underscore or space before time
155
    unless ($snapname =~ m/.*?(\d{4}-\d{2}-\d{2}(?:[T _]\d{2}[:\.]\d{2}(?:[:\.]\d{2})?)?)(?:.*?)(\d+)([smhdwy])$/) {
156
        logmsg("snapshot does not match ...<date/time>...<N><s|m|h|d|w|y>: $snap");
60 rodolico 157
        # if --unmatched flag is set, mark this snapshot for removal
158
        if ($UNMATCHED) {
159
            push @candidates, $snap;
160
            logmsg("marked for removal (unmatched pattern): $snap") if $VERBOSE;
161
        }
47 rodolico 162
        next;
163
    }
60 rodolico 164
 
165
    # If --unmatched flag is set, skip normal retention processing for matched snapshots
166
    if ($UNMATCHED) {
167
        logmsg("skipping retention check (matched pattern): $snap") if $VERBOSE;
168
        next;
169
    }
170
 
47 rodolico 171
    my ($snap_date, $num, $unit) = ($1, $2, $3);
172
 
50 rodolico 173
    # parse snapshot date/time using Time::Piece; accept several common formats
47 rodolico 174
    my $snap_epoch;
50 rodolico 175
    my @fmts = (
176
        '%Y-%m-%d %H:%M:%S',
177
        '%Y-%m-%dT%H:%M:%S',
178
        '%Y-%m-%d %H:%M',
179
        '%Y-%m-%d_%H.%M.%S',
180
        '%Y-%m-%d_%H.%M',
181
        '%Y-%m-%d',
182
    );
183
    # try to parse using each format until one works
184
    my $parsed = 0;
185
    for my $fmt (@fmts) {
186
        eval {
187
            my $tp = Time::Piece->strptime($snap_date, $fmt);
188
            $snap_epoch = $tp->epoch;
189
            $parsed = 1;
190
            1;
191
        };
192
        last if $parsed;
193
    }
194
    # didn't parse successfully, show message and skip
195
    unless ($parsed) {
196
        logmsg("failed to parse date/time '$snap_date' for $snap");
60 rodolico 197
        # if --unmatched flag is set, mark this snapshot for removal (pattern matched but date parsing failed)
198
        if ($UNMATCHED) {
199
            push @candidates, $snap;
200
            logmsg("marked for removal (date parse failed): $snap") if $VERBOSE;
201
        }
47 rodolico 202
        next;
50 rodolico 203
    }
47 rodolico 204
 
50 rodolico 205
    # compute retention in seconds according to unit (s,m,h,d,w,m,y)
206
    my $retention_seconds;
207
    if ($unit eq 's') { $retention_seconds = $num }
208
    elsif ($unit eq 'm') { $retention_seconds = $num * 30 * 86400 }   # months ~= 30 days
209
    elsif ($unit eq 'h') { $retention_seconds = $num * 3600 }
210
    elsif ($unit eq 'd') { $retention_seconds = $num * 86400 }
211
    elsif ($unit eq 'w') { $retention_seconds = $num * 7 * 86400 }
212
    elsif ($unit eq 'y') { $retention_seconds = $num * 365 * 86400 }
47 rodolico 213
    else { logmsg("unknown unit $unit for $snap"); next }
214
 
50 rodolico 215
   # determine if snapshot is older than retention period
47 rodolico 216
    my $age = $now - $snap_epoch;
50 rodolico 217
    if ($age > $retention_seconds) { # ready for removal
47 rodolico 218
        push @candidates, $snap;
50 rodolico 219
    } else { # keep
220
        my $age_days = $age / 86400;
221
        my $ret_days = $retention_seconds / 86400;
222
        logmsg("keep: $snap (age " . sprintf("%.2f", $age_days) . "d <= " . sprintf("%.2f", $ret_days) . "d)");
47 rodolico 223
    }
224
}
225
close $fh;
226
 
227
if (!@candidates) {
228
    print "No snapshots to remove.\n";
229
    exit 0;
230
}
231
 
232
printf "Snapshots to remove (%d):\n", scalar @candidates;
233
for my $s (@candidates) { printf "  %s\n", $s }
234
 
54 rodolico 235
if (!$FORCE) {
47 rodolico 236
    print "\nDry-run: no snapshots were destroyed. Use -f to actually remove them.\n";
237
    exit 0;
238
}
239
 
50 rodolico 240
# actual removal. If any fail, report error but continue to remove rest
47 rodolico 241
my $failed = 0;
242
for my $s (@candidates) {
243
    printf "Destroying: %s\n", $s;
244
    my $rc = system($ZFS_CMD, 'destroy', $s);
245
    if ($rc != 0) {
246
        printf STDERR "Failed to destroy %s\n", $s;
247
        $failed = 1;
248
    }
249
}
250
 
251
if ($failed) {
252
    print STDERR "One or more destroys failed.\n";
253
    exit 1;
254
}
255
 
256
print "Done.\n";