| 96 | rodolico | 1 | #! /usr/bin/env perl
 | 
        
           |  |  | 2 |   | 
        
           | 98 | rodolico | 3 | #    snapShot: Manage ZFS snapshots
 | 
        
           |  |  | 4 | #    see http://wiki.linuxservertech.com for additional information
 | 
        
           |  |  | 5 | #    Copyright (C) 2022  R. W. Rodolico
 | 
        
           |  |  | 6 | #
 | 
        
           | 107 | rodolico | 7 | #    version 1.0, 20220423 RWR
 | 
        
           | 98 | rodolico | 8 | #       Initial Release
 | 
        
           |  |  | 9 | #
 | 
        
           | 107 | rodolico | 10 | #    version 1.0.1 20220430 RWR
 | 
        
           |  |  | 11 | #       Removed some debugging, set so it will always log the actions to /tmp/snapShot
 | 
        
           | 98 | rodolico | 12 | #
 | 
        
           |  |  | 13 | #    This program is free software: you can redistribute it and/or modify
 | 
        
           |  |  | 14 | #    it under the terms of the GNU General Public License as published by
 | 
        
           |  |  | 15 | #    the Free Software Foundation, either version 3 of the License, or
 | 
        
           |  |  | 16 | #    (at your option) any later version.
 | 
        
           |  |  | 17 | #
 | 
        
           |  |  | 18 | #    This program is distributed in the hope that it will be useful,
 | 
        
           |  |  | 19 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
        
           |  |  | 20 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
        
           |  |  | 21 | #    GNU General Public License for more details.
 | 
        
           |  |  | 22 | #
 | 
        
           |  |  | 23 | #    You should have received a copy of the GNU General Public License
 | 
        
           |  |  | 24 | #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
        
           |  |  | 25 | #
 | 
        
           | 103 | rodolico | 26 | # Warning, this script requires non-standard Perl modules YAML::Tiny and Hash::Merge
 | 
        
           | 98 | rodolico | 27 | # Under Debian:  apt install libyaml-tiny-perl libhash-merge-simple-perl
 | 
        
           |  |  | 28 | # Under FreeBSD: cpan -i Hash::Merge::Simple YAML::Tiny
 | 
        
           |  |  | 29 |   | 
        
           |  |  | 30 |   | 
        
           | 96 | rodolico | 31 | use strict;
 | 
        
           |  |  | 32 | use warnings;
 | 
        
           |  |  | 33 |   | 
        
           | 107 | rodolico | 34 | use version; our $VERSION = version->declare( 'v1.0.1');
 | 
        
           | 96 | rodolico | 35 | use Data::Dumper;
 | 
        
           |  |  | 36 | use Time::Local;
 | 
        
           |  |  | 37 | use POSIX qw(strftime);
 | 
        
           |  |  | 38 | use YAML::Tiny; # apt-get libyaml-tiny-perl under debian, BSD Systems: cpan -i YAML::Tiny
 | 
        
           | 98 | rodolico | 39 | use Hash::Merge::Simple qw/ merge clone_merge /; # apt install libhash-merge-simple-perl or cpan -i Hash::Merge::Simple
 | 
        
           | 96 | rodolico | 40 |   | 
        
           |  |  | 41 |   | 
        
           |  |  | 42 | # globals
 | 
        
           |  |  | 43 | my $CONFIG_FILE_NAME = 'snapShot.yaml';
 | 
        
           |  |  | 44 |   | 
        
           |  |  | 45 | # This will be read in from snapShot.yaml
 | 
        
           |  |  | 46 | my $config;
 | 
        
           |  |  | 47 |   | 
        
           |  |  | 48 | #
 | 
        
           |  |  | 49 | # find where the script is actually located as cfg should be there
 | 
        
           |  |  | 50 | #
 | 
        
           |  |  | 51 | sub getScriptLocation {
 | 
        
           |  |  | 52 |    use strict;
 | 
        
           |  |  | 53 |    use File::Spec::Functions qw(rel2abs);
 | 
        
           |  |  | 54 |    use File::Basename;
 | 
        
           |  |  | 55 |    return dirname(rel2abs($0));
 | 
        
           |  |  | 56 | }
 | 
        
           |  |  | 57 |   | 
        
           |  |  | 58 | #
 | 
        
           |  |  | 59 | # Read the configuration file from current location 
 | 
        
           |  |  | 60 | # and return it as a string
 | 
        
           |  |  | 61 | #
 | 
        
           |  |  | 62 | sub readConfig {
 | 
        
           |  |  | 63 |    my $scriptLocation = &getScriptLocation();
 | 
        
           |  |  | 64 |    if ( -e "$scriptLocation/$CONFIG_FILE_NAME" ) {
 | 
        
           |  |  | 65 |       my $yaml = YAML::Tiny->read( "$scriptLocation/$CONFIG_FILE_NAME" );
 | 
        
           |  |  | 66 |       # use clone_merge to merge conf file into $config
 | 
        
           |  |  | 67 |       # overwrites anything in $config if it exists in the config file
 | 
        
           |  |  | 68 |       $config = clone_merge( $config, $yaml->[0] );
 | 
        
           |  |  | 69 |       return 1;
 | 
        
           |  |  | 70 |    }
 | 
        
           |  |  | 71 |    return 0;
 | 
        
           |  |  | 72 | }
 | 
        
           |  |  | 73 |   | 
        
           |  |  | 74 |   | 
        
           |  |  | 75 | # parse one single line from the output of `zfs list [-t snapshot]`
 | 
        
           |  |  | 76 | sub parseListing {
 | 
        
           |  |  | 77 |    my ($line,$keys) = @_;
 | 
        
           |  |  | 78 |    chomp $line;
 | 
        
           |  |  | 79 |    my %values;
 | 
        
           |  |  | 80 |    @values{@$keys} = split( /\s+/, $line );
 | 
        
           |  |  | 81 |    return \%values;
 | 
        
           |  |  | 82 | }      
 | 
        
           |  |  | 83 |   | 
        
           |  |  | 84 |   | 
        
           |  |  | 85 | # this will parse the date out of the snapshots and put the values into
 | 
        
           |  |  | 86 | # the hash {'date'}
 | 
        
           |  |  | 87 | sub parseSnapshots {
 | 
        
           |  |  | 88 |    my ( $snapShots, $config) = @_;
 | 
        
           |  |  | 89 |    my $keys = $config->{'snapshot'}->{'parseFields'};
 | 
        
           |  |  | 90 |    foreach my $snapShot ( keys %$snapShots ) {
 | 
        
           |  |  | 91 |       my %temp;
 | 
        
           |  |  | 92 |       # run the regex, capture the output to an array, then populate the hash %temp
 | 
        
           |  |  | 93 |       # using the regex results as the values, and $keys as the keys
 | 
        
           |  |  | 94 |       @temp{@$keys} = ( $snapShot =~ m/$config->{'snapshot'}->{'parse'}/ );
 | 
        
           |  |  | 95 |       # while we're here, calculate the unix time (epoch). NOTE: month is 0 based
 | 
        
           |  |  | 96 |       $temp{'unix'} = timelocal( 0,$temp{'minute'},$temp{'hour'},$temp{'day'},$temp{'month'}-1,$temp{'year'} );
 | 
        
           |  |  | 97 |       # put this into our record
 | 
        
           |  |  | 98 |       $snapShots->{$snapShot}->{'date'} = \%temp;
 | 
        
           |  |  | 99 |    }
 | 
        
           |  |  | 100 | }
 | 
        
           |  |  | 101 |   | 
        
           |  |  | 102 | # run $command, then parse its output and return the results as a hashref
 | 
        
           | 103 | rodolico | 103 | # $command is one of zfs list or zfs list -t snapshot
 | 
        
           |  |  | 104 | # In other words, get all datasets/volumes or get all snapshots
 | 
        
           | 96 | rodolico | 105 | sub getListing {
 | 
        
           |  |  | 106 |    my ($configuration, $regex, $command )  = @_;
 | 
        
           |  |  | 107 |    my %dataSets;
 | 
        
           |  |  | 108 |   | 
        
           | 103 | rodolico | 109 |    # get all datasets/volumes or snapshots
 | 
        
           | 96 | rodolico | 110 |    my @zfsList = `$command`;
 | 
        
           |  |  | 111 |    foreach my $thisSet ( @zfsList ) {
 | 
        
           | 103 | rodolico | 112 |       # parse the line into its portions. The only one we use right now is name
 | 
        
           | 96 | rodolico | 113 |       my $temp = &parseListing( $thisSet, $configuration->{'listingKeys'} );
 | 
        
           | 103 | rodolico | 114 |       if (  $temp->{'name'} =~ m/^($regex)$/ ) { # it matches the regex we're using, so save it
 | 
        
           | 96 | rodolico | 115 |          $dataSets{$temp->{'name'}} = $temp;
 | 
        
           |  |  | 116 |       }
 | 
        
           |  |  | 117 |    }
 | 
        
           | 103 | rodolico | 118 |    return \%dataSets; # return all entries we are looking for
 | 
        
           | 96 | rodolico | 119 | }
 | 
        
           |  |  | 120 |   | 
        
           |  |  | 121 | # will convert something like 1 day to the number of seconds (86400) for math.
 | 
        
           |  |  | 122 | # month and year are approximations (30.5 day = a month, 365.2425 days is a year)
 | 
        
           | 101 | rodolico | 123 | # For month and year, use the int function to convert back to integer
 | 
        
           | 96 | rodolico | 124 | sub period2seconds {
 | 
        
           |  |  | 125 |    my ($count, $unit) = ( shift =~ m/\s*(\d+)\s*([a-z]+)\s*/i );
 | 
        
           |  |  | 126 |    $unit = lc $unit;
 | 
        
           | 97 | rodolico | 127 |    if ( $unit eq 'hour' ) {
 | 
        
           | 96 | rodolico | 128 |       $count *= 3600;
 | 
        
           |  |  | 129 |    } elsif ( $unit eq 'day' ) {
 | 
        
           |  |  | 130 |       $count *= 86400;
 | 
        
           |  |  | 131 |    } elsif ( $unit eq 'week' ) {
 | 
        
           |  |  | 132 |       $count *= 864000 * 7;
 | 
        
           |  |  | 133 |    } elsif ( $unit eq 'month' ) {
 | 
        
           | 101 | rodolico | 134 |       $count *= int( 864000 * 30.5 );
 | 
        
           | 96 | rodolico | 135 |    } elsif ( $unit eq 'year' ) {
 | 
        
           | 101 | rodolico | 136 |       $count *= int( 86400 * 365.2425 );
 | 
        
           | 96 | rodolico | 137 |    } else {
 | 
        
           |  |  | 138 |       die "Unknown units [$unit] in period2seconds\n";
 | 
        
           |  |  | 139 |    }
 | 
        
           |  |  | 140 |    return $count;
 | 
        
           |  |  | 141 | }
 | 
        
           |  |  | 142 |   | 
        
           |  |  | 143 | # Merges datasets, snapshots and some stuff from the configuration into the datasets
 | 
        
           | 103 | rodolico | 144 | # hash. After this, $config and $snapshots should no longer be necessary
 | 
        
           | 97 | rodolico | 145 | sub mergeData {
 | 
        
           | 96 | rodolico | 146 |    my ($datasets,$snapshots,$config) = @_;
 | 
        
           |  |  | 147 |    my $confKeys = $config->{'datasets'};
 | 
        
           |  |  | 148 |    foreach my $thisDataset ( keys %$datasets ) {
 | 
        
           | 103 | rodolico | 149 |       # go through each configuration entry and see if we match the current dataset
 | 
        
           | 96 | rodolico | 150 |       foreach my $conf (keys %$confKeys ) {
 | 
        
           | 103 | rodolico | 151 |          if ( $thisDataset =~ m/^$conf$/ ) { # found it, so store the configuration values into the dataset
 | 
        
           | 96 | rodolico | 152 |             $datasets->{$thisDataset}->{'recursive'} = $confKeys->{$conf}->{'recursive'};
 | 
        
           |  |  | 153 |             $datasets->{$thisDataset}->{'frequency'} = &period2seconds( $confKeys->{$conf}->{'frequency'} );
 | 
        
           |  |  | 154 |             $datasets->{$thisDataset}->{'retention'} = &period2seconds( $confKeys->{$conf}->{'retention'} );
 | 
        
           | 103 | rodolico | 155 |             last; # there is only one, so no need to process any more for this configuration key
 | 
        
           | 96 | rodolico | 156 |          } # if
 | 
        
           |  |  | 157 |       } # foreach
 | 
        
           | 103 | rodolico | 158 |       # do the same for the snapshots we found; bind them to the data set
 | 
        
           | 96 | rodolico | 159 |       foreach my $snapshot ( keys %$snapshots ) {
 | 
        
           |  |  | 160 |          if ( $snapshot =~ m/^$thisDataset@/ ) { # this is a match
 | 
        
           |  |  | 161 |             # copy the snapshot into the dataset
 | 
        
           |  |  | 162 |             $datasets->{$thisDataset}->{'snapshots'}->{$snapshot} = $snapshots->{$snapshot};
 | 
        
           | 103 | rodolico | 163 |             # track the latest snapshot (we use this to decide whether it is time to add a new one)
 | 
        
           | 96 | rodolico | 164 |             $datasets->{$thisDataset}->{'lastSnap'} = $snapshots->{$snapshot}->{'date'}->{'unix'}
 | 
        
           |  |  | 165 |                if ! defined( $datasets->{$thisDataset}->{'lastSnap'} ) || $datasets->{$thisDataset}->{'lastSnap'} < $snapshots->{$snapshot}->{'date'}->{'unix'};
 | 
        
           | 103 | rodolico | 166 |             # delete the snapshot, to free up memory
 | 
        
           | 96 | rodolico | 167 |             delete $snapshots->{$snapshot};
 | 
        
           |  |  | 168 |          } # if
 | 
        
           |  |  | 169 |       } # foreach
 | 
        
           |  |  | 170 |    } # foreach
 | 
        
           | 97 | rodolico | 171 | } # sub mergeData
 | 
        
           | 96 | rodolico | 172 |   | 
        
           | 103 | rodolico | 173 |   | 
        
           |  |  | 174 | # check to see if a particular snapshot is ready to be destroyed, ie right now is greater than the retention period
 | 
        
           |  |  | 175 | # if $recurive is true, add the '-r' to the command to do a recursive destroy
 | 
        
           | 96 | rodolico | 176 | sub checkRetention {
 | 
        
           |  |  | 177 |    my ( $retentionPeriod, $recursive, $snapshots, $now ) = @_;
 | 
        
           | 103 | rodolico | 178 |    my @toDelete; # an array of destroy commands
 | 
        
           | 96 | rodolico | 179 |    foreach my $thisSnapshot ( keys %$snapshots ) {
 | 
        
           |  |  | 180 |       # print "checking $thisSnapshot\n\tNow: $now\n\tDate: $snapshots->{$thisSnapshot}->{date}->{unix}\n\tRetention: $retentionPeriod\n\n";
 | 
        
           | 103 | rodolico | 181 |       if ( $now - $snapshots->{$thisSnapshot}->{'date'}->{'unix'} > $retentionPeriod ) { # it is too old
 | 
        
           |  |  | 182 |          push ( @toDelete, ( 'zfs destroy ' . ($recursive ? '-r ' : '') . $thisSnapshot ) ); # list it to be destroyed
 | 
        
           | 96 | rodolico | 183 |       }
 | 
        
           |  |  | 184 |    }
 | 
        
           | 103 | rodolico | 185 |    return @toDelete; # just return the list of destroy commands to be executed
 | 
        
           | 96 | rodolico | 186 | }   
 | 
        
           |  |  | 187 |   | 
        
           | 103 | rodolico | 188 |   | 
        
           |  |  | 189 | # just return the command to create a new snapshot. Very simple, but I wanted the code to be isolated in case something needed
 | 
        
           |  |  | 190 | # to change. Basically, zfs snapshot [-r] datasetname@template
 | 
        
           | 96 | rodolico | 191 | sub makeSnapshot {
 | 
        
           |  |  | 192 |    my ( $datasetName, $recursive, $snapshotName ) = @_;
 | 
        
           |  |  | 193 |    return 
 | 
        
           |  |  | 194 |       'zfs snapshot ' . 
 | 
        
           |  |  | 195 |       ($recursive ? '-r ' : '') . 
 | 
        
           |  |  | 196 |       $datasetName . $snapshotName;
 | 
        
           |  |  | 197 | }
 | 
        
           |  |  | 198 |   | 
        
           | 103 | rodolico | 199 | # this is the biggie; everything leads to here. We will take every dataset/volume we found, and decide whether some old snapshots
 | 
        
           |  |  | 200 | # need to be destroyed, and whether a new snapshot needs to be created.
 | 
        
           | 96 | rodolico | 201 | sub process {
 | 
        
           |  |  | 202 |    my ( $datasets, $now, $snapshotName, $slop ) = @_;
 | 
        
           | 103 | rodolico | 203 |    my @toDelete; # will hold all the destroy commands
 | 
        
           |  |  | 204 |    my @toAdd; # will hold all the create commands
 | 
        
           | 96 | rodolico | 205 |   | 
        
           | 103 | rodolico | 206 |    foreach my $thisDataset ( keys %$datasets ) { # Look at each dataset/volume in turn
 | 
        
           |  |  | 207 |       # if any snapshots need to be destroyed, add them to @toDelete
 | 
        
           | 96 | rodolico | 208 |       push( @toDelete, 
 | 
        
           |  |  | 209 |          &checkRetention( 
 | 
        
           |  |  | 210 |          $datasets->{$thisDataset}->{'retention'}, 
 | 
        
           |  |  | 211 |          $datasets->{$thisDataset}->{'recursive'}, 
 | 
        
           |  |  | 212 |          $datasets->{$thisDataset}->{'snapshots'}, 
 | 
        
           |  |  | 213 |          $now )
 | 
        
           |  |  | 214 |          );
 | 
        
           | 103 | rodolico | 215 |       # if it is time to add a new snapshot, add it to @toAdd
 | 
        
           | 102 | rodolico | 216 |       if ( $datasets->{$thisDataset}->{'lastSnap'} + $datasets->{$thisDataset}->{'frequency'} - $slop < $now ) {
 | 
        
           | 96 | rodolico | 217 |          push @toAdd, &makeSnapshot( $thisDataset, $datasets->{$thisDataset}->{'recursive'}, $snapshotName )
 | 
        
           |  |  | 218 |       }
 | 
        
           |  |  | 219 |    }
 | 
        
           | 103 | rodolico | 220 |    # return the actions, deletions first, adds second (executed in that order)
 | 
        
           | 96 | rodolico | 221 |    return ( @toDelete, @toAdd );
 | 
        
           |  |  | 222 | }   
 | 
        
           |  |  | 223 |   | 
        
           | 103 | rodolico | 224 | # Run 0 or more commands
 | 
        
           |  |  | 225 | # the first thing on the stack is a flag for testing
 | 
        
           |  |  | 226 | # everything after that is an ordered list of commands to be executed.
 | 
        
           |  |  | 227 | # If any command fails, all subsequent commands abort
 | 
        
           | 96 | rodolico | 228 | sub run {
 | 
        
           |  |  | 229 |    my $testing = shift;
 | 
        
           | 103 | rodolico | 230 |    return 0 unless @_; # bail if there are no commands to run
 | 
        
           | 107 | rodolico | 231 |    # dump the run to /tmp so the user can see the last one written
 | 
        
           |  |  | 232 |    open LOG, ">/tmp/snapShot" or die "could not write to /tmp/snapShot: $!\n";
 | 
        
           |  |  | 233 |    print LOG join( "\n", @_ ) . "\n";
 | 
        
           |  |  | 234 |    close LOG;
 | 
        
           |  |  | 235 |    unless ( $testing ) { # run the commands if we're not testing
 | 
        
           | 103 | rodolico | 236 |       my $out; # capture all output
 | 
        
           |  |  | 237 |       while ( my $command = shift ) { # for each command on the stack
 | 
        
           |  |  | 238 |          $out .= `$command` . "\n"; # add it to $out
 | 
        
           |  |  | 239 |          if ( $? ) { # we had an error, add debugging text, the end program
 | 
        
           | 96 | rodolico | 240 |             $out .= "Error executing command\n\t$command\n\t";
 | 
        
           |  |  | 241 |             if ($? == -1) {
 | 
        
           |  |  | 242 |                 $out .= "failed to execute $command: $!";
 | 
        
           |  |  | 243 |             } elsif ($? & 127) {
 | 
        
           |  |  | 244 |                 $out .= sprintf( "child died with signal %d, %s coredump", ($? & 127),  ($? & 128) ? 'with' : 'without' );
 | 
        
           |  |  | 245 |             } else {
 | 
        
           |  |  | 246 |                 $out .= sprintf( "child exited with value %d", $? >> 8 );
 | 
        
           |  |  | 247 |             }
 | 
        
           |  |  | 248 |             $out .= "\n";
 | 
        
           |  |  | 249 |             return $out;
 | 
        
           |  |  | 250 |          }
 | 
        
           |  |  | 251 |       }
 | 
        
           |  |  | 252 |    }
 | 
        
           | 103 | rodolico | 253 |    return 0; # we succeeded
 | 
        
           | 96 | rodolico | 254 | }
 | 
        
           |  |  | 255 |   | 
        
           | 99 | rodolico | 256 | &readConfig() or die "Could not read config file: $!\n";
 | 
        
           | 97 | rodolico | 257 |   | 
        
           | 103 | rodolico | 258 | # we're pre-calculating some things so we don't do it over and over for each entry
 | 
        
           | 96 | rodolico | 259 | # grab the time once
 | 
        
           |  |  | 260 | my $now = time;
 | 
        
           |  |  | 261 | # create the string to be used for all snapshots, using $now and the template provided
 | 
        
           |  |  | 262 | my $snapshotName = '@' . strftime($config->{'snapshot'}->{'template'},localtime $now);
 | 
        
           | 103 | rodolico | 263 | # Create the dataset regex by joing all of the regexes defined.
 | 
        
           | 96 | rodolico | 264 | $config->{'dataset_regex'} = '(' . join( ')|(', keys %{ $config->{'datasets'} }  ) . ')' unless $config->{'dataset_regex'};
 | 
        
           |  |  | 265 | #print $config{'dataset_regex'} . "\n";
 | 
        
           |  |  | 266 | $config->{'snapshot_regex'} = '(' . $config->{'dataset_regex'} . ')@' . $config->{'snapshot'}->{'parse'};
 | 
        
           |  |  | 267 | #print $config->{'snapshot_regex'} . "\n\n";
 | 
        
           |  |  | 268 |   | 
        
           |  |  | 269 | #die Dumper( $config ) . "\n";   
 | 
        
           |  |  | 270 | # first, find all datasets which match our keys
 | 
        
           |  |  | 271 | my $dataSets = &getListing( $config, $config->{'dataset_regex'}, 'zfs list'  );
 | 
        
           |  |  | 272 | # and, find all snapshots that match
 | 
        
           |  |  | 273 | my $snapshots = &getListing( $config, $config->{'snapshot_regex'}, 'zfs list -t snapshot'  );
 | 
        
           |  |  | 274 | # get the date/time of the snapshots and store them in the hash
 | 
        
           |  |  | 275 | &parseSnapshots($snapshots, $config );
 | 
        
           | 97 | rodolico | 276 | # mergeData the snapshots into the datasets for convenience
 | 
        
           |  |  | 277 | &mergeData( $dataSets, $snapshots, $config );
 | 
        
           | 96 | rodolico | 278 | # Now, let's do the actual processing
 | 
        
           |  |  | 279 | my @commands  = &process( $dataSets, $now, $snapshotName, &period2seconds( $config->{'slop'} ) );
 | 
        
           | 99 | rodolico | 280 | #print join ( "\n", @commands ) . "\n";
 | 
        
           | 96 | rodolico | 281 | my $errors;
 | 
        
           | 99 | rodolico | 282 | print "Error: $errors\n" if $errors = &run( $config->{'TESTING'}, @commands );
 | 
        
           | 96 | rodolico | 283 |   | 
        
           | 99 | rodolico | 284 | # print Dumper( $dataSets );
 | 
        
           | 96 | rodolico | 285 | #print Dumper( $snapshots );
 | 
        
           |  |  | 286 |   | 
        
           |  |  | 287 | #print join ("\n", sort keys( %$dataSets ) ) . "\n\n";
 | 
        
           |  |  | 288 | #print join( "\n", sort keys( %$snapshots ) ) . "\n";
 | 
        
           |  |  | 289 |   | 
        
           |  |  | 290 | 1;
 |