Subversion Repositories zfs_utils

Rev

Rev 20 | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

#! /usr/bin/env perl

# replicate
# Author: R. W. Rodolico
# very simple script to replicate a ZFS snapshot to another server.
# no fancy bells and whistles, does not create snapshots, and does
# not prune them. No major error checking either
#
# This is free software, and may be redistributed under the same terms
#
# Copyright (c) 2025, R. W. Rodolico
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice, 
# this list of conditions and the following disclaimer in the documentation 
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.[13]
#
# version 1.0.0 20250614 RWR
# Added support for inconsistent child dataset snapshots
# If all child datasets do not have all the snapshots the parent has,
# then we break the job into multiple jobs, one for each dataset


use strict;
use warnings;

use Data::Dumper;
use Getopt::Long;
Getopt::Long::Configure ("bundling");

# define the version number
# see https://metacpan.org/pod/release/JPEACOCK/version-0.97/lib/version.pod
use version 0.77; our $VERSION = version->declare("v1.0.0");


# create our configuration, with some defaults
# these are overridden by command line stuff
my $config = {
   # the source, where we're coming from
   'source' => '',
   # the target, where we want to replicate to
   'target' => '',
   # compile the regex
   'filter' => '(\d{4}.\d{2}.\d{2}.\d{2}.\d{2})',
   # if non-zero, just display the commands we'd use, don't run them
   'dryrun' => 0,
   # whether to do all child datasets also (default)
   'recurse' => 0,
   # show more information
   'verbose' => 0
   };

# Parses a dataset string, which may include a server (server:dataset),
# and returns a hashref with 'server' and 'dataset' keys.
sub parseDataSet {
   my $data = shift;
   my %return;
   my ( $server, $dataset ) = split( ':', $data );
   if ( $dataset ) { # they passed a server:dataset
      $return{'server'} = $server;
      $return{'dataset'} = $dataset;
   } else { # only passing in dataset, so assume localhost
      $return{'server'} = '';
      $return{'dataset'} = $server;
   }
   return \%return;
}

# Appends log messages to /tmp/replicate.log.
sub logit {
   open LOG, ">>/tmp/replicate.log" or die "Could not open replicate.log: $!\n";
   print LOG join( "\n", @_ ) .  "\n";
   close LOG;
}

# Runs a shell command, capturing stdout and stderr.
# Returns (0, output) on success, or (error_code, error_message) on failure.
sub run {
   my $command = shift;
   #&logit( $command );
   my $output = qx/$command 2>&1/;
   if ($? == -1) {
      return (-1,"failed to execute: $!");
   } elsif ($? & 127) {
      return ($?, sprintf "child died with signal %d, %s coredump",
        ($? & 127),  ($? & 128) ? 'with' : 'without' );
   } else {
      return ($? >> 8, sprintf "child exited with value %d", $? >> 8 ) if $? >> 8;
   }
   return (0,$output);
}

# Retrieves all ZFS snapshots for a given dataset (and server, if remote).
# Filters snapshots by a regex pattern, and returns a hashref of snapshots
# with metadata (key, refer, used).
sub getSnaps {
   my ($config,$pattern) = @_;
   my %return;
   # actual command to run to get all snapshots, recursively, of the dataset
   my $command = 'zfs list -r -t snap ' . $config->{'dataset'};
   $command = "ssh $config->{server} '$command'" if $config->{'server'};
   my ($error, $output ) = &run( $command );
   die "Error running $command with output [$output]\nMisconfigured Dataset?\n" if $error;
   my @snaps = split( "\n", $output );
   chomp @snaps;
   for (my $i = 0; $i < @snaps; $i++ ) {
      # parse out the space delmited fields
      my ($fullname, $used, $avail, $refer, $mount) = split( /\s+/, $snaps[$i] );
      # break the name into dataset and snapname
      my ($dataset, $snap) = split( '@', $fullname );
      # remove the root dataset name
      $dataset =~ s/^$config->{'dataset'}//;
      # skip anything not matching our regex
      next unless $pattern && $snap && $snap =~ m/$pattern/;
      # grab the matched key
      $return{$dataset}{'snaps'}{$snap}{'key'} = $1;
      # and remove all non-numerics
      $return{$dataset}{'snaps'}{$snap}{'key'} =~ s/[^0-9]//g;
      # get the transfer size
      $return{$dataset}{'snaps'}{$snap}{'refer'} = $refer;
      # get the actual disk space used
      $return{$dataset}{'snaps'}{$snap}{'used'} = $used;
   }
   return \%return;
}

# Calculates the number of bytes that would be transferred for the next sync.
# Returns the size in bytes, or 0 if datasets are up to date.
sub findSize {
   my $config = shift;
   # check for new snapshots to sync. If they are equal, we are up to date
   if ( $config->{'source'}->{'lastSnap'} ne $config->{'target'}->{'lastSnap'} ) {
      # Build the source command
      my $sourceCommand = sprintf( '%s@%s %s@%s', 
                               $config->{'source'}->{'dataset'},
                               $config->{'target'}->{'lastSnap'},
                               $config->{'source'}->{'dataset'},
                               $config->{'source'}->{'lastSnap'}
                           );
      # prepend 'zfs send' and the flags. Note that verbose is only for the one which is local
      $sourceCommand = 'zfs send -' . 
                  ( $config->{'recurse'} ? 'R' : '' ) . # recurse if they asked for it
                  # Tell it to give us the size in bytes
                  'Pn' .
                  # this is the part that asks for incremental
                  'I ' .
                  $sourceCommand;
      # wrap the ssh call if this is remote
      $sourceCommand = "ssh $config->{source}->{server} '$sourceCommand'" if  $config->{'source'}->{'server'};
      print "Checking Size with\n$sourceCommand\n" if $config->{'verbose'} > 3;
      my ( $error, $output ) = &run( $sourceCommand );
      return -1 if $error;
      # the size is the second column (tab separated) of the last line (\n separated) in $output
      return ( 
               split( 
                  "\t",
                  (
                     split( "\n", $output )
                  )[-1]
               )
            )[1];
   } else { # nothing to sync
      return 0;
   }
}

# Builds the shell command(s) needed to replicate the ZFS snapshot(s)
# from source to target, using zfs send/receive and optionally pv.
sub createCommands {
   my $config = shift;
   # check for new snapshots to sync. If they are equal, we are up to date
   if ( $config->{'source'}->{'lastSnap'} ne $config->{'target'}->{'lastSnap'} ) {
      # Build the source command
      my $sourceCommand = sprintf( '%s@%s %s@%s', 
                               $config->{'source'}->{'dataset'},
                               $config->{'target'}->{'lastSnap'},
                               $config->{'source'}->{'dataset'},
                               $config->{'source'}->{'lastSnap'}
                           );
      # prepend 'zfs send' and the flags. Note that verbose is only for the one which is local
      $sourceCommand = 'zfs send -' . 
                  ( $config->{'recurse'} ? 'R' : '' ) . # recurse if they asked for it
                  # turn on verbose if they asked for level 2 AND if source is local
                  ( $config->{'verbose'} > 2 && ! $config->{'source'}->{'server'} ? 'v' : '' ) .
                  # this is the part that asks for incremental
                  'I ' .
                  $sourceCommand;
      # wrap the ssh call if this is remote
      $sourceCommand = "ssh $config->{source}->{server} '$sourceCommand'" if  $config->{'source'}->{'server'};
      # Now, build the target command
      my $targetCommand = 'zfs receive ' . 
                          ( ! $config->{'target'}->{'server'} && $config->{'verbose'} > 2 ? '-v ' : '') .
                          $config->{'target'}->{'dataset'};
      $targetCommand = "ssh $config->{target}->{server} '$targetCommand'" if  $config->{'target'}->{'server'};
      # if the command pv is installed
      if ( `which pv` ) {
         my $tags;
         # add bandwdith limits, if requested
         $tags = " --si -L $config->{bwlimit} " if $config->{'bwlimit'};
         # if interactive, or if we are in dry run, add thermometer
         $tags .= '-petrs ' . $config->{'report'}->{'Bytes Transferred'} if -t *STDOUT || $config->{'dryrun'};
         $sourceCommand .= " | pv $tags" if $tags;
      }
      # return the command
      return $sourceCommand . ' | ' . $targetCommand;
   } else { # source and target are in sync, so do nothing
      return '# Nothing new to sync';
   }
}
   
# Finds the most recent snapshot in a hash of snapshots.
# Returns the snapshot name with the largest 'key' value.
sub getLastSnapshot {
   my $snapList = shift;
   my $lastKey = 0;
   my $lastSnap = '';
   foreach my $snap ( keys %$snapList ) {
      if ( $snapList->{$snap}->{'key'} > $lastKey ) {
         $lastKey = $snapList->{$snap}->{'key'};
         $lastSnap = $snap;
      }
   }
   return $lastSnap;
}

# Checks if all child datasets have all the snapshots the parent has.
# If not, returns a list of datasets to replicate individually.
sub check_child_snap_consistency {
    my ($config, $side) = @_;
    #print Dumper( $config ) . "\n"; die;
    my $snaps = $config->{$side}->{'snapshots'};
    #print Dumper( $snaps ) . "\n"; die;
    my @datasets = keys %$snaps;
    #die Dumper( \@datasets ) . "\n";
    return @datasets if @datasets == 1; # Only parent, nothing to check

    my $parent = (sort @datasets)[0]; # Assume parent is first (no / in name or shortest)
    #die Dumper( \@datasets ) . "\n";
    my %parent_snaps = %{ $snaps->{$parent}{'snaps'} };

    my @inconsistent;
    foreach my $child (@datasets) {
        next if $child eq $parent;
        foreach my $snap (keys %parent_snaps) {
            unless (exists $snaps->{$child}{'snaps'}{$snap}) {
                push @inconsistent, $child;
                last;
            }
        }
    }
    if (@inconsistent) {
        # Return all datasets as separate jobs
        return @datasets;
    } else {
        # All children have all parent snaps, treat as one job
        return ($parent);
    }
}

# Calculates the last snapshot for source and target, and checks for consistency.
# Returns (source_last_snap, target_last_snap, warnings_arrayref).
sub calculate {
   my $config = shift;

   my @warnings;
   
   # find the last snapshot date in each dataset, on each target
   foreach my $machine ( 'source', 'target' ) {
      $config->{$machine}->{'last'} = 0; # track the last entry in all children in dataset
      $config->{$machine}->{'allOk'} = 1; # assumed to be true, becomes false if some children do not have snapshots
      foreach my $child ( keys %{ $config->{$machine}->{'snapshots'} } ) {
         $config->{$machine}->{'snapshots'}->{$child}->{'last'} = 
            &getLastSnapshot( $config->{$machine}->{'snapshots'}->{$child}->{'snaps'} );
         # set the machine last if we haven't done so yet
         $config->{$machine}->{'last'} = $config->{$machine}->{'snapshots'}->{$child}->{'last'} unless $config->{$machine}->{'last'};
         # keep track of the last snapshot for each set
         if ( $config->{$machine}->{'last'} ne $config->{$machine}->{'snapshots'}->{$child}->{'last'} ) {
            $config->{$machine}->{'allOk'} = 0;
            push @warnings, "Warning: $machine does not have consistent snapshots at $child";;
         }
      }
   }
   # make sure the source has a corresponding snap for target->last
   foreach my $child ( keys %{ $config->{'target'}->{'snapshots'} } ) {
      if (! exists ($config->{'source'}->{'snapshots'}->{$child}->{'snaps'}->{$config->{'target'}->{'snapshots'}->{$child}->{'last'}} ) ) {
         $config->{'source'}->{'allOk'} = 0;
         push @warnings, "Warning: We  do not have consistent snapshots";
      }
   }
   my $return;
   if ( $config->{'source'}->{'allOk'} and $config->{'target'}->{'allOk'} ) { # whew, they match
      return( $config->{'source'}->{'last'}, $config->{'target'}->{'last'}, \@warnings );
   } else {
      return( '','',\@warnings);
   }
} # sub calculate

# Prints usage/help message and exits.
sub help {
   use File::Basename;
   my $me = fileparse( $0 );
   my $helpMessage = <<"   EOF";
      $me [flags] [source [target]]
      Version $VERSION
         Syncs source dataset to target dataset
      
      Parameters (optional)
         source - dataset syncing from
         target - dataset syncing to
         
      Flags
         --source|s  - Alternate way to pass source dataset
         --target|t  - Alternate way to pass target dataset
         --filter|f  - Filter (regex) to limit source snapshots to process
         --dryrun|n  - Only displays command(s) to be run
         --recurse|r - Process dataset and all child datasets
         --verbose|v - increase verbosity of output
         --bwlimit   - Limit the speed of the connect to # bytes/s. KMGT allowed 
         --version|V - display the version number and exit
      
      May use short flags with bundling, ie -nrvv is valid for 
      --dryrun --recurse --verbose --verbose
      
      Either source or target must contain a DNS name or IP address of a remote
      machine, separated from the dataset with a colon, ie
         --source fbsd:storage/mydata
      would use the dataset storage/mydata on the server fbsd. The other dataset
      is assumed to be the local machine
      
      filter is a string which is a valid regular expression. Only snapshots matching
      that string will be used from the source dataset
      
      By default, only error messages are displayed. verbose will display statistics
      on size and transfer time. Twice will give the commands, and three times will 
      display entire output of send/receive (whichever is the local machine)
      
      Example:
         $me -r prod.example.org:pool/mydata -t pool/backup/mydata \
            --bwlimit=5M --filter='(\\d{4}.\\d{2}.\\d{2}.\\d{2}.\\d{2})'

         Would sync pool/mydata and all child datasets on prod.example.org to
         pool/backup/mydata on the local server. Only the snapshots which had a
         datetime stamp matching the --filter rule would be used. The transfer
         would not exceed 5MB/s (40Mb/s) if the pv app was installed
   EOF
   # get rid of indentation
   $helpMessage =~ s/^      //;
   $helpMessage =~ s/\n      /\n/g;
   print $helpMessage;
   exit 1;
} # help
   

GetOptions( $config,
   'source|s=s',
   'target|t=s',
   'filter|f=s',
   'dryrun|n',
   'recurse|r',
   'bwlimit=s',
   'verbose|v+',
   'version|V',
   'help|h'
);

&help() if $config->{'help'};
if ($config->{'version'}) {
   print "replicate version $VERSION\n" ;
   exit 0;
}
# allow them to use positional, without flags, such as
# replicate source target --filter='regex' -n
$config->{'source'} = shift unless $config->{'source'};
$config->{'target'} = shift unless $config->{'target'};
die "You must enter a source and a target, at a minimum\n" unless $config->{'source'} && $config->{'target'};

# keep track of when we started this run
$config->{'report'}->{'Start Time'} = time;

# WARNING: this converts source and targets from a string to a hash
# '10.0.0.1:data/set' becomes ( 'server' => '10.0.0.1', 'dataset' => 'data/set')
# and 'data/set' becomes ( 'server' => '', 'dataset' => 'data/set')
$config->{'source'} = &parseDataSet( $config->{'source'} );
$config->{'target'} = &parseDataSet( $config->{'target'} );

# both source and target can not have a server portion; one must be local
die "Source and Target can not both be remote\n" if $config->{'source'}->{'server'} && $config->{'target'}->{'server'};

# connect to servers and get all existing snapshots
$config->{'target'}->{'snapshots'} = &getSnaps( $config->{'target'}, $config->{'filter'} );
$config->{'source'}->{'snapshots'} = &getSnaps( $config->{'source'}, $config->{'filter'} );

# Check for child dataset snapshot consistency on source and target
my @source_jobs = check_child_snap_consistency($config, 'source');
my @target_jobs = check_child_snap_consistency($config, 'target');

# If either side has inconsistencies, break into per-dataset jobs
my %all_jobs;
$all_jobs{$_}++ for (@source_jobs, @target_jobs);

foreach my $dataset (sort keys %all_jobs) {
    # Prepare a config for this dataset
    my %job_config = %$config;
    $job_config{'source'} = { %{$config->{'source'}}, 'dataset' => $config->{'source'}{'dataset'} . $dataset, 'snapshots' => { $dataset => $config->{'source'}{'snapshots'}{$dataset} } };
    $job_config{'target'} = { %{$config->{'target'}}, 'dataset' => $config->{'target'}{'dataset'} . $dataset, 'snapshots' => { $dataset => $config->{'target'}{'snapshots'}{$dataset} } };

    ( $job_config{'source'}{'lastSnap'}, $job_config{'target'}{'lastSnap'} ) = &calculate( \%job_config );
    $job_config{'report'}{'Bytes Transferred'} = &findSize( \%job_config ) if $config->{'verbose'};
    my $commands = &createCommands( \%job_config );
    print "$commands\n" if $config->{'verbose'} > 1 or $config->{'dryrun'};
    if ( $config->{'dryrun'} ) {
        print "Dry Run for $dataset\n";
    } else {
        print qx/$commands/ if $commands =~ m/^[a-zA-Z]/;
    }
}

$config->{'report'}->{'End Time'} = time;
$config->{'report'}->{'Elapsed Time'} = $config->{'report'}->{'End Time'} - $config->{'report'}->{'Start Time'};
if ( $config->{'verbose'}  ) {
   if ( $config->{'dryrun'} ) {
      print "Would have transferred $config->{'report'}->{'Bytes Transferred'} bytes\n";
   } elsif ( $config->{'report'}->{'Bytes Transferred'} ) {
      print "bytes\t$config->{'report'}->{'Bytes Transferred'}\nseconds\t$config->{'report'}->{'Elapsed Time'}\n";
   } else {
      print "Nothing to do, datasets up to date\n";
   }
}
1;