Subversion Repositories zfs_utils

Rev

Rev 7 | Rev 9 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 rodolico 1
#! /usr/bin/env perl
2
 
4 rodolico 3
# very simple script to replicate a ZFS snapshot to another server.
4
# no fancy bells and whistles, does not create snapshots, and does
5
# not prune them. No major error checking either
6
 
2 rodolico 7
use strict;
8
use warnings;
9
 
10
use Data::Dumper;
4 rodolico 11
use Getopt::Long;
12
Getopt::Long::Configure ("bundling");
2 rodolico 13
 
4 rodolico 14
# create our configuration, with some defaults
15
# these are overridden by command line stuff
2 rodolico 16
my $config = {
4 rodolico 17
   # the source, where we're coming from
18
   'source' => '',
19
   # the target, where we want to replicate to
20
   'target' => '',
2 rodolico 21
   # compile the regex
6 rodolico 22
   'filter' => '(\d{4}.\d{2}.\d{2}.\d{2}.\d{2})',
4 rodolico 23
   # if non-zero, just display the commands we'd use, don't run them
24
   'dryrun' => 0,
25
   # whether to do all child datasets also (default)
6 rodolico 26
   'recurse' => 0,
4 rodolico 27
   # show more information
28
   'verbose' => 0
2 rodolico 29
   };
30
 
31
sub parseDataSet {
32
   my $data = shift;
33
   my %return;
34
   my ( $server, $dataset ) = split( ':', $data );
35
   if ( $dataset ) { # they passed a server:dataset
36
      $return{'server'} = $server;
37
      $return{'dataset'} = $dataset;
38
   } else { # only passing in dataset, so assume localhost
39
      $return{'server'} = '';
40
      $return{'dataset'} = $server;
41
   }
42
   return \%return;
43
}
44
 
45
# runs a command, redirecting stderr to stdout (which it ignores)
46
# then returns 0 and $output on success.
47
# if error, returns error code and string describing error
48
sub run {
49
   my $command = shift;
50
   my $output = qx/$command 2>&1/;
51
   if ($? == -1) {
52
      return (-1,"failed to execute: $!");
53
   } elsif ($? & 127) {
54
      return ($?, sprintf "child died with signal %d, %s coredump",
55
        ($? & 127),  ($? & 128) ? 'with' : 'without' );
56
   } else {
57
      return ($? >> 8, sprintf "child exited with value %d", $? >> 8 ) if $? >> 8;
58
   }
59
   return (0,$output);
60
}
61
 
62
 
63
sub getSnaps {
64
   my ($config,$pattern) = @_;
65
   my %return;
66
   # actual command to run to get all snapshots, recursively, of the dataset
67
   my $command = 'zfs list -r -t snap ' . $config->{'dataset'};
68
   $command = "ssh $config->{server} '$command'" if $config->{'server'};
69
   #die "$command\n";
70
   my ($error, $output ) = &run( $command );
71
   #die "Error running $command with output\n$output" if $error;
72
   my @snaps = split( "\n", $output );
73
   chomp @snaps;
74
   for (my $i = 0; $i < @snaps; $i++ ) {
75
      # parse out the space delmited fields
76
      my ($fullname, $used, $avail, $refer, $mount) = split( /\s+/, $snaps[$i] );
77
      # break the name into dataset and snapname
78
      my ($dataset, $snap) = split( '@', $fullname );
79
      # remove the root dataset name
80
      $dataset =~ s/^$config->{'dataset'}//;
81
      # skip anything not matching our regex
82
      next unless $pattern && $snap && $snap =~ m/$pattern/;
83
      # grab the matched key
84
      $return{$dataset}{'snaps'}{$snap}{'key'} = $1;
85
      # and remove all non-numerics
86
      $return{$dataset}{'snaps'}{$snap}{'key'} =~ s/[^0-9]//g;
87
      # get the transfer size
88
      $return{$dataset}{'snaps'}{$snap}{'refer'} = $refer;
89
      # get the actual disk space used
90
      $return{$dataset}{'snaps'}{$snap}{'used'} = $used;
91
   }
92
   return \%return;
93
}
94
 
6 rodolico 95
# get tne number of bytes we will be syncing.
96
sub findSize {
97
   my $config = shift;
98
   # check for new snapshots to sync. If they are equal, we are up to date
99
   if ( $config->{'source'}->{'lastSnap'} ne $config->{'target'}->{'lastSnap'} ) {
100
      # Build the source command
101
      my $sourceCommand = sprintf( '%s@%s %s@%s', 
102
                               $config->{'source'}->{'dataset'},
103
                               $config->{'target'}->{'lastSnap'},
104
                               $config->{'source'}->{'dataset'},
105
                               $config->{'source'}->{'lastSnap'}
106
                           );
107
      # prepend 'zfs send' and the flags. Note that verbose is only for the one which is local
108
      $sourceCommand = 'zfs send -' . 
109
                  ( $config->{'recurse'} ? 'R' : '' ) . # recurse if they asked for it
110
                  # turn on verbose if they asked for level 2 AND if source is local
8 rodolico 111
                  'Pn' .
6 rodolico 112
                  # this is the part that asks for incremental
113
                  'I ' .
114
                  $sourceCommand;
115
      # wrap the ssh call if this is remote
116
      $sourceCommand = "ssh $config->{source}->{server} '$sourceCommand'" if  $config->{'source'}->{'server'};
117
      print "Checking Size with\n$sourceCommand\n" if $config->{'verbose'} > 2;
118
      my ( $error, $output ) = &run( $sourceCommand );
119
      return -1 if $error;
120
      # the size is the second column (tab separated) of the last line (\n separated) in $output
121
      return ( 
122
               split( 
123
                  "\t",
124
                  (
125
                     split( "\n", $output )
126
                  )[-1]
127
               )
128
            )[1];
129
   } else { # nothing to sync
130
      return 0;
131
   }
132
}
2 rodolico 133
 
6 rodolico 134
# create the command necessary to do the replication
2 rodolico 135
sub createCommands {
6 rodolico 136
   my $config = shift;
137
   # check for new snapshots to sync. If they are equal, we are up to date
138
   if ( $config->{'source'}->{'lastSnap'} ne $config->{'target'}->{'lastSnap'} ) {
139
      # Build the source command
140
      my $sourceCommand = sprintf( '%s@%s %s@%s', 
141
                               $config->{'source'}->{'dataset'},
142
                               $config->{'target'}->{'lastSnap'},
143
                               $config->{'source'}->{'dataset'},
144
                               $config->{'source'}->{'lastSnap'}
145
                           );
146
      # prepend 'zfs send' and the flags. Note that verbose is only for the one which is local
147
      $sourceCommand = 'zfs send -' . 
148
                  ( $config->{'recurse'} ? 'R' : '' ) . # recurse if they asked for it
149
                  # turn on verbose if they asked for level 2 AND if source is local
150
                  ( $config->{'verbose'} > 1 && ! $config->{'source'}->{'server'} ? 'v' : '' ) .
151
                  # this is the part that asks for incremental
152
                  'I ' .
153
                  $sourceCommand;
154
      # wrap the ssh call if this is remote
155
      $sourceCommand = "ssh $config->{source}->{server} '$sourceCommand'" if  $config->{'source'}->{'server'};
156
      # Now, build the target command
157
      my $targetCommand = 'zfs receive ' . 
158
                          ( ! $config->{'target'}->{'server'} && $config->{'verbose'} > 1 ? '-v ' : '') .
159
                          $config->{'target'}->{'dataset'};
160
      $targetCommand = "ssh $config->{target}->{server} '$targetCommand'" if  $config->{'target'}->{'server'};
7 rodolico 161
      # if the command pv is installed
162
      if ( `which pv` ) {
163
         my $tags;
164
         # add bandwdith limits, if requested
165
         $tags = " --si -L $config->{bwlimit} " if $config->{'bwlimit'};
166
         # if interactive, or if we are in dry run, add thermometer
167
         $tags .= '-petrs ' . $config->{'report'}->{'Bytes Transferred'} if -t *STDOUT || $config->{'dryrun'};
168
         $sourceCommand .= " | pv $tags" if $tags;
169
      }
6 rodolico 170
      # return the command
171
      return $sourceCommand . ' | ' . $targetCommand;
172
   } else { # source and target are in sync, so do nothing
173
      return '# Nothing new to sync';
2 rodolico 174
   }
175
}
176
 
177
# find the last snapshot in a hash. The hash is assumed to have a subkey
178
# 'key'. look for the largest subkey, and return the key for it
179
sub getLastSnapshot {
180
   my $snapList = shift;
181
   my $lastKey = 0;
182
   my $lastSnap = '';
183
   foreach my $snap ( keys %$snapList ) {
184
      if ( $snapList->{$snap}->{'key'} > $lastKey ) {
185
         $lastKey = $snapList->{$snap}->{'key'};
186
         $lastSnap = $snap;
187
      }
188
   }
189
   return $lastSnap;
190
}
191
 
192
 
193
sub calculate {
194
   my $config = shift;
195
 
196
   my @warnings;
197
 
198
   # find the last snapshot date in each dataset, on each target
199
   foreach my $machine ( 'source', 'target' ) {
200
      $config->{$machine}->{'last'} = 0; # track the last entry in all children in dataset
201
      $config->{$machine}->{'allOk'} = 1; # assumed to be true, becomes false if some children do not have snapshots
202
      foreach my $child ( keys %{ $config->{$machine}->{'snapshots'} } ) {
203
         $config->{$machine}->{'snapshots'}->{$child}->{'last'} = 
204
            &getLastSnapshot( $config->{$machine}->{'snapshots'}->{$child}->{'snaps'} );
205
         # set the machine last if we haven't done so yet
206
         $config->{$machine}->{'last'} = $config->{$machine}->{'snapshots'}->{$child}->{'last'} unless $config->{$machine}->{'last'};
207
         # keep track of the last snapshot for each set
208
         if ( $config->{$machine}->{'last'} ne $config->{$machine}->{'snapshots'}->{$child}->{'last'} ) {
209
            $config->{$machine}->{'allOk'} = 0;
210
            push @warnings, "Warning: $machine does not have consistent snapshots at $child";;
211
         }
212
      }
213
   }
214
   # make sure the source has a corresponding snap for target->last
215
   foreach my $child ( keys %{ $config->{'target'}->{'snapshots'} } ) {
216
      if (! exists ($config->{'source'}->{'snapshots'}->{$child}->{'snaps'}->{$config->{'target'}->{'snapshots'}->{$child}->{'last'}} ) ) {
217
         $config->{'source'}->{'allOk'} = 0;
218
         push @warnings, "Warning: We  do not have consistent snapshots";
219
      }
220
   }
221
   my $return;
222
   if ( $config->{'source'}->{'allOk'} and $config->{'target'}->{'allOk'} ) { # whew, they match
223
      return( $config->{'source'}->{'last'}, $config->{'target'}->{'last'}, \@warnings );
224
   } else {
225
      return( '','',\@warnings);
226
   }
4 rodolico 227
} # sub calculate
2 rodolico 228
 
6 rodolico 229
sub help {
230
   use File::Basename;
231
   my $me = fileparse( $0 );
232
   my $helpMessage = <<"   EOF";
233
      $me [flags] [source [target]]
234
         Syncs source dataset to target dataset
235
 
236
      Parameters (optional)
237
         source - dataset syncing from
238
         target - dataset syncing to
239
 
240
      Flags
241
         --source|s  - Alternate way to pass source dataset
242
         --target|t  - Alternate way to pass target dataset
243
         --filter|f  - Filter (regex) to limit source snapshots to process
244
         --dryrun|n  - Only displays command(s) to be run
245
         --recurse|r - Process dataset and all child datasets
246
         --verbose|v - increase verbosity of output
7 rodolico 247
         --bwlimit   - Limit the speed of the connect to # bytes/s. KMGT allowed 
6 rodolico 248
 
249
      May use short flags with bundling, ie -nrvv is valid for 
250
      --dryrun --recurse --verbose --verbose
251
 
252
      Either source or target must contain a DNS name or IP address of a remote
253
      machine, separated from the dataset with a colon, ie
254
         --source fbsd:storage/mydata
255
      would use the dataset storage/mydata on the server fbsd. The other dataset
256
      is assumed to be the local machine
257
 
258
      filter is a string which is a valid regular expression. Only snapshots matching
259
      that string will be used from the source dataset
260
 
261
      By default, only error messages are displayed. verbose will display statistics
262
      on size and transfer time. Invoking twice will display entire output of
263
      send/receive (whichever is the local machine)
7 rodolico 264
 
265
      Example:
266
         $me -r prod.example.org:pool/mydata -t pool/backup/mydata \
267
            --bwlimit=5M --filter='(\\d{4}.\\d{2}.\\d{2}.\\d{2}.\\d{2})'
268
 
269
         Would sync pool/mydata and all child datasets on prod.example.org to
270
         pool/backup/mydata on the local server. Only the snapshots which had a
271
         datetime stamp matching the --filter rule would be used. The transfer
272
         would not exceed 5MB/s (40Mb/s) if the pv app was installed
6 rodolico 273
   EOF
274
   # get rid of indentation
275
   $helpMessage =~ s/^      //;
276
   $helpMessage =~ s/\n      /\n/g;
277
   print $helpMessage;
278
   exit 1;
279
} # help
280
 
281
 
4 rodolico 282
GetOptions( $config,
283
   'source|s=s',
284
   'target|t=s',
285
   'filter|f=s',
286
   'dryrun|n',
287
   'recurse|r',
8 rodolico 288
   'bwlimit=s',
6 rodolico 289
   'verbose|v+',
4 rodolico 290
   'help|h'
291
);
2 rodolico 292
 
6 rodolico 293
&help() if $config->{'help'};
4 rodolico 294
# allow them to use positional, without flags, such as
295
# replicate source target --filter='regex' -n
296
$config->{'source'} = shift unless $config->{'source'};
297
$config->{'target'} = shift unless $config->{'target'};
298
die "You must enter a source and a target, at a minimum\n" unless $config->{'source'} && $config->{'target'};
299
 
6 rodolico 300
# keep track of when we started this run
301
$config->{'report'}->{'Start Time'} = time;
302
 
4 rodolico 303
# WARNING: this converts source and targets from a string to a hash
304
# '10.0.0.1:data/set' becomes ( 'server' => '10.0.0.1', 'dataset' => 'data/set')
305
# and 'data/set' becomes ( 'server' => '', 'dataset' => 'data/set')
306
$config->{'source'} = &parseDataSet( $config->{'source'} );
307
$config->{'target'} = &parseDataSet( $config->{'target'} );
308
 
2 rodolico 309
# both source and target can not have a server portion; one must be local
310
die "Source and Target can not both be remote\n" if $config->{'source'}->{'server'} && $config->{'target'}->{'server'};
311
 
6 rodolico 312
# connect to servers and get all existing snapshots
313
$config->{'target'}->{'snapshots'} = &getSnaps( $config->{'target'}, $config->{'filter'} );
4 rodolico 314
$config->{'source'}->{'snapshots'} = &getSnaps( $config->{'source'}, $config->{'filter'} );
2 rodolico 315
 
6 rodolico 316
# we sync from last snap on target machine to last snap on source machine. calculate simply
317
# finds the last snapshot on source and target
318
( $config->{'source'}->{'lastSnap'}, $config->{'target'}->{'lastSnap'} ) = &calculate( $config );
2 rodolico 319
 
6 rodolico 320
# calculate transfer size if they want any feedback at all. Since this does take a few seconds
321
# to calculate, we won't run it unless they want a report
322
$config->{'report'}->{'Bytes Transferred'} = &findSize( $config ) if $config->{'verbose'};
2 rodolico 323
 
4 rodolico 324
# actually creates the commands to do the replicate
6 rodolico 325
my $commands = &createCommands( $config );
326
print "$commands\n" if $config->{'verbose'} or $config->{'dryrun'};
327
if ( $config->{'dryrun'} ) {
328
   print "Dry Run\n";
329
} else {
330
   print qx/$commands/ if $commands =~ m/^[a-zA-Z]/;
331
}
332
 
333
$config->{'report'}->{'End Time'} = time;
334
$config->{'report'}->{'Elapsed Time'} = $config->{'report'}->{'End Time'} - $config->{'report'}->{'Start Time'};
335
if ( $config->{'verbose'} ) {
4 rodolico 336
   if ( $config->{'dryrun'} ) {
6 rodolico 337
      print "Would have transferred $config->{'report'}->{'Bytes Transferred'} bytes\n";
2 rodolico 338
   } else {
7 rodolico 339
      print "bytes\t$config->{'report'}->{'Bytes Transferred'}\nseconds\t$config->{'report'}->{'Elapsed Time'}\n";
2 rodolico 340
   }
341
}
342
1;