Subversion Repositories zfs_utils

Rev

Rev 8 | Rev 15 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 rodolico 1
#! /usr/bin/env perl
2
 
4 rodolico 3
# very simple script to replicate a ZFS snapshot to another server.
4
# no fancy bells and whistles, does not create snapshots, and does
5
# not prune them. No major error checking either
6
 
2 rodolico 7
use strict;
8
use warnings;
9
 
10
use Data::Dumper;
4 rodolico 11
use Getopt::Long;
12
Getopt::Long::Configure ("bundling");
2 rodolico 13
 
4 rodolico 14
# create our configuration, with some defaults
15
# these are overridden by command line stuff
2 rodolico 16
my $config = {
4 rodolico 17
   # the source, where we're coming from
18
   'source' => '',
19
   # the target, where we want to replicate to
20
   'target' => '',
2 rodolico 21
   # compile the regex
6 rodolico 22
   'filter' => '(\d{4}.\d{2}.\d{2}.\d{2}.\d{2})',
4 rodolico 23
   # if non-zero, just display the commands we'd use, don't run them
24
   'dryrun' => 0,
25
   # whether to do all child datasets also (default)
6 rodolico 26
   'recurse' => 0,
4 rodolico 27
   # show more information
28
   'verbose' => 0
2 rodolico 29
   };
30
 
31
sub parseDataSet {
32
   my $data = shift;
33
   my %return;
34
   my ( $server, $dataset ) = split( ':', $data );
35
   if ( $dataset ) { # they passed a server:dataset
36
      $return{'server'} = $server;
37
      $return{'dataset'} = $dataset;
38
   } else { # only passing in dataset, so assume localhost
39
      $return{'server'} = '';
40
      $return{'dataset'} = $server;
41
   }
42
   return \%return;
43
}
44
 
9 rodolico 45
sub logit {
46
   open LOG, ">>/tmp/replicate.log" or die "Could not open replicate.log: $!\n";
47
   print LOG join( "\n", @_ ) .  "\n";
48
   close LOG;
49
}
50
 
2 rodolico 51
# runs a command, redirecting stderr to stdout (which it ignores)
52
# then returns 0 and $output on success.
53
# if error, returns error code and string describing error
54
sub run {
55
   my $command = shift;
9 rodolico 56
   #&logit( $command );
2 rodolico 57
   my $output = qx/$command 2>&1/;
58
   if ($? == -1) {
59
      return (-1,"failed to execute: $!");
60
   } elsif ($? & 127) {
61
      return ($?, sprintf "child died with signal %d, %s coredump",
62
        ($? & 127),  ($? & 128) ? 'with' : 'without' );
63
   } else {
64
      return ($? >> 8, sprintf "child exited with value %d", $? >> 8 ) if $? >> 8;
65
   }
66
   return (0,$output);
67
}
68
 
69
 
70
sub getSnaps {
71
   my ($config,$pattern) = @_;
72
   my %return;
73
   # actual command to run to get all snapshots, recursively, of the dataset
74
   my $command = 'zfs list -r -t snap ' . $config->{'dataset'};
75
   $command = "ssh $config->{server} '$command'" if $config->{'server'};
76
   #die "$command\n";
77
   my ($error, $output ) = &run( $command );
78
   #die "Error running $command with output\n$output" if $error;
79
   my @snaps = split( "\n", $output );
80
   chomp @snaps;
81
   for (my $i = 0; $i < @snaps; $i++ ) {
82
      # parse out the space delmited fields
83
      my ($fullname, $used, $avail, $refer, $mount) = split( /\s+/, $snaps[$i] );
84
      # break the name into dataset and snapname
85
      my ($dataset, $snap) = split( '@', $fullname );
86
      # remove the root dataset name
87
      $dataset =~ s/^$config->{'dataset'}//;
88
      # skip anything not matching our regex
89
      next unless $pattern && $snap && $snap =~ m/$pattern/;
90
      # grab the matched key
91
      $return{$dataset}{'snaps'}{$snap}{'key'} = $1;
92
      # and remove all non-numerics
93
      $return{$dataset}{'snaps'}{$snap}{'key'} =~ s/[^0-9]//g;
94
      # get the transfer size
95
      $return{$dataset}{'snaps'}{$snap}{'refer'} = $refer;
96
      # get the actual disk space used
97
      $return{$dataset}{'snaps'}{$snap}{'used'} = $used;
98
   }
99
   return \%return;
100
}
101
 
6 rodolico 102
# get tne number of bytes we will be syncing.
103
sub findSize {
104
   my $config = shift;
105
   # check for new snapshots to sync. If they are equal, we are up to date
106
   if ( $config->{'source'}->{'lastSnap'} ne $config->{'target'}->{'lastSnap'} ) {
107
      # Build the source command
108
      my $sourceCommand = sprintf( '%s@%s %s@%s', 
109
                               $config->{'source'}->{'dataset'},
110
                               $config->{'target'}->{'lastSnap'},
111
                               $config->{'source'}->{'dataset'},
112
                               $config->{'source'}->{'lastSnap'}
113
                           );
114
      # prepend 'zfs send' and the flags. Note that verbose is only for the one which is local
115
      $sourceCommand = 'zfs send -' . 
116
                  ( $config->{'recurse'} ? 'R' : '' ) . # recurse if they asked for it
117
                  # turn on verbose if they asked for level 2 AND if source is local
8 rodolico 118
                  'Pn' .
6 rodolico 119
                  # this is the part that asks for incremental
120
                  'I ' .
121
                  $sourceCommand;
122
      # wrap the ssh call if this is remote
123
      $sourceCommand = "ssh $config->{source}->{server} '$sourceCommand'" if  $config->{'source'}->{'server'};
124
      print "Checking Size with\n$sourceCommand\n" if $config->{'verbose'} > 2;
125
      my ( $error, $output ) = &run( $sourceCommand );
126
      return -1 if $error;
127
      # the size is the second column (tab separated) of the last line (\n separated) in $output
128
      return ( 
129
               split( 
130
                  "\t",
131
                  (
132
                     split( "\n", $output )
133
                  )[-1]
134
               )
135
            )[1];
136
   } else { # nothing to sync
137
      return 0;
138
   }
139
}
2 rodolico 140
 
6 rodolico 141
# create the command necessary to do the replication
2 rodolico 142
sub createCommands {
6 rodolico 143
   my $config = shift;
144
   # check for new snapshots to sync. If they are equal, we are up to date
145
   if ( $config->{'source'}->{'lastSnap'} ne $config->{'target'}->{'lastSnap'} ) {
146
      # Build the source command
147
      my $sourceCommand = sprintf( '%s@%s %s@%s', 
148
                               $config->{'source'}->{'dataset'},
149
                               $config->{'target'}->{'lastSnap'},
150
                               $config->{'source'}->{'dataset'},
151
                               $config->{'source'}->{'lastSnap'}
152
                           );
153
      # prepend 'zfs send' and the flags. Note that verbose is only for the one which is local
154
      $sourceCommand = 'zfs send -' . 
155
                  ( $config->{'recurse'} ? 'R' : '' ) . # recurse if they asked for it
156
                  # turn on verbose if they asked for level 2 AND if source is local
157
                  ( $config->{'verbose'} > 1 && ! $config->{'source'}->{'server'} ? 'v' : '' ) .
158
                  # this is the part that asks for incremental
159
                  'I ' .
160
                  $sourceCommand;
161
      # wrap the ssh call if this is remote
162
      $sourceCommand = "ssh $config->{source}->{server} '$sourceCommand'" if  $config->{'source'}->{'server'};
163
      # Now, build the target command
164
      my $targetCommand = 'zfs receive ' . 
165
                          ( ! $config->{'target'}->{'server'} && $config->{'verbose'} > 1 ? '-v ' : '') .
166
                          $config->{'target'}->{'dataset'};
167
      $targetCommand = "ssh $config->{target}->{server} '$targetCommand'" if  $config->{'target'}->{'server'};
7 rodolico 168
      # if the command pv is installed
169
      if ( `which pv` ) {
170
         my $tags;
171
         # add bandwdith limits, if requested
172
         $tags = " --si -L $config->{bwlimit} " if $config->{'bwlimit'};
173
         # if interactive, or if we are in dry run, add thermometer
174
         $tags .= '-petrs ' . $config->{'report'}->{'Bytes Transferred'} if -t *STDOUT || $config->{'dryrun'};
175
         $sourceCommand .= " | pv $tags" if $tags;
176
      }
6 rodolico 177
      # return the command
178
      return $sourceCommand . ' | ' . $targetCommand;
179
   } else { # source and target are in sync, so do nothing
180
      return '# Nothing new to sync';
2 rodolico 181
   }
182
}
183
 
184
# find the last snapshot in a hash. The hash is assumed to have a subkey
185
# 'key'. look for the largest subkey, and return the key for it
186
sub getLastSnapshot {
187
   my $snapList = shift;
188
   my $lastKey = 0;
189
   my $lastSnap = '';
190
   foreach my $snap ( keys %$snapList ) {
191
      if ( $snapList->{$snap}->{'key'} > $lastKey ) {
192
         $lastKey = $snapList->{$snap}->{'key'};
193
         $lastSnap = $snap;
194
      }
195
   }
196
   return $lastSnap;
197
}
198
 
199
 
200
sub calculate {
201
   my $config = shift;
202
 
203
   my @warnings;
204
 
205
   # find the last snapshot date in each dataset, on each target
206
   foreach my $machine ( 'source', 'target' ) {
207
      $config->{$machine}->{'last'} = 0; # track the last entry in all children in dataset
208
      $config->{$machine}->{'allOk'} = 1; # assumed to be true, becomes false if some children do not have snapshots
209
      foreach my $child ( keys %{ $config->{$machine}->{'snapshots'} } ) {
210
         $config->{$machine}->{'snapshots'}->{$child}->{'last'} = 
211
            &getLastSnapshot( $config->{$machine}->{'snapshots'}->{$child}->{'snaps'} );
212
         # set the machine last if we haven't done so yet
213
         $config->{$machine}->{'last'} = $config->{$machine}->{'snapshots'}->{$child}->{'last'} unless $config->{$machine}->{'last'};
214
         # keep track of the last snapshot for each set
215
         if ( $config->{$machine}->{'last'} ne $config->{$machine}->{'snapshots'}->{$child}->{'last'} ) {
216
            $config->{$machine}->{'allOk'} = 0;
217
            push @warnings, "Warning: $machine does not have consistent snapshots at $child";;
218
         }
219
      }
220
   }
221
   # make sure the source has a corresponding snap for target->last
222
   foreach my $child ( keys %{ $config->{'target'}->{'snapshots'} } ) {
223
      if (! exists ($config->{'source'}->{'snapshots'}->{$child}->{'snaps'}->{$config->{'target'}->{'snapshots'}->{$child}->{'last'}} ) ) {
224
         $config->{'source'}->{'allOk'} = 0;
225
         push @warnings, "Warning: We  do not have consistent snapshots";
226
      }
227
   }
228
   my $return;
229
   if ( $config->{'source'}->{'allOk'} and $config->{'target'}->{'allOk'} ) { # whew, they match
230
      return( $config->{'source'}->{'last'}, $config->{'target'}->{'last'}, \@warnings );
231
   } else {
232
      return( '','',\@warnings);
233
   }
4 rodolico 234
} # sub calculate
2 rodolico 235
 
6 rodolico 236
sub help {
237
   use File::Basename;
238
   my $me = fileparse( $0 );
239
   my $helpMessage = <<"   EOF";
240
      $me [flags] [source [target]]
241
         Syncs source dataset to target dataset
242
 
243
      Parameters (optional)
244
         source - dataset syncing from
245
         target - dataset syncing to
246
 
247
      Flags
248
         --source|s  - Alternate way to pass source dataset
249
         --target|t  - Alternate way to pass target dataset
250
         --filter|f  - Filter (regex) to limit source snapshots to process
251
         --dryrun|n  - Only displays command(s) to be run
252
         --recurse|r - Process dataset and all child datasets
253
         --verbose|v - increase verbosity of output
7 rodolico 254
         --bwlimit   - Limit the speed of the connect to # bytes/s. KMGT allowed 
6 rodolico 255
 
256
      May use short flags with bundling, ie -nrvv is valid for 
257
      --dryrun --recurse --verbose --verbose
258
 
259
      Either source or target must contain a DNS name or IP address of a remote
260
      machine, separated from the dataset with a colon, ie
261
         --source fbsd:storage/mydata
262
      would use the dataset storage/mydata on the server fbsd. The other dataset
263
      is assumed to be the local machine
264
 
265
      filter is a string which is a valid regular expression. Only snapshots matching
266
      that string will be used from the source dataset
267
 
268
      By default, only error messages are displayed. verbose will display statistics
269
      on size and transfer time. Invoking twice will display entire output of
270
      send/receive (whichever is the local machine)
7 rodolico 271
 
272
      Example:
273
         $me -r prod.example.org:pool/mydata -t pool/backup/mydata \
274
            --bwlimit=5M --filter='(\\d{4}.\\d{2}.\\d{2}.\\d{2}.\\d{2})'
275
 
276
         Would sync pool/mydata and all child datasets on prod.example.org to
277
         pool/backup/mydata on the local server. Only the snapshots which had a
278
         datetime stamp matching the --filter rule would be used. The transfer
279
         would not exceed 5MB/s (40Mb/s) if the pv app was installed
6 rodolico 280
   EOF
281
   # get rid of indentation
282
   $helpMessage =~ s/^      //;
283
   $helpMessage =~ s/\n      /\n/g;
284
   print $helpMessage;
285
   exit 1;
286
} # help
287
 
288
 
4 rodolico 289
GetOptions( $config,
290
   'source|s=s',
291
   'target|t=s',
292
   'filter|f=s',
293
   'dryrun|n',
294
   'recurse|r',
8 rodolico 295
   'bwlimit=s',
6 rodolico 296
   'verbose|v+',
4 rodolico 297
   'help|h'
298
);
2 rodolico 299
 
6 rodolico 300
&help() if $config->{'help'};
4 rodolico 301
# allow them to use positional, without flags, such as
302
# replicate source target --filter='regex' -n
303
$config->{'source'} = shift unless $config->{'source'};
304
$config->{'target'} = shift unless $config->{'target'};
305
die "You must enter a source and a target, at a minimum\n" unless $config->{'source'} && $config->{'target'};
306
 
6 rodolico 307
# keep track of when we started this run
308
$config->{'report'}->{'Start Time'} = time;
309
 
4 rodolico 310
# WARNING: this converts source and targets from a string to a hash
311
# '10.0.0.1:data/set' becomes ( 'server' => '10.0.0.1', 'dataset' => 'data/set')
312
# and 'data/set' becomes ( 'server' => '', 'dataset' => 'data/set')
313
$config->{'source'} = &parseDataSet( $config->{'source'} );
314
$config->{'target'} = &parseDataSet( $config->{'target'} );
315
 
2 rodolico 316
# both source and target can not have a server portion; one must be local
317
die "Source and Target can not both be remote\n" if $config->{'source'}->{'server'} && $config->{'target'}->{'server'};
318
 
6 rodolico 319
# connect to servers and get all existing snapshots
320
$config->{'target'}->{'snapshots'} = &getSnaps( $config->{'target'}, $config->{'filter'} );
4 rodolico 321
$config->{'source'}->{'snapshots'} = &getSnaps( $config->{'source'}, $config->{'filter'} );
2 rodolico 322
 
6 rodolico 323
# we sync from last snap on target machine to last snap on source machine. calculate simply
324
# finds the last snapshot on source and target
325
( $config->{'source'}->{'lastSnap'}, $config->{'target'}->{'lastSnap'} ) = &calculate( $config );
2 rodolico 326
 
6 rodolico 327
# calculate transfer size if they want any feedback at all. Since this does take a few seconds
328
# to calculate, we won't run it unless they want a report
329
$config->{'report'}->{'Bytes Transferred'} = &findSize( $config ) if $config->{'verbose'};
2 rodolico 330
 
4 rodolico 331
# actually creates the commands to do the replicate
6 rodolico 332
my $commands = &createCommands( $config );
333
print "$commands\n" if $config->{'verbose'} or $config->{'dryrun'};
334
if ( $config->{'dryrun'} ) {
335
   print "Dry Run\n";
336
} else {
337
   print qx/$commands/ if $commands =~ m/^[a-zA-Z]/;
338
}
339
 
340
$config->{'report'}->{'End Time'} = time;
341
$config->{'report'}->{'Elapsed Time'} = $config->{'report'}->{'End Time'} - $config->{'report'}->{'Start Time'};
342
if ( $config->{'verbose'} ) {
4 rodolico 343
   if ( $config->{'dryrun'} ) {
6 rodolico 344
      print "Would have transferred $config->{'report'}->{'Bytes Transferred'} bytes\n";
2 rodolico 345
   } else {
7 rodolico 346
      print "bytes\t$config->{'report'}->{'Bytes Transferred'}\nseconds\t$config->{'report'}->{'Elapsed Time'}\n";
2 rodolico 347
   }
348
}
349
1;