| 2 |
rodolico |
1 |
#! /usr/bin/env perl
|
|
|
2 |
|
| 4 |
rodolico |
3 |
# very simple script to replicate a ZFS snapshot to another server.
|
|
|
4 |
# no fancy bells and whistles, does not create snapshots, and does
|
|
|
5 |
# not prune them. No major error checking either
|
|
|
6 |
|
| 2 |
rodolico |
7 |
use strict;
|
|
|
8 |
use warnings;
|
|
|
9 |
|
|
|
10 |
use Data::Dumper;
|
| 4 |
rodolico |
11 |
use Getopt::Long;
|
|
|
12 |
Getopt::Long::Configure ("bundling");
|
| 2 |
rodolico |
13 |
|
| 4 |
rodolico |
14 |
# create our configuration, with some defaults
|
|
|
15 |
# these are overridden by command line stuff
|
| 2 |
rodolico |
16 |
my $config = {
|
| 4 |
rodolico |
17 |
# the source, where we're coming from
|
|
|
18 |
'source' => '',
|
|
|
19 |
# the target, where we want to replicate to
|
|
|
20 |
'target' => '',
|
| 2 |
rodolico |
21 |
# compile the regex
|
| 6 |
rodolico |
22 |
'filter' => '(\d{4}.\d{2}.\d{2}.\d{2}.\d{2})',
|
| 4 |
rodolico |
23 |
# if non-zero, just display the commands we'd use, don't run them
|
|
|
24 |
'dryrun' => 0,
|
|
|
25 |
# whether to do all child datasets also (default)
|
| 6 |
rodolico |
26 |
'recurse' => 0,
|
| 4 |
rodolico |
27 |
# show more information
|
|
|
28 |
'verbose' => 0
|
| 2 |
rodolico |
29 |
};
|
|
|
30 |
|
|
|
31 |
sub parseDataSet {
|
|
|
32 |
my $data = shift;
|
|
|
33 |
my %return;
|
|
|
34 |
my ( $server, $dataset ) = split( ':', $data );
|
|
|
35 |
if ( $dataset ) { # they passed a server:dataset
|
|
|
36 |
$return{'server'} = $server;
|
|
|
37 |
$return{'dataset'} = $dataset;
|
|
|
38 |
} else { # only passing in dataset, so assume localhost
|
|
|
39 |
$return{'server'} = '';
|
|
|
40 |
$return{'dataset'} = $server;
|
|
|
41 |
}
|
|
|
42 |
return \%return;
|
|
|
43 |
}
|
|
|
44 |
|
| 9 |
rodolico |
45 |
sub logit {
|
|
|
46 |
open LOG, ">>/tmp/replicate.log" or die "Could not open replicate.log: $!\n";
|
|
|
47 |
print LOG join( "\n", @_ ) . "\n";
|
|
|
48 |
close LOG;
|
|
|
49 |
}
|
|
|
50 |
|
| 2 |
rodolico |
51 |
# runs a command, redirecting stderr to stdout (which it ignores)
|
|
|
52 |
# then returns 0 and $output on success.
|
|
|
53 |
# if error, returns error code and string describing error
|
|
|
54 |
sub run {
|
|
|
55 |
my $command = shift;
|
| 9 |
rodolico |
56 |
#&logit( $command );
|
| 2 |
rodolico |
57 |
my $output = qx/$command 2>&1/;
|
|
|
58 |
if ($? == -1) {
|
|
|
59 |
return (-1,"failed to execute: $!");
|
|
|
60 |
} elsif ($? & 127) {
|
|
|
61 |
return ($?, sprintf "child died with signal %d, %s coredump",
|
|
|
62 |
($? & 127), ($? & 128) ? 'with' : 'without' );
|
|
|
63 |
} else {
|
|
|
64 |
return ($? >> 8, sprintf "child exited with value %d", $? >> 8 ) if $? >> 8;
|
|
|
65 |
}
|
|
|
66 |
return (0,$output);
|
|
|
67 |
}
|
|
|
68 |
|
|
|
69 |
|
|
|
70 |
sub getSnaps {
|
|
|
71 |
my ($config,$pattern) = @_;
|
|
|
72 |
my %return;
|
|
|
73 |
# actual command to run to get all snapshots, recursively, of the dataset
|
|
|
74 |
my $command = 'zfs list -r -t snap ' . $config->{'dataset'};
|
|
|
75 |
$command = "ssh $config->{server} '$command'" if $config->{'server'};
|
|
|
76 |
#die "$command\n";
|
|
|
77 |
my ($error, $output ) = &run( $command );
|
|
|
78 |
#die "Error running $command with output\n$output" if $error;
|
|
|
79 |
my @snaps = split( "\n", $output );
|
|
|
80 |
chomp @snaps;
|
|
|
81 |
for (my $i = 0; $i < @snaps; $i++ ) {
|
|
|
82 |
# parse out the space delmited fields
|
|
|
83 |
my ($fullname, $used, $avail, $refer, $mount) = split( /\s+/, $snaps[$i] );
|
|
|
84 |
# break the name into dataset and snapname
|
|
|
85 |
my ($dataset, $snap) = split( '@', $fullname );
|
|
|
86 |
# remove the root dataset name
|
|
|
87 |
$dataset =~ s/^$config->{'dataset'}//;
|
|
|
88 |
# skip anything not matching our regex
|
|
|
89 |
next unless $pattern && $snap && $snap =~ m/$pattern/;
|
|
|
90 |
# grab the matched key
|
|
|
91 |
$return{$dataset}{'snaps'}{$snap}{'key'} = $1;
|
|
|
92 |
# and remove all non-numerics
|
|
|
93 |
$return{$dataset}{'snaps'}{$snap}{'key'} =~ s/[^0-9]//g;
|
|
|
94 |
# get the transfer size
|
|
|
95 |
$return{$dataset}{'snaps'}{$snap}{'refer'} = $refer;
|
|
|
96 |
# get the actual disk space used
|
|
|
97 |
$return{$dataset}{'snaps'}{$snap}{'used'} = $used;
|
|
|
98 |
}
|
|
|
99 |
return \%return;
|
|
|
100 |
}
|
|
|
101 |
|
| 6 |
rodolico |
102 |
# get tne number of bytes we will be syncing.
|
|
|
103 |
sub findSize {
|
|
|
104 |
my $config = shift;
|
|
|
105 |
# check for new snapshots to sync. If they are equal, we are up to date
|
|
|
106 |
if ( $config->{'source'}->{'lastSnap'} ne $config->{'target'}->{'lastSnap'} ) {
|
|
|
107 |
# Build the source command
|
|
|
108 |
my $sourceCommand = sprintf( '%s@%s %s@%s',
|
|
|
109 |
$config->{'source'}->{'dataset'},
|
|
|
110 |
$config->{'target'}->{'lastSnap'},
|
|
|
111 |
$config->{'source'}->{'dataset'},
|
|
|
112 |
$config->{'source'}->{'lastSnap'}
|
|
|
113 |
);
|
|
|
114 |
# prepend 'zfs send' and the flags. Note that verbose is only for the one which is local
|
|
|
115 |
$sourceCommand = 'zfs send -' .
|
|
|
116 |
( $config->{'recurse'} ? 'R' : '' ) . # recurse if they asked for it
|
| 15 |
rodolico |
117 |
# Tell it to give us the size in bytes
|
| 8 |
rodolico |
118 |
'Pn' .
|
| 6 |
rodolico |
119 |
# this is the part that asks for incremental
|
|
|
120 |
'I ' .
|
|
|
121 |
$sourceCommand;
|
|
|
122 |
# wrap the ssh call if this is remote
|
|
|
123 |
$sourceCommand = "ssh $config->{source}->{server} '$sourceCommand'" if $config->{'source'}->{'server'};
|
| 15 |
rodolico |
124 |
print "Checking Size with\n$sourceCommand\n" if $config->{'verbose'} > 3;
|
| 6 |
rodolico |
125 |
my ( $error, $output ) = &run( $sourceCommand );
|
|
|
126 |
return -1 if $error;
|
|
|
127 |
# the size is the second column (tab separated) of the last line (\n separated) in $output
|
|
|
128 |
return (
|
|
|
129 |
split(
|
|
|
130 |
"\t",
|
|
|
131 |
(
|
|
|
132 |
split( "\n", $output )
|
|
|
133 |
)[-1]
|
|
|
134 |
)
|
|
|
135 |
)[1];
|
|
|
136 |
} else { # nothing to sync
|
|
|
137 |
return 0;
|
|
|
138 |
}
|
|
|
139 |
}
|
| 2 |
rodolico |
140 |
|
| 6 |
rodolico |
141 |
# create the command necessary to do the replication
|
| 2 |
rodolico |
142 |
sub createCommands {
|
| 6 |
rodolico |
143 |
my $config = shift;
|
|
|
144 |
# check for new snapshots to sync. If they are equal, we are up to date
|
|
|
145 |
if ( $config->{'source'}->{'lastSnap'} ne $config->{'target'}->{'lastSnap'} ) {
|
|
|
146 |
# Build the source command
|
|
|
147 |
my $sourceCommand = sprintf( '%s@%s %s@%s',
|
|
|
148 |
$config->{'source'}->{'dataset'},
|
|
|
149 |
$config->{'target'}->{'lastSnap'},
|
|
|
150 |
$config->{'source'}->{'dataset'},
|
|
|
151 |
$config->{'source'}->{'lastSnap'}
|
|
|
152 |
);
|
|
|
153 |
# prepend 'zfs send' and the flags. Note that verbose is only for the one which is local
|
|
|
154 |
$sourceCommand = 'zfs send -' .
|
|
|
155 |
( $config->{'recurse'} ? 'R' : '' ) . # recurse if they asked for it
|
|
|
156 |
# turn on verbose if they asked for level 2 AND if source is local
|
| 15 |
rodolico |
157 |
( $config->{'verbose'} > 2 && ! $config->{'source'}->{'server'} ? 'v' : '' ) .
|
| 6 |
rodolico |
158 |
# this is the part that asks for incremental
|
|
|
159 |
'I ' .
|
|
|
160 |
$sourceCommand;
|
|
|
161 |
# wrap the ssh call if this is remote
|
|
|
162 |
$sourceCommand = "ssh $config->{source}->{server} '$sourceCommand'" if $config->{'source'}->{'server'};
|
|
|
163 |
# Now, build the target command
|
|
|
164 |
my $targetCommand = 'zfs receive ' .
|
| 15 |
rodolico |
165 |
( ! $config->{'target'}->{'server'} && $config->{'verbose'} > 2 ? '-v ' : '') .
|
| 6 |
rodolico |
166 |
$config->{'target'}->{'dataset'};
|
|
|
167 |
$targetCommand = "ssh $config->{target}->{server} '$targetCommand'" if $config->{'target'}->{'server'};
|
| 7 |
rodolico |
168 |
# if the command pv is installed
|
|
|
169 |
if ( `which pv` ) {
|
|
|
170 |
my $tags;
|
|
|
171 |
# add bandwdith limits, if requested
|
|
|
172 |
$tags = " --si -L $config->{bwlimit} " if $config->{'bwlimit'};
|
|
|
173 |
# if interactive, or if we are in dry run, add thermometer
|
|
|
174 |
$tags .= '-petrs ' . $config->{'report'}->{'Bytes Transferred'} if -t *STDOUT || $config->{'dryrun'};
|
|
|
175 |
$sourceCommand .= " | pv $tags" if $tags;
|
|
|
176 |
}
|
| 6 |
rodolico |
177 |
# return the command
|
|
|
178 |
return $sourceCommand . ' | ' . $targetCommand;
|
|
|
179 |
} else { # source and target are in sync, so do nothing
|
|
|
180 |
return '# Nothing new to sync';
|
| 2 |
rodolico |
181 |
}
|
|
|
182 |
}
|
|
|
183 |
|
|
|
184 |
# find the last snapshot in a hash. The hash is assumed to have a subkey
|
|
|
185 |
# 'key'. look for the largest subkey, and return the key for it
|
|
|
186 |
sub getLastSnapshot {
|
|
|
187 |
my $snapList = shift;
|
|
|
188 |
my $lastKey = 0;
|
|
|
189 |
my $lastSnap = '';
|
|
|
190 |
foreach my $snap ( keys %$snapList ) {
|
|
|
191 |
if ( $snapList->{$snap}->{'key'} > $lastKey ) {
|
|
|
192 |
$lastKey = $snapList->{$snap}->{'key'};
|
|
|
193 |
$lastSnap = $snap;
|
|
|
194 |
}
|
|
|
195 |
}
|
|
|
196 |
return $lastSnap;
|
|
|
197 |
}
|
|
|
198 |
|
|
|
199 |
|
|
|
200 |
sub calculate {
|
|
|
201 |
my $config = shift;
|
|
|
202 |
|
|
|
203 |
my @warnings;
|
|
|
204 |
|
|
|
205 |
# find the last snapshot date in each dataset, on each target
|
|
|
206 |
foreach my $machine ( 'source', 'target' ) {
|
|
|
207 |
$config->{$machine}->{'last'} = 0; # track the last entry in all children in dataset
|
|
|
208 |
$config->{$machine}->{'allOk'} = 1; # assumed to be true, becomes false if some children do not have snapshots
|
|
|
209 |
foreach my $child ( keys %{ $config->{$machine}->{'snapshots'} } ) {
|
|
|
210 |
$config->{$machine}->{'snapshots'}->{$child}->{'last'} =
|
|
|
211 |
&getLastSnapshot( $config->{$machine}->{'snapshots'}->{$child}->{'snaps'} );
|
|
|
212 |
# set the machine last if we haven't done so yet
|
|
|
213 |
$config->{$machine}->{'last'} = $config->{$machine}->{'snapshots'}->{$child}->{'last'} unless $config->{$machine}->{'last'};
|
|
|
214 |
# keep track of the last snapshot for each set
|
|
|
215 |
if ( $config->{$machine}->{'last'} ne $config->{$machine}->{'snapshots'}->{$child}->{'last'} ) {
|
|
|
216 |
$config->{$machine}->{'allOk'} = 0;
|
|
|
217 |
push @warnings, "Warning: $machine does not have consistent snapshots at $child";;
|
|
|
218 |
}
|
|
|
219 |
}
|
|
|
220 |
}
|
|
|
221 |
# make sure the source has a corresponding snap for target->last
|
|
|
222 |
foreach my $child ( keys %{ $config->{'target'}->{'snapshots'} } ) {
|
|
|
223 |
if (! exists ($config->{'source'}->{'snapshots'}->{$child}->{'snaps'}->{$config->{'target'}->{'snapshots'}->{$child}->{'last'}} ) ) {
|
|
|
224 |
$config->{'source'}->{'allOk'} = 0;
|
|
|
225 |
push @warnings, "Warning: We do not have consistent snapshots";
|
|
|
226 |
}
|
|
|
227 |
}
|
|
|
228 |
my $return;
|
|
|
229 |
if ( $config->{'source'}->{'allOk'} and $config->{'target'}->{'allOk'} ) { # whew, they match
|
|
|
230 |
return( $config->{'source'}->{'last'}, $config->{'target'}->{'last'}, \@warnings );
|
|
|
231 |
} else {
|
|
|
232 |
return( '','',\@warnings);
|
|
|
233 |
}
|
| 4 |
rodolico |
234 |
} # sub calculate
|
| 2 |
rodolico |
235 |
|
| 6 |
rodolico |
236 |
sub help {
|
|
|
237 |
use File::Basename;
|
|
|
238 |
my $me = fileparse( $0 );
|
|
|
239 |
my $helpMessage = <<" EOF";
|
|
|
240 |
$me [flags] [source [target]]
|
|
|
241 |
Syncs source dataset to target dataset
|
|
|
242 |
|
|
|
243 |
Parameters (optional)
|
|
|
244 |
source - dataset syncing from
|
|
|
245 |
target - dataset syncing to
|
|
|
246 |
|
|
|
247 |
Flags
|
|
|
248 |
--source|s - Alternate way to pass source dataset
|
|
|
249 |
--target|t - Alternate way to pass target dataset
|
|
|
250 |
--filter|f - Filter (regex) to limit source snapshots to process
|
|
|
251 |
--dryrun|n - Only displays command(s) to be run
|
|
|
252 |
--recurse|r - Process dataset and all child datasets
|
|
|
253 |
--verbose|v - increase verbosity of output
|
| 7 |
rodolico |
254 |
--bwlimit - Limit the speed of the connect to # bytes/s. KMGT allowed
|
| 6 |
rodolico |
255 |
|
|
|
256 |
May use short flags with bundling, ie -nrvv is valid for
|
|
|
257 |
--dryrun --recurse --verbose --verbose
|
|
|
258 |
|
|
|
259 |
Either source or target must contain a DNS name or IP address of a remote
|
|
|
260 |
machine, separated from the dataset with a colon, ie
|
|
|
261 |
--source fbsd:storage/mydata
|
|
|
262 |
would use the dataset storage/mydata on the server fbsd. The other dataset
|
|
|
263 |
is assumed to be the local machine
|
|
|
264 |
|
|
|
265 |
filter is a string which is a valid regular expression. Only snapshots matching
|
|
|
266 |
that string will be used from the source dataset
|
|
|
267 |
|
|
|
268 |
By default, only error messages are displayed. verbose will display statistics
|
| 15 |
rodolico |
269 |
on size and transfer time. Twice will give the commands, and three times will
|
|
|
270 |
display entire output of send/receive (whichever is the local machine)
|
| 7 |
rodolico |
271 |
|
|
|
272 |
Example:
|
|
|
273 |
$me -r prod.example.org:pool/mydata -t pool/backup/mydata \
|
|
|
274 |
--bwlimit=5M --filter='(\\d{4}.\\d{2}.\\d{2}.\\d{2}.\\d{2})'
|
|
|
275 |
|
|
|
276 |
Would sync pool/mydata and all child datasets on prod.example.org to
|
|
|
277 |
pool/backup/mydata on the local server. Only the snapshots which had a
|
|
|
278 |
datetime stamp matching the --filter rule would be used. The transfer
|
|
|
279 |
would not exceed 5MB/s (40Mb/s) if the pv app was installed
|
| 6 |
rodolico |
280 |
EOF
|
|
|
281 |
# get rid of indentation
|
|
|
282 |
$helpMessage =~ s/^ //;
|
|
|
283 |
$helpMessage =~ s/\n /\n/g;
|
|
|
284 |
print $helpMessage;
|
|
|
285 |
exit 1;
|
|
|
286 |
} # help
|
|
|
287 |
|
|
|
288 |
|
| 4 |
rodolico |
289 |
GetOptions( $config,
|
|
|
290 |
'source|s=s',
|
|
|
291 |
'target|t=s',
|
|
|
292 |
'filter|f=s',
|
|
|
293 |
'dryrun|n',
|
|
|
294 |
'recurse|r',
|
| 8 |
rodolico |
295 |
'bwlimit=s',
|
| 6 |
rodolico |
296 |
'verbose|v+',
|
| 4 |
rodolico |
297 |
'help|h'
|
|
|
298 |
);
|
| 2 |
rodolico |
299 |
|
| 6 |
rodolico |
300 |
&help() if $config->{'help'};
|
| 4 |
rodolico |
301 |
# allow them to use positional, without flags, such as
|
|
|
302 |
# replicate source target --filter='regex' -n
|
|
|
303 |
$config->{'source'} = shift unless $config->{'source'};
|
|
|
304 |
$config->{'target'} = shift unless $config->{'target'};
|
|
|
305 |
die "You must enter a source and a target, at a minimum\n" unless $config->{'source'} && $config->{'target'};
|
|
|
306 |
|
| 6 |
rodolico |
307 |
# keep track of when we started this run
|
|
|
308 |
$config->{'report'}->{'Start Time'} = time;
|
|
|
309 |
|
| 4 |
rodolico |
310 |
# WARNING: this converts source and targets from a string to a hash
|
|
|
311 |
# '10.0.0.1:data/set' becomes ( 'server' => '10.0.0.1', 'dataset' => 'data/set')
|
|
|
312 |
# and 'data/set' becomes ( 'server' => '', 'dataset' => 'data/set')
|
|
|
313 |
$config->{'source'} = &parseDataSet( $config->{'source'} );
|
|
|
314 |
$config->{'target'} = &parseDataSet( $config->{'target'} );
|
|
|
315 |
|
| 2 |
rodolico |
316 |
# both source and target can not have a server portion; one must be local
|
|
|
317 |
die "Source and Target can not both be remote\n" if $config->{'source'}->{'server'} && $config->{'target'}->{'server'};
|
|
|
318 |
|
| 6 |
rodolico |
319 |
# connect to servers and get all existing snapshots
|
|
|
320 |
$config->{'target'}->{'snapshots'} = &getSnaps( $config->{'target'}, $config->{'filter'} );
|
| 4 |
rodolico |
321 |
$config->{'source'}->{'snapshots'} = &getSnaps( $config->{'source'}, $config->{'filter'} );
|
| 2 |
rodolico |
322 |
|
| 6 |
rodolico |
323 |
# we sync from last snap on target machine to last snap on source machine. calculate simply
|
|
|
324 |
# finds the last snapshot on source and target
|
|
|
325 |
( $config->{'source'}->{'lastSnap'}, $config->{'target'}->{'lastSnap'} ) = &calculate( $config );
|
| 2 |
rodolico |
326 |
|
| 6 |
rodolico |
327 |
# calculate transfer size if they want any feedback at all. Since this does take a few seconds
|
|
|
328 |
# to calculate, we won't run it unless they want a report
|
|
|
329 |
$config->{'report'}->{'Bytes Transferred'} = &findSize( $config ) if $config->{'verbose'};
|
| 2 |
rodolico |
330 |
|
| 4 |
rodolico |
331 |
# actually creates the commands to do the replicate
|
| 6 |
rodolico |
332 |
my $commands = &createCommands( $config );
|
| 15 |
rodolico |
333 |
print "$commands\n" if $config->{'verbose'} > 1 or $config->{'dryrun'};
|
| 6 |
rodolico |
334 |
if ( $config->{'dryrun'} ) {
|
|
|
335 |
print "Dry Run\n";
|
|
|
336 |
} else {
|
|
|
337 |
print qx/$commands/ if $commands =~ m/^[a-zA-Z]/;
|
|
|
338 |
}
|
|
|
339 |
|
|
|
340 |
$config->{'report'}->{'End Time'} = time;
|
|
|
341 |
$config->{'report'}->{'Elapsed Time'} = $config->{'report'}->{'End Time'} - $config->{'report'}->{'Start Time'};
|
| 15 |
rodolico |
342 |
if ( $config->{'verbose'} ) {
|
| 4 |
rodolico |
343 |
if ( $config->{'dryrun'} ) {
|
| 6 |
rodolico |
344 |
print "Would have transferred $config->{'report'}->{'Bytes Transferred'} bytes\n";
|
| 2 |
rodolico |
345 |
} else {
|
| 7 |
rodolico |
346 |
print "bytes\t$config->{'report'}->{'Bytes Transferred'}\nseconds\t$config->{'report'}->{'Elapsed Time'}\n";
|
| 2 |
rodolico |
347 |
}
|
|
|
348 |
}
|
|
|
349 |
1;
|