| 24 |
rodolico |
1 |
#! /usr/bin/env perl
|
|
|
2 |
|
| 34 |
rodolico |
3 |
# Simplified BSD License (FreeBSD License)
|
|
|
4 |
#
|
|
|
5 |
# Copyright (c) 2025, Daily Data Inc.
|
|
|
6 |
# All rights reserved.
|
|
|
7 |
#
|
|
|
8 |
# Redistribution and use in source and binary forms, with or without
|
|
|
9 |
# modification, are permitted provided that the following conditions are met:
|
|
|
10 |
#
|
|
|
11 |
# 1. Redistributions of source code must retain the above copyright notice, this
|
|
|
12 |
# list of conditions and the following disclaimer.
|
|
|
13 |
#
|
|
|
14 |
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
|
15 |
# this list of conditions and the following disclaimer in the documentation
|
|
|
16 |
# and/or other materials provided with the distribution.
|
|
|
17 |
#
|
|
|
18 |
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
19 |
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
20 |
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
|
21 |
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
|
22 |
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
23 |
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
|
24 |
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
|
25 |
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
|
26 |
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
27 |
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
28 |
|
|
|
29 |
# sneakernet.pl
|
|
|
30 |
# Script to perform sneakernet replication of ZFS datasets between two servers
|
|
|
31 |
# using an external transport drive.
|
|
|
32 |
# Uses ZFS send/receive to replicate datasets to/from the transport drive.
|
|
|
33 |
# Optionally uses symmetric encryption to encrypt datasets during transport.
|
|
|
34 |
# On the target server, can optionally use GELI to encrypt the datasets on disk.
|
|
|
35 |
# Requires a configuration file in YAML format next to the script.
|
|
|
36 |
# Author: R. W. Rodlico <rodo@dailydata.net>
|
|
|
37 |
# Created: December 2025
|
|
|
38 |
# Revision History:
|
|
|
39 |
# Version: 0.1 2025-12-10 Initial version
|
|
|
40 |
|
| 24 |
rodolico |
41 |
use strict;
|
|
|
42 |
use warnings;
|
|
|
43 |
|
| 34 |
rodolico |
44 |
our $VERSION = '0.1';
|
|
|
45 |
|
| 24 |
rodolico |
46 |
use FindBin;
|
|
|
47 |
use lib "$FindBin::Bin/..";
|
| 27 |
rodolico |
48 |
use Data::Dumper;
|
| 35 |
rodolico |
49 |
use ZFS_Utils qw(loadConfig shredFile logMsg makeReplicateCommands mountDriveByLabel mountGeli runCmd sendReport $logFileName $displayLogsOnConsole);
|
| 24 |
rodolico |
50 |
|
| 35 |
rodolico |
51 |
my $scriptDirectory = $FindBin::RealBin;
|
|
|
52 |
my $scriptFullPath = "$scriptDirectory/" . $FindBin::Script;
|
|
|
53 |
|
|
|
54 |
|
| 33 |
rodolico |
55 |
# if set, will not actually write files to disk
|
|
|
56 |
my $DEBUG = 0;
|
|
|
57 |
|
| 24 |
rodolico |
58 |
# display all log messages on console in addition to the log file
|
|
|
59 |
$displayLogsOnConsole = 1;
|
|
|
60 |
|
| 35 |
rodolico |
61 |
my $configFileName = "$scriptFullPath.conf.yaml";
|
| 24 |
rodolico |
62 |
|
|
|
63 |
my $config = {
|
|
|
64 |
# file created on source server to track last copyed dataset
|
| 35 |
rodolico |
65 |
'status_file' => "$scriptFullPath.status",
|
|
|
66 |
'log_file' => "$scriptFullPath.log",
|
| 24 |
rodolico |
67 |
#information about source server
|
|
|
68 |
'source_server' => {
|
|
|
69 |
'hostname' => '', # used to see if we are on source
|
| 34 |
rodolico |
70 |
'poolname' => 'pool', # name of the ZFS pool to export
|
|
|
71 |
# if set, will generate a report via email or by storing on a drive
|
|
|
72 |
'report' => {
|
|
|
73 |
'email' => 'tech@example.org',
|
|
|
74 |
'subject' => 'AG Transport Report',
|
|
|
75 |
'targetDrive' => {
|
| 35 |
rodolico |
76 |
'fstype' => '', # filesystem type of the report drive
|
|
|
77 |
# How often to check for the disk (seconds), message displayed every interval
|
|
|
78 |
'check_interval' => 15,
|
| 34 |
rodolico |
79 |
'label' => '',
|
|
|
80 |
'mount_point' => '',
|
|
|
81 |
}
|
|
|
82 |
}
|
| 24 |
rodolico |
83 |
},
|
|
|
84 |
#information about target server
|
|
|
85 |
'target_server' => {
|
|
|
86 |
'hostname' => '', # used to see if we are on target
|
| 34 |
rodolico |
87 |
'poolname' => 'backup', # name of the ZFS pool to import
|
| 24 |
rodolico |
88 |
# if this is set, the dataset uses GELI, so we must decrypt and
|
|
|
89 |
# mount it first
|
|
|
90 |
'geli' => {
|
| 35 |
rodolico |
91 |
'secureKey ' => {
|
|
|
92 |
'label' => 'replica', # the GPT label of the key disk
|
|
|
93 |
'fstype' => 'ufs', # filesystem type of the key disk
|
|
|
94 |
'check_interval' => 15,
|
|
|
95 |
'wait_timeout' => 300,
|
|
|
96 |
'keyfile' => 'geli.key', # the name of the key file on the secureKey disk
|
|
|
97 |
},
|
| 24 |
rodolico |
98 |
'localKey' => 'e98c660cccdae1226550484d62caa2b72f60632ae0c607528aba1ac9e7bfbc9c', # hex representation of the local key part
|
|
|
99 |
'target' => '/media/geli.key', # location to create the combined keyfile
|
| 34 |
rodolico |
100 |
'poolname' => 'backup', # name of the ZFS pool to import
|
| 24 |
rodolico |
101 |
'diskList' => [
|
| 34 |
rodolico |
102 |
'da0',
|
|
|
103 |
'da1'
|
| 24 |
rodolico |
104 |
], # list of disks to try to mount the dataset from
|
| 34 |
rodolico |
105 |
},
|
|
|
106 |
'report' => {
|
|
|
107 |
'email' => '',
|
|
|
108 |
'subject' => '',
|
|
|
109 |
'targetDrive' => {
|
| 35 |
rodolico |
110 |
'fstype' => 'msdos', # filesystem type of the report drive
|
|
|
111 |
'label' => 'sneakernet',
|
|
|
112 |
'mount_point' => '',
|
| 34 |
rodolico |
113 |
}
|
| 24 |
rodolico |
114 |
}
|
|
|
115 |
},
|
|
|
116 |
'transport' => {
|
|
|
117 |
# this is the GPT label of the sneakernet disk
|
|
|
118 |
'disk_label' => 'sneakernet',
|
| 35 |
rodolico |
119 |
# this is the file system type. Not needed if ufs
|
|
|
120 |
'fstype' => 'ufs',
|
| 24 |
rodolico |
121 |
# where we want to mount it
|
|
|
122 |
'mount_point' => '/mnt/sneakernet',
|
|
|
123 |
# amount of time to wait for the disk to appear
|
|
|
124 |
'timeout' => 600,
|
| 35 |
rodolico |
125 |
# How often to check for the disk (seconds), message displayed every interval
|
|
|
126 |
'check_interval' => 15,
|
| 24 |
rodolico |
127 |
# if set, all files will be encrypted with this key/IV during transport
|
|
|
128 |
'encryption' => {
|
|
|
129 |
'key' => '', # openssl rand 32 | xxd -p | tr -d '\n' > test.key
|
|
|
130 |
'IV' => '00000000000000000000000000000000',
|
|
|
131 |
},
|
|
|
132 |
},
|
|
|
133 |
'datasets' => {
|
| 34 |
rodolico |
134 |
'dataset1' => {
|
|
|
135 |
'source' => 'pool/dataset1',
|
|
|
136 |
'target' => 'backup/dataset1',
|
|
|
137 |
'filename' => 'dataset1'
|
| 24 |
rodolico |
138 |
},
|
|
|
139 |
'files_share' => {
|
| 34 |
rodolico |
140 |
'source' => 'pool/files_share',
|
|
|
141 |
'target' => 'backup/files_share',
|
| 24 |
rodolico |
142 |
'filename' => 'files_share'
|
|
|
143 |
},
|
|
|
144 |
}
|
|
|
145 |
};
|
|
|
146 |
|
| 35 |
rodolico |
147 |
# read the status file and return as list. If the file doesn't exits, returns an empty list
|
| 30 |
rodolico |
148 |
sub getStatusFile {
|
|
|
149 |
my $filename = shift;
|
|
|
150 |
# read in history/status file
|
| 34 |
rodolico |
151 |
my @lines = ();
|
| 30 |
rodolico |
152 |
if ( -e $filename && open my $fh, '<', $filename ) {
|
|
|
153 |
chomp( @lines = <$fh> );
|
|
|
154 |
close $fh;
|
|
|
155 |
logMsg("Read status file '$filename' with contents:\n" . join( "\n", @lines ) . "\n");
|
|
|
156 |
} else {
|
| 34 |
rodolico |
157 |
logMsg("Error: could not read status file '$filename', assuming a fresh start: $!");
|
| 30 |
rodolico |
158 |
}
|
|
|
159 |
return \@lines;
|
|
|
160 |
}
|
| 24 |
rodolico |
161 |
|
| 30 |
rodolico |
162 |
# write the status list to file
|
|
|
163 |
sub writeStatusFile {
|
|
|
164 |
my ( $filename, $statusList ) = @_;
|
|
|
165 |
# backup existing status file
|
|
|
166 |
if ( -e $filename ) {
|
|
|
167 |
rename( $filename, "$filename.bak" ) or do {
|
|
|
168 |
logMsg("Error: could not backup existing status file '$filename': $!");
|
|
|
169 |
die;
|
|
|
170 |
};
|
|
|
171 |
}
|
|
|
172 |
# write new status file
|
|
|
173 |
if ( open my $fh, '>', $filename ) {
|
|
|
174 |
foreach my $line ( @$statusList ) {
|
|
|
175 |
print $fh "$line\n";
|
|
|
176 |
}
|
|
|
177 |
close $fh;
|
|
|
178 |
logMsg("Wrote status file '$filename' with contents:\n" . join( "\n", @$statusList ) . "\n");
|
|
|
179 |
} else {
|
|
|
180 |
logMsg("Error: could not write status file '$filename': $!");
|
|
|
181 |
die;
|
|
|
182 |
}
|
|
|
183 |
}
|
|
|
184 |
|
| 31 |
rodolico |
185 |
# simple sub to take root/dataset/datset/dataset and turn it into
|
|
|
186 |
# dataset.dataset.dataset
|
|
|
187 |
sub replaceSlashWithDot {
|
|
|
188 |
my $string = shift;
|
|
|
189 |
my @parts = split( "/", $string );
|
|
|
190 |
shift @parts;
|
|
|
191 |
return join( '.', @parts );
|
|
|
192 |
}
|
|
|
193 |
|
| 30 |
rodolico |
194 |
# perform replication on source server
|
|
|
195 |
# $config - configuration hashref
|
|
|
196 |
# $statusList - list of last snapshots replicated for each dataset in previous replications
|
|
|
197 |
# return new status list after replication containing updated last snapshots
|
|
|
198 |
# this script will actually replicate the datasets to the sneakernet disk
|
|
|
199 |
sub doSourceReplication {
|
|
|
200 |
my ($config, $statusList) = @_;
|
|
|
201 |
my $newStatus = [];
|
|
|
202 |
foreach my $dataset ( sort keys %{$config->{datasets}} ) {
|
| 31 |
rodolico |
203 |
logMsg("Processing dataset '$dataset'");
|
|
|
204 |
# get list of all snapshots on dataset
|
| 33 |
rodolico |
205 |
my $sourceList = [ runCmd( "zfs list -rt snap -H -o name $config->{datasets}->{$dataset}->{source} " ) ];
|
| 30 |
rodolico |
206 |
# process dataset here
|
|
|
207 |
my $commands = makeReplicateCommands($sourceList, $statusList, $newStatus );
|
| 31 |
rodolico |
208 |
if ( %$commands ) {
|
|
|
209 |
foreach my $cmd ( keys %$commands ) {
|
|
|
210 |
my $command = $commands->{$cmd};
|
|
|
211 |
$command .= " | openssl enc -aes-256-cbc -K $config->{transport}->{encryption}->{key} -iv $config->{transport}->{encryption}->{IV} " if $config->{transport}->{encryption}->{key};
|
|
|
212 |
$command .= " > $config->{transport}->{mount_point}/" . replaceSlashWithDot($cmd);
|
|
|
213 |
logMsg("Running command: $command");
|
| 33 |
rodolico |
214 |
runCmd( $command ) unless $DEBUG;
|
| 31 |
rodolico |
215 |
}
|
|
|
216 |
} else {
|
|
|
217 |
logMsg( "Nothing to do for $dataset" );
|
| 30 |
rodolico |
218 |
}
|
|
|
219 |
}
|
|
|
220 |
return $newStatus;
|
|
|
221 |
}
|
|
|
222 |
|
| 33 |
rodolico |
223 |
# clean all files from a directory, but not any subdirectories
|
|
|
224 |
sub cleanDirectory {
|
|
|
225 |
my $dirname = shift;
|
|
|
226 |
logMsg( "Cleaning up $dirname of all files" );
|
|
|
227 |
# clean up a directory
|
|
|
228 |
opendir( my $dh, $dirname ) || fatalError( "Can not open $dirname: #!" );
|
|
|
229 |
# get all file names, but leave directories alone
|
|
|
230 |
my @files = map{ $dirname . "/$_" } grep { -f "$dirname/$_" } readdir($dh);
|
|
|
231 |
closedir $dh;
|
|
|
232 |
foreach my $file (@files) {
|
|
|
233 |
unlink $file or warn "Could not unlink $file: #!\n";
|
|
|
234 |
}
|
|
|
235 |
}
|
|
|
236 |
|
|
|
237 |
|
|
|
238 |
|
| 31 |
rodolico |
239 |
# how to handle a fatal error
|
|
|
240 |
sub fatalError {
|
|
|
241 |
my $message = shift;
|
|
|
242 |
logMsg( $message );
|
|
|
243 |
die;
|
|
|
244 |
}
|
| 30 |
rodolico |
245 |
|
| 31 |
rodolico |
246 |
|
| 30 |
rodolico |
247 |
##################### main program starts here #####################
|
|
|
248 |
# Example to create a random key for encryption/decryption:
|
| 24 |
rodolico |
249 |
# generate a random key with
|
|
|
250 |
# openssl rand 32 | xxd -p | tr -d '\n' > test.key
|
|
|
251 |
|
|
|
252 |
# If a YAML config file exists next to the script, load and merge it
|
|
|
253 |
$config = loadConfig($configFileName, $config );
|
| 27 |
rodolico |
254 |
|
| 25 |
rodolico |
255 |
# set some defaults
|
| 35 |
rodolico |
256 |
$config->{'status_file'} //= "$scriptFullPath.status";
|
|
|
257 |
# set log file name for sub logMsg in ZFS_Utils, and remove the old log if it exists
|
|
|
258 |
# Log file is only valid for one run
|
|
|
259 |
$logFileName = $config->{'log_file'} //= "$scriptFullPath.log";
|
|
|
260 |
# log only for one run
|
|
|
261 |
unlink ( $logFileName ) if -f $logFileName;
|
| 24 |
rodolico |
262 |
|
| 31 |
rodolico |
263 |
fatalError( "Invalid config file: missing source and/or target server" )
|
| 24 |
rodolico |
264 |
unless (defined $config->{source_server} && defined $config->{target_server});
|
|
|
265 |
|
| 31 |
rodolico |
266 |
# mount the transport drive, fatal error if we can not find it
|
|
|
267 |
fatalError( "Unable to mount tranport drive with label $config->{transport}->{disk_label}" )
|
| 35 |
rodolico |
268 |
unless $config->{transport}->{mount_point} = mountDriveByLabel( $config->{transport} );
|
| 31 |
rodolico |
269 |
|
| 24 |
rodolico |
270 |
my $servername = `hostname -s`;
|
|
|
271 |
chomp $servername;
|
| 35 |
rodolico |
272 |
my $runningAs = $servername eq $config->{source_server}->{hostname} ? 'source' :
|
|
|
273 |
$servername eq $config->{target_server}->{hostname} ? 'target' : 'unknown';
|
|
|
274 |
if ( $runningAs eq 'source' ) {
|
| 33 |
rodolico |
275 |
logMsg "Running as source server";
|
|
|
276 |
# remove all files from transport disk, but leave all subdirectories alone
|
|
|
277 |
cleanDirectory( $config->{transport}->{mount_point} );
|
| 30 |
rodolico |
278 |
my $statusList = getStatusFile($config->{status_file});
|
|
|
279 |
$statusList = doSourceReplication($config, $statusList);
|
|
|
280 |
writeStatusFile($config->{status_file}, $statusList);
|
| 35 |
rodolico |
281 |
} elsif ( $runningAs eq 'target' ) {
|
| 33 |
rodolico |
282 |
logMsg "Running as target server";
|
|
|
283 |
die "Target Server code not complete\n";
|
| 30 |
rodolico |
284 |
die "GELI target server logic not yet implemented\n" if ( defined $config->{target_server}->{geli} );
|
| 24 |
rodolico |
285 |
mountGeli( $config->{target_server}->{geli} ) if ( defined $config->{target_server}->{geli} );
|
|
|
286 |
} else {
|
| 25 |
rodolico |
287 |
logMsg "This server ($servername) is neither source nor target server as per config\n";
|
|
|
288 |
die;
|
| 24 |
rodolico |
289 |
}
|
|
|
290 |
|
| 35 |
rodolico |
291 |
# add disk space utilization information on transport to the log
|
|
|
292 |
logMsg( "Disk space utilization on transport disk:\n" . runCmd( "df -h $config->{transport}->{mount_point}" ) . "\n" );
|
|
|
293 |
# add information about the server (zpools) to the log
|
|
|
294 |
logMsg( "Zpools on server $servername:\n" . join( "\n", runCmd( "zpool list" ) ) . "\n" );
|
|
|
295 |
|
| 31 |
rodolico |
296 |
# unmount the sneakernet drive
|
|
|
297 |
`umount $config->{transport}->{mount_point}`;
|
|
|
298 |
# and remove the directory
|
| 33 |
rodolico |
299 |
rmdir $config->{transport}->{mount_point};
|
| 35 |
rodolico |
300 |
sendReport( $config->{$runningAs}->{report}, "sneakernet replication completed on server $servername", $config->{log_file} );
|
| 31 |
rodolico |
301 |
|
| 35 |
rodolico |
302 |
|
| 25 |
rodolico |
303 |
1;
|
|
|
304 |
|
|
|
305 |
|
| 24 |
rodolico |
306 |
#`cat $config->{input} | openssl enc -aes-256-cbc -K $config->{key} -iv $config->{IV} > $config->{output}`;
|
|
|
307 |
|
|
|
308 |
# this will decrypt $config->{output} to stdout
|
|
|
309 |
#`cat $config->{output} | openssl enc -aes-256-cbc -d -K $config->{key} -iv $config->{IV} > test.out`;
|