| 24 |
rodolico |
1 |
#! /usr/bin/env perl
|
|
|
2 |
|
| 34 |
rodolico |
3 |
# Simplified BSD License (FreeBSD License)
|
|
|
4 |
#
|
|
|
5 |
# Copyright (c) 2025, Daily Data Inc.
|
|
|
6 |
# All rights reserved.
|
|
|
7 |
#
|
|
|
8 |
# Redistribution and use in source and binary forms, with or without
|
|
|
9 |
# modification, are permitted provided that the following conditions are met:
|
|
|
10 |
#
|
|
|
11 |
# 1. Redistributions of source code must retain the above copyright notice, this
|
|
|
12 |
# list of conditions and the following disclaimer.
|
|
|
13 |
#
|
|
|
14 |
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
|
15 |
# this list of conditions and the following disclaimer in the documentation
|
|
|
16 |
# and/or other materials provided with the distribution.
|
|
|
17 |
#
|
|
|
18 |
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
19 |
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
20 |
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
|
21 |
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
|
22 |
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
23 |
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
|
24 |
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
|
25 |
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
|
26 |
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
27 |
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
28 |
|
|
|
29 |
# sneakernet.pl
|
|
|
30 |
# Script to perform sneakernet replication of ZFS datasets between two servers
|
|
|
31 |
# using an external transport drive.
|
|
|
32 |
# Uses ZFS send/receive to replicate datasets to/from the transport drive.
|
|
|
33 |
# Optionally uses symmetric encryption to encrypt datasets during transport.
|
|
|
34 |
# On the target server, can optionally use GELI to encrypt the datasets on disk.
|
|
|
35 |
# Requires a configuration file in YAML format next to the script.
|
|
|
36 |
# Author: R. W. Rodlico <rodo@dailydata.net>
|
|
|
37 |
# Created: December 2025
|
|
|
38 |
# Revision History:
|
|
|
39 |
# Version: 0.1 2025-12-10 Initial version
|
|
|
40 |
|
| 24 |
rodolico |
41 |
use strict;
|
|
|
42 |
use warnings;
|
|
|
43 |
|
| 34 |
rodolico |
44 |
our $VERSION = '0.1';
|
|
|
45 |
|
| 24 |
rodolico |
46 |
use FindBin;
|
|
|
47 |
use lib "$FindBin::Bin/..";
|
| 27 |
rodolico |
48 |
use Data::Dumper;
|
| 42 |
rodolico |
49 |
use ZFS_Utils qw(loadConfig shredFile logMsg makeReplicateCommands mountDriveByLabel unmountDriveByLabel mountGeli runCmd sendReport fatalError cleanDirectory $logFileName $displayLogsOnConsole);
|
| 24 |
rodolico |
50 |
|
| 35 |
rodolico |
51 |
my $scriptDirectory = $FindBin::RealBin;
|
|
|
52 |
my $scriptFullPath = "$scriptDirectory/" . $FindBin::Script;
|
|
|
53 |
|
|
|
54 |
|
| 33 |
rodolico |
55 |
# if set, will not actually write files to disk
|
| 37 |
rodolico |
56 |
my $DEBUG = 1;
|
| 33 |
rodolico |
57 |
|
| 24 |
rodolico |
58 |
# display all log messages on console in addition to the log file
|
|
|
59 |
$displayLogsOnConsole = 1;
|
|
|
60 |
|
| 35 |
rodolico |
61 |
my $configFileName = "$scriptFullPath.conf.yaml";
|
| 24 |
rodolico |
62 |
|
|
|
63 |
my $config = {
|
|
|
64 |
# file created on source server to track last copyed dataset
|
| 35 |
rodolico |
65 |
'status_file' => "$scriptFullPath.status",
|
|
|
66 |
'log_file' => "$scriptFullPath.log",
|
| 24 |
rodolico |
67 |
#information about source server
|
| 37 |
rodolico |
68 |
'source' => {
|
| 24 |
rodolico |
69 |
'hostname' => '', # used to see if we are on source
|
| 34 |
rodolico |
70 |
'poolname' => 'pool', # name of the ZFS pool to export
|
|
|
71 |
# if set, will generate a report via email or by storing on a drive
|
|
|
72 |
'report' => {
|
|
|
73 |
'email' => 'tech@example.org',
|
|
|
74 |
'subject' => 'AG Transport Report',
|
|
|
75 |
'targetDrive' => {
|
| 35 |
rodolico |
76 |
'fstype' => '', # filesystem type of the report drive
|
|
|
77 |
# How often to check for the disk (seconds), message displayed every interval
|
|
|
78 |
'check_interval' => 15,
|
| 34 |
rodolico |
79 |
'label' => '',
|
|
|
80 |
'mount_point' => '',
|
|
|
81 |
}
|
|
|
82 |
}
|
| 24 |
rodolico |
83 |
},
|
|
|
84 |
#information about target server
|
| 37 |
rodolico |
85 |
'target' => {
|
| 24 |
rodolico |
86 |
'hostname' => '', # used to see if we are on target
|
| 34 |
rodolico |
87 |
'poolname' => 'backup', # name of the ZFS pool to import
|
| 42 |
rodolico |
88 |
'shutdown_after_replication' => 0, # if set to 1, will shutdown the server after replication
|
| 24 |
rodolico |
89 |
# if this is set, the dataset uses GELI, so we must decrypt and
|
|
|
90 |
# mount it first
|
|
|
91 |
'geli' => {
|
| 35 |
rodolico |
92 |
'secureKey ' => {
|
|
|
93 |
'label' => 'replica', # the GPT label of the key disk
|
|
|
94 |
'fstype' => 'ufs', # filesystem type of the key disk
|
|
|
95 |
'check_interval' => 15,
|
|
|
96 |
'wait_timeout' => 300,
|
|
|
97 |
'keyfile' => 'geli.key', # the name of the key file on the secureKey disk
|
|
|
98 |
},
|
| 24 |
rodolico |
99 |
'localKey' => 'e98c660cccdae1226550484d62caa2b72f60632ae0c607528aba1ac9e7bfbc9c', # hex representation of the local key part
|
|
|
100 |
'target' => '/media/geli.key', # location to create the combined keyfile
|
| 34 |
rodolico |
101 |
'poolname' => 'backup', # name of the ZFS pool to import
|
| 24 |
rodolico |
102 |
'diskList' => [
|
| 34 |
rodolico |
103 |
'da0',
|
|
|
104 |
'da1'
|
| 24 |
rodolico |
105 |
], # list of disks to try to mount the dataset from
|
| 34 |
rodolico |
106 |
},
|
|
|
107 |
'report' => {
|
|
|
108 |
'email' => '',
|
|
|
109 |
'subject' => '',
|
|
|
110 |
'targetDrive' => {
|
| 35 |
rodolico |
111 |
'fstype' => 'msdos', # filesystem type of the report drive
|
|
|
112 |
'label' => 'sneakernet',
|
|
|
113 |
'mount_point' => '',
|
| 34 |
rodolico |
114 |
}
|
| 24 |
rodolico |
115 |
}
|
|
|
116 |
},
|
|
|
117 |
'transport' => {
|
|
|
118 |
# this is the GPT label of the sneakernet disk
|
| 37 |
rodolico |
119 |
'label' => 'sneakernet',
|
| 35 |
rodolico |
120 |
# this is the file system type. Not needed if ufs
|
|
|
121 |
'fstype' => 'ufs',
|
| 24 |
rodolico |
122 |
# where we want to mount it
|
|
|
123 |
'mount_point' => '/mnt/sneakernet',
|
|
|
124 |
# amount of time to wait for the disk to appear
|
|
|
125 |
'timeout' => 600,
|
| 35 |
rodolico |
126 |
# How often to check for the disk (seconds), message displayed every interval
|
|
|
127 |
'check_interval' => 15,
|
| 24 |
rodolico |
128 |
# if set, all files will be encrypted with this key/IV during transport
|
|
|
129 |
'encryption' => {
|
|
|
130 |
'key' => '', # openssl rand 32 | xxd -p | tr -d '\n' > test.key
|
|
|
131 |
'IV' => '00000000000000000000000000000000',
|
|
|
132 |
},
|
|
|
133 |
},
|
|
|
134 |
'datasets' => {
|
| 34 |
rodolico |
135 |
'dataset1' => {
|
| 42 |
rodolico |
136 |
'source' => 'pool', # the parent of the dataset on the source
|
|
|
137 |
'target' => 'backup', # the parent of the dataset on the target
|
|
|
138 |
'dataset' => 'dataset1', # the dataset name
|
| 24 |
rodolico |
139 |
},
|
|
|
140 |
'files_share' => {
|
| 42 |
rodolico |
141 |
'source' => 'pool',
|
|
|
142 |
'target' => 'backup',
|
| 43 |
rodolico |
143 |
'dataset' => 'files_share',
|
| 24 |
rodolico |
144 |
},
|
|
|
145 |
}
|
|
|
146 |
};
|
|
|
147 |
|
| 35 |
rodolico |
148 |
# read the status file and return as list. If the file doesn't exits, returns an empty list
|
| 30 |
rodolico |
149 |
sub getStatusFile {
|
|
|
150 |
my $filename = shift;
|
|
|
151 |
# read in history/status file
|
| 34 |
rodolico |
152 |
my @lines = ();
|
| 30 |
rodolico |
153 |
if ( -e $filename && open my $fh, '<', $filename ) {
|
|
|
154 |
chomp( @lines = <$fh> );
|
|
|
155 |
close $fh;
|
|
|
156 |
logMsg("Read status file '$filename' with contents:\n" . join( "\n", @lines ) . "\n");
|
|
|
157 |
} else {
|
| 34 |
rodolico |
158 |
logMsg("Error: could not read status file '$filename', assuming a fresh start: $!");
|
| 30 |
rodolico |
159 |
}
|
|
|
160 |
return \@lines;
|
|
|
161 |
}
|
| 24 |
rodolico |
162 |
|
| 30 |
rodolico |
163 |
# write the status list to file
|
|
|
164 |
sub writeStatusFile {
|
|
|
165 |
my ( $filename, $statusList ) = @_;
|
|
|
166 |
# backup existing status file
|
|
|
167 |
if ( -e $filename ) {
|
|
|
168 |
rename( $filename, "$filename.bak" ) or do {
|
|
|
169 |
logMsg("Error: could not backup existing status file '$filename': $!");
|
|
|
170 |
die;
|
|
|
171 |
};
|
|
|
172 |
}
|
|
|
173 |
# write new status file
|
|
|
174 |
if ( open my $fh, '>', $filename ) {
|
|
|
175 |
foreach my $line ( @$statusList ) {
|
|
|
176 |
print $fh "$line\n";
|
|
|
177 |
}
|
|
|
178 |
close $fh;
|
|
|
179 |
logMsg("Wrote status file '$filename' with contents:\n" . join( "\n", @$statusList ) . "\n");
|
|
|
180 |
} else {
|
|
|
181 |
logMsg("Error: could not write status file '$filename': $!");
|
|
|
182 |
die;
|
|
|
183 |
}
|
|
|
184 |
}
|
|
|
185 |
|
| 31 |
rodolico |
186 |
# simple sub to take root/dataset/datset/dataset and turn it into
|
|
|
187 |
# dataset.dataset.dataset
|
|
|
188 |
sub replaceSlashWithDot {
|
|
|
189 |
my $string = shift;
|
|
|
190 |
my @parts = split( "/", $string );
|
|
|
191 |
shift @parts;
|
|
|
192 |
return join( '.', @parts );
|
|
|
193 |
}
|
|
|
194 |
|
| 30 |
rodolico |
195 |
# perform replication on source server
|
|
|
196 |
# $config - configuration hashref
|
|
|
197 |
# $statusList - list of last snapshots replicated for each dataset in previous replications
|
|
|
198 |
# return new status list after replication containing updated last snapshots
|
|
|
199 |
# this script will actually replicate the datasets to the sneakernet disk
|
|
|
200 |
sub doSourceReplication {
|
|
|
201 |
my ($config, $statusList) = @_;
|
|
|
202 |
my $newStatus = [];
|
|
|
203 |
foreach my $dataset ( sort keys %{$config->{datasets}} ) {
|
| 31 |
rodolico |
204 |
logMsg("Processing dataset '$dataset'");
|
|
|
205 |
# get list of all snapshots on dataset
|
| 42 |
rodolico |
206 |
my $root = $config->{datasets}->{$dataset}->{source} . '/' . $config->{datasets}->{$dataset}->{dataset};
|
|
|
207 |
my $sourceList = [ runCmd( "zfs list -rt snap -H -o name $root" ) ];
|
|
|
208 |
# remove the parent part, leave the dataset itself
|
|
|
209 |
$sourceList =~ s|$config->{datasets}->{$dataset}->{source}/||;
|
| 30 |
rodolico |
210 |
# process dataset here
|
| 43 |
rodolico |
211 |
my $commands = makeReplicateCommands( $sourceList, $statusList, $newStatus );
|
|
|
212 |
|
| 31 |
rodolico |
213 |
if ( %$commands ) {
|
|
|
214 |
foreach my $cmd ( keys %$commands ) {
|
|
|
215 |
my $command = $commands->{$cmd};
|
| 43 |
rodolico |
216 |
my $outputFile = $cmd;
|
|
|
217 |
$outputFile =~ s/^$root//;
|
|
|
218 |
$outputFile = replaceSlashWithDot($outputFile);
|
|
|
219 |
#$command .= " | openssl enc -aes-256-cbc -K $config->{transport}->{encryption}->{key} -iv $config->{transport}->{encryption}->{IV} " if $config->{transport}->{encryption}->{key};
|
|
|
220 |
$command .= " > $config->{transport}->{mount_point}/" . $outputFile;
|
| 31 |
rodolico |
221 |
logMsg("Running command: $command");
|
| 33 |
rodolico |
222 |
runCmd( $command ) unless $DEBUG;
|
| 31 |
rodolico |
223 |
}
|
|
|
224 |
} else {
|
|
|
225 |
logMsg( "Nothing to do for $dataset" );
|
| 30 |
rodolico |
226 |
}
|
|
|
227 |
}
|
|
|
228 |
return $newStatus;
|
|
|
229 |
}
|
|
|
230 |
|
| 42 |
rodolico |
231 |
# perform cleanup actions
|
|
|
232 |
# $config - configuration hashref
|
|
|
233 |
# $message - optional message to include in the report
|
|
|
234 |
#
|
|
|
235 |
sub cleanup{
|
|
|
236 |
my ( $config, $message ) = @_;
|
|
|
237 |
# add disk space utilization information on transport to the log
|
|
|
238 |
logMsg( "Disk space utilization on transport disk:\n" . runCmd( "df -h $config->{transport}->{mount_point}" ) . "\n" );
|
|
|
239 |
# add information about the server (zpools) to the log
|
|
|
240 |
my $servername = `hostname -s`;
|
|
|
241 |
chomp $servername;
|
|
|
242 |
logMsg( "Zpools on server $servername:\n" . join( "\n", runCmd( "zpool list" ) ) . "\n" );
|
|
|
243 |
$config->{$config->{runningAs}}->{report}->{subject} //= "Replication Report for $config->{runningAs} server $servername";
|
|
|
244 |
$message //= "Replication completed on $config->{runningAs} server $servername.";
|
|
|
245 |
# unmount the sneakernet drive
|
|
|
246 |
unmountDriveByLabel( $config->{transport} );
|
|
|
247 |
sendReport( $config->{$config->{runningAs}}->{report}, $message, $config->{log_file} );
|
|
|
248 |
# If they have requested shutdown, do it now
|
|
|
249 |
if ( $config->{$config->{runningAs}}->{shutdown_after_replication} ) {
|
|
|
250 |
logMsg( "Shutting down target server as per configuration" );
|
|
|
251 |
runCmd( "shutdown -p now" ) unless $DEBUG;
|
| 33 |
rodolico |
252 |
}
|
| 42 |
rodolico |
253 |
}
|
| 33 |
rodolico |
254 |
|
| 42 |
rodolico |
255 |
# update the target datasets from the files on the transport drive
|
|
|
256 |
sub updateTarget {
|
|
|
257 |
my $config = shift;
|
|
|
258 |
my $files = getDirectoryList( $config->{transport}->{mount_point});
|
|
|
259 |
foreach my $filename ( @$files ) {
|
|
|
260 |
my $command = "cat $config->{output} | openssl enc -aes-256-cbc -d -K $config->{key} -iv $config->{IV}";
|
|
|
261 |
}
|
| 31 |
rodolico |
262 |
}
|
| 30 |
rodolico |
263 |
|
|
|
264 |
##################### main program starts here #####################
|
|
|
265 |
# Example to create a random key for encryption/decryption:
|
| 24 |
rodolico |
266 |
# generate a random key with
|
|
|
267 |
# openssl rand 32 | xxd -p | tr -d '\n' > test.key
|
|
|
268 |
|
|
|
269 |
# If a YAML config file exists next to the script, load and merge it
|
|
|
270 |
$config = loadConfig($configFileName, $config );
|
| 42 |
rodolico |
271 |
exit 1 unless keys %$config;
|
| 27 |
rodolico |
272 |
|
| 25 |
rodolico |
273 |
# set some defaults
|
| 35 |
rodolico |
274 |
$config->{'status_file'} //= "$scriptFullPath.status";
|
|
|
275 |
# set log file name for sub logMsg in ZFS_Utils, and remove the old log if it exists
|
|
|
276 |
# Log file is only valid for one run
|
|
|
277 |
$logFileName = $config->{'log_file'} //= "$scriptFullPath.log";
|
|
|
278 |
# log only for one run
|
|
|
279 |
unlink ( $logFileName ) if -f $logFileName;
|
| 24 |
rodolico |
280 |
|
| 42 |
rodolico |
281 |
fatalError( "Invalid config file: missing source and/or target server", $config, \&cleanup )
|
| 37 |
rodolico |
282 |
unless (defined $config->{source} && defined $config->{target});
|
| 24 |
rodolico |
283 |
|
|
|
284 |
my $servername = `hostname -s`;
|
|
|
285 |
chomp $servername;
|
| 42 |
rodolico |
286 |
$config->{runningAs} = $servername eq $config->{source}->{hostname} ? 'source' :
|
| 37 |
rodolico |
287 |
$servername eq $config->{target}->{hostname} ? 'target' : 'unknown';
|
|
|
288 |
|
| 42 |
rodolico |
289 |
#cleanup( $config, "Testing" );
|
|
|
290 |
|
|
|
291 |
# mount the transport drive, fatal error if we can not find it
|
|
|
292 |
fatalError( "Unable to mount tranport drive with label $config->{transport}->{disk_label}", $config, \&cleanup )
|
|
|
293 |
unless $config->{transport}->{mount_point} = mountDriveByLabel( $config->{transport} );
|
|
|
294 |
|
|
|
295 |
# mail program logic
|
|
|
296 |
if ( $config->{runningAs} eq 'source' ) {
|
| 33 |
rodolico |
297 |
logMsg "Running as source server";
|
|
|
298 |
# remove all files from transport disk, but leave all subdirectories alone
|
| 42 |
rodolico |
299 |
fatalError( "Failed to clean transport directory $config->{transport}->{mount_point}", $config, \&cleanup )
|
|
|
300 |
unless cleanDirectory( $config->{transport}->{mount_point} );
|
| 30 |
rodolico |
301 |
my $statusList = getStatusFile($config->{status_file});
|
| 37 |
rodolico |
302 |
$statusList = doSourceReplication($config, $statusList);
|
| 30 |
rodolico |
303 |
writeStatusFile($config->{status_file}, $statusList);
|
| 42 |
rodolico |
304 |
} elsif ( $config->{runningAs} eq 'target' ) {
|
| 33 |
rodolico |
305 |
logMsg "Running as target server";
|
| 37 |
rodolico |
306 |
mountGeli( $config->{target}->{geli} ) if ( defined $config->{target}->{geli} );
|
| 42 |
rodolico |
307 |
updateTarget( $config );
|
| 24 |
rodolico |
308 |
} else {
|
| 37 |
rodolico |
309 |
fatalError( "This server ($servername) is neither source nor target server as per config\n" );
|
| 24 |
rodolico |
310 |
}
|
|
|
311 |
|
| 42 |
rodolico |
312 |
cleanup( $config );
|
| 35 |
rodolico |
313 |
|
| 25 |
rodolico |
314 |
1;
|