| 24 |
rodolico |
1 |
#! /usr/bin/env perl
|
|
|
2 |
|
| 34 |
rodolico |
3 |
# Simplified BSD License (FreeBSD License)
|
|
|
4 |
#
|
|
|
5 |
# Copyright (c) 2025, Daily Data Inc.
|
|
|
6 |
# All rights reserved.
|
|
|
7 |
#
|
|
|
8 |
# Redistribution and use in source and binary forms, with or without
|
|
|
9 |
# modification, are permitted provided that the following conditions are met:
|
|
|
10 |
#
|
|
|
11 |
# 1. Redistributions of source code must retain the above copyright notice, this
|
|
|
12 |
# list of conditions and the following disclaimer.
|
|
|
13 |
#
|
|
|
14 |
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
|
15 |
# this list of conditions and the following disclaimer in the documentation
|
|
|
16 |
# and/or other materials provided with the distribution.
|
|
|
17 |
#
|
|
|
18 |
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
19 |
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
20 |
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
|
21 |
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
|
22 |
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
23 |
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
|
24 |
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
|
25 |
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
|
26 |
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
27 |
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
28 |
|
|
|
29 |
# sneakernet.pl
|
|
|
30 |
# Script to perform sneakernet replication of ZFS datasets between two servers
|
|
|
31 |
# using an external transport drive.
|
|
|
32 |
# Uses ZFS send/receive to replicate datasets to/from the transport drive.
|
|
|
33 |
# Optionally uses symmetric encryption to encrypt datasets during transport.
|
|
|
34 |
# On the target server, can optionally use GELI to encrypt the datasets on disk.
|
|
|
35 |
# Requires a configuration file in YAML format next to the script.
|
|
|
36 |
# Author: R. W. Rodlico <rodo@dailydata.net>
|
|
|
37 |
# Created: December 2025
|
|
|
38 |
# Revision History:
|
|
|
39 |
# Version: 0.1 2025-12-10 Initial version
|
|
|
40 |
|
| 24 |
rodolico |
41 |
use strict;
|
|
|
42 |
use warnings;
|
|
|
43 |
|
| 34 |
rodolico |
44 |
our $VERSION = '0.1';
|
|
|
45 |
|
| 24 |
rodolico |
46 |
use FindBin;
|
|
|
47 |
use lib "$FindBin::Bin/..";
|
| 27 |
rodolico |
48 |
use Data::Dumper;
|
|
|
49 |
use ZFS_Utils qw(loadConfig shredFile logMsg makeReplicateCommands mountDriveByLabel mountGeli runCmd $logFileName $displayLogsOnConsole);
|
| 24 |
rodolico |
50 |
|
| 33 |
rodolico |
51 |
# if set, will not actually write files to disk
|
|
|
52 |
my $DEBUG = 0;
|
|
|
53 |
|
| 24 |
rodolico |
54 |
# set the log file to be next to this script
|
|
|
55 |
$logFileName = "$FindBin::Bin/sneakernet.log";
|
| 31 |
rodolico |
56 |
# log only for one run
|
|
|
57 |
unlink ( $logFileName ) if -f $logFileName;
|
|
|
58 |
|
| 24 |
rodolico |
59 |
# display all log messages on console in addition to the log file
|
|
|
60 |
$displayLogsOnConsole = 1;
|
|
|
61 |
|
|
|
62 |
my $configFileName = "$0.conf.yaml";
|
|
|
63 |
|
|
|
64 |
my $config = {
|
|
|
65 |
# file created on source server to track last copyed dataset
|
|
|
66 |
'status_file' => "$0.status",
|
|
|
67 |
#information about source server
|
|
|
68 |
'source_server' => {
|
|
|
69 |
'hostname' => '', # used to see if we are on source
|
| 34 |
rodolico |
70 |
'poolname' => 'pool', # name of the ZFS pool to export
|
|
|
71 |
# if set, will generate a report via email or by storing on a drive
|
|
|
72 |
'report' => {
|
|
|
73 |
'email' => 'tech@example.org',
|
|
|
74 |
'subject' => 'AG Transport Report',
|
|
|
75 |
'targetDrive' => {
|
|
|
76 |
'label' => '',
|
|
|
77 |
'mount_point' => '',
|
|
|
78 |
}
|
|
|
79 |
}
|
| 24 |
rodolico |
80 |
},
|
|
|
81 |
#information about target server
|
|
|
82 |
'target_server' => {
|
|
|
83 |
'hostname' => '', # used to see if we are on target
|
| 34 |
rodolico |
84 |
'poolname' => 'backup', # name of the ZFS pool to import
|
| 24 |
rodolico |
85 |
# if this is set, the dataset uses GELI, so we must decrypt and
|
|
|
86 |
# mount it first
|
|
|
87 |
'geli' => {
|
|
|
88 |
'keydiskname' => 'replica', # the GPT label of the key disk
|
|
|
89 |
'keyfile' => 'geli.key', # the name of the key file on keydiskname
|
|
|
90 |
'localKey' => 'e98c660cccdae1226550484d62caa2b72f60632ae0c607528aba1ac9e7bfbc9c', # hex representation of the local key part
|
|
|
91 |
'target' => '/media/geli.key', # location to create the combined keyfile
|
| 34 |
rodolico |
92 |
'poolname' => 'backup', # name of the ZFS pool to import
|
| 24 |
rodolico |
93 |
'diskList' => [
|
| 34 |
rodolico |
94 |
'da0',
|
|
|
95 |
'da1'
|
| 24 |
rodolico |
96 |
], # list of disks to try to mount the dataset from
|
| 34 |
rodolico |
97 |
},
|
|
|
98 |
'report' => {
|
|
|
99 |
'email' => '',
|
|
|
100 |
'subject' => '',
|
|
|
101 |
'targetDrive' => {
|
|
|
102 |
'label' => 'sneakernet_report',
|
|
|
103 |
'mount_point' => '/mnt/sneakernet_report',
|
|
|
104 |
}
|
| 24 |
rodolico |
105 |
}
|
|
|
106 |
},
|
|
|
107 |
'transport' => {
|
|
|
108 |
# this is the GPT label of the sneakernet disk
|
|
|
109 |
'disk_label' => 'sneakernet',
|
|
|
110 |
# where we want to mount it
|
|
|
111 |
'mount_point' => '/mnt/sneakernet',
|
|
|
112 |
# amount of time to wait for the disk to appear
|
|
|
113 |
'timeout' => 600,
|
|
|
114 |
# if set, all files will be encrypted with this key/IV during transport
|
|
|
115 |
'encryption' => {
|
|
|
116 |
'key' => '', # openssl rand 32 | xxd -p | tr -d '\n' > test.key
|
|
|
117 |
'IV' => '00000000000000000000000000000000',
|
|
|
118 |
},
|
|
|
119 |
},
|
|
|
120 |
'datasets' => {
|
| 34 |
rodolico |
121 |
'dataset1' => {
|
|
|
122 |
'source' => 'pool/dataset1',
|
|
|
123 |
'target' => 'backup/dataset1',
|
|
|
124 |
'filename' => 'dataset1'
|
| 24 |
rodolico |
125 |
},
|
|
|
126 |
'files_share' => {
|
| 34 |
rodolico |
127 |
'source' => 'pool/files_share',
|
|
|
128 |
'target' => 'backup/files_share',
|
| 24 |
rodolico |
129 |
'filename' => 'files_share'
|
|
|
130 |
},
|
|
|
131 |
}
|
|
|
132 |
};
|
|
|
133 |
|
| 30 |
rodolico |
134 |
# read the status file and return as list
|
|
|
135 |
sub getStatusFile {
|
|
|
136 |
my $filename = shift;
|
|
|
137 |
# read in history/status file
|
| 34 |
rodolico |
138 |
my @lines = ();
|
| 30 |
rodolico |
139 |
if ( -e $filename && open my $fh, '<', $filename ) {
|
|
|
140 |
chomp( @lines = <$fh> );
|
|
|
141 |
close $fh;
|
|
|
142 |
logMsg("Read status file '$filename' with contents:\n" . join( "\n", @lines ) . "\n");
|
|
|
143 |
} else {
|
| 34 |
rodolico |
144 |
logMsg("Error: could not read status file '$filename', assuming a fresh start: $!");
|
| 30 |
rodolico |
145 |
}
|
|
|
146 |
return \@lines;
|
|
|
147 |
}
|
| 24 |
rodolico |
148 |
|
| 30 |
rodolico |
149 |
# write the status list to file
|
|
|
150 |
sub writeStatusFile {
|
|
|
151 |
my ( $filename, $statusList ) = @_;
|
|
|
152 |
# backup existing status file
|
|
|
153 |
if ( -e $filename ) {
|
|
|
154 |
rename( $filename, "$filename.bak" ) or do {
|
|
|
155 |
logMsg("Error: could not backup existing status file '$filename': $!");
|
|
|
156 |
die;
|
|
|
157 |
};
|
|
|
158 |
}
|
|
|
159 |
# write new status file
|
|
|
160 |
if ( open my $fh, '>', $filename ) {
|
|
|
161 |
foreach my $line ( @$statusList ) {
|
|
|
162 |
print $fh "$line\n";
|
|
|
163 |
}
|
|
|
164 |
close $fh;
|
|
|
165 |
logMsg("Wrote status file '$filename' with contents:\n" . join( "\n", @$statusList ) . "\n");
|
|
|
166 |
} else {
|
|
|
167 |
logMsg("Error: could not write status file '$filename': $!");
|
|
|
168 |
die;
|
|
|
169 |
}
|
|
|
170 |
}
|
|
|
171 |
|
| 31 |
rodolico |
172 |
# simple sub to take root/dataset/datset/dataset and turn it into
|
|
|
173 |
# dataset.dataset.dataset
|
|
|
174 |
sub replaceSlashWithDot {
|
|
|
175 |
my $string = shift;
|
|
|
176 |
my @parts = split( "/", $string );
|
|
|
177 |
shift @parts;
|
|
|
178 |
return join( '.', @parts );
|
|
|
179 |
}
|
|
|
180 |
|
| 30 |
rodolico |
181 |
# perform replication on source server
|
|
|
182 |
# $config - configuration hashref
|
|
|
183 |
# $statusList - list of last snapshots replicated for each dataset in previous replications
|
|
|
184 |
# return new status list after replication containing updated last snapshots
|
|
|
185 |
# this script will actually replicate the datasets to the sneakernet disk
|
|
|
186 |
sub doSourceReplication {
|
|
|
187 |
my ($config, $statusList) = @_;
|
|
|
188 |
my $newStatus = [];
|
|
|
189 |
foreach my $dataset ( sort keys %{$config->{datasets}} ) {
|
| 31 |
rodolico |
190 |
logMsg("Processing dataset '$dataset'");
|
|
|
191 |
# get list of all snapshots on dataset
|
| 33 |
rodolico |
192 |
my $sourceList = [ runCmd( "zfs list -rt snap -H -o name $config->{datasets}->{$dataset}->{source} " ) ];
|
| 30 |
rodolico |
193 |
# process dataset here
|
|
|
194 |
my $commands = makeReplicateCommands($sourceList, $statusList, $newStatus );
|
| 31 |
rodolico |
195 |
if ( %$commands ) {
|
|
|
196 |
foreach my $cmd ( keys %$commands ) {
|
|
|
197 |
my $command = $commands->{$cmd};
|
|
|
198 |
$command .= " | openssl enc -aes-256-cbc -K $config->{transport}->{encryption}->{key} -iv $config->{transport}->{encryption}->{IV} " if $config->{transport}->{encryption}->{key};
|
|
|
199 |
$command .= " > $config->{transport}->{mount_point}/" . replaceSlashWithDot($cmd);
|
|
|
200 |
logMsg("Running command: $command");
|
| 33 |
rodolico |
201 |
runCmd( $command ) unless $DEBUG;
|
| 31 |
rodolico |
202 |
}
|
|
|
203 |
} else {
|
|
|
204 |
logMsg( "Nothing to do for $dataset" );
|
| 30 |
rodolico |
205 |
}
|
|
|
206 |
}
|
|
|
207 |
return $newStatus;
|
|
|
208 |
}
|
|
|
209 |
|
| 33 |
rodolico |
210 |
# clean all files from a directory, but not any subdirectories
|
|
|
211 |
sub cleanDirectory {
|
|
|
212 |
my $dirname = shift;
|
|
|
213 |
logMsg( "Cleaning up $dirname of all files" );
|
|
|
214 |
# clean up a directory
|
|
|
215 |
opendir( my $dh, $dirname ) || fatalError( "Can not open $dirname: #!" );
|
|
|
216 |
# get all file names, but leave directories alone
|
|
|
217 |
my @files = map{ $dirname . "/$_" } grep { -f "$dirname/$_" } readdir($dh);
|
|
|
218 |
closedir $dh;
|
|
|
219 |
foreach my $file (@files) {
|
|
|
220 |
unlink $file or warn "Could not unlink $file: #!\n";
|
|
|
221 |
}
|
|
|
222 |
}
|
|
|
223 |
|
|
|
224 |
|
|
|
225 |
|
| 31 |
rodolico |
226 |
# how to handle a fatal error
|
|
|
227 |
sub fatalError {
|
|
|
228 |
my $message = shift;
|
|
|
229 |
logMsg( $message );
|
|
|
230 |
die;
|
|
|
231 |
}
|
| 30 |
rodolico |
232 |
|
| 31 |
rodolico |
233 |
|
| 30 |
rodolico |
234 |
##################### main program starts here #####################
|
|
|
235 |
# Example to create a random key for encryption/decryption:
|
| 24 |
rodolico |
236 |
# generate a random key with
|
|
|
237 |
# openssl rand 32 | xxd -p | tr -d '\n' > test.key
|
|
|
238 |
|
|
|
239 |
# If a YAML config file exists next to the script, load and merge it
|
|
|
240 |
$config = loadConfig($configFileName, $config );
|
| 27 |
rodolico |
241 |
|
| 25 |
rodolico |
242 |
# set some defaults
|
| 34 |
rodolico |
243 |
$config->{'status_file'} //= "$0.status";
|
| 24 |
rodolico |
244 |
|
| 31 |
rodolico |
245 |
fatalError( "Invalid config file: missing source and/or target server" )
|
| 24 |
rodolico |
246 |
unless (defined $config->{source_server} && defined $config->{target_server});
|
|
|
247 |
|
| 31 |
rodolico |
248 |
# mount the transport drive, fatal error if we can not find it
|
|
|
249 |
fatalError( "Unable to mount tranport drive with label $config->{transport}->{disk_label}" )
|
|
|
250 |
unless $config->{transport}->{mount_point} = mountDriveByLabel( $config->{transport}->{disk_label}, $config->{transport}->{mount_point}, $config->{transport}->{timeout} );
|
|
|
251 |
|
| 24 |
rodolico |
252 |
my $servername = `hostname -s`;
|
|
|
253 |
chomp $servername;
|
|
|
254 |
if ( $servername eq $config->{source_server}->{hostname} ) {
|
| 33 |
rodolico |
255 |
logMsg "Running as source server";
|
|
|
256 |
# remove all files from transport disk, but leave all subdirectories alone
|
|
|
257 |
cleanDirectory( $config->{transport}->{mount_point} );
|
| 30 |
rodolico |
258 |
my $statusList = getStatusFile($config->{status_file});
|
|
|
259 |
$statusList = doSourceReplication($config, $statusList);
|
|
|
260 |
writeStatusFile($config->{status_file}, $statusList);
|
| 24 |
rodolico |
261 |
# source server logic here
|
|
|
262 |
} elsif ( $servername eq $config->{target_server}->{hostname} ) {
|
| 33 |
rodolico |
263 |
logMsg "Running as target server";
|
|
|
264 |
die "Target Server code not complete\n";
|
| 30 |
rodolico |
265 |
die "GELI target server logic not yet implemented\n" if ( defined $config->{target_server}->{geli} );
|
| 24 |
rodolico |
266 |
mountGeli( $config->{target_server}->{geli} ) if ( defined $config->{target_server}->{geli} );
|
|
|
267 |
} else {
|
| 25 |
rodolico |
268 |
logMsg "This server ($servername) is neither source nor target server as per config\n";
|
|
|
269 |
die;
|
| 24 |
rodolico |
270 |
}
|
|
|
271 |
|
| 31 |
rodolico |
272 |
# unmount the sneakernet drive
|
|
|
273 |
`umount $config->{transport}->{mount_point}`;
|
|
|
274 |
# and remove the directory
|
| 33 |
rodolico |
275 |
rmdir $config->{transport}->{mount_point};
|
| 31 |
rodolico |
276 |
|
| 25 |
rodolico |
277 |
1;
|
|
|
278 |
|
|
|
279 |
|
| 24 |
rodolico |
280 |
#`cat $config->{input} | openssl enc -aes-256-cbc -K $config->{key} -iv $config->{IV} > $config->{output}`;
|
|
|
281 |
|
|
|
282 |
# this will decrypt $config->{output} to stdout
|
|
|
283 |
#`cat $config->{output} | openssl enc -aes-256-cbc -d -K $config->{key} -iv $config->{IV} > test.out`;
|