Subversion Repositories zfs_utils

Rev

Rev 45 | Rev 48 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
24 rodolico 1
#! /usr/bin/env perl
2
 
34 rodolico 3
# Simplified BSD License (FreeBSD License)
4
#
5
# Copyright (c) 2025, Daily Data Inc.
6
# All rights reserved.
7
#
8
# Redistribution and use in source and binary forms, with or without
9
# modification, are permitted provided that the following conditions are met:
10
#
11
# 1. Redistributions of source code must retain the above copyright notice, this
12
#    list of conditions and the following disclaimer.
13
#
14
# 2. Redistributions in binary form must reproduce the above copyright notice,
15
#    this list of conditions and the following disclaimer in the documentation
16
#    and/or other materials provided with the distribution.
17
#
18
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 
29
# sneakernet.pl
30
# Script to perform sneakernet replication of ZFS datasets between two servers
31
# using an external transport drive.
32
# Uses ZFS send/receive to replicate datasets to/from the transport drive.
33
# Optionally uses symmetric encryption to encrypt datasets during transport.
34
# On the target server, can optionally use GELI to encrypt the datasets on disk.
35
# Requires a configuration file in YAML format next to the script.
36
# Author: R. W. Rodlico <rodo@dailydata.net>
37
# Created: December 2025
38
# Revision History:
39
# Version: 0.1 2025-12-10 Initial version
40
 
24 rodolico 41
use strict;
42
use warnings;
43
 
34 rodolico 44
our $VERSION = '0.1';
45
 
46 rodolico 46
use File::Basename;
24 rodolico 47
use FindBin;
48
use lib "$FindBin::Bin/..";
27 rodolico 49
use Data::Dumper;
46 rodolico 50
use ZFS_Utils qw(loadConfig shredFile logMsg makeReplicateCommands mountDriveByLabel unmountDriveByLabel mountGeli runCmd sendReport fatalError getDirectoryList cleanDirectory $logFileName $displayLogsOnConsole);
44 rodolico 51
use Getopt::Long qw(GetOptions);
52
Getopt::Long::Configure ("bundling");
24 rodolico 53
 
35 rodolico 54
my $scriptDirectory = $FindBin::RealBin;
55
my $scriptFullPath = "$scriptDirectory/" . $FindBin::Script;
56
 
24 rodolico 57
# display all log messages on console in addition to the log file
58
$displayLogsOnConsole = 1;
59
 
35 rodolico 60
my $configFileName = "$scriptFullPath.conf.yaml";
24 rodolico 61
 
62
my $config = {
44 rodolico 63
   'dryrun' => 0,
64
   'verbosity' => 1,
24 rodolico 65
   # file created on source server to track last copyed dataset
35 rodolico 66
   'status_file' => "$scriptFullPath.status",
67
   'log_file' => "$scriptFullPath.log",
24 rodolico 68
   #information about source server
37 rodolico 69
   'source' => {
24 rodolico 70
      'hostname' => '', # used to see if we are on source
34 rodolico 71
      'poolname' => 'pool', # name of the ZFS pool to export
72
      # if set, will generate a report via email or by storing on a drive
73
      'report' => {
74
         'email' => 'tech@example.org',
75
         'subject' => 'AG Transport Report',
76
         'targetDrive' => {
35 rodolico 77
            'fstype' => '', # filesystem type of the report drive
78
            # How often to check for the disk (seconds), message displayed every interval
79
            'check_interval' => 15,
34 rodolico 80
            'label' => '',
81
            'mount_point' => '',
82
         }
83
      }
24 rodolico 84
   },
85
   #information about target server
37 rodolico 86
   'target' => {
24 rodolico 87
      'hostname' => '', # used to see if we are on target
34 rodolico 88
      'poolname' => 'backup', # name of the ZFS pool to import
42 rodolico 89
      'shutdown_after_replication' => 0, # if set to 1, will shutdown the server after replication
24 rodolico 90
      # if this is set, the dataset uses GELI, so we must decrypt and
91
      # mount it first
92
      'geli' => {
35 rodolico 93
         'secureKey ' => {
94
            'label' => 'replica', # the GPT label of the key disk
95
            'fstype' => 'ufs', # filesystem type of the key disk
96
            'check_interval' => 15,
97
            'wait_timeout' => 300,
98
            'keyfile' => 'geli.key', # the name of the key file on the secureKey disk
99
         },
24 rodolico 100
         'localKey' => 'e98c660cccdae1226550484d62caa2b72f60632ae0c607528aba1ac9e7bfbc9c', # hex representation of the local key part
101
         'target' => '/media/geli.key', # location to create the combined keyfile
34 rodolico 102
         'poolname' => 'backup', # name of the ZFS pool to import
24 rodolico 103
         'diskList' => [ 
34 rodolico 104
            'da0',
105
            'da1'
24 rodolico 106
            ], # list of disks to try to mount the dataset from
34 rodolico 107
      },
108
      'report' => {
109
         'email' => '',
110
         'subject' => '',
111
         'targetDrive' => {
35 rodolico 112
            'fstype' => 'msdos', # filesystem type of the report drive
113
            'label' => 'sneakernet',
114
            'mount_point' => '',
34 rodolico 115
         }
24 rodolico 116
      }
117
   },
118
   'transport' => {
119
      # this is the GPT label of the sneakernet disk
37 rodolico 120
      'label' => 'sneakernet',
35 rodolico 121
      # this is the file system type. Not needed if ufs
122
      'fstype' => 'ufs',
24 rodolico 123
      # where we want to mount it
124
      'mount_point' => '/mnt/sneakernet',
125
      # amount of time to wait for the disk to appear
126
      'timeout' => 600,
35 rodolico 127
      # How often to check for the disk (seconds), message displayed every interval
128
      'check_interval' => 15,
24 rodolico 129
      # if set, all files will be encrypted with this key/IV during transport
130
      'encryption' => {
131
         'key'    => '', # openssl rand 32 | xxd -p | tr -d '\n' > test.key
132
         'IV'     => '00000000000000000000000000000000',
133
      },
134
   },
135
   'datasets' => {
34 rodolico 136
      'dataset1' => {
42 rodolico 137
         'source' => 'pool', # the parent of the dataset on the source
138
         'target' => 'backup', # the parent of the dataset on the target
139
         'dataset' => 'dataset1', # the dataset name
24 rodolico 140
      },
141
      'files_share'  => {
42 rodolico 142
         'source' => 'pool',
143
         'target' => 'backup',
43 rodolico 144
         'dataset' => 'files_share',
24 rodolico 145
      },
146
   }
147
};
148
 
35 rodolico 149
# read the status file and return as list. If the file doesn't exits, returns an empty list
30 rodolico 150
sub getStatusFile {
151
   my $filename = shift;
152
   # read in history/status file
34 rodolico 153
   my @lines = ();
30 rodolico 154
   if ( -e $filename && open my $fh, '<', $filename ) {
155
      chomp( @lines = <$fh> );
156
      close $fh;
157
      logMsg("Read status file '$filename' with contents:\n" . join( "\n", @lines ) . "\n");
158
   } else {
34 rodolico 159
      logMsg("Error: could not read status file '$filename', assuming a fresh start: $!");
30 rodolico 160
   }
161
   return \@lines;
162
}
24 rodolico 163
 
30 rodolico 164
# write the status list to file
165
sub writeStatusFile {
166
   my ( $filename, $statusList ) = @_;
167
   # backup existing status file
168
   if ( -e $filename ) {
169
      rename( $filename, "$filename.bak" ) or do {
170
         logMsg("Error: could not backup existing status file '$filename': $!");
171
         die;
172
      };
173
   }
174
   # write new status file
175
   if ( open my $fh, '>', $filename ) {
176
      foreach my $line ( @$statusList ) {
177
         print $fh "$line\n";
178
      }
179
      close $fh;
180
      logMsg("Wrote status file '$filename' with contents:\n" . join( "\n", @$statusList ) . "\n");
181
   } else {
182
      logMsg("Error: could not write status file '$filename': $!");
183
      die;
184
   }
185
}
186
 
31 rodolico 187
# simple sub to take root/dataset/datset/dataset and turn it into
188
# dataset.dataset.dataset
46 rodolico 189
sub dirnameToFileName {
190
   my ( $string, $delimiter, $substitution ) = @_;
191
   $delimiter //= '/';
192
   $substitution //= '.';
193
   my @parts = split( /\Q$delimiter\E/, $string );
194
   return join( $substitution, @parts );
31 rodolico 195
}
196
 
30 rodolico 197
# perform replication on source server
198
# $config - configuration hashref
199
# $statusList - list of last snapshots replicated for each dataset in previous replications
200
# return new status list after replication containing updated last snapshots
201
# this script will actually replicate the datasets to the sneakernet disk
202
sub doSourceReplication {
203
   my ($config, $statusList) = @_;
204
   my $newStatus = [];
205
   foreach my $dataset ( sort keys %{$config->{datasets}} ) {
31 rodolico 206
      logMsg("Processing dataset '$dataset'");
207
      # get list of all snapshots on dataset
44 rodolico 208
      my $sourceList;
46 rodolico 209
#      print Dumper( $config ) . "\n";
210
#      print "$dataset\n";
211
#      print Dumper( $config->{datasets}->{$dataset} ) . "\n";
212
#      die;
44 rodolico 213
      if ( -e "$scriptDirectory/test.status") {
214
         $sourceList = getStatusFile( "$scriptDirectory/test.status" );
215
      } else {
46 rodolico 216
         $sourceList = [ runCmd( "zfs list -rt snap -H -o name $config->{datasets}->{$dataset}->{source}" ) ];
44 rodolico 217
      }
218
 
30 rodolico 219
      # process dataset here
44 rodolico 220
      my $commands = makeReplicateCommands( 
221
                        $sourceList,
222
                        $statusList,
223
                        $dataset,
224
                        $config->{datasets}->{$dataset}->{source},
225
                        $config->{datasets}->{$dataset}->{target},
226
                        $newStatus
227
                     );
31 rodolico 228
      if ( %$commands ) {
229
         foreach my $cmd ( keys %$commands ) {
230
            my $command = $commands->{$cmd};
43 rodolico 231
            my $outputFile = $cmd;
232
            $outputFile = replaceSlashWithDot($outputFile);
45 rodolico 233
            $command .= " | openssl enc -aes-256-cbc -K $config->{transport}->{encryption}->{key} -iv $config->{transport}->{encryption}->{IV} " if $config->{transport}->{encryption}->{key};
46 rodolico 234
            $command .= " > $config->{transport}->{mount_point}/" . dirnameToFileName( $cmd );
31 rodolico 235
            logMsg("Running command: $command");
44 rodolico 236
            runCmd(  $command  ) unless $config->{dryrun};
31 rodolico 237
         }
238
      } else {
239
         logMsg( "Nothing to do for $dataset" ); 
30 rodolico 240
      }
241
   }
242
   return $newStatus;
243
}
244
 
42 rodolico 245
# perform cleanup actions
246
# $config - configuration hashref
247
# $message - optional message to include in the report
248
#
249
sub cleanup{
250
   my ( $config, $message ) = @_;
251
   # add disk space utilization information on transport to the log
252
   logMsg( "Disk space utilization on transport disk:\n" . runCmd( "df -h $config->{transport}->{mount_point}" ) . "\n" );
253
   # add information about the server (zpools) to the log
254
   my $servername = `hostname -s`;
255
   chomp $servername;
256
   logMsg( "Zpools on server $servername:\n" . join( "\n", runCmd( "zpool list" ) ) . "\n" );
257
   $config->{$config->{runningAs}}->{report}->{subject} //= "Replication Report for $config->{runningAs} server $servername";
258
   $message //= "Replication completed on $config->{runningAs} server $servername.";
259
   # unmount the sneakernet drive
44 rodolico 260
   unmountDriveByLabel( $config->{transport} ) unless $config->{dryrun};
42 rodolico 261
   sendReport( $config->{$config->{runningAs}}->{report}, $message, $config->{log_file} );
262
   # If they have requested shutdown, do it now
263
   if ( $config->{$config->{runningAs}}->{shutdown_after_replication} ) {
264
      logMsg( "Shutting down target server as per configuration" );
44 rodolico 265
      runCmd( "shutdown -p now" ) unless $config->{dryrun};
33 rodolico 266
   }
42 rodolico 267
}
33 rodolico 268
 
42 rodolico 269
# update the target datasets from the files on the transport drive
270
sub updateTarget {
271
   my $config = shift;
272
   my $files = getDirectoryList( $config->{transport}->{mount_point});
273
   foreach my $filename ( @$files ) {
46 rodolico 274
      my $targetDataset = basename( $filename );
275
      my ($dataset) = split( /\Q\.\E/, $targetDataset ); # grab only the first element of a string which has internal delimiters
276
      $targetDataset = $config->{datasets}->{$dataset}->{target} . '/' . dirnameToFileName( $targetDataset, '.', '/' );
277
      my $command = "cat $filename";
278
      $command .= " | openssl enc -aes-256-cbc -d -K $config->{transport}->{encryption}->{key} -iv $config->{transport}->{encryption}->{IV} " if $config->{transport}->{encryption}->{key};
279
      $command .= " | zfs receive -F $targetDataset";
280
      logMsg( $command );
281
      runCmd( $command );
42 rodolico 282
   }
31 rodolico 283
}
30 rodolico 284
 
285
##################### main program starts here #####################
286
# Example to create a random key for encryption/decryption:
24 rodolico 287
# generate a random key with
288
# openssl rand 32 | xxd -p | tr -d '\n' > test.key
289
 
290
# If a YAML config file exists next to the script, load and merge it
291
$config = loadConfig($configFileName, $config );
42 rodolico 292
exit 1 unless keys %$config;
27 rodolico 293
 
44 rodolico 294
# parse CLI options
295
GetOptions( $config,
296
   'dryrun|n',
297
   'verbose|v+',
298
   'version|V',
299
   'help|h',
300
) or do { print "Invalid options\n"; exit 2 };
301
if (defined ($config->{help})) {
302
   print "Usage: $FindBin::Script [--dryrun] [--verbose] [--help]\n";
303
   print "  --dryrun, -n   Run in dry-run mode (no writes)\n";
304
   print "  --verbose, -v  Run in verbose mode (more v's mean more verbose)\n";
305
   print "  --version, -V  Display version number\n";
306
   exit 0;
307
} elsif (defined $config->{version}) {
308
   print "$FindBin::Script v$VERSION\n";
309
   exit 0;
310
}
311
 
25 rodolico 312
# set some defaults
35 rodolico 313
$config->{'status_file'} //= "$scriptFullPath.status";
314
# set log file name for sub logMsg in ZFS_Utils, and remove the old log if it exists
315
# Log file is only valid for one run
316
$logFileName = $config->{'log_file'} //= "$scriptFullPath.log";
317
# log only for one run
318
unlink ( $logFileName ) if -f $logFileName;
24 rodolico 319
 
42 rodolico 320
fatalError( "Invalid config file: missing source and/or target server", $config, \&cleanup )
37 rodolico 321
    unless (defined $config->{source} && defined $config->{target});
24 rodolico 322
 
323
my $servername = `hostname -s`;
324
chomp $servername;
42 rodolico 325
$config->{runningAs} = $servername eq $config->{source}->{hostname} ? 'source' :
37 rodolico 326
                $servername eq $config->{target}->{hostname} ? 'target' : 'unknown';
327
 
42 rodolico 328
#cleanup( $config, "Testing" );
329
 
330
# mount the transport drive, fatal error if we can not find it
46 rodolico 331
fatalError( "Unable to mount tranport drive with label $config->{transport}->{label}", $config, \&cleanup )
332
   unless $config->{transport}->{mount_point} =  mountDriveByLabel( $config->{transport} );
42 rodolico 333
 
44 rodolico 334
# main program logic
42 rodolico 335
if ( $config->{runningAs} eq 'source' ) {
33 rodolico 336
    logMsg "Running as source server";
337
    # remove all files from transport disk, but leave all subdirectories alone
42 rodolico 338
   fatalError( "Failed to clean transport directory $config->{transport}->{mount_point}", $config, \&cleanup )
44 rodolico 339
      unless $config->{dryrun} or cleanDirectory( $config->{transport}->{mount_point} );
30 rodolico 340
    my $statusList = getStatusFile($config->{status_file});
37 rodolico 341
    $statusList = doSourceReplication($config, $statusList); 
44 rodolico 342
    writeStatusFile($config->{status_file}, $statusList) unless $config->{dryrun};
42 rodolico 343
} elsif ( $config->{runningAs} eq 'target' ) {
33 rodolico 344
    logMsg "Running as target server";
37 rodolico 345
    mountGeli( $config->{target}->{geli} ) if ( defined $config->{target}->{geli} );
46 rodolico 346
    umountDiskByLabel( $config->{target}->{geli}->{secureKey} )
347
       unless $config->{target}->{geli}->{secureKey}->{label} eq $config->{transport}->{label};
348
    print "Please insert device labeled REPORT\n" if $config->{target}->{report}->{targetDrive}->{label};
42 rodolico 349
    updateTarget( $config );
24 rodolico 350
} else {
37 rodolico 351
    fatalError( "This server ($servername) is neither source nor target server as per config\n" );
24 rodolico 352
}
353
 
42 rodolico 354
cleanup( $config );
35 rodolico 355
 
25 rodolico 356
1;