Subversion Repositories zfs_utils

Rev

Rev 57 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
24 rodolico 1
#! /usr/bin/env perl
2
 
34 rodolico 3
# Simplified BSD License (FreeBSD License)
4
#
5
# Copyright (c) 2025, Daily Data Inc.
6
# All rights reserved.
7
#
8
# Redistribution and use in source and binary forms, with or without
9
# modification, are permitted provided that the following conditions are met:
10
#
11
# 1. Redistributions of source code must retain the above copyright notice, this
12
#    list of conditions and the following disclaimer.
13
#
14
# 2. Redistributions in binary form must reproduce the above copyright notice,
15
#    this list of conditions and the following disclaimer in the documentation
16
#    and/or other materials provided with the distribution.
17
#
18
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 
60 rodolico 29
# sneakernet
34 rodolico 30
# Script to perform sneakernet replication of ZFS datasets between two servers
31
# using an external transport drive.
32
# Uses ZFS send/receive to replicate datasets to/from the transport drive.
33
# Optionally uses symmetric encryption to encrypt datasets during transport.
34
# On the target server, can optionally use GELI to encrypt the datasets on disk.
35
# Requires a configuration file in YAML format next to the script.
36
# Author: R. W. Rodlico <rodo@dailydata.net>
37
# Created: December 2025
48 rodolico 38
#
34 rodolico 39
# Revision History:
48 rodolico 40
# Version: 0.1 RWR 2025-12-10
41
# Development version
42
#
43
# Version: 1.0 RWR 2025-12-15
44
# Tested and ready for initial release
51 rodolico 45
#
46
# Version: 1.0.1 RWR 2025-12-15
47
# Added verbose logging control to logMsg calls, controlled by ZFS_Utils::$verboseLoggingLevel
60 rodolico 48
#
49
# Version: 1.1 RWR 2025-12-17
50
# Added filtering so only the snapshots for the current dataset are considered when
51
# generating replication commands.
52
# added test.source.status (in script directory) file for dry-run testing of source replication.
53
# changed default name of status file to sneakernet_target.status to avoid confusion.
54
# added more logging of source/target snapshots in doSourceReplication for debugging.
55
#
56
# Version: 1.1.1 RWR 2025-12-18
57
# Fixed makeReplicateCommands call to pass actual dataset name from config rather than config key.
58
# Updated documentation to clarify how makeReplicateCommands filters snapshots by parent+dataset path
59
# to avoid false matches with similarly-named datasets in different locations.
34 rodolico 60
 
48 rodolico 61
 
24 rodolico 62
use strict;
63
use warnings;
64
 
60 rodolico 65
our $VERSION = '1.1.1';
34 rodolico 66
 
46 rodolico 67
use File::Basename;
24 rodolico 68
use FindBin;
69
use lib "$FindBin::Bin/..";
27 rodolico 70
use Data::Dumper;
51 rodolico 71
use ZFS_Utils qw(loadConfig shredFile logMsg makeReplicateCommands mountDriveByLabel unmountDriveByLabel mountGeli runCmd sendReport fatalError getDirectoryList cleanDirectory $logFileName $displayLogsOnConsole $verboseLoggingLevel);
44 rodolico 72
use Getopt::Long qw(GetOptions);
73
Getopt::Long::Configure ("bundling");
24 rodolico 74
 
35 rodolico 75
my $scriptDirectory = $FindBin::RealBin;
76
my $scriptFullPath = "$scriptDirectory/" . $FindBin::Script;
77
 
24 rodolico 78
# display all log messages on console in addition to the log file
79
$displayLogsOnConsole = 1;
80
 
35 rodolico 81
my $configFileName = "$scriptFullPath.conf.yaml";
24 rodolico 82
 
83
my $config = {
44 rodolico 84
   'dryrun' => 0,
85
   'verbosity' => 1,
60 rodolico 86
   # file created on source server to track last copied dataset, assuming
87
   # the import goes well on the target server
88
   'status_file' => "$scriptDirectory/sneakernet_target.status",
35 rodolico 89
   'log_file' => "$scriptFullPath.log",
24 rodolico 90
   #information about source server
37 rodolico 91
   'source' => {
24 rodolico 92
      'hostname' => '', # used to see if we are on source
34 rodolico 93
      'poolname' => 'pool', # name of the ZFS pool to export
94
      # if set, will generate a report via email or by storing on a drive
95
      'report' => {
96
         'email' => 'tech@example.org',
97
         'subject' => 'AG Transport Report',
98
         'targetDrive' => {
35 rodolico 99
            'fstype' => '', # filesystem type of the report drive
100
            # How often to check for the disk (seconds), message displayed every interval
101
            'check_interval' => 15,
34 rodolico 102
            'label' => '',
103
            'mount_point' => '',
104
         }
105
      }
24 rodolico 106
   },
107
   #information about target server
37 rodolico 108
   'target' => {
24 rodolico 109
      'hostname' => '', # used to see if we are on target
34 rodolico 110
      'poolname' => 'backup', # name of the ZFS pool to import
42 rodolico 111
      'shutdown_after_replication' => 0, # if set to 1, will shutdown the server after replication
24 rodolico 112
      # if this is set, the dataset uses GELI, so we must decrypt and
113
      # mount it first
114
      'geli' => {
35 rodolico 115
         'secureKey ' => {
116
            'label' => 'replica', # the GPT label of the key disk
117
            'fstype' => 'ufs', # filesystem type of the key disk
118
            'check_interval' => 15,
119
            'wait_timeout' => 300,
120
            'keyfile' => 'geli.key', # the name of the key file on the secureKey disk
121
         },
24 rodolico 122
         'localKey' => 'e98c660cccdae1226550484d62caa2b72f60632ae0c607528aba1ac9e7bfbc9c', # hex representation of the local key part
123
         'target' => '/media/geli.key', # location to create the combined keyfile
34 rodolico 124
         'poolname' => 'backup', # name of the ZFS pool to import
24 rodolico 125
         'diskList' => [ 
34 rodolico 126
            'da0',
127
            'da1'
24 rodolico 128
            ], # list of disks to try to mount the dataset from
34 rodolico 129
      },
130
      'report' => {
131
         'email' => '',
132
         'subject' => '',
133
         'targetDrive' => {
35 rodolico 134
            'fstype' => 'msdos', # filesystem type of the report drive
135
            'label' => 'sneakernet',
136
            'mount_point' => '',
34 rodolico 137
         }
24 rodolico 138
      }
139
   },
140
   'transport' => {
141
      # this is the GPT label of the sneakernet disk
37 rodolico 142
      'label' => 'sneakernet',
35 rodolico 143
      # this is the file system type. Not needed if ufs
144
      'fstype' => 'ufs',
24 rodolico 145
      # where we want to mount it
146
      'mount_point' => '/mnt/sneakernet',
147
      # amount of time to wait for the disk to appear
148
      'timeout' => 600,
35 rodolico 149
      # How often to check for the disk (seconds), message displayed every interval
150
      'check_interval' => 15,
24 rodolico 151
      # if set, all files will be encrypted with this key/IV during transport
152
      'encryption' => {
153
         'key'    => '', # openssl rand 32 | xxd -p | tr -d '\n' > test.key
154
         'IV'     => '00000000000000000000000000000000',
155
      },
156
   },
157
   'datasets' => {
34 rodolico 158
      'dataset1' => {
42 rodolico 159
         'source' => 'pool', # the parent of the dataset on the source
160
         'target' => 'backup', # the parent of the dataset on the target
161
         'dataset' => 'dataset1', # the dataset name
24 rodolico 162
      },
163
      'files_share'  => {
42 rodolico 164
         'source' => 'pool',
165
         'target' => 'backup',
43 rodolico 166
         'dataset' => 'files_share',
24 rodolico 167
      },
168
   }
169
};
170
 
48 rodolico 171
## Read the status file and return its lines as an ARRAYREF.
172
##
173
## Arguments:
174
##   $filename - path to the status file (string)
175
##
176
## Behavior:
177
##   - If the file exists and is readable, reads all lines, chomps newlines and returns an ARRAYREF
178
##     containing the lines (each line generally holds a fully qualified snapshot name).
179
##   - If the file does not exist or cannot be opened, logs a message and returns an empty ARRAYREF.
180
##
181
## Returns: ARRAYREF of lines (possibly empty).
30 rodolico 182
sub getStatusFile {
183
   my $filename = shift;
184
   # read in history/status file
34 rodolico 185
   my @lines = ();
30 rodolico 186
   if ( -e $filename && open my $fh, '<', $filename ) {
187
      chomp( @lines = <$fh> );
188
      close $fh;
51 rodolico 189
      logMsg("Read status file '$filename' with contents:\n" . join( "\n", @lines ) . "\n") if $verboseLoggingLevel >= 3;
30 rodolico 190
   } else {
51 rodolico 191
      logMsg("Error: could not read status file '$filename', assuming a fresh start: $!") if $verboseLoggingLevel >= 2;
30 rodolico 192
   }
193
   return \@lines;
194
}
24 rodolico 195
 
48 rodolico 196
## Write the status list to disk safely.
197
##
198
## Arguments:
199
##   $filename   - path to the status file to write
200
##   $statusList - ARRAYREF of lines to write into the file
201
##
202
## Behavior:
203
##   - If an existing file is present, renames it to `$filename.bak` as a simple backup.
204
##   - Writes the provided lines to `$filename` (one per line).
205
##   - Logs the written contents. Dies on failure to backup or write the file.
30 rodolico 206
sub writeStatusFile {
207
   my ( $filename, $statusList ) = @_;
208
   # backup existing status file
209
   if ( -e $filename ) {
51 rodolico 210
      rename( $filename, "$filename.bak" ) 
211
         or fatalError("Error: could not backup existing status file '$filename': $!");
30 rodolico 212
   }
213
   # write new status file
214
   if ( open my $fh, '>', $filename ) {
215
      foreach my $line ( @$statusList ) {
216
         print $fh "$line\n";
217
      }
218
      close $fh;
51 rodolico 219
      logMsg("Wrote status file '$filename' with contents:\n" . join( "\n", @$statusList ) . "\n") if $verboseLoggingLevel >= 3;
30 rodolico 220
   } else {
51 rodolico 221
      fatalError("Error: could not write status file '$filename': $!");
30 rodolico 222
   }
223
}
224
 
48 rodolico 225
## Convert a path-like dataset name into a filename-safe string.
226
##
227
## Examples:
228
##   'pool/fs/sub' => 'pool.fs.sub' (default)
229
##
230
## Arguments:
231
##   $string       - input string to convert
232
##   $delimiter    - input delimiter to split on (default: '/')
233
##   $substitution - output separator to join with (default: '.')
234
##
235
## Returns: a joined string suitable for use as a filename.
46 rodolico 236
sub dirnameToFileName {
237
   my ( $string, $delimiter, $substitution ) = @_;
238
   $delimiter //= '/';
239
   $substitution //= '.';
240
   my @parts = split( /\Q$delimiter\E/, $string );
241
   return join( $substitution, @parts );
31 rodolico 242
}
243
 
48 rodolico 244
## Perform replication for all configured datasets on the source server.
245
##
246
## Arguments:
247
##   $config     - configuration HASHREF (loaded from YAML). Must contain `datasets` and `transport` entries.
248
##   $statusList - ARRAYREF of previously replicated snapshot full names (used to compute incremental sends)
249
##
250
## Behavior:
251
##   - Iterates over datasets defined in `$config->{datasets}`.
252
##   - For each dataset, enumerates available snapshots on the source and calls
60 rodolico 253
##     `makeReplicateCommands` (from ZFS_Utils) to produce zfs send commands.
254
##   - `makeReplicateCommands` filters snapshots by matching parent+dataset path (e.g., ^pool/dataset(?:/|@))
255
##     to avoid false matches with similarly-named datasets, and intelligently determines whether to use
256
##     recursive vs per-filesystem sends and incremental vs full sends based on snapshot availability.
48 rodolico 257
##   - Commands are optionally piped through `openssl enc` when `transport.encryption.key` is set.
258
##   - The output is written to files on the transport mount point (one file per dataset snapshot set).
259
##   - Respects `$config->{dryrun}`: no commands are executed when dryrun is enabled.
260
##
261
## Returns: ARRAYREF `$newStatus` containing updated status lines (latest snapshots per dataset).
30 rodolico 262
sub doSourceReplication {
60 rodolico 263
   my ($config, $targetList) = @_;
30 rodolico 264
   my $newStatus = [];
265
   foreach my $dataset ( sort keys %{$config->{datasets}} ) {
51 rodolico 266
      logMsg("Processing dataset '$dataset'") if $verboseLoggingLevel >= 1;
31 rodolico 267
      # get list of all snapshots on dataset
44 rodolico 268
      my $sourceList;
60 rodolico 269
      if ( -e "$scriptDirectory/test.source.status" && $config->{dryrun} ) {
270
         logMsg("Using test.source.status file for source snapshots in dry-run mode") if $verboseLoggingLevel >= 2;
271
         $sourceList = getStatusFile( "$scriptDirectory/test.source.status" );
44 rodolico 272
      } else {
60 rodolico 273
         $sourceList = [ runCmd( "zfs list -rt snap -H -o name $config->{datasets}->{$dataset}->{source}/$config->{datasets}->{$dataset}->{dataset}" ) ];
44 rodolico 274
      }
60 rodolico 275
 
276
      if ($verboseLoggingLevel >= 5) {
277
         logMsg("Source snapshots for dataset '$dataset': " . join( ', ', sort @$sourceList ) );
278
         logMsg("Target snapshots for dataset '$dataset': " . join( ', ', sort @$targetList ) );
279
      }
280
      # process dataset here using makeReplicateCommands from ZFS_Utils
281
      # $dataset here is the config key; the actual dataset name is in $config->{datasets}->{$dataset}->{dataset}
44 rodolico 282
      my $commands = makeReplicateCommands( 
283
                        $sourceList,
60 rodolico 284
                        $targetList,
285
                        $config->{datasets}->{$dataset}->{dataset},
44 rodolico 286
                        $config->{datasets}->{$dataset}->{source},
287
                        $config->{datasets}->{$dataset}->{target},
288
                        $newStatus
289
                     );
31 rodolico 290
      if ( %$commands ) {
291
         foreach my $cmd ( keys %$commands ) {
292
            my $command = $commands->{$cmd};
43 rodolico 293
            my $outputFile = $cmd;
48 rodolico 294
            my $outfile = dirnameToFileName( $cmd );
45 rodolico 295
            $command .= " | openssl enc -aes-256-cbc -K $config->{transport}->{encryption}->{key} -iv $config->{transport}->{encryption}->{IV} " if $config->{transport}->{encryption}->{key};
48 rodolico 296
            $command .= " > $config->{transport}->{mount_point}/$outfile";
51 rodolico 297
            logMsg("Running command: $command") if $verboseLoggingLevel >= 2;
44 rodolico 298
            runCmd(  $command  ) unless $config->{dryrun};
31 rodolico 299
         }
300
      } else {
51 rodolico 301
         logMsg( "Nothing to do for $dataset" ) if $verboseLoggingLevel >= 1;
30 rodolico 302
      }
303
   }
304
   return $newStatus;
305
}
306
 
48 rodolico 307
## Perform cleanup and final reporting after replication.
308
##
309
## Arguments:
310
##   $config  - configuration HASHREF (required)
311
##   $message - OPTIONAL message to include in the report
312
##
313
## Behavior:
314
##   - Logs disk usage for the transport mount and zpool list for diagnostics.
315
##   - Ensures the report subject and message are populated, then attempts to unmount
316
##     the transport drive and send the report (via `sendReport`).
317
##   - If `shutdown_after_replication` is set in the running role's config, attempts
318
##     to shut down the machine (honors `$config->{dryrun}`).
42 rodolico 319
sub cleanup{
320
   my ( $config, $message ) = @_;
321
   # add disk space utilization information on transport to the log
51 rodolico 322
   logMsg( "Disk space utilization on transport disk:\n" . runCmd( "df -h $config->{transport}->{mount_point}" ) . "\n" )
60 rodolico 323
      if -d $config->{transport}->{mount_point} && $verboseLoggingLevel >= 1;
42 rodolico 324
   # add information about the server (zpools) to the log
325
   my $servername = `hostname -s`;
326
   chomp $servername;
60 rodolico 327
   logMsg( "Zpools on server $servername:\n" . join( "\n", runCmd( "zpool list" ) ) . "\n" ) if `which zpool` && $verboseLoggingLevel >= 1;
42 rodolico 328
   $config->{$config->{runningAs}}->{report}->{subject} //= "Replication Report for $config->{runningAs} server $servername";
329
   $message //= "Replication completed on $config->{runningAs} server $servername.";
330
   # unmount the sneakernet drive
44 rodolico 331
   unmountDriveByLabel( $config->{transport} ) unless $config->{dryrun};
42 rodolico 332
   sendReport( $config->{$config->{runningAs}}->{report}, $message, $config->{log_file} );
333
   # If they have requested shutdown, do it now
334
   if ( $config->{$config->{runningAs}}->{shutdown_after_replication} ) {
51 rodolico 335
      logMsg( "Shutting down target server as per configuration" ) if $verboseLoggingLevel >= 0;
44 rodolico 336
      runCmd( "shutdown -p now" ) unless $config->{dryrun};
33 rodolico 337
   }
42 rodolico 338
}
33 rodolico 339
 
48 rodolico 340
## Update the target zfs datasets from files on the transport drive.
341
##
342
## Arguments:
343
##   $config - configuration HASHREF containing `transport` and `datasets` entries.
344
##
345
## Behavior:
346
##   - Reads all regular files from the transport mount point (via `getDirectoryList`).
347
##   - For each file, determines the intended target dataset based on the filename and
348
##     the `datasets` mapping in the config, optionally decrypts via `openssl enc -d`,
349
##     and pipes the stream into `zfs receive -F` to update the target dataset.
350
##   - Uses `runCmd` to execute the receive commands and logs the executed command string.
42 rodolico 351
sub updateTarget {
352
   my $config = shift;
353
   my $files = getDirectoryList( $config->{transport}->{mount_point});
354
   foreach my $filename ( @$files ) {
46 rodolico 355
      my $targetDataset = basename( $filename );
356
      my ($dataset) = split( /\Q\.\E/, $targetDataset ); # grab only the first element of a string which has internal delimiters
357
      $targetDataset = $config->{datasets}->{$dataset}->{target} . '/' . dirnameToFileName( $targetDataset, '.', '/' );
358
      my $command = "cat $filename";
359
      $command .= " | openssl enc -aes-256-cbc -d -K $config->{transport}->{encryption}->{key} -iv $config->{transport}->{encryption}->{IV} " if $config->{transport}->{encryption}->{key};
360
      $command .= " | zfs receive -F $targetDataset";
51 rodolico 361
      logMsg( $command ) if $verboseLoggingLevel >= 2;
46 rodolico 362
      runCmd( $command );
42 rodolico 363
   }
31 rodolico 364
}
30 rodolico 365
 
366
##################### main program starts here #####################
367
# Example to create a random key for encryption/decryption:
24 rodolico 368
# generate a random key with
369
# openssl rand 32 | xxd -p | tr -d '\n' > test.key
370
 
371
# If a YAML config file exists next to the script, load and merge it
372
$config = loadConfig($configFileName, $config );
42 rodolico 373
exit 1 unless keys %$config;
27 rodolico 374
 
44 rodolico 375
# parse CLI options
376
GetOptions( $config,
377
   'dryrun|n',
57 rodolico 378
   'verbosity|v+',
44 rodolico 379
   'version|V',
380
   'help|h',
381
) or do { print "Invalid options\n"; exit 2 };
382
if (defined ($config->{help})) {
57 rodolico 383
   print "Usage: $FindBin::Script [--dryrun] [--verbosity] [--help]\n";
44 rodolico 384
   print "  --dryrun, -n   Run in dry-run mode (no writes)\n";
57 rodolico 385
   print "  --verbosity, -v  Run in verbose mode (more v's mean more verbose)\n";
44 rodolico 386
   print "  --version, -V  Display version number\n";
387
   exit 0;
388
} elsif (defined $config->{version}) {
389
   print "$FindBin::Script v$VERSION\n";
390
   exit 0;
391
}
392
 
51 rodolico 393
# set some defaults in library from config
57 rodolico 394
$verboseLoggingLevel = $config->{verbosity} // 0;
51 rodolico 395
# status file path
35 rodolico 396
$config->{'status_file'} //= "$scriptFullPath.status";
397
# set log file name for sub logMsg in ZFS_Utils, and remove the old log if it exists
398
# Log file is only valid for one run
399
$logFileName = $config->{'log_file'} //= "$scriptFullPath.log";
400
# log only for one run
401
unlink ( $logFileName ) if -f $logFileName;
24 rodolico 402
 
42 rodolico 403
fatalError( "Invalid config file: missing source and/or target server", $config, \&cleanup )
37 rodolico 404
    unless (defined $config->{source} && defined $config->{target});
24 rodolico 405
 
406
my $servername = `hostname -s`;
407
chomp $servername;
42 rodolico 408
$config->{runningAs} = $servername eq $config->{source}->{hostname} ? 'source' :
37 rodolico 409
                $servername eq $config->{target}->{hostname} ? 'target' : 'unknown';
410
 
42 rodolico 411
# mount the transport drive, fatal error if we can not find it
46 rodolico 412
fatalError( "Unable to mount tranport drive with label $config->{transport}->{label}", $config, \&cleanup )
60 rodolico 413
   unless $config->{dryrun} or $config->{transport}->{mount_point} =  mountDriveByLabel( $config->{transport} );
42 rodolico 414
 
44 rodolico 415
# main program logic
42 rodolico 416
if ( $config->{runningAs} eq 'source' ) {
51 rodolico 417
    logMsg "Running as source server" if $verboseLoggingLevel >= 1;
33 rodolico 418
    # remove all files from transport disk, but leave all subdirectories alone
42 rodolico 419
   fatalError( "Failed to clean transport directory $config->{transport}->{mount_point}", $config, \&cleanup )
44 rodolico 420
      unless $config->{dryrun} or cleanDirectory( $config->{transport}->{mount_point} );
30 rodolico 421
    my $statusList = getStatusFile($config->{status_file});
37 rodolico 422
    $statusList = doSourceReplication($config, $statusList); 
44 rodolico 423
    writeStatusFile($config->{status_file}, $statusList) unless $config->{dryrun};
42 rodolico 424
} elsif ( $config->{runningAs} eq 'target' ) {
51 rodolico 425
    logMsg "Running as target server" if $verboseLoggingLevel >= 1;
37 rodolico 426
    mountGeli( $config->{target}->{geli} ) if ( defined $config->{target}->{geli} );
46 rodolico 427
    umountDiskByLabel( $config->{target}->{geli}->{secureKey} )
428
       unless $config->{target}->{geli}->{secureKey}->{label} eq $config->{transport}->{label};
429
    print "Please insert device labeled REPORT\n" if $config->{target}->{report}->{targetDrive}->{label};
42 rodolico 430
    updateTarget( $config );
24 rodolico 431
} else {
37 rodolico 432
    fatalError( "This server ($servername) is neither source nor target server as per config\n" );
24 rodolico 433
}
434
 
42 rodolico 435
cleanup( $config );
35 rodolico 436
 
25 rodolico 437
1;