Subversion Repositories zfs_utils

Rev

Rev 42 | Rev 44 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
24 rodolico 1
#! /usr/bin/env perl
2
 
34 rodolico 3
# Simplified BSD License (FreeBSD License)
4
#
5
# Copyright (c) 2025, Daily Data Inc.
6
# All rights reserved.
7
#
8
# Redistribution and use in source and binary forms, with or without
9
# modification, are permitted provided that the following conditions are met:
10
#
11
# 1. Redistributions of source code must retain the above copyright notice, this
12
#    list of conditions and the following disclaimer.
13
#
14
# 2. Redistributions in binary form must reproduce the above copyright notice,
15
#    this list of conditions and the following disclaimer in the documentation
16
#    and/or other materials provided with the distribution.
17
#
18
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 
29
# sneakernet.pl
30
# Script to perform sneakernet replication of ZFS datasets between two servers
31
# using an external transport drive.
32
# Uses ZFS send/receive to replicate datasets to/from the transport drive.
33
# Optionally uses symmetric encryption to encrypt datasets during transport.
34
# On the target server, can optionally use GELI to encrypt the datasets on disk.
35
# Requires a configuration file in YAML format next to the script.
36
# Author: R. W. Rodlico <rodo@dailydata.net>
37
# Created: December 2025
38
# Revision History:
39
# Version: 0.1 2025-12-10 Initial version
40
 
24 rodolico 41
use strict;
42
use warnings;
43
 
34 rodolico 44
our $VERSION = '0.1';
45
 
24 rodolico 46
use FindBin;
47
use lib "$FindBin::Bin/..";
27 rodolico 48
use Data::Dumper;
42 rodolico 49
use ZFS_Utils qw(loadConfig shredFile logMsg makeReplicateCommands mountDriveByLabel unmountDriveByLabel mountGeli runCmd sendReport fatalError cleanDirectory $logFileName $displayLogsOnConsole);
24 rodolico 50
 
35 rodolico 51
my $scriptDirectory = $FindBin::RealBin;
52
my $scriptFullPath = "$scriptDirectory/" . $FindBin::Script;
53
 
54
 
33 rodolico 55
# if set, will not actually write files to disk
37 rodolico 56
my $DEBUG = 1;
33 rodolico 57
 
24 rodolico 58
# display all log messages on console in addition to the log file
59
$displayLogsOnConsole = 1;
60
 
35 rodolico 61
my $configFileName = "$scriptFullPath.conf.yaml";
24 rodolico 62
 
63
my $config = {
64
   # file created on source server to track last copyed dataset
35 rodolico 65
   'status_file' => "$scriptFullPath.status",
66
   'log_file' => "$scriptFullPath.log",
24 rodolico 67
   #information about source server
37 rodolico 68
   'source' => {
24 rodolico 69
      'hostname' => '', # used to see if we are on source
34 rodolico 70
      'poolname' => 'pool', # name of the ZFS pool to export
71
      # if set, will generate a report via email or by storing on a drive
72
      'report' => {
73
         'email' => 'tech@example.org',
74
         'subject' => 'AG Transport Report',
75
         'targetDrive' => {
35 rodolico 76
            'fstype' => '', # filesystem type of the report drive
77
            # How often to check for the disk (seconds), message displayed every interval
78
            'check_interval' => 15,
34 rodolico 79
            'label' => '',
80
            'mount_point' => '',
81
         }
82
      }
24 rodolico 83
   },
84
   #information about target server
37 rodolico 85
   'target' => {
24 rodolico 86
      'hostname' => '', # used to see if we are on target
34 rodolico 87
      'poolname' => 'backup', # name of the ZFS pool to import
42 rodolico 88
      'shutdown_after_replication' => 0, # if set to 1, will shutdown the server after replication
24 rodolico 89
      # if this is set, the dataset uses GELI, so we must decrypt and
90
      # mount it first
91
      'geli' => {
35 rodolico 92
         'secureKey ' => {
93
            'label' => 'replica', # the GPT label of the key disk
94
            'fstype' => 'ufs', # filesystem type of the key disk
95
            'check_interval' => 15,
96
            'wait_timeout' => 300,
97
            'keyfile' => 'geli.key', # the name of the key file on the secureKey disk
98
         },
24 rodolico 99
         'localKey' => 'e98c660cccdae1226550484d62caa2b72f60632ae0c607528aba1ac9e7bfbc9c', # hex representation of the local key part
100
         'target' => '/media/geli.key', # location to create the combined keyfile
34 rodolico 101
         'poolname' => 'backup', # name of the ZFS pool to import
24 rodolico 102
         'diskList' => [ 
34 rodolico 103
            'da0',
104
            'da1'
24 rodolico 105
            ], # list of disks to try to mount the dataset from
34 rodolico 106
      },
107
      'report' => {
108
         'email' => '',
109
         'subject' => '',
110
         'targetDrive' => {
35 rodolico 111
            'fstype' => 'msdos', # filesystem type of the report drive
112
            'label' => 'sneakernet',
113
            'mount_point' => '',
34 rodolico 114
         }
24 rodolico 115
      }
116
   },
117
   'transport' => {
118
      # this is the GPT label of the sneakernet disk
37 rodolico 119
      'label' => 'sneakernet',
35 rodolico 120
      # this is the file system type. Not needed if ufs
121
      'fstype' => 'ufs',
24 rodolico 122
      # where we want to mount it
123
      'mount_point' => '/mnt/sneakernet',
124
      # amount of time to wait for the disk to appear
125
      'timeout' => 600,
35 rodolico 126
      # How often to check for the disk (seconds), message displayed every interval
127
      'check_interval' => 15,
24 rodolico 128
      # if set, all files will be encrypted with this key/IV during transport
129
      'encryption' => {
130
         'key'    => '', # openssl rand 32 | xxd -p | tr -d '\n' > test.key
131
         'IV'     => '00000000000000000000000000000000',
132
      },
133
   },
134
   'datasets' => {
34 rodolico 135
      'dataset1' => {
42 rodolico 136
         'source' => 'pool', # the parent of the dataset on the source
137
         'target' => 'backup', # the parent of the dataset on the target
138
         'dataset' => 'dataset1', # the dataset name
24 rodolico 139
      },
140
      'files_share'  => {
42 rodolico 141
         'source' => 'pool',
142
         'target' => 'backup',
43 rodolico 143
         'dataset' => 'files_share',
24 rodolico 144
      },
145
   }
146
};
147
 
35 rodolico 148
# read the status file and return as list. If the file doesn't exits, returns an empty list
30 rodolico 149
sub getStatusFile {
150
   my $filename = shift;
151
   # read in history/status file
34 rodolico 152
   my @lines = ();
30 rodolico 153
   if ( -e $filename && open my $fh, '<', $filename ) {
154
      chomp( @lines = <$fh> );
155
      close $fh;
156
      logMsg("Read status file '$filename' with contents:\n" . join( "\n", @lines ) . "\n");
157
   } else {
34 rodolico 158
      logMsg("Error: could not read status file '$filename', assuming a fresh start: $!");
30 rodolico 159
   }
160
   return \@lines;
161
}
24 rodolico 162
 
30 rodolico 163
# write the status list to file
164
sub writeStatusFile {
165
   my ( $filename, $statusList ) = @_;
166
   # backup existing status file
167
   if ( -e $filename ) {
168
      rename( $filename, "$filename.bak" ) or do {
169
         logMsg("Error: could not backup existing status file '$filename': $!");
170
         die;
171
      };
172
   }
173
   # write new status file
174
   if ( open my $fh, '>', $filename ) {
175
      foreach my $line ( @$statusList ) {
176
         print $fh "$line\n";
177
      }
178
      close $fh;
179
      logMsg("Wrote status file '$filename' with contents:\n" . join( "\n", @$statusList ) . "\n");
180
   } else {
181
      logMsg("Error: could not write status file '$filename': $!");
182
      die;
183
   }
184
}
185
 
31 rodolico 186
# simple sub to take root/dataset/datset/dataset and turn it into
187
# dataset.dataset.dataset
188
sub replaceSlashWithDot {
189
   my $string = shift;
190
   my @parts = split( "/", $string );
191
   shift @parts;
192
   return join( '.', @parts );
193
}
194
 
30 rodolico 195
# perform replication on source server
196
# $config - configuration hashref
197
# $statusList - list of last snapshots replicated for each dataset in previous replications
198
# return new status list after replication containing updated last snapshots
199
# this script will actually replicate the datasets to the sneakernet disk
200
sub doSourceReplication {
201
   my ($config, $statusList) = @_;
202
   my $newStatus = [];
203
   foreach my $dataset ( sort keys %{$config->{datasets}} ) {
31 rodolico 204
      logMsg("Processing dataset '$dataset'");
205
      # get list of all snapshots on dataset
42 rodolico 206
      my $root = $config->{datasets}->{$dataset}->{source} . '/' . $config->{datasets}->{$dataset}->{dataset};
207
      my $sourceList = [ runCmd( "zfs list -rt snap -H -o name $root" ) ];
208
      # remove the parent part, leave the dataset itself
209
      $sourceList =~ s|$config->{datasets}->{$dataset}->{source}/||;
30 rodolico 210
      # process dataset here
43 rodolico 211
      my $commands = makeReplicateCommands( $sourceList, $statusList, $newStatus );
212
 
31 rodolico 213
      if ( %$commands ) {
214
         foreach my $cmd ( keys %$commands ) {
215
            my $command = $commands->{$cmd};
43 rodolico 216
            my $outputFile = $cmd;
217
            $outputFile =~ s/^$root//;
218
            $outputFile = replaceSlashWithDot($outputFile);
219
            #$command .= " | openssl enc -aes-256-cbc -K $config->{transport}->{encryption}->{key} -iv $config->{transport}->{encryption}->{IV} " if $config->{transport}->{encryption}->{key};
220
            $command .= " > $config->{transport}->{mount_point}/" . $outputFile;
31 rodolico 221
            logMsg("Running command: $command");
33 rodolico 222
            runCmd(  $command  ) unless $DEBUG;
31 rodolico 223
         }
224
      } else {
225
         logMsg( "Nothing to do for $dataset" ); 
30 rodolico 226
      }
227
   }
228
   return $newStatus;
229
}
230
 
42 rodolico 231
# perform cleanup actions
232
# $config - configuration hashref
233
# $message - optional message to include in the report
234
#
235
sub cleanup{
236
   my ( $config, $message ) = @_;
237
   # add disk space utilization information on transport to the log
238
   logMsg( "Disk space utilization on transport disk:\n" . runCmd( "df -h $config->{transport}->{mount_point}" ) . "\n" );
239
   # add information about the server (zpools) to the log
240
   my $servername = `hostname -s`;
241
   chomp $servername;
242
   logMsg( "Zpools on server $servername:\n" . join( "\n", runCmd( "zpool list" ) ) . "\n" );
243
   $config->{$config->{runningAs}}->{report}->{subject} //= "Replication Report for $config->{runningAs} server $servername";
244
   $message //= "Replication completed on $config->{runningAs} server $servername.";
245
   # unmount the sneakernet drive
246
   unmountDriveByLabel( $config->{transport} );
247
   sendReport( $config->{$config->{runningAs}}->{report}, $message, $config->{log_file} );
248
   # If they have requested shutdown, do it now
249
   if ( $config->{$config->{runningAs}}->{shutdown_after_replication} ) {
250
      logMsg( "Shutting down target server as per configuration" );
251
      runCmd( "shutdown -p now" ) unless $DEBUG;
33 rodolico 252
   }
42 rodolico 253
}
33 rodolico 254
 
42 rodolico 255
# update the target datasets from the files on the transport drive
256
sub updateTarget {
257
   my $config = shift;
258
   my $files = getDirectoryList( $config->{transport}->{mount_point});
259
   foreach my $filename ( @$files ) {
260
     my $command = "cat $config->{output} | openssl enc -aes-256-cbc -d -K $config->{key} -iv $config->{IV}";
261
   }
31 rodolico 262
}
30 rodolico 263
 
264
##################### main program starts here #####################
265
# Example to create a random key for encryption/decryption:
24 rodolico 266
# generate a random key with
267
# openssl rand 32 | xxd -p | tr -d '\n' > test.key
268
 
269
# If a YAML config file exists next to the script, load and merge it
270
$config = loadConfig($configFileName, $config );
42 rodolico 271
exit 1 unless keys %$config;
27 rodolico 272
 
25 rodolico 273
# set some defaults
35 rodolico 274
$config->{'status_file'} //= "$scriptFullPath.status";
275
# set log file name for sub logMsg in ZFS_Utils, and remove the old log if it exists
276
# Log file is only valid for one run
277
$logFileName = $config->{'log_file'} //= "$scriptFullPath.log";
278
# log only for one run
279
unlink ( $logFileName ) if -f $logFileName;
24 rodolico 280
 
42 rodolico 281
fatalError( "Invalid config file: missing source and/or target server", $config, \&cleanup )
37 rodolico 282
    unless (defined $config->{source} && defined $config->{target});
24 rodolico 283
 
284
my $servername = `hostname -s`;
285
chomp $servername;
42 rodolico 286
$config->{runningAs} = $servername eq $config->{source}->{hostname} ? 'source' :
37 rodolico 287
                $servername eq $config->{target}->{hostname} ? 'target' : 'unknown';
288
 
42 rodolico 289
#cleanup( $config, "Testing" );
290
 
291
# mount the transport drive, fatal error if we can not find it
292
fatalError( "Unable to mount tranport drive with label $config->{transport}->{disk_label}", $config, \&cleanup )
293
   unless $config->{transport}->{mount_point} =  mountDriveByLabel( $config->{transport} );
294
 
295
# mail program logic
296
if ( $config->{runningAs} eq 'source' ) {
33 rodolico 297
    logMsg "Running as source server";
298
    # remove all files from transport disk, but leave all subdirectories alone
42 rodolico 299
   fatalError( "Failed to clean transport directory $config->{transport}->{mount_point}", $config, \&cleanup )
300
      unless cleanDirectory( $config->{transport}->{mount_point} );
30 rodolico 301
    my $statusList = getStatusFile($config->{status_file});
37 rodolico 302
    $statusList = doSourceReplication($config, $statusList); 
30 rodolico 303
    writeStatusFile($config->{status_file}, $statusList);
42 rodolico 304
} elsif ( $config->{runningAs} eq 'target' ) {
33 rodolico 305
    logMsg "Running as target server";
37 rodolico 306
    mountGeli( $config->{target}->{geli} ) if ( defined $config->{target}->{geli} );
42 rodolico 307
    updateTarget( $config );
24 rodolico 308
} else {
37 rodolico 309
    fatalError( "This server ($servername) is neither source nor target server as per config\n" );
24 rodolico 310
}
311
 
42 rodolico 312
cleanup( $config );
35 rodolico 313
 
25 rodolico 314
1;