Subversion Repositories zfs_utils

Rev

Rev 35 | Rev 42 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
24 rodolico 1
#! /usr/bin/env perl
2
 
34 rodolico 3
# Simplified BSD License (FreeBSD License)
4
#
5
# Copyright (c) 2025, Daily Data Inc.
6
# All rights reserved.
7
#
8
# Redistribution and use in source and binary forms, with or without
9
# modification, are permitted provided that the following conditions are met:
10
#
11
# 1. Redistributions of source code must retain the above copyright notice, this
12
#    list of conditions and the following disclaimer.
13
#
14
# 2. Redistributions in binary form must reproduce the above copyright notice,
15
#    this list of conditions and the following disclaimer in the documentation
16
#    and/or other materials provided with the distribution.
17
#
18
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 
29
# sneakernet.pl
30
# Script to perform sneakernet replication of ZFS datasets between two servers
31
# using an external transport drive.
32
# Uses ZFS send/receive to replicate datasets to/from the transport drive.
33
# Optionally uses symmetric encryption to encrypt datasets during transport.
34
# On the target server, can optionally use GELI to encrypt the datasets on disk.
35
# Requires a configuration file in YAML format next to the script.
36
# Author: R. W. Rodlico <rodo@dailydata.net>
37
# Created: December 2025
38
# Revision History:
39
# Version: 0.1 2025-12-10 Initial version
40
 
24 rodolico 41
use strict;
42
use warnings;
43
 
34 rodolico 44
our $VERSION = '0.1';
45
 
24 rodolico 46
use FindBin;
47
use lib "$FindBin::Bin/..";
27 rodolico 48
use Data::Dumper;
35 rodolico 49
use ZFS_Utils qw(loadConfig shredFile logMsg makeReplicateCommands mountDriveByLabel mountGeli runCmd sendReport $logFileName $displayLogsOnConsole);
24 rodolico 50
 
35 rodolico 51
my $scriptDirectory = $FindBin::RealBin;
52
my $scriptFullPath = "$scriptDirectory/" . $FindBin::Script;
53
 
54
 
33 rodolico 55
# if set, will not actually write files to disk
37 rodolico 56
my $DEBUG = 1;
33 rodolico 57
 
24 rodolico 58
# display all log messages on console in addition to the log file
59
$displayLogsOnConsole = 1;
60
 
35 rodolico 61
my $configFileName = "$scriptFullPath.conf.yaml";
24 rodolico 62
 
63
my $config = {
64
   # file created on source server to track last copyed dataset
35 rodolico 65
   'status_file' => "$scriptFullPath.status",
66
   'log_file' => "$scriptFullPath.log",
24 rodolico 67
   #information about source server
37 rodolico 68
   'source' => {
24 rodolico 69
      'hostname' => '', # used to see if we are on source
34 rodolico 70
      'poolname' => 'pool', # name of the ZFS pool to export
71
      # if set, will generate a report via email or by storing on a drive
72
      'report' => {
73
         'email' => 'tech@example.org',
74
         'subject' => 'AG Transport Report',
75
         'targetDrive' => {
35 rodolico 76
            'fstype' => '', # filesystem type of the report drive
77
            # How often to check for the disk (seconds), message displayed every interval
78
            'check_interval' => 15,
34 rodolico 79
            'label' => '',
80
            'mount_point' => '',
81
         }
82
      }
24 rodolico 83
   },
84
   #information about target server
37 rodolico 85
   'target' => {
24 rodolico 86
      'hostname' => '', # used to see if we are on target
34 rodolico 87
      'poolname' => 'backup', # name of the ZFS pool to import
24 rodolico 88
      # if this is set, the dataset uses GELI, so we must decrypt and
89
      # mount it first
90
      'geli' => {
35 rodolico 91
         'secureKey ' => {
92
            'label' => 'replica', # the GPT label of the key disk
93
            'fstype' => 'ufs', # filesystem type of the key disk
94
            'check_interval' => 15,
95
            'wait_timeout' => 300,
96
            'keyfile' => 'geli.key', # the name of the key file on the secureKey disk
97
         },
24 rodolico 98
         'localKey' => 'e98c660cccdae1226550484d62caa2b72f60632ae0c607528aba1ac9e7bfbc9c', # hex representation of the local key part
99
         'target' => '/media/geli.key', # location to create the combined keyfile
34 rodolico 100
         'poolname' => 'backup', # name of the ZFS pool to import
24 rodolico 101
         'diskList' => [ 
34 rodolico 102
            'da0',
103
            'da1'
24 rodolico 104
            ], # list of disks to try to mount the dataset from
34 rodolico 105
      },
106
      'report' => {
107
         'email' => '',
108
         'subject' => '',
109
         'targetDrive' => {
35 rodolico 110
            'fstype' => 'msdos', # filesystem type of the report drive
111
            'label' => 'sneakernet',
112
            'mount_point' => '',
34 rodolico 113
         }
24 rodolico 114
      }
115
   },
116
   'transport' => {
117
      # this is the GPT label of the sneakernet disk
37 rodolico 118
      'label' => 'sneakernet',
35 rodolico 119
      # this is the file system type. Not needed if ufs
120
      'fstype' => 'ufs',
24 rodolico 121
      # where we want to mount it
122
      'mount_point' => '/mnt/sneakernet',
123
      # amount of time to wait for the disk to appear
124
      'timeout' => 600,
35 rodolico 125
      # How often to check for the disk (seconds), message displayed every interval
126
      'check_interval' => 15,
24 rodolico 127
      # if set, all files will be encrypted with this key/IV during transport
128
      'encryption' => {
129
         'key'    => '', # openssl rand 32 | xxd -p | tr -d '\n' > test.key
130
         'IV'     => '00000000000000000000000000000000',
131
      },
132
   },
133
   'datasets' => {
34 rodolico 134
      'dataset1' => {
135
         'source' => 'pool/dataset1',
136
         'target' => 'backup/dataset1',
137
         'filename' => 'dataset1'
24 rodolico 138
      },
139
      'files_share'  => {
34 rodolico 140
         'source' => 'pool/files_share',
141
         'target' => 'backup/files_share',
24 rodolico 142
         'filename' => 'files_share'
143
      },
144
   }
145
};
146
 
35 rodolico 147
# read the status file and return as list. If the file doesn't exits, returns an empty list
30 rodolico 148
sub getStatusFile {
149
   my $filename = shift;
150
   # read in history/status file
34 rodolico 151
   my @lines = ();
30 rodolico 152
   if ( -e $filename && open my $fh, '<', $filename ) {
153
      chomp( @lines = <$fh> );
154
      close $fh;
155
      logMsg("Read status file '$filename' with contents:\n" . join( "\n", @lines ) . "\n");
156
   } else {
34 rodolico 157
      logMsg("Error: could not read status file '$filename', assuming a fresh start: $!");
30 rodolico 158
   }
159
   return \@lines;
160
}
24 rodolico 161
 
30 rodolico 162
# write the status list to file
163
sub writeStatusFile {
164
   my ( $filename, $statusList ) = @_;
165
   # backup existing status file
166
   if ( -e $filename ) {
167
      rename( $filename, "$filename.bak" ) or do {
168
         logMsg("Error: could not backup existing status file '$filename': $!");
169
         die;
170
      };
171
   }
172
   # write new status file
173
   if ( open my $fh, '>', $filename ) {
174
      foreach my $line ( @$statusList ) {
175
         print $fh "$line\n";
176
      }
177
      close $fh;
178
      logMsg("Wrote status file '$filename' with contents:\n" . join( "\n", @$statusList ) . "\n");
179
   } else {
180
      logMsg("Error: could not write status file '$filename': $!");
181
      die;
182
   }
183
}
184
 
31 rodolico 185
# simple sub to take root/dataset/datset/dataset and turn it into
186
# dataset.dataset.dataset
187
sub replaceSlashWithDot {
188
   my $string = shift;
189
   my @parts = split( "/", $string );
190
   shift @parts;
191
   return join( '.', @parts );
192
}
193
 
30 rodolico 194
# perform replication on source server
195
# $config - configuration hashref
196
# $statusList - list of last snapshots replicated for each dataset in previous replications
197
# return new status list after replication containing updated last snapshots
198
# this script will actually replicate the datasets to the sneakernet disk
199
sub doSourceReplication {
200
   my ($config, $statusList) = @_;
201
   my $newStatus = [];
202
   foreach my $dataset ( sort keys %{$config->{datasets}} ) {
31 rodolico 203
      logMsg("Processing dataset '$dataset'");
204
      # get list of all snapshots on dataset
33 rodolico 205
      my $sourceList = [ runCmd( "zfs list -rt snap -H -o name $config->{datasets}->{$dataset}->{source} " ) ];
30 rodolico 206
      # process dataset here
207
      my $commands = makeReplicateCommands($sourceList, $statusList, $newStatus );
31 rodolico 208
      if ( %$commands ) {
209
         foreach my $cmd ( keys %$commands ) {
210
            my $command = $commands->{$cmd};
211
            $command .= " | openssl enc -aes-256-cbc -K $config->{transport}->{encryption}->{key} -iv $config->{transport}->{encryption}->{IV} " if $config->{transport}->{encryption}->{key};
212
            $command .= " > $config->{transport}->{mount_point}/" . replaceSlashWithDot($cmd);
213
            logMsg("Running command: $command");
33 rodolico 214
            runCmd(  $command  ) unless $DEBUG;
31 rodolico 215
         }
216
      } else {
217
         logMsg( "Nothing to do for $dataset" ); 
30 rodolico 218
      }
219
   }
220
   return $newStatus;
221
}
222
 
33 rodolico 223
# clean all files from a directory, but not any subdirectories
224
sub cleanDirectory {
225
   my $dirname = shift;
226
   logMsg( "Cleaning up $dirname of all files" );
227
   # clean up a directory
228
   opendir( my $dh, $dirname ) || fatalError( "Can not open $dirname: #!" );
229
   # get all file names, but leave directories alone
230
   my @files = map{ $dirname . "/$_" } grep { -f "$dirname/$_" } readdir($dh);
231
   closedir $dh;
232
   foreach my $file (@files) {
233
      unlink $file or warn "Could not unlink $file: #!\n";
234
   }
235
 }
236
 
237
 
238
 
31 rodolico 239
# how to handle a fatal error
240
sub fatalError {
241
   my $message = shift;
242
   logMsg( $message );
243
   die;
244
}
30 rodolico 245
 
31 rodolico 246
 
30 rodolico 247
##################### main program starts here #####################
248
# Example to create a random key for encryption/decryption:
24 rodolico 249
# generate a random key with
250
# openssl rand 32 | xxd -p | tr -d '\n' > test.key
251
 
252
# If a YAML config file exists next to the script, load and merge it
253
$config = loadConfig($configFileName, $config );
27 rodolico 254
 
25 rodolico 255
# set some defaults
35 rodolico 256
$config->{'status_file'} //= "$scriptFullPath.status";
257
# set log file name for sub logMsg in ZFS_Utils, and remove the old log if it exists
258
# Log file is only valid for one run
259
$logFileName = $config->{'log_file'} //= "$scriptFullPath.log";
260
# log only for one run
261
unlink ( $logFileName ) if -f $logFileName;
24 rodolico 262
 
31 rodolico 263
fatalError( "Invalid config file: missing source and/or target server" )
37 rodolico 264
    unless (defined $config->{source} && defined $config->{target});
24 rodolico 265
 
31 rodolico 266
# mount the transport drive, fatal error if we can not find it
267
fatalError( "Unable to mount tranport drive with label $config->{transport}->{disk_label}" )
35 rodolico 268
   unless $config->{transport}->{mount_point} =  mountDriveByLabel( $config->{transport} );
31 rodolico 269
 
24 rodolico 270
my $servername = `hostname -s`;
271
chomp $servername;
37 rodolico 272
my $runningAs = $servername eq $config->{source}->{hostname} ? 'source' :
273
                $servername eq $config->{target}->{hostname} ? 'target' : 'unknown';
274
 
35 rodolico 275
if ( $runningAs eq 'source' ) {
33 rodolico 276
    logMsg "Running as source server";
277
    # remove all files from transport disk, but leave all subdirectories alone
278
    cleanDirectory( $config->{transport}->{mount_point} );
30 rodolico 279
    my $statusList = getStatusFile($config->{status_file});
37 rodolico 280
    $statusList = doSourceReplication($config, $statusList); 
30 rodolico 281
    writeStatusFile($config->{status_file}, $statusList);
35 rodolico 282
} elsif ( $runningAs eq 'target' ) {
33 rodolico 283
    logMsg "Running as target server";
37 rodolico 284
    mountGeli( $config->{target}->{geli} ) if ( defined $config->{target}->{geli} );
24 rodolico 285
} else {
37 rodolico 286
    fatalError( "This server ($servername) is neither source nor target server as per config\n" );
24 rodolico 287
}
288
 
35 rodolico 289
# add disk space utilization information on transport to the log
290
logMsg( "Disk space utilization on transport disk:\n" . runCmd( "df -h $config->{transport}->{mount_point}" ) . "\n" );
291
# add information about the server (zpools) to the log
292
logMsg( "Zpools on server $servername:\n" . join( "\n", runCmd( "zpool list" ) ) . "\n" );
293
 
31 rodolico 294
# unmount the sneakernet drive
295
`umount $config->{transport}->{mount_point}`;
296
# and remove the directory
33 rodolico 297
rmdir $config->{transport}->{mount_point};
31 rodolico 298
 
35 rodolico 299
 
25 rodolico 300
1;
301
 
302
 
24 rodolico 303
#`cat $config->{input} | openssl enc -aes-256-cbc -K $config->{key} -iv $config->{IV} > $config->{output}`;
304
 
305
# this will decrypt $config->{output} to stdout
306
#`cat $config->{output} | openssl enc -aes-256-cbc -d -K $config->{key} -iv $config->{IV} > test.out`;