Subversion Repositories zfs_utils

Rev

Rev 33 | Rev 35 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
24 rodolico 1
#! /usr/bin/env perl
2
 
34 rodolico 3
# Simplified BSD License (FreeBSD License)
4
#
5
# Copyright (c) 2025, Daily Data Inc.
6
# All rights reserved.
7
#
8
# Redistribution and use in source and binary forms, with or without
9
# modification, are permitted provided that the following conditions are met:
10
#
11
# 1. Redistributions of source code must retain the above copyright notice, this
12
#    list of conditions and the following disclaimer.
13
#
14
# 2. Redistributions in binary form must reproduce the above copyright notice,
15
#    this list of conditions and the following disclaimer in the documentation
16
#    and/or other materials provided with the distribution.
17
#
18
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 
29
# sneakernet.pl
30
# Script to perform sneakernet replication of ZFS datasets between two servers
31
# using an external transport drive.
32
# Uses ZFS send/receive to replicate datasets to/from the transport drive.
33
# Optionally uses symmetric encryption to encrypt datasets during transport.
34
# On the target server, can optionally use GELI to encrypt the datasets on disk.
35
# Requires a configuration file in YAML format next to the script.
36
# Author: R. W. Rodlico <rodo@dailydata.net>
37
# Created: December 2025
38
# Revision History:
39
# Version: 0.1 2025-12-10 Initial version
40
 
24 rodolico 41
use strict;
42
use warnings;
43
 
34 rodolico 44
our $VERSION = '0.1';
45
 
24 rodolico 46
use FindBin;
47
use lib "$FindBin::Bin/..";
27 rodolico 48
use Data::Dumper;
49
use ZFS_Utils qw(loadConfig shredFile logMsg makeReplicateCommands mountDriveByLabel mountGeli runCmd $logFileName $displayLogsOnConsole);
24 rodolico 50
 
33 rodolico 51
# if set, will not actually write files to disk
52
my $DEBUG = 0;
53
 
24 rodolico 54
# set the log file to be next to this script
55
$logFileName = "$FindBin::Bin/sneakernet.log";
31 rodolico 56
# log only for one run
57
unlink ( $logFileName ) if -f $logFileName;
58
 
24 rodolico 59
# display all log messages on console in addition to the log file
60
$displayLogsOnConsole = 1;
61
 
62
my $configFileName = "$0.conf.yaml";
63
 
64
my $config = {
65
   # file created on source server to track last copyed dataset
66
   'status_file' => "$0.status",
67
   #information about source server
68
   'source_server' => {
69
      'hostname' => '', # used to see if we are on source
34 rodolico 70
      'poolname' => 'pool', # name of the ZFS pool to export
71
      # if set, will generate a report via email or by storing on a drive
72
      'report' => {
73
         'email' => 'tech@example.org',
74
         'subject' => 'AG Transport Report',
75
         'targetDrive' => {
76
            'label' => '',
77
            'mount_point' => '',
78
         }
79
      }
24 rodolico 80
   },
81
   #information about target server
82
   'target_server' => {
83
      'hostname' => '', # used to see if we are on target
34 rodolico 84
      'poolname' => 'backup', # name of the ZFS pool to import
24 rodolico 85
      # if this is set, the dataset uses GELI, so we must decrypt and
86
      # mount it first
87
      'geli' => {
88
         'keydiskname' => 'replica', # the GPT label of the key disk
89
         'keyfile' => 'geli.key', # the name of the key file on keydiskname
90
         'localKey' => 'e98c660cccdae1226550484d62caa2b72f60632ae0c607528aba1ac9e7bfbc9c', # hex representation of the local key part
91
         'target' => '/media/geli.key', # location to create the combined keyfile
34 rodolico 92
         'poolname' => 'backup', # name of the ZFS pool to import
24 rodolico 93
         'diskList' => [ 
34 rodolico 94
            'da0',
95
            'da1'
24 rodolico 96
            ], # list of disks to try to mount the dataset from
34 rodolico 97
      },
98
      'report' => {
99
         'email' => '',
100
         'subject' => '',
101
         'targetDrive' => {
102
            'label' => 'sneakernet_report',
103
            'mount_point' => '/mnt/sneakernet_report',
104
         }
24 rodolico 105
      }
106
   },
107
   'transport' => {
108
      # this is the GPT label of the sneakernet disk
109
      'disk_label' => 'sneakernet',
110
      # where we want to mount it
111
      'mount_point' => '/mnt/sneakernet',
112
      # amount of time to wait for the disk to appear
113
      'timeout' => 600,
114
      # if set, all files will be encrypted with this key/IV during transport
115
      'encryption' => {
116
         'key'    => '', # openssl rand 32 | xxd -p | tr -d '\n' > test.key
117
         'IV'     => '00000000000000000000000000000000',
118
      },
119
   },
120
   'datasets' => {
34 rodolico 121
      'dataset1' => {
122
         'source' => 'pool/dataset1',
123
         'target' => 'backup/dataset1',
124
         'filename' => 'dataset1'
24 rodolico 125
      },
126
      'files_share'  => {
34 rodolico 127
         'source' => 'pool/files_share',
128
         'target' => 'backup/files_share',
24 rodolico 129
         'filename' => 'files_share'
130
      },
131
   }
132
};
133
 
30 rodolico 134
# read the status file and return as list
135
sub getStatusFile {
136
   my $filename = shift;
137
   # read in history/status file
34 rodolico 138
   my @lines = ();
30 rodolico 139
   if ( -e $filename && open my $fh, '<', $filename ) {
140
      chomp( @lines = <$fh> );
141
      close $fh;
142
      logMsg("Read status file '$filename' with contents:\n" . join( "\n", @lines ) . "\n");
143
   } else {
34 rodolico 144
      logMsg("Error: could not read status file '$filename', assuming a fresh start: $!");
30 rodolico 145
   }
146
   return \@lines;
147
}
24 rodolico 148
 
30 rodolico 149
# write the status list to file
150
sub writeStatusFile {
151
   my ( $filename, $statusList ) = @_;
152
   # backup existing status file
153
   if ( -e $filename ) {
154
      rename( $filename, "$filename.bak" ) or do {
155
         logMsg("Error: could not backup existing status file '$filename': $!");
156
         die;
157
      };
158
   }
159
   # write new status file
160
   if ( open my $fh, '>', $filename ) {
161
      foreach my $line ( @$statusList ) {
162
         print $fh "$line\n";
163
      }
164
      close $fh;
165
      logMsg("Wrote status file '$filename' with contents:\n" . join( "\n", @$statusList ) . "\n");
166
   } else {
167
      logMsg("Error: could not write status file '$filename': $!");
168
      die;
169
   }
170
}
171
 
31 rodolico 172
# simple sub to take root/dataset/datset/dataset and turn it into
173
# dataset.dataset.dataset
174
sub replaceSlashWithDot {
175
   my $string = shift;
176
   my @parts = split( "/", $string );
177
   shift @parts;
178
   return join( '.', @parts );
179
}
180
 
30 rodolico 181
# perform replication on source server
182
# $config - configuration hashref
183
# $statusList - list of last snapshots replicated for each dataset in previous replications
184
# return new status list after replication containing updated last snapshots
185
# this script will actually replicate the datasets to the sneakernet disk
186
sub doSourceReplication {
187
   my ($config, $statusList) = @_;
188
   my $newStatus = [];
189
   foreach my $dataset ( sort keys %{$config->{datasets}} ) {
31 rodolico 190
      logMsg("Processing dataset '$dataset'");
191
      # get list of all snapshots on dataset
33 rodolico 192
      my $sourceList = [ runCmd( "zfs list -rt snap -H -o name $config->{datasets}->{$dataset}->{source} " ) ];
30 rodolico 193
      # process dataset here
194
      my $commands = makeReplicateCommands($sourceList, $statusList, $newStatus );
31 rodolico 195
      if ( %$commands ) {
196
         foreach my $cmd ( keys %$commands ) {
197
            my $command = $commands->{$cmd};
198
            $command .= " | openssl enc -aes-256-cbc -K $config->{transport}->{encryption}->{key} -iv $config->{transport}->{encryption}->{IV} " if $config->{transport}->{encryption}->{key};
199
            $command .= " > $config->{transport}->{mount_point}/" . replaceSlashWithDot($cmd);
200
            logMsg("Running command: $command");
33 rodolico 201
            runCmd(  $command  ) unless $DEBUG;
31 rodolico 202
         }
203
      } else {
204
         logMsg( "Nothing to do for $dataset" ); 
30 rodolico 205
      }
206
   }
207
   return $newStatus;
208
}
209
 
33 rodolico 210
# clean all files from a directory, but not any subdirectories
211
sub cleanDirectory {
212
   my $dirname = shift;
213
   logMsg( "Cleaning up $dirname of all files" );
214
   # clean up a directory
215
   opendir( my $dh, $dirname ) || fatalError( "Can not open $dirname: #!" );
216
   # get all file names, but leave directories alone
217
   my @files = map{ $dirname . "/$_" } grep { -f "$dirname/$_" } readdir($dh);
218
   closedir $dh;
219
   foreach my $file (@files) {
220
      unlink $file or warn "Could not unlink $file: #!\n";
221
   }
222
 }
223
 
224
 
225
 
31 rodolico 226
# how to handle a fatal error
227
sub fatalError {
228
   my $message = shift;
229
   logMsg( $message );
230
   die;
231
}
30 rodolico 232
 
31 rodolico 233
 
30 rodolico 234
##################### main program starts here #####################
235
# Example to create a random key for encryption/decryption:
24 rodolico 236
# generate a random key with
237
# openssl rand 32 | xxd -p | tr -d '\n' > test.key
238
 
239
# If a YAML config file exists next to the script, load and merge it
240
$config = loadConfig($configFileName, $config );
27 rodolico 241
 
25 rodolico 242
# set some defaults
34 rodolico 243
$config->{'status_file'} //= "$0.status";
24 rodolico 244
 
31 rodolico 245
fatalError( "Invalid config file: missing source and/or target server" )
24 rodolico 246
    unless (defined $config->{source_server} && defined $config->{target_server});
247
 
31 rodolico 248
# mount the transport drive, fatal error if we can not find it
249
fatalError( "Unable to mount tranport drive with label $config->{transport}->{disk_label}" )
250
   unless $config->{transport}->{mount_point} =  mountDriveByLabel( $config->{transport}->{disk_label}, $config->{transport}->{mount_point}, $config->{transport}->{timeout} );
251
 
24 rodolico 252
my $servername = `hostname -s`;
253
chomp $servername;
254
if ( $servername eq $config->{source_server}->{hostname} ) {
33 rodolico 255
    logMsg "Running as source server";
256
    # remove all files from transport disk, but leave all subdirectories alone
257
    cleanDirectory( $config->{transport}->{mount_point} );
30 rodolico 258
    my $statusList = getStatusFile($config->{status_file});
259
    $statusList = doSourceReplication($config, $statusList);
260
    writeStatusFile($config->{status_file}, $statusList);
24 rodolico 261
    # source server logic here
262
} elsif ( $servername eq $config->{target_server}->{hostname} ) {
33 rodolico 263
    logMsg "Running as target server";
264
    die "Target Server code not complete\n";
30 rodolico 265
    die "GELI target server logic not yet implemented\n" if ( defined $config->{target_server}->{geli} );
24 rodolico 266
    mountGeli( $config->{target_server}->{geli} ) if ( defined $config->{target_server}->{geli} );
267
} else {
25 rodolico 268
    logMsg "This server ($servername) is neither source nor target server as per config\n";
269
    die;
24 rodolico 270
}
271
 
31 rodolico 272
# unmount the sneakernet drive
273
`umount $config->{transport}->{mount_point}`;
274
# and remove the directory
33 rodolico 275
rmdir $config->{transport}->{mount_point};
31 rodolico 276
 
25 rodolico 277
1;
278
 
279
 
24 rodolico 280
#`cat $config->{input} | openssl enc -aes-256-cbc -K $config->{key} -iv $config->{IV} > $config->{output}`;
281
 
282
# this will decrypt $config->{output} to stdout
283
#`cat $config->{output} | openssl enc -aes-256-cbc -d -K $config->{key} -iv $config->{IV} > test.out`;