Subversion Repositories zfs_utils

Rev

Rev 48 | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

# Simplified BSD License (FreeBSD License)
#
# Copyright (c) 2025, Daily Data Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

package ZFS_Utils;

use strict;
use warnings;
use Exporter 'import';
use Data::Dumper;
use POSIX qw(strftime);
use File::Path qw(make_path);

# library of ZFS related utility functions
# Copyright 2025 Daily Data Inc. <rodo@dailydata.net>

# currently used for sneakernet scripts, but plans to expand to other ZFS related tasks
# functions include:
#   runCmd: execute a command and return its output (captures exit status in $lastRunError;
#           supports optional stderr merge via $merge_stderr)
#   shredFile: securely delete a file using gshred (note: not effective on ZFS due to COW)
#   logMsg: timestamped logging to a file and optionally to console
#   loadConfig: load a YAML configuration file into a hashref; will create the file from a
#           provided default hashref if the file does not exist (uses YAML::XS or YAML::Tiny)
#   mountDriveByLabel: find and mount a drive by its GPT label (supports ufs/msdos; waits
#           for device and creates mountpoint)
#   unmountDriveByLabel: unmount a drive found by GPT label and remove the mountpoint if empty
#   mountGeli: high level orchestrator to decrypt multiple GELI devices and import/mount a ZFS pool
#   decryptAndMountGeli: attach GELI devices, optionally build a combined key, import the pool
#           and mount ZFS datasets
#   makeGeliKey: create a GELI key by XOR'ing a remote binary keyfile and a local 256-bit hex key;
#           writes a 32-byte binary key file with mode 0600
#   findGeliDisks: discover candidate disks suitable for GELI on the host
#   makeReplicateCommands: build zfs send/receive command lists from snapshot lists and prior status
#   sendReport: helper to deliver replication reports (email/file) — exported for scripts to implement
#   fatalError: helper to log a fatal condition and die (convenience wrapper)
#   getDirectoryList: utility to list directory contents with optional filters
#   cleanDirectory: safe directory cleaning utility used by snapshot pruning helpers
#   exported package variables: $logFileName, $displayLogsOnConsole, $lastRunError, $verboseLoggingLevel
#
# v1.0 RWR 20251215
# This is the initial, tested release
#
# v1.0.1 RWR 20251215
# Added verbose logging control to logMsg calls, controlled by $verboseLoggingLevel

# Exported functions and variables

our @EXPORT_OK = qw(loadConfig shredFile mountDriveByLabel unmountDriveByLabel mountGeli logMsg runCmd makeReplicateCommands sendReport fatalError getDirectoryList cleanDirectory $logFileName $displayLogsOnConsole $lastRunError $verboseLoggingLevel);

our $VERSION = '1.0';

# these are variables which affect the flow of the program and are exported so they can be modified by the caller
our $logFileName = '/tmp/zfs_utils.log'; # this can be overridden by the caller, and turned off with empty string
our $displayLogsOnConsole = 1; # if non-zero, log messages are also printed to console
our $merge_stderr = 0; # if set to 1, stderr is captured in runCmd
our $lastRunError = 0; # tracks the last error code from runCmd
our $verboseLoggingLevel = 0; # if non-zero, logMsg will include more verbose output

# Execute a command and return its output.
# If called in scalar context, returns the full output as a single string.
# If called in list context, returns the output split into lines.
# If $merge_stderr is true (default), stderr is merged into stdout (only for scalar commands).
# returns undef on failure and logs failure message.
sub runCmd {
   my $cmd = join( ' ', @_ );
   $merge_stderr = 1 unless defined $merge_stderr;
   my $output = '';

   logMsg( "Running command [$cmd]" ) if $verboseLoggingLevel >= 2;
   $cmd .= ' 2>&1' if $merge_stderr;
   $output = `$cmd`;
   $lastRunError = $?;
   if ( $lastRunError ) {
      if ($? == -1) {
         logMsg( "failed to execute: $!");
         return '';
      } elsif ($? & 127) { # fatal error, exit program
         logMsg( sprintf( "child died with signal %d, %s coredump\n", ($? & 127),  ($? & 128) ? 'with' : 'without' ) );
         die;
      } elsif ($? >> 8) { # it had some return code other than 0
         logMsg( sprintf( "child exited with value %d\n", $? >> 8 ) );
      }
   }
   $output //= '';

   if (wantarray) {
      return $output eq '' ? () : split(/\n/, $output);
   } else {
      return $output;
   }
}

# this calls gshred which will overwrite the file 3 times, then
# remove it.
# NOTE: this will not work on ZFS, since ZFS is CopyOnWrite (COW)
# so assuming file is on something without COW (ramdisk, UFS, etc)
sub shredFile {
   my $filename = shift;
   `/usr/local/bin/gshred -u -f -s 32 $filename` if -e $filename;
}

sub logMsg {
    my $msg = shift;
    my $filename = shift // $logFileName;
    my $timeStampFormat = shift // '%Y-%m-%d %H:%M:%S';
    my $timestamp = strftime($timeStampFormat, localtime());
    if (defined $filename && $filename ne '' ) {
       open my $logfh, '>>', $filename or die "Could not open log file $filename: $!\n";
       print $logfh "$timestamp\t$msg\n";
       close $logfh;
    }
    print "$timestamp\t$msg\n" if ($displayLogsOnConsole);
}

# find a drive by it's label by scanning /dev/gpt/
# driveInfo is a hashref with the following keys:
# label - the GPT label of the drive (required)
# filesystem - the filesystem type (default: ufs)
# mountPath - where to mount the drive (default: /mnt/label)
# timeout - how long to wait for the drive (default: 600 seconds)
# check_interval - how often to check for the drive (default: 15 seconds)
# If the drive is found, mount it on mountPath and return the mountPath.
# If not found, return empty string.
sub mountDriveByLabel {
   my ( $driveInfo ) = @_;
   unless ($driveInfo->{label}) {
      logMsg("mountDriveByLabel: No drive label provided");
      return '';
   }
   unless ( $driveInfo->{label} =~ /^[a-zA-Z0-9_\-]+$/ ) {
      logMsg("mountDriveByLabel: Invalid label '$driveInfo->{label}'");
      return '';
   }

   logMsg("mountDriveByLabel: Looking for drive with label '$driveInfo->{label}'") if $verboseLoggingLevel >= 1;
   # default to /mnt/label if not provided
   $driveInfo->{mountPath} //= "/mnt/$driveInfo->{label}"; # this is where we'll mount it if we find it
   $driveInfo->{fstype} //= 'ufs'; # default to mounting ufs
   # The location for the label depends on filesystem. Only providing access to ufs and msdos here for safety.
   # gpt labeled drives for ufs are in /dev/gpt/, for msdosfs in /dev/msdosfs/
   my $labelPath = $driveInfo->{fstype} eq 'msdos' ? "/dev/msdosfs/$driveInfo->{label}" : "/dev/gpt/$driveInfo->{label}"; 
   # drive already mounted, just return the path
   my $output = runCmd( "mount | grep '$driveInfo->{mountPath}'" );
   return $driveInfo->{mountPath} if ( $lastRunError == 0 ); # grep found it for us
   # default to 10 minutes (600 seconds) if not provided
   $driveInfo->{timeout} //= 600;
   # default to checking every minute if not provided
   $driveInfo->{check_interval} //= 15;
   # wait up to $timeout seconds for device to appear, checking every 10 seconds
   while ( $driveInfo->{timeout} > 0 ) {
      if ( -e "$labelPath" ) {
         last;
      } else {
         print "Waiting for drive labeled $driveInfo->{label}, looking in $labelPath\n";
         sleep $driveInfo->{check_interval};
         $driveInfo->{timeout} -= $driveInfo->{check_interval};
      }
    }
    # if we found it, mount and return mount path
    if ( -e "$labelPath" ) {
       # ensure mount point
       unless ( -d $driveInfo->{mountPath} || make_path($driveInfo->{mountPath}) ) {
         logMsg("Failed to create $driveInfo->{mountPath}: $!");
         return '';
       }
       # mount device
       runCmd( "mount -t $driveInfo->{fstype} $labelPath $driveInfo->{mountPath}" );
       if ( $lastRunError ) {
         logMsg("Failed to mount $labelPath on $driveInfo->{mountPath}: $!") if $verboseLoggingLevel >= 0;
         return '';
       }
       return $driveInfo->{mountPath};
    } else {
       return '';
    }
}

# finds and unmounts a drive defined by $driveInfo.
# on success, removes the mount point if empty.
sub unmountDriveByLabel {
   my ( $driveInfo ) = @_;
   unless ($driveInfo->{label}) {
      logMsg("unmountDriveByLabel: No drive label provided");
      return '';
   }
   unless ( $driveInfo->{label} =~ /^[a-zA-Z0-9_\-]+$/ ) {
      logMsg("unmountDriveByLabel: Invalid label '$driveInfo->{label}'");
      return '';
   }

   logMsg("unmountDriveByLabel: Looking for drive with label '$driveInfo->{label}'") if $verboseLoggingLevel >= 1;
   # default to /mnt/label if not provided
   $driveInfo->{mountPath} //= "/mnt/$driveInfo->{label}"; # this is where we'll mount it if we find it
   
   runCmd( "mount | grep '$driveInfo->{mountPath}'" );
   if ( $lastRunError ) {
     logMsg("Drive with label '$driveInfo->{label}' is not mounted") if $verboseLoggingLevel >= 2;
     return '';
   }

   # unmount device
   runCmd( "umount $driveInfo->{mountPath}" );
   if ( $lastRunError ) {
     logMsg("Failed to unmount $driveInfo->{mountPath}: $!");
     return '';
   }

   # and remove the directory if empty (find command will return empty string or one filename)
   rmdir $driveInfo->{mountPath} unless runCmd( "find $driveInfo->{mountPath} -mindepth 1 -print -quit");
   return $driveInfo->{mountPath};
}

## Load a YAML configuration file into a hashref.
## If the file does not exist, and a default hashref is provided,
## create the file by dumping the default to YAML, then return the default.
sub loadConfig {
    my ($filename, $default) = @_;

    # If no filename was provided, return default or empty hashref
    die "No filename provided to loadConfig\n" unless defined $filename;

    # If file doesn't exist but a default hashref was provided, try to
    # create the file by dumping the default to YAML, then return the default.
    unless (-e $filename) {
      logMsg("Config file $filename does not exist. Creating it with default values.");
      if ($default && ref $default eq 'HASH') {
         my $wrote = 0;
         eval {
               require YAML::XS;
               YAML::XS->import();
               YAML::XS::DumpFile($filename, $default);
               $wrote = 1;
               1;
         } or do {
               eval {
                  require YAML::Tiny;
                  YAML::Tiny->import();
                  my $yt = YAML::Tiny->new($default);
                  $yt->write($filename);
                  $wrote = 1;
                  1;
               } or do {
                  logMsg("No YAML writer available (YAML::XS or YAML::Tiny). Could not create $filename");
               };
         };
         die "Failed to write default config to $filename:$!\n" unless $wrote;
      } # if default
      # No default provided; nothing to create
      return {};
   } # unless -e $filename

   my $yaml;

   # Try YAML::XS first, fall back to YAML::Tiny
   eval {
      require YAML::XS;
      YAML::XS->import();
      $yaml = YAML::XS::LoadFile($filename);
      logMsg("using YAML::XS to load $filename") if $verboseLoggingLevel >= 3;
      1;
   } or do {
      eval {
         require YAML::Tiny;
         YAML::Tiny->import();
         $yaml = YAML::Tiny->read($filename);
         $yaml = $yaml->[0] if $yaml;  # YAML::Tiny returns an arrayref of documents
         logMsg("using YAML::Tiny to load $filename") if $verboseLoggingLevel >= 3;
         1;
      } or do {
         logMsg("No YAML parser installed (YAML::XS or YAML::Tiny). Skipping config load from $filename");
         return ($default && ref $default eq 'HASH') ? $default : {};
      };
   };
   # Ensure we have a hashref
   die "Config file $filename did not produce a HASH.\n" unless (defined $yaml && ref $yaml eq 'HASH');

   return $yaml;
}


## Mount a GELI-encrypted ZFS pool (high-level orchestration).
##
## Arguments:
##   $geliConfig - HASHREF containing GELI/ZFS mounting configuration. Expected keys include:
##       poolname        - name of the zpool to import
##       secureKey       - HASHREF with { label, keyfile, path } describing the keyfile disk
##       target          - path where the combined keyfile will be written
##       diskList        - OPTIONAL arrayref of disk device names (eg: ['ada0','ada1'])
##
## Behavior:
##   - Mounts the keyfile disk (using mountDriveByLabel), builds the combined key (makeGeliKey),
##     then calls decryptAndMountGeli to attach geli devices and import/mount the zpool.
##
## Returns:
##   Pool name (string) on success, empty string on error.
sub mountGeli {
   my $geliConfig = shift;

   logMsg( "geli config detected, attempting to mount geli disks" ) if $verboseLoggingLevel >= 0;
   # Can't continue at all if no pool name
   unless ( $geliConfig->{'poolname'} ) {
      logMsg "Could not find pool name in configuration file\n";
      return '';
   }
   # find the keyfile disk and mount it
   $geliConfig->{secureKey}->{path} = mountDriveByLabel( $geliConfig->{secureKey} );
   unless ( $geliConfig->{secureKey}->{path} ) {
      logMsg "Could not find or mount keyfile disk with label: " . $geliConfig->{secureKey}->{label};
      return '';
   }
   # create the combined geli keyfile in target location
   unless ( makeGeliKey( $geliConfig ) ) {
         logMsg "Could not create geli keyfile\n";
         return '';
      }
   # decrypt and mount the geli disks and zfs pool
   my $poolname = decryptAndMountGeli( $geliConfig );
   return $poolname;
                                                
}

## Discover disks suitable for GELI/ZFS use on the host.
##
## Returns an array of device names (eg: qw( ada0 ada1 )) that appear free for use.
## The routine collects all disks, excludes disks with existing partitions and those
## referenced by active zpools.
sub findGeliDisks {
   logMsg("Finding available disks for GELI/ZFS use") if $verboseLoggingLevel >= 2;
   # get all disks in system
   my %allDisks = map{ chomp $_ ; $_ => 1 } runCmd( "geom disk list | grep 'Geom name:' | rev | cut -d' ' -f1 | rev" );
   # get the disks with partitions
   my @temp = runCmd( "gpart show -p | grep '^=>'");  # -p prints just the disks without partitions
   # remove them from the list
   foreach my $disk ( @temp ) {
      $allDisks{$1} = 0 if ( $disk =~ m/^=>[\t\s0-9]+([a-z][a-z0-9]+)/ ) ;
   }

   # get disk which are currently used for zpools
   @temp = runCmd( "zpool status -LP | grep '/dev/'" );
   foreach my $disk ( @temp ) {
      $allDisks{$1} = 0 if  $disk =~ m|/dev/([a-z]+\d+)|;
   }

   # return only the disks which are free (value 1)
   return grep{ $allDisks{$_} == 1 } keys %allDisks;
}

## Decrypt GELI-encrypted disks and import/mount the ZFS pool.
##
## Arguments:
##   $geliConfig - HASHREF expected to contain:
##       poolname - zpool name to import
##       target   - path to the combined GELI keyfile created by makeGeliKey
##       diskList - OPTIONAL arrayref of disk device names (if omitted, findGeliDisks() is used)
##
## Behavior:
##   - Ensures the pool is not already imported
##   - Attaches (geli attach) each supplied disk using the keyfile
##   - Attempts to import the specified pool and runs `zfs mount -a` to mount datasets
##
## Returns:
##   Pool name (string) on success; empty string on failure.
sub decryptAndMountGeli {
   my ($geliConfig) = shift;
   
   # if no list of disks provided, try to find them
   $geliConfig->{'diskList'} //= [ findGeliDisks() ];
   
   my $diskList = $geliConfig->{'diskList'};
   my $poolname = $geliConfig->{'poolname'};
   my $keyfile = $geliConfig->{'target'};

   # check if the pool already attached (grep returns 0 on found, something else on not)
   runCmd( "zpool list -H -o name | grep $poolname" );
   return $poolname unless $lastRunError;

   unless ( -e $keyfile ) {
      logMsg "GELI keyfile $keyfile does not exist\n";
      return '';
   }

   my @decrypted_devices;

   # Decrypt each disk in the list
   foreach my $disk (@{$geliConfig->{'diskList'}}) {
      $disk = '/dev/' . $disk unless $disk =~ m|/dev|;
      unless ( -e $disk ) {
         logMsg "Disk $disk does not exist\n";
         return '';
      }

      # Derive the decrypted device name (.eli suffix on FreeBSD)
      my $decrypted = $disk . '.eli';

      # Decrypt using geli attach with the keyfile
      logMsg("Decrypting $disk with keyfile $keyfile") if $verboseLoggingLevel >= 2;
      runCmd("geli attach -p -k $geliConfig->{target} $disk");
      if ( $lastRunError) {
         logMsg "Failed to decrypt $disk (exit $lastRunError)\n" if $verboseLoggingLevel >= 3;
         next; # ignore failed disks and continue to see if we can import the pool
      }

      unless ( -e $decrypted ) {
         logMsg "Decrypted device $decrypted does not exist after geli attach\n" if $verboseLoggingLevel >= 0;
         return '';
      }
      push @decrypted_devices, $decrypted;
   }

   # Import the ZFS pool
   logMsg("Importing ZFS pool $poolname") if $verboseLoggingLevel >= 0;
   my @import_cmd = ('zpool', 'import');
   
   push @import_cmd, $poolname;

   runCmd("zpool import $poolname" );
   unless ( $lastRunError == 0 ) {
      logMsg("Failed to import zfs pool $poolname (exit $lastRunError)\n");
      return '';
   }

   # Mount the ZFS pool (zfs mount -a mounts all filesystems in the pool)
   logMsg("Mounting ZFS pool $poolname") if $verboseLoggingLevel >= 1;
   runCmd('zfs mount -a');
   unless ( $lastRunError == 0 ) {
      logMsg("Failed to mount zfs pool $poolname (exit $lastRunError)\n");
      return '';
   }
   
   logMsg("Successfully decrypted and mounted pool $poolname") if $verboseLoggingLevel >= 2;
   return $poolname;
}

## Create a GELI key by XOR'ing a remote binary keyfile and a local key (hex string).
##
## Expected input (via $geliConfig HASHREF):
##   $geliConfig->{secureKey}->{path} - directory where the remote keyfile resides
##   $geliConfig->{secureKey}->{keyfile} - filename of the remote 32-byte binary key
##   $geliConfig->{localKey} - 64-hex char string OR path to a file containing the hex
##   $geliConfig->{target} - path to write the resulting 32-byte binary key
##
## Behavior:
##   - Reads 32 bytes from the remote binary key
##   - Reads/cleans the 64-hex local key and converts it to 32 bytes
##   - XORs the two 32-byte buffers and writes the 32-byte result to $target with mode 0600
##
## Returns: 1 on success. Dies on unrecoverable errors.
sub makeGeliKey {
   my ( $geliConfig ) = @_;

   $geliConfig->{secureKey}->{keyfile} //= '';
   $geliConfig->{localKey} //= '';
   $geliConfig->{target} //= '';

   if ( $geliConfig->{target} && -f $geliConfig->{target} ) {
      logMsg "GELI target keyfile $geliConfig->{target} already exists. Not overwriting.\n" if $verboseLoggingLevel >= 2;
      return 1;
   }

   my $remote_keyfile = "$geliConfig->{secureKey}->{path}/$geliConfig->{secureKey}->{keyfile}";
   my $localKeyHexOrPath = $geliConfig->{localKey};
   my $target = $geliConfig->{target};

   if ( $geliConfig->{secureKey}->{keyfile} && $geliConfig->{localKey} ) {
      # we have what we need to proceed

      if ( -f $remote_keyfile ) {
         logMsg "Creating GELI keyfile at $geliConfig->{target} using remote keyfile " . $geliConfig->{secureKey}->{keyfile} . " and local key\n" 
            if $verboseLoggingLevel >= 2;
      } else {
         die "Remote keyfile " . $geliConfig->{secureKey}->{keyfile} . " does not exist\n";
      }
   }

   # Read remote binary key
   open my $rh, '<:raw', $remote_keyfile or die "Unable to open $remote_keyfile: $!\n";
   my $rbuf;
   my $read = read($rh, $rbuf, 32);
   close $rh;
   die "Failed to read 32 bytes from $remote_keyfile (got $read)\n" unless defined $read && $read == 32;

   # Get local hex string (either direct string or file contents)
   my $hex;
   if (-e $localKeyHexOrPath) {
      open my $lh, '<', $localKeyHexOrPath or die "Unable to open local key file $localKeyHexOrPath: $!\n";
      local $/ = undef;
      $hex = <$lh>;
      close $lh;
   } else {
      $hex = $localKeyHexOrPath;
   }
   # clean hex (remove whitespace/newlines and optional 0x)
   $hex =~ s/0x//g;
   $hex =~ s/[^0-9a-fA-F]//g;

   die "Local key must be 64 hex characters (256-bit)\n" unless length($hex) == 64;

   my $lbuf = pack('H*', $hex);
   die "Local key decoded to unexpected length " . length($lbuf) . "\n" unless length($lbuf) == 32;

   # XOR the two buffers
   my $out = '';
   for my $i (0 .. 31) {
      $out .= chr( ord(substr($rbuf, $i, 1)) ^ ord(substr($lbuf, $i, 1)) );
   }

   # Ensure target directory exists
   my ($vol, $dirs, $file) = ($target =~ m{^(/?)(.*/)?([^/]+)$});
   if ($dirs) {
      my $dir = $dirs;
      $dir =~ s{/$}{};
      unless (-d $dir) {
         require File::Path;
         File::Path::make_path($dir) or die "Failed to create directory $dir: $!\n";
      }
   }

   # Write out binary key and protect permissions
   open my $oh, '>:raw', $target or die "Unable to open $target for writing: $!\n";
   print $oh $out or die "Failed to write to $target: $!\n";
   close $oh;
   chmod 0600, $target;

   return 1;
}

# make a bunch of replicate commands and return them to the caller as a list
# $sourceSnapsRef - list of snapshots on source machine
# $targetSnapsRef - list of snapshots on target machine
# $dataset - The name of the dataset we are working on (same on both source and target)
# $sourceParent - The parent dataset of $dataset on source
# $targetParent - The parent dataset of $dataset on target
# $newStatusRef - A place to put the updated $targetSnapsRef
# returns hashref of commands to execute, of form
#    {$dataset} = "zfs send command"
# where $dataset above can be a child of $dataset
sub makeReplicateCommands {
   my ( $sourceSnapsRef, $targetSnapsRef, $dataset, $sourceParent, $targetParent, $newStatusRef ) = @_;
   $sourceSnapsRef ||= [];
   $targetSnapsRef     ||= [];
   $newStatusRef  ||= [];
   $sourceParent //= '';
   $sourceParent .= '/' unless $sourceParent eq '' or substr($sourceParent, -1) eq '/';
   $targetParent //= '';
   $targetParent .= '/' unless $targetParent eq '' or substr($targetParent, -1) eq '/';

   my %commands; # this will hold the commands (and the dataset as key) for return

   fatalError( "No dataset defined in makeReplicateCommands, can not continue") unless $dataset;

   # filter only the target and source snapshots which have this dataset in them, then remove
   # the parent of each.
   my $targetSnaps = [ map{ s/^$targetParent//r } grep{ /$dataset/ } @$targetSnapsRef ];
   my $sourceSnaps = [ map{ s/^$sourceParent//r } grep{ /$dataset/ } @$sourceSnapsRef ];

   #print "Dataset => [$dataset]\nSource Parent => [$sourceParent]\nTarget Parent => [$targetParent]\n";
   #print "Source Snaps\n" . Dumper( $sourceSnapsRef) . "\nTarget Snaps\n" . Dumper( $targetSnapsRef) . "\n";

   #print Dumper( $targetSnaps ) . "\n" . Dumper( $sourceSnaps ) . "\n"; die;
   #return \%commands;

   # parse snapshots: each line is expected to have snapshot fullname as first token: pool/fs@snap ...
   my %snaps_by_fs;
   foreach my $line (@$sourceSnaps) {
      next unless defined $line && $line =~ /\S/;
      my ($tok) = split /\s+/, $line;
      next unless $tok && $tok =~ /@/;
      my ($fs, $snap) = split /@/, $tok, 2;
      push @{ $snaps_by_fs{$fs} }, $snap;
   }

   # nothing to do
   return [] unless keys %snaps_by_fs;

   # figure root filesystem: first snapshot line's fs is the requested root
   my ($first_line) = grep { defined $_ && $_ =~ /\S/ } @$sourceSnaps;
   my ($root_fs) = $first_line ? (split(/\s+/, $first_line))[0] =~ /@/ ? (split(/@/, (split(/\s+/, $first_line))[0]))[0] : undef : undef;
   $root_fs ||= (sort keys %snaps_by_fs)[0];

   # helper: find last status entry for a filesystem (status lines contain full snapshot names pool/fs@snap)
   my %last_status_for;
   for my $s (@$targetSnaps) {
      next unless $s && $s =~ /@/;
      my ($fs, $snap) = split /@/, $s, 2;
      $last_status_for{$fs} = $snap;    # later entries override earlier ones -> last occurrence kept
   }

   # build per-filesystem "from" and "to"
   my %from_for;
   my %to_for;
   foreach my $fs (keys %snaps_by_fs) {
      my $arr = $snaps_by_fs{$fs};
      next unless @$arr;
      $to_for{$fs} = $arr->[-1];
      $from_for{$fs} = $last_status_for{$fs};    # may be undef -> full send required
   }

   # decide if we can do a single recursive send:
   # condition: all 'to' snapshot names are identical
   my %to_names = map { $_ => 1 } values %to_for;
   my $single_to_name = (keys %to_names == 1) ? (keys %to_names)[0] : undef;

   if ($single_to_name) {
      # check whether any from is missing
      my @from_values = map { $from_for{$_} } sort keys %from_for;
      my $any_from_missing = grep { !defined $_ } @from_values;
      my %from_names = map { $_ => 1 } grep { defined $_ } @from_values;
      my $single_from_name = (keys %from_names == 1) ? (keys %from_names)[0] : undef;

      if ($any_from_missing) {
         # full recursive send from root
         $commands{$root_fs} = sprintf('zfs send -R %s%s@%s', $sourceParent, $root_fs, $single_to_name);
      }
      elsif ($single_from_name) {
         # incremental recursive send, but don't do it if they are the same
         $commands{$root_fs} = sprintf('zfs send -R -I %s%s@%s %s%s@%s',
                           $sourceParent, $root_fs, $single_from_name, $sourceParent, $root_fs, $single_to_name)
                           unless $single_from_name eq $single_to_name;
      }
      else {
         # from snapshots differ across children -> fall back to per-filesystem sends
         foreach my $fs (sort keys %to_for) {
            my $to  = $to_for{$fs};
            my $from = $from_for{$fs};
            if ($from) {
               # if from and to are different, add it
               $commands{$fs} = sprintf('zfs send -I %s%s@%s %s%s@%s', $sourceParent, $fs, $from, $sourceParent, $fs, $to)
                  unless $from eq $to;
            } else {
               $commands{$fs} = sprintf('zfs send %s%s@%s', $sourceParent, $fs, $to);
            }
         }
      }

      # update new status: record newest snap for every filesystem
      foreach my $fs (keys %to_for) {
         push @$newStatusRef, sprintf('%s%s@%s', $targetParent, $fs, $to_for{$fs});
      }
   } else {
      # not all children share same newest snap -> per-filesystem sends
      foreach my $fs (sort keys %to_for) {
         my $to  = $to_for{$fs};
         my $from = $from_for{$fs};
         if ($from) {
            $commands{$fs} = sprintf('zfs send -I %s%s@%s %s%s@%s', $sourceParent, $fs, $from, $sourceParent, $fs, $to);
         } else {
            $commands{$fs} = sprintf('zfs send %s%s@%s', $sourceParent, $fs, $to);
         }
         push @$newStatusRef, sprintf('%s%s@%s', $targetParent, $fs, $to);
      }
   }

   # return arrayref of commands (caller can iterate or join with pipes)
   return \%commands;
}

# Send report via email and/or copy to target drive.
# $reportConfig is a hashref with optional keys:
#   email - email address to send report to
#   targetDrive - hashref with keys:
#       label - GPT or msdosfs label of the target drive
#       mount_point - optional mount point to use (if not provided, /mnt/label is used)
# $subject is the email subject
# $message is the message to include in the email body
# $logFile is the path to the log file to include in the report
sub sendReport {
   my ( $reportConfig, $message, $logFile ) = @_;
   return unless defined $reportConfig;
   $logFile //= $reportConfig->{logFile};
   logMsg( "Beginning sendReport" ) if $verboseLoggingLevel >= 0;
   # if targetDrive defined and there is a valid label for it, try to mount it and write the report there
   if ( defined $reportConfig->{targetDrive} && defined $reportConfig->{targetDrive}->{label} && $reportConfig->{targetDrive}->{label} ) {
      logMsg( "Saving report to disk with label $reportConfig->{targetDrive}->{label}" ) if $verboseLoggingLevel >= 2;
      if ( $reportConfig->{targetDrive}->{mountPath} = mountDriveByLabel( $reportConfig->{targetDrive} ) ) {
         copyReportToDrive( $logFile, $reportConfig->{targetDrive}->{mountPath} );
         unmountDriveByLabel( $reportConfig->{targetDrive} );
      } else {
         logMsg( "Warning: could not mount report target drive with label '$reportConfig->{targetDrive}->{label}'" ) if $verboseLoggingLevel >= 1;
      }
   }
   # if they have set an e-mail address, try to e-mail the report
   if ( defined $reportConfig->{email} && $reportConfig->{email} ne '' ) {
      logMsg( "Sending report via e-mail to $reportConfig->{email}" ) if $verboseLoggingLevel >= 1;
      $reportConfig->{subject} //= 'Replication Report from ' . `hostname`;
      sendEmailReport( $reportConfig->{email}, $reportConfig->{subject}, $message, $logFile );
   }
}

## Copy the report log file to a mounted target drive.
##
## Arguments:
##   $logFile    - path to the log file to copy (must exist)
##   $mountPoint - mount point of the target drive (must be a directory)
##
## Behavior:
##   - Copies the log file into the root of $mountPoint using File::Copy::copy
##   - Logs success/failure via logMsg
sub copyReportToDrive {
   my ( $logFile, $mountPoint ) = @_;
   return unless defined $logFile && -e $logFile;
   return unless defined $mountPoint && -d $mountPoint;

   my $targetFile = "$mountPoint/" . ( split( /\//, $logFile ) )[-1];
   logMsg( "Copying report log file $logFile to drive at $mountPoint" ) if $verboseLoggingLevel >= 2;
   use File::Copy;
   unless ( copy( $logFile, $targetFile ) ) {
      logMsg( "Could not copy report log file to target drive: $!" ) if $verboseLoggingLevel >= 0;
   }
}

## Send an email report with an attached log body.
##
## Arguments:
##   $to      - recipient email address (string)
##   $subject - subject line (string)
##   $message - optional message body (string)
##   $logFile - optional path to log file whose contents will be appended to the email body
##
## Behavior:
##   - Opens /usr/sbin/sendmail -t and writes a simple plain-text email including the
##     supplied message and the contents of $logFile (if present).
##   - Logs failures to open sendmail or read the log file.
sub sendEmailReport {
   my ( $to, $subject, $message, $logFile ) = @_;
   return unless defined $to && $to ne '';
   $subject //= 'Sneakernet Replication Report from ' . `hostname`;
   $message //= '';
   $logFile //= '';

   logMsg( "Sending email report to $to with subject '$subject'" ) if $verboseLoggingLevel >= 2;
   open my $mailfh, '|-', '/usr/sbin/sendmail -t' or do {
      logMsg( "Could not open sendmail: $!" ) if $verboseLoggingLevel >= 0;
      return;
   };
   print $mailfh "To: $to\n";
   print $mailfh "Subject: $subject\n";
   print $mailfh "MIME-Version: 1.0\n";
   print $mailfh "Content-Type: text/plain; charset=\"utf-8\"\n";
   print $mailfh "\n"; # end of headers
   
   print $mailfh "$message\n";
   print $mailfh "\nLog contents:\n\n";
   if ( -e $logFile && open my $logfh, '<', $logFile ) {
      while ( my $line = <$logfh> ) {
         print $mailfh $line;
      }
      close $logfh;
   } else {
      logMsg( "Could not open log file [$logFile] for reading: $!" ) if $verboseLoggingLevel >= 0;
   };

   close $mailfh;
}  

## Return list of regular files in a directory (non-recursive).
##
## Arguments:
##   $dirname - directory to scan
##
## Returns: ARRAYREF of full-path filenames on success, 0 on error (matching prior behavior).
sub getDirectoryList {
   my $dirname = shift;
   opendir( my $dh, $dirname ) || return 0;
   # get all file names, but leave directories alone
   my @files = map{ $dirname . "/$_" } grep { -f "$dirname/$_" } readdir($dh);
   closedir $dh;
   return \@files;
}

## Remove all regular files from the specified directory (non-recursive).
##
## Arguments:
##   $dirname - directory to clean
##
## Behavior:
##   - Calls getDirectoryList to obtain files and unlinks each file. Directories are left untouched.
##   - Logs the cleanup operation via logMsg.
##
## Returns: 1 on completion. Note: individual unlink failures are currently reported via warn.
sub cleanDirectory {
   my $dirname = shift;
   logMsg( "Cleaning up $dirname of all files" ) if $verboseLoggingLevel >= 2;
   my $files = getDirectoryList( $dirname );
   # clean up a directory
   foreach my $file (@$files) {
      unlink $file or warn "Could not unlink $file: #!\n";
   }
   return 1;
}

## Handle a fatal error: log, optionally run a cleanup routine, then die.
##
## Arguments:
##   $message        - string message describing the fatal condition
##   $config         - OPTIONAL configuration HASHREF (passed to cleanupRoutine)
##   $cleanupRoutine - OPTIONAL CODE ref to run prior to dying; will be called as
##                     $cleanupRoutine->($config, $message)
##
## Behavior:
##   - Logs the fatal message via logMsg, runs the cleanup code if provided (errors in the cleanup
##     are logged), then terminates the process via die.
sub fatalError {
   my ( $message, $config, $cleanupRoutine ) = @_;
   logMsg( "FATAL ERROR: $message" ) if $verboseLoggingLevel >= 0;
   if ( defined $cleanupRoutine && ref $cleanupRoutine eq 'CODE' ) {
      logMsg( "Running cleanup routine before fatal error" ) if $verboseLoggingLevel >= 2;
      eval {
         $cleanupRoutine->( $config, $message );
         1;
      } or do {
         logMsg( "Cleanup routine failed: $@" ) if $verboseLoggingLevel >= 0;
      };
   }
   die;
}


1;