Subversion Repositories sysadmin_scripts

Rev

Rev 12 | Blame | Last modification | View Log | Download | RSS feed

#! /usr/bin/env perl

# archiveDirectories.pl
# Author: R. W. Rodolico
# Date: 20180603
# Copyright: 2018, Vanduzen Enterprises, Dallas TX

# Script designed to be run from a cron job, which checks if any directories
# are ready to be archived. A directory is defined as a directory under
# the root of $config{'local root dir'}.

# If found, all directories are moved into the staging area and 
# an md5 checksum is calculated for the entire tree.
# After all directories are moved, a second process looks in the staging
# area and copies the files (using rsync for reliability) into the staging
# area of $config{'target server'}. When a directory has been copied, a checksum is
# calculated on the remote copy and compared to the checksum calculated
# in the first stage and, if it passes, the directory is then moved to the 
# $config{'target final directory'}.
# After the copy and move, the directory and its MD5 sum file are moved
# to the $config{'local trash dir'} (which is cleaned on the next invocation of
# the script).

# Script does NOT handle the situation where directories are being moved
# while the script is running, so the script should be run at a time
# when there is no other activity on the server.
#
# Version: 1.0

use warnings;
use strict;
use Cwd qw();
use File::Copy qw(move);
use File::Basename;
use File::stat;

my $DEBUG = 5;

my %config = (
   # location where directories are put by end users   
   'local root dir' => '/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/ArchiveProjects',
   # location where directories are moved while processing
   'local work dir' => '/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/transfer_area',
   # location where directories are moved when job is completed
   'local trash dir' => "/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/Trash",
   # location where directories are moved while being transferred
   'local staging area' => '/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/Staging',

   # target server name/ip. Must be accessible via ssh with no password
   'target server' => 'davinci',
   # location on target server where directories are placed while copying
   'target staging area' => '/home/samba/archives/fromDenver/.Staging/',
   # location on target server where directories are finally put
   'target final directory' => '/home/samba/archives/fromDenver/',

   # suffix of md5 of directories
   'md5 suffix' => 'md5sum',
   # suffix of filename to create showing actions
   'log suffix' => 'log',
   # suffix of error log
   'error suffix' => 'err',
   # how long a directory must be undisturbed before it is ready to work on
   'quiesent seconds' => 60*5, # five minutes
   # how long to leave stuff in the trash directory. 0 indicates never do it.
   'trash cleanup' => 86400*7, # 7 days
);

my @DirectoriesToMove;

# simply read the entire fiel into a string
sub slurpFile {
   my $filename = shift;
   return '' unless -e $filename;
   open TEMP, "<$filename" or die "could not read $filename: $!\n";
   my @contents = <TEMP>;
   close TEMP;
   return join( '', @contents );
}

# print a value to a file
sub writeData {
   my $filename = shift;
   open TEMP, ">$filename" or die "could not write to $filename: $!\n";
   print TEMP join( '', @_ );
   close TEMP;
}

# look in the directories to move directory and see if there is anything 
# new in there. If so, check MD5 Sum file (create if necessary) and ensure
# we have waited long enough and the sums match
sub getDirectories {
   my $rootDir = shift;
   print "In getDirectories with dir of $rootDir\n" if $DEBUG;
   opendir( my $dh, $rootDir ) or die "Could not open directory $rootDir: $!\n";
   my @dirs = grep { ! /^\./ && -d "$rootDir/$_" } readdir( $dh );
   closedir ( $dh );
   print "Directories Found\n" . join( "\n", @dirs ) . "\n" if $DEBUG > 1;
   my @dirsToMove;
   foreach my $thisDir ( @dirs ) {
      my $fullyQualified = "$rootDir/$thisDir";
      my $md5 = calcMD5( $fullyQualified );
      print "\tFound Dir $fullyQualified with MD5 of $md5\n" if $DEBUG > 2;
      # let's look for the md5 checksum file and compare if it exist
      my $md5Name = "$fullyQualified.$config{'md5 suffix'}";
      if ( -e $md5Name ) {
         # find out when it was last written to
         my $lastModification = stat( $md5Name );
         $lastModification = $$lastModification[9];
         my $howOld = time - $lastModification;
         print "\tFound existing MD5 file $md5Name written to at $lastModification, or $howOld seconds ago\n" if $DEBUG > 3;
         # and blow it off if it is too recent
         if ( $howOld < $config{'quiesent seconds'} ) {
            print "\t\tBlowing it off because $howOld is less than $config{'quiesent seconds'}\n" if $DEBUG > 4;
            next;
         }
         my $oldMD5 = &slurpFile( $md5Name );
         if ( $md5 eq $oldMD5 ) {
            print "\t\tAdding, md5 not changed, $md5 same as $oldMD5\n" if $DEBUG > 4;
            push @dirsToMove, $thisDir;
         } else {
            print "\t\tWaiting, md5 changed, $md5 and $oldMD5\n" if $DEBUG > 4;
            # overwrite if the checksum has changed
            &writeData( $md5Name, $md5 ) if $md5 ne &slurpFile( $md5Name );
         }
      } else { # doesn't exist, so create it
         print "\t\tCreating MD5 File $md5Name with value $md5\n" if $DEBUG > 4;
         &writeData( $md5Name, $md5 );
      }
   } # foreach
   return @dirsToMove;
}

# calculate the checksum of a directory by
# 1. calculate checksum of each individual file in the entire tree
# 2. Grab the first column, which is the checksum
# 3. sort the result since Linux will not always return them in the same order
# 4. do a checksum of the checksums
#
# This is highly unlikely to give the same answer if any file changes
# in the process of the copy
sub calcMD5 {
   my $directory = shift;
   return -1 unless -d $directory;
   my $md5 = `find '$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1`;
   chomp $md5;
   return $md5;
}

# moves directory to staging area and puts the md5 sum into a file
# with the same name, but a .md5sum suffix
sub moveToStaging {
   my ( $directory, $fullPath, $staging ) = @_;
   # and let's get the md5 file name also
   my $md5File = $fullPath . ".$config{'md5 suffix'}";
   mkdir $staging unless -d $staging;
   return 'Directory already exists in staging' if -e "$staging/$directory";
   move( $fullPath, "$staging/$directory" ) or die "Error moving $fullPath to $staging/$directory: $!\n";
   move( $md5File, $staging ) or die "Error moving $md5File to $staging: $!\n";
   return '';
}
   
# verifies the directory is correct on the server by comparing the checksums
# calculated locally and remote server. If valid, moves it into the final
# location on the remote server
sub validateTarget {
   my ( $remoteServer, $remoteStaging, $remoteTarget, $directory, $checksum ) = @_;
   my $md5sum = `ssh $remoteServer "find '$remoteStaging/$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1"`;
   chomp $md5sum;
   if ( $checksum eq $md5sum ) {
      my $command = "ssh $remoteServer \"mv '$remoteStaging/$directory' '$remoteTarget'\"";
      if ( system( $command ) == 0 ) {
         return 1;
      } else {
         &logit( "Unable to move $directory to $remoteServer:$remoteTarget" );
         return 0;
      }
   } else {
      &logit( "Invalid checksum moving directory $directory" );
      return 0;
   }
}

# reads the checksum file
sub getCheckSum {
   my ( $directory, $staging )  = @_;
   $directory .= $config{'md5 suffix'};
   if ( open DATA, "<$staging/$directory" ) {
      my $cksum = <DATA>;
      chomp $cksum;
      close DATA;
      return $cksum;
   } 
   &logit( "Could not open $staging/$directory: $!" );
   return '';
}
   
# simple little logger that records some information   
sub logit {
   my $logfile = shift;
   my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
   my $now = sprintf( "%04d-%02d-%02d %02d:%-2d:%02d", $year+1900, $mon+1, $mday, $hour, $min, $sec );
   open LOG, ">>$logfile" or die "could not write to $logfile: $!\n";
   while ( my $message = shift ) {
      print LOG "$now\t$message\n";
   }
   close LOG;
}
   
# simply remove everything from the trash directory
sub cleanTrash {
   my ( $trashDir, $age ) = @_;
   `mkdir -p $trashDir` unless -d $trashDir;
   `rm -fR $trashDir/*`;
}

unless ( -d $config{'local root dir'} ) {
   `mkdir -p $config{'local root dir'}`;
   `chmod 777 $config{'local root dir'}`;
}
# clean the trash if $config{ 'trash cleanup' } is non-zero
&cleanTrash( $config{'local trash dir'}, $config{ 'trash cleanup' } ) if $config{ 'trash cleanup' };
   
# Check if we have any directories which are ready to be moved.
@DirectoriesToMove = &getDirectories( $config{'local root dir'} );

print "Processing\n\t" . join( "\n\t", @DirectoriesToMove ) . "\n";

foreach my $directory ( @DirectoriesToMove ) {
   my $fullPath = $config{'local root dir'} . "/$directory";
   my $logFile = "$fullPath.$config{'log suffix'}";
   my $errorFile = "$fullPath.$config{'error suffix'}";
   print "Path for $directory is $fullPath\n\tLog File is $logFile\n\tError file is $errorFile\n" if $DEBUG > 3;
   if ( -e $errorFile ) {
      &logit( $logFile, "Aborting because we have a pre-existing error" );
      print "\tAborting because we have a pre-existing error\n" if $DEBUG > 3;
      next;
   }
   &logit( $logFile, "Processing $directory" );
   my $error = &moveToStaging( $directory, $fullPath, $config{'local staging area'} );
   if ( ! $error ) {
      print "\tMoved to $config{'local staging area'}\n" if $DEBUG > 3;
      &logit( $logFile, "Successfully moved to $config{'local staging area'}" );
   } else {
      &logit( $logFile, "Error, move aborted" );
      &logit( $errorFile, $error );
   }
}

# done with that, now we need to see if there is anything in the staging area
# that needs to be sent to the remote server
opendir( my $dh, $config{'local staging area'} ) or die "Could not read $config{'local staging area'}: $!\n";
my @directories;
my @toMove = grep { /$config{'md5 suffix'}$/ } readdir( $dh );
my $targetPath = "$config{'target server'}:$config{'target staging area'}/";
print "Copying the following to $targetPath\n\t" . join ("\n\t", @toMove ) . "\n";
die;
foreach my $directory ( @toMove ) {
   $directory =~ m/^(.*)\.md5sum/;
   $directory = $1;
   my $md5sum = &getCheckSum( $directory, $config{'local staging area'} );
   next unless $md5sum;
   my $rsync = "rsync -av '$config{'local staging area'}/$directory' $config{'target server'}:$config{'target staging area'}/ > /tmp/lastrsync.log";
   &logit( $rsync );
   if ( system ( $rsync ) == 0 ) { # we succeeded
      if ( &validateTarget( $config{'target server'}, $config{'target staging area'}, $config{'target final directory'}, $directory, $md5sum ) ) {
         `mkdir -p $config{'local trash dir'}` unless -d $config{'local trash dir'};
         move( "$config{'local staging area'}/$directory", "$config{'local trash dir'}/$directory" );
         $directory .= $config{'md5 suffix'};
         move( "$config{'local staging area'}/$directory", "$config{'local trash dir'}/$directory" );
         &logit( "Successfully moved directory $directory to $config{'target server'}" );
      } else {
         &logit( "Unable to validate target for $directory" );
      }
   } else {
      &logit( "Unknown error attempting to rsync $directory" );
   }
}


1;