Subversion Repositories sysadmin_scripts

Rev

Rev 19 | Rev 21 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

#! /usr/bin/env perl

# archiveDirectories.pl
# Author: R. W. Rodolico
# Date: 20180603

# Copyright (c) 2018, Daily Data, Inc
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# 
# The views and conclusions contained in the software and documentation are those
# of the authors and should not be interpreted as representing official policies,
# either expressed or implied, of the <project name> project.

# Script designed to be run from a cron job, which checks if any directories
# are ready to be archived. A directory is defined as a directory under
# the root of $config{'local root dir'}.

# If found, all directories are moved into the staging area and 
# an md5 checksum is calculated for the entire tree.
# After all directories are moved, a second process looks in the staging
# area and copies the files (using rsync for reliability) into the staging
# area of $config{'target server'}. When a directory has been copied, a checksum is
# calculated on the remote copy and compared to the checksum calculated
# in the first stage and, if it passes, the directory is then moved to the 
# $config{'target final directory'}.
# After the copy and move, the directory and its MD5 sum file are moved
# to the $config{'local trash dir'} (which is cleaned on the next invocation of
# the script).

#
# Version: 1.0

use warnings;
use strict;
use Cwd qw();
use File::Copy qw(move);
use File::Basename;
use File::stat;

our $VERSION = '1.0';

my $DEBUG = 3;

my %config;

my @DirectoriesToMove;

sub loadConfig {
   use FindBin;
   my $configFileName = "$FindBin::Bin/$FindBin::Script";
   unless ( $configFileName =~ s/\.pl$/\.conf/ ) {
      $configFileName .= '.conf';
   }
      
   if ( -e $configFileName ) {
      my $configFileContents = &slurpFile( $configFileName );
      eval( $configFileContents );
      die "Error interpreting $configFileName: $@\n" if $@;
   } else {
      die "Could not locate config file $configFileName\n";
   } # if..else
} #loadConfig

# simply read the entire file into a string
sub slurpFile {
   my $filename = shift;
   return '' unless -e $filename;
   open TEMP, "<$filename" or die "could not read $filename: $!\n";
   my @contents = <TEMP>;
   close TEMP;
   return join( '', @contents );
}

# print a value to a file
sub writeData {
   my $filename = shift;
   open TEMP, ">$filename" or die "could not write to $filename: $!\n";
   print TEMP join( '', @_ );
   close TEMP;
}

# returns how many seconds ago a file was created
sub fileAge {
   my $filename = shift;
   my $age = stat( $filename );
   $age = $$age[9];
   print "$age\t$filename" if $DEBUG > 3;
   return time - $age;
}
   

# look in the directories to move directory and see if there is anything 
# new in there. If so, check MD5 Sum file (create if necessary) and ensure
# we have waited long enough and the sums match
sub getDirectories {
   my $rootDir = shift;
   print "In getDirectories with dir of $rootDir\n" if $DEBUG;
   opendir( my $dh, $rootDir ) or die "Could not open directory $rootDir: $!\n";
   my @dirs = grep { ! /^\./ && -d "$rootDir/$_" } readdir( $dh );
   closedir ( $dh );
   print "Directories Found\n" . join( "\n", @dirs ) . "\n" if $DEBUG > 1;
   my @dirsToMove;
   foreach my $thisDir ( @dirs ) {
      my $fullyQualified = "$rootDir/$thisDir";
      my $md5 = calcMD5( $fullyQualified );
      print "\tFound Dir $fullyQualified with MD5 of $md5\n" if $DEBUG > 2;
      # let's look for the md5 checksum file and compare if it exist
      my $md5Name = "$fullyQualified.$config{'md5 suffix'}";
      if ( -e $md5Name ) {
         # find out when it was last written to
         print "\tFound existing MD5 file $md5Name\n" if $DEBUG > 3;
         # and blow it off if it is too recent
         if ( &fileAge( $md5Name) < $config{'quiesent seconds'} ) {
            print "\t\tBlowing it off because it is less than $config{'quiesent seconds'} seconds old\n" if $DEBUG > 4;
            next;
         }
         my $oldMD5 = &slurpFile( $md5Name );
         if ( $md5 eq $oldMD5 ) {
            print "\t\tAdding, md5 not changed, $md5 same as $oldMD5\n" if $DEBUG > 4;
            push @dirsToMove, $thisDir;
         } else {
            print "\t\tWaiting, md5 changed, $md5 and $oldMD5\n" if $DEBUG > 4;
            # overwrite if the checksum has changed
            &writeData( $md5Name, $md5 ) if $md5 ne &slurpFile( $md5Name );
         }
      } else { # doesn't exist, so create it
         print "\t\tCreating MD5 File $md5Name with value $md5\n" if $DEBUG > 4;
         &writeData( $md5Name, $md5 );
      }
   } # foreach
   return @dirsToMove;
}

# calculate the checksum of a directory by
# 1. calculate checksum of each individual file in the entire tree
# 2. Grab the first column, which is the checksum
# 3. sort the result since Linux will not always return them in the same order
# 4. do a checksum of the checksums
#
# This is highly unlikely to give the same answer if any file changes
# in the process of the copy
sub calcMD5 {
   my $directory = shift;
   return -1 unless -d $directory;
   my $md5 = `find '$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1`;
   chomp $md5;
   return $md5;
}

# moves directory to staging area and puts the md5 sum into a file
# with the same name, but a .md5sum suffix
sub moveToStaging {
   my ( $directory, $fullPath, $staging ) = @_;
   # and let's get the md5 file name also
   my $md5File = $fullPath . ".$config{'md5 suffix'}";
   mkdir( $staging ) unless -d $staging;
   return 'Directory already exists in staging' if -e "$staging/$directory";
   move( $fullPath, "$staging/$directory" ) or die "Error moving $fullPath to $staging/$directory: $!\n";
   move( $md5File, $staging ) or die "Error moving $md5File to $staging: $!\n";
   return '';
}

sub getCheckSum {
   my $project = shift;
   my $checkSumFile = $config{'local staging area'} . '/' . $project . '.' . $config{'md5 suffix'};
   if ( -e $checkSumFile ) {
      return &slurpFile( $checkSumFile );
   }
   return '';
}

# verifies the directory is correct on the server by comparing the checksums
# calculated locally and remote server. If valid, moves it into the final
# location on the remote server
sub validateTarget {
   my ( $remoteServer, $remoteStaging, $remoteTarget, $directory, $checksum ) = @_;
   my $md5sum = `ssh $remoteServer "find '$remoteStaging/$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1"`;
   chomp $md5sum;
   if ( $checksum eq $md5sum ) {
      my $command = "ssh $remoteServer \"mv '$remoteStaging/$directory' '$remoteTarget'\"";
      if ( system( $command ) == 0 ) {
         return 1;
      } else {
         &logit( "Unable to move $directory to $remoteServer:$remoteTarget" );
         return 0;
      }
   } else {
      &logit( "Invalid checksum moving directory $directory" );
      return 0;
   }
}

# simple little logger that records some information   
sub logit {
   my $projectName = shift;
   my $suffix = shift;
   my $logfile = $config{'local root dir'} . "/$projectName.$suffix";
   my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
   my $now = sprintf( "%04d-%02d-%02d %02d:%-2d:%02d", $year+1900, $mon+1, $mday, $hour, $min, $sec );
   open LOG, ">>$logfile" or die "could not write to $logfile: $!\n";
   while ( my $message = shift ) {
      print LOG "$now\t$message\n";
   }
   close LOG;
}

sub runRemoteCommand {
   my $server = shift;
   while ( my $command = shift ) {
      my $output = qx/ssh $server '$command'/;
      if ( my $error = $? & 127 ) {
         return ( $output, $error );
      }
   }
   return ('', 0);
}
      
   
sub copyToRemote {
   my ( $path, $dirname, $remoteServer, $remotePath ) = @_;
   # first, copy the file
   #print "rsync -a $path/$dirname $remoteServer:$remotePath > /tmp/lastrsync.log";
   #die;
   qx"rsync -a $path/$dirname $remoteServer:$remotePath > /tmp/lastrsync.log";
   return 'rsync failed with error :' . $? & 127 if $? & 127;
   return '';
}

# simply remove everything from the trash directory over $age seconds old
sub cleanTrash {
   my ( $trashDir, $age ) = @_;
   my $md5Suffix = $config{'md5 suffix'};
   my @toRemove = ();
   if ( opendir( my $dh, $trashDir ) ) {
      # get all the md5sum files which are older than $age seconds old
      @toRemove = grep { &fileAge( "$_" ) > $age  } map{ "$trashDir/$_" }  grep{ /$md5Suffix$/ } readdir( $dh);
      closedir( $dh );
   }
   print "You should remove the following files\n" if $DEBUG > 1;
   foreach my $thisDir ( @toRemove ) {
      $thisDir =~ m/(.*)\.$md5Suffix/;
      $thisDir = $1;
      qx/rm -fR '$thisDir' '$thisDir.$md5Suffix'/;
   }
}

sub makeDirectories {
   my $directory = shift;
   my $permissions = shift;
   $permissions = '777' unless $permissions;
   unless ( -d $directory ) {
      print "Making directory $directory\n" if $DEBUG > 1;
      `mkdir -p $directory`; 
      `chmod $permissions $directory`;
   }
}
         


###############################################################################
# Main
###############################################################################

&loadConfig();
#use Data::Dumper;
#print Dumper( \%config );
#die;

foreach my $dirsToMake ( 'local root dir', 'local trash dir', 'local staging area' ) {
   &makeDirectories( $config{$dirsToMake} );
}

# clean the trash if $config{ 'trash cleanup' } is non-zero
&cleanTrash( $config{'local trash dir'}, $config{ 'trash cleanup' } ) if $config{ 'trash cleanup' };

# Check if we have any directories which are ready to be moved.
@DirectoriesToMove = &getDirectories( $config{'local root dir'} );

print "Processing\n\t" . join( "\n\t", @DirectoriesToMove ) . "\n" if $DEBUG > 1;

foreach my $directory ( @DirectoriesToMove ) {
   my $fullPath = $config{'local root dir'} . "/$directory";
   my $logFile = "$fullPath.$config{'log suffix'}";
   my $errorFile = "$fullPath.$config{'error suffix'}";
   print "Path for $directory is $fullPath\n\tLog File is $logFile\n\tError file is $errorFile\n" if $DEBUG > 3;
   if ( -e $errorFile ) {
      &logit( $directory, $config{'log suffix'}, "Aborting because we have a pre-existing error" );
      print "\tAborting because we have a pre-existing error\n" if $DEBUG > 3;
      next;
   }
   &logit( $directory, $config{'log suffix'}, "Processing $directory" );
   my $error = &moveToStaging( $directory, $fullPath, $config{'local staging area'} );
   if ( ! $error ) {
      print "\tMoved to $config{'local staging area'}\n" if $DEBUG > 3;
      &logit( $directory, $config{'log suffix'},  "Successfully moved to $config{'local staging area'}" );
   } else {
      &logit( $directory, $config{'log suffix'},  "Error, move aborted" );
      &logit( $directory, $config{'error suffix'},  $error );
   }
}

# done with that, now we need to see if there is anything in the staging area
# that needs to be sent to the remote server
`mkdir -p $config{'local staging area'}` unless -d $config{'local staging area'};
opendir( my $dh, $config{'local staging area'} ) or die "Could not read $config{'local staging area'}: $!\n";
my @directories;
# get all the .md5 files
my @toMove = grep { /$config{'md5 suffix'}$/ } readdir( $dh );
my $targetPath = "$config{'target server'}:$config{'target staging area'}/";
print "Copying the following to $targetPath\n\t" . join ("\n\t", @toMove ) . "\n" if $DEBUG > 1;
# create the target directory on the server if it doesn't exist
&runRemoteCommand( $config{'target server'},
   "[ ! -d $config{'target staging area'} ] && mkdir -p $config{'target staging area'}",
   "[ ! -d $config{'target final directory'} ] && mkdir -p $config{'target final directory'}"
   );


# now, process each directory in turn
foreach my $dirname ( @toMove ) {
   print "Processing $dirname\n";
   my $error;
   $dirname =~ m/^(.*)\.$config{'md5 suffix'}$/;
   $dirname = $1;
   $error = &copyToRemote( $config{'local staging area'}, $dirname, $config{'target server'}, $config{'target staging area'} );
   if ( $error ) {
      &logit( $dirname, $config{'error suffix'}, $error );
      next;
   } else {
      &logit( $dirname, $config{'log suffix'}, "Copied to $config{'target server'}:$config{'target staging area'}" );
   }

   my $md5sum = &getCheckSum( $dirname );
   next unless $md5sum;
   my $rsync = "rsync -av '$config{'local staging area'}/$dirname' $config{'target server'}:$config{'target staging area'}/ > /tmp/lastrsync.log";
   &logit( $dirname, $config{'log suffix'}, $rsync );
   if ( system ( $rsync ) == 0 ) { # we succeeded
      if ( &validateTarget( $config{'target server'}, $config{'target staging area'}, $config{'target final directory'}, $dirname, $md5sum ) ) {
         `mkdir -p $config{'local trash dir'}` unless -d $config{'local trash dir'};
         move( "$config{'local staging area'}/$dirname", "$config{'local trash dir'}/$dirname" );
         my $md5File = $dirname . '.' . $config{'md5 suffix'};
         move( "$config{'local staging area'}/$md5File", "$config{'local trash dir'}/$md5File" );
         &logit( $dirname, $config{'log suffix'}, "Successfully moved directory $dirname to $config{'target server'}" );
      } else {
         &logit( $dirname, $config{'error suffix'}, "Unable to validate target for $dirname" );
      }
   } else {
      &logit( $dirname, $config{'error suffix'}, "Unknown error attempting to rsync $dirname" );
   }
}


1;