Rev 27 | Rev 29 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed
#! /usr/bin/env perl
# archiveDirectories.pl
# Author: R. W. Rodolico
# Date: 20180603
# Copyright (c) 2018, Daily Data, Inc
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are those
# of the authors and should not be interpreted as representing official policies,
# either expressed or implied, of the <project name> project.
# Script designed to be run from a cron job, which checks if any directories
# are ready to be archived. A directory is defined as a directory under
# the root of $config{'local root dir'}.
# If found, all directories are moved into the staging area and
# an md5 checksum is calculated for the entire tree.
# After all directories are moved, a second process looks in the staging
# area and copies the files (using rsync for reliability) into the staging
# area of $config{'target server'}. When a directory has been copied, a checksum is
# calculated on the remote copy and compared to the checksum calculated
# in the first stage and, if it passes, the directory is then moved to the
# $config{'target final directory'}.
# After the copy and move, the directory and its MD5 sum file are moved
# to the $config{'local trash dir'} (which is cleaned on the next invocation of
# the script).
#
# Version: 1.0
use warnings;
use strict;
use Cwd qw();
use File::Copy qw(move);
use File::Basename;
use File::stat;
# http://computer-programming-forum.com/53-perl/843e6090fe295ffc.htm
# how to verify this script is not already running
# creates a lock file which only lasts the length of the script
# if we die before removing it, the lock is released.
use Fcntl qw(:flock); # imports some constants
my $LOCKFILE = '/tmp/archiveDirectories.lock';
open LOCK, ">>$LOCKFILE" or die ("Can't open lockfile $LOCKFILE: $!");
flock(LOCK, LOCK_EX) or die ("I'm already running");
our $VERSION = '1.0';
my $DEBUG = 0;
my %config;
my @DirectoriesToMove;
sub loadConfig {
use FindBin;
my $configFileName = "$FindBin::Bin/$FindBin::Script";
unless ( $configFileName =~ s/\.pl$/\.conf/ ) {
$configFileName .= '.conf';
}
if ( -e $configFileName ) {
my $configFileContents = &slurpFile( $configFileName );
eval( $configFileContents );
die "Error interpreting $configFileName: $@\n" if $@;
} else {
die "Could not locate config file $configFileName\n";
} # if..else
} #loadConfig
# simply read the entire file into a string
sub slurpFile {
my $filename = shift;
return '' unless -e $filename;
open TEMP, "<$filename" or die "could not read $filename: $!\n";
my @contents = <TEMP>;
close TEMP;
return join( '', @contents );
}
# print a value to a file
sub writeData {
my $filename = shift;
open TEMP, ">$filename" or die "could not write to $filename: $!\n";
print TEMP join( '', @_ );
close TEMP;
}
# returns how many seconds ago a file was created
sub fileAge {
my $filename = shift;
my $age = stat( $filename );
$age = $$age[9];
print "$age\t$filename" if $DEBUG > 3;
return time - $age;
}
# look in the directories to move directory and see if there is anything
# new in there. If so, check MD5 Sum file (create if necessary) and ensure
# we have waited long enough and the sums match
sub getDirectories {
my $rootDir = shift;
print "In getDirectories with dir of $rootDir\n" if $DEBUG;
opendir( my $dh, $rootDir ) or die "Could not open directory $rootDir: $!\n";
my @dirs = grep { ! /^\./ && -d "$rootDir/$_" } readdir( $dh );
closedir ( $dh );
print "Directories Found\n" . join( "\n", @dirs ) . "\n" if $DEBUG > 1;
my @dirsToMove;
foreach my $thisDir ( @dirs ) {
my $fullyQualified = "$rootDir/$thisDir";
my $md5 = calcMD5( $fullyQualified );
print "\tFound Dir $fullyQualified with MD5 of $md5\n" if $DEBUG > 2;
# let's look for the md5 checksum file and compare if it exist
my $md5Name = "$fullyQualified.$config{'md5 suffix'}";
if ( -e $md5Name ) {
# find out when it was last written to
print "\tFound existing MD5 file $md5Name\n" if $DEBUG > 3;
# and blow it off if it is too recent
if ( &fileAge( $md5Name) < $config{'quiesent seconds'} ) {
print "\t\tBlowing it off because it is less than $config{'quiesent seconds'} seconds old\n" if $DEBUG > 4;
next;
}
my $oldMD5 = &slurpFile( $md5Name );
if ( $md5 eq $oldMD5 ) {
print "\t\tAdding, md5 not changed, $md5 same as $oldMD5\n" if $DEBUG > 4;
push @dirsToMove, $thisDir;
} else {
print "\t\tWaiting, md5 changed, $md5 and $oldMD5\n" if $DEBUG > 4;
# overwrite if the checksum has changed
&writeData( $md5Name, $md5 ) if $md5 ne &slurpFile( $md5Name );
}
} else { # doesn't exist, so create it
print "\t\tCreating MD5 File $md5Name with value $md5\n" if $DEBUG > 4;
&writeData( $md5Name, $md5 );
}
} # foreach
return @dirsToMove;
}
# calculate the checksum of a directory by
# 1. calculate checksum of each individual file in the entire tree
# 2. Grab the first column, which is the checksum
# 3. sort the result since Linux will not always return them in the same order
# 4. do a checksum of the checksums
#
# This is highly unlikely to give the same answer if any file changes
# in the process of the copy
sub calcMD5 {
my $directory = shift;
return -1 unless -d $directory;
my $md5 = `find '$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1`;
chomp $md5;
return $md5;
}
# moves directory to staging area and puts the md5 sum into a file
# with the same name, but a .md5sum suffix
sub moveToStaging {
my ( $directory, $fullPath, $staging ) = @_;
# and let's get the md5 file name also
my $md5File = $fullPath . ".$config{'md5 suffix'}";
mkdir( $staging ) unless -d $staging;
return 'Directory already exists in staging' if -e "$staging/$directory";
move( $fullPath, "$staging/$directory" ) or die "Error moving $fullPath to $staging/$directory: $!\n";
move( $md5File, $staging ) or die "Error moving $md5File to $staging: $!\n";
return '';
}
sub getCheckSum {
my $project = shift;
my $checkSumFile = $config{'local staging area'} . '/' . $project . '.' . $config{'md5 suffix'};
if ( -e $checkSumFile ) {
return &slurpFile( $checkSumFile );
}
return '';
}
# verifies the directory is correct on the server by comparing the checksums
# calculated locally and remote server. If valid, moves it into the final
# location on the remote server
sub validateTarget {
my ( $remoteServer, $remoteStaging, $remoteTarget, $directory, $checksum ) = @_;
my $md5sum = `ssh $remoteServer "find '$remoteStaging/$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1"`;
chomp $md5sum;
if ( $checksum eq $md5sum ) {
if ( defined ( $config{ 'final procedure' } ) ) {
my $result = $config{ 'final procedure' }->( $remoteServer, $remoteStaging, $remoteTarget, $directory );
&logit( $result ) if ( $result );
} # do the final procedure, if it exist
} else {
&logit( "Invalid checksum moving directory $directory" );
return 0;
}
}
# simple little logger that records some information
sub logit {
my $projectName = shift;
my $suffix = shift;
my $logfile = $config{'local root dir'} . "/$projectName.$suffix";
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
my $now = sprintf( "%04d-%02d-%02d %02d:%-2d:%02d", $year+1900, $mon+1, $mday, $hour, $min, $sec );
# create the logfile if it doesn't exist and set it to rw by everyone
unless ( -e $logfile ) {
qx(touch '$logfile');
qx(chmod 666 '$logfile');
}
open LOG, ">>$logfile" or die "could not write to $logfile: $!\n";
while ( my $message = shift ) {
print LOG "$now\t$message\n";
}
close LOG;
}
sub runRemoteCommand {
my $server = shift;
while ( my $command = shift ) {
my $output = qx/ssh $server '$command'/;
if ( my $error = $? & 127 ) {
return ( $output, $error );
}
}
return ('', 0);
}
sub copyToRemote {
my ( $path, $dirname, $remoteServer, $remotePath ) = @_;
# first, copy the file
#print "rsync -a $path/$dirname $remoteServer:$remotePath > /tmp/lastrsync.log";
#die;
qx"rsync -a '$path/$dirname' $remoteServer:$remotePath > /tmp/lastrsync.log";
return 'rsync failed with error :' . $? & 127 if $? & 127;
return '';
}
# simply remove everything from the trash directory over $age seconds old
sub cleanTrash {
my ( $trashDir, $age ) = @_;
my $md5Suffix = $config{'md5 suffix'};
my @toRemove = ();
if ( opendir( my $dh, $trashDir ) ) {
# get all the md5sum files which are older than $age seconds old
@toRemove = grep { &fileAge( "$_" ) > $age } map{ "$trashDir/$_" } grep{ /$md5Suffix$/ } readdir( $dh);
closedir( $dh );
}
print "You should remove the following files\n" if $DEBUG > 1;
foreach my $thisDir ( @toRemove ) {
$thisDir =~ m/(.*)\.$md5Suffix/;
$thisDir = $1;
qx/rm -fR '$thisDir' '$thisDir.$md5Suffix'/;
}
}
sub makeDirectories {
my $directory = shift;
my $permissions = shift;
$permissions = '777' unless $permissions;
unless ( -d $directory ) {
print "Making directory $directory\n" if $DEBUG > 1;
`mkdir -p $directory`;
`chmod $permissions $directory`;
}
}
###############################################################################
# Main
###############################################################################
&loadConfig();
#use Data::Dumper;
#print Dumper( \%config );
#die;
foreach my $dirsToMake ( 'local root dir', 'local trash dir', 'local staging area' ) {
&makeDirectories( $config{$dirsToMake} );
}
# clean the trash if $config{ 'trash cleanup' } is non-zero
&cleanTrash( $config{'local trash dir'}, $config{ 'trash cleanup' } ) if $config{ 'trash cleanup' };
# Check if we have any directories which are ready to be moved.
@DirectoriesToMove = &getDirectories( $config{'local root dir'} );
print "Processing\n\t" . join( "\n\t", @DirectoriesToMove ) . "\n" if $DEBUG > 1;
foreach my $directory ( @DirectoriesToMove ) {
my $fullPath = $config{'local root dir'} . "/$directory";
my $logFile = "$fullPath.$config{'log suffix'}";
my $errorFile = "$fullPath.$config{'error suffix'}";
print "Path for $directory is $fullPath\n\tLog File is $logFile\n\tError file is $errorFile\n" if $DEBUG > 3;
if ( -e $errorFile ) {
&logit( $directory, $config{'log suffix'}, "Aborting because we have a pre-existing error" );
print "\tAborting because we have a pre-existing error\n" if $DEBUG > 3;
next;
}
&logit( $directory, $config{'log suffix'}, "Processing $directory" );
my $error = &moveToStaging( $directory, $fullPath, $config{'local staging area'} );
if ( ! $error ) {
print "\tMoved to $config{'local staging area'}\n" if $DEBUG > 3;
&logit( $directory, $config{'log suffix'}, "Successfully moved to $config{'local staging area'}" );
} else {
&logit( $directory, $config{'log suffix'}, "Error, move aborted" );
&logit( $directory, $config{'error suffix'}, $error );
}
}
# done with that, now we need to see if there is anything in the staging area
# that needs to be sent to the remote server
`mkdir -p $config{'local staging area'}` unless -d $config{'local staging area'};
opendir( my $dh, $config{'local staging area'} ) or die "Could not read $config{'local staging area'}: $!\n";
my @directories;
# get all the .md5 files
my @toMove = grep { /$config{'md5 suffix'}$/ } readdir( $dh );
my $targetPath = "$config{'target server'}:$config{'target staging area'}/";
print "Copying the following to $targetPath\n\t" . join ("\n\t", @toMove ) . "\n" if $DEBUG > 1;
# create the target directory on the server if it doesn't exist
&runRemoteCommand( $config{'target server'},
"[ ! -d $config{'target staging area'} ] && mkdir -p $config{'target staging area'}",
"[ ! -d $config{'target final directory'} ] && mkdir -p $config{'target final directory'}"
);
# now, process each directory in turn
foreach my $dirname ( @toMove ) {
print "Processing $dirname\n";
my $error;
$dirname =~ m/^(.*)\.$config{'md5 suffix'}$/;
$dirname = $1;
$error = ©ToRemote( $config{'local staging area'}, $dirname, $config{'target server'}, $config{'target staging area'} );
if ( $error ) {
&logit( $dirname, $config{'error suffix'}, $error );
next;
} else {
&logit( $dirname, $config{'log suffix'}, "Copied to $config{'target server'}:$config{'target staging area'}" );
}
my $md5sum = &getCheckSum( $dirname );
next unless $md5sum;
if ( &validateTarget( $config{'target server'}, $config{'target staging area'}, $config{'target final directory'}, $dirname, $md5sum ) ) {
`mkdir -p $config{'local trash dir'}` unless -d $config{'local trash dir'};
move( "$config{'local staging area'}/$dirname", "$config{'local trash dir'}/$dirname" );
my $md5File = $dirname . '.' . $config{'md5 suffix'};
move( "$config{'local staging area'}/$md5File", "$config{'local trash dir'}/$md5File" );
&logit( $dirname, $config{'log suffix'}, "Successfully moved directory $dirname to $config{'target server'}" );
} else {
&logit( $dirname, $config{'error suffix'}, "Unable to validate target for $dirname" );
}
}
1;