Rev 12 | Blame | Last modification | View Log | Download | RSS feed
#! /usr/bin/env perl
# archiveDirectories.pl
# Author: R. W. Rodolico
# Date: 20180603
# Copyright: 2018, Vanduzen Enterprises, Dallas TX
# Script designed to be run from a cron job, which checks if any directories
# are ready to be archived. A directory is defined as a directory under
# the root of $config{'local root dir'}.
# If found, all directories are moved into the staging area and
# an md5 checksum is calculated for the entire tree.
# After all directories are moved, a second process looks in the staging
# area and copies the files (using rsync for reliability) into the staging
# area of $config{'target server'}. When a directory has been copied, a checksum is
# calculated on the remote copy and compared to the checksum calculated
# in the first stage and, if it passes, the directory is then moved to the
# $config{'target final directory'}.
# After the copy and move, the directory and its MD5 sum file are moved
# to the $config{'local trash dir'} (which is cleaned on the next invocation of
# the script).
# Script does NOT handle the situation where directories are being moved
# while the script is running, so the script should be run at a time
# when there is no other activity on the server.
#
# Version: 1.0
use warnings;
use strict;
use Cwd qw();
use File::Copy qw(move);
use File::Basename;
use File::stat;
my $DEBUG = 5;
my %config = (
# location where directories are put by end users
'local root dir' => '/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/ArchiveProjects',
# location where directories are moved while processing
'local work dir' => '/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/transfer_area',
# location where directories are moved when job is completed
'local trash dir' => "/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/Trash",
# location where directories are moved while being transferred
'local staging area' => '/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/Staging',
# target server name/ip. Must be accessible via ssh with no password
'target server' => 'davinci',
# location on target server where directories are placed while copying
'target staging area' => '/home/samba/archives/fromDenver/.Staging/',
# location on target server where directories are finally put
'target final directory' => '/home/samba/archives/fromDenver/',
# suffix of md5 of directories
'md5 suffix' => 'md5sum',
# suffix of filename to create showing actions
'log suffix' => 'log',
# suffix of error log
'error suffix' => 'err',
# how long a directory must be undisturbed before it is ready to work on
'quiesent seconds' => 60*5, # five minutes
# how long to leave stuff in the trash directory. 0 indicates never do it.
'trash cleanup' => 86400*7, # 7 days
);
my @DirectoriesToMove;
# simply read the entire fiel into a string
sub slurpFile {
my $filename = shift;
return '' unless -e $filename;
open TEMP, "<$filename" or die "could not read $filename: $!\n";
my @contents = <TEMP>;
close TEMP;
return join( '', @contents );
}
# print a value to a file
sub writeData {
my $filename = shift;
open TEMP, ">$filename" or die "could not write to $filename: $!\n";
print TEMP join( '', @_ );
close TEMP;
}
# look in the directories to move directory and see if there is anything
# new in there. If so, check MD5 Sum file (create if necessary) and ensure
# we have waited long enough and the sums match
sub getDirectories {
my $rootDir = shift;
print "In getDirectories with dir of $rootDir\n" if $DEBUG;
opendir( my $dh, $rootDir ) or die "Could not open directory $rootDir: $!\n";
my @dirs = grep { ! /^\./ && -d "$rootDir/$_" } readdir( $dh );
closedir ( $dh );
print "Directories Found\n" . join( "\n", @dirs ) . "\n" if $DEBUG > 1;
my @dirsToMove;
foreach my $thisDir ( @dirs ) {
my $fullyQualified = "$rootDir/$thisDir";
my $md5 = calcMD5( $fullyQualified );
print "\tFound Dir $fullyQualified with MD5 of $md5\n" if $DEBUG > 2;
# let's look for the md5 checksum file and compare if it exist
my $md5Name = "$fullyQualified.$config{'md5 suffix'}";
if ( -e $md5Name ) {
# find out when it was last written to
my $lastModification = stat( $md5Name );
$lastModification = $$lastModification[9];
my $howOld = time - $lastModification;
print "\tFound existing MD5 file $md5Name written to at $lastModification, or $howOld seconds ago\n" if $DEBUG > 3;
# and blow it off if it is too recent
if ( $howOld < $config{'quiesent seconds'} ) {
print "\t\tBlowing it off because $howOld is less than $config{'quiesent seconds'}\n" if $DEBUG > 4;
next;
}
my $oldMD5 = &slurpFile( $md5Name );
if ( $md5 eq $oldMD5 ) {
print "\t\tAdding, md5 not changed, $md5 same as $oldMD5\n" if $DEBUG > 4;
push @dirsToMove, $thisDir;
} else {
print "\t\tWaiting, md5 changed, $md5 and $oldMD5\n" if $DEBUG > 4;
# overwrite if the checksum has changed
&writeData( $md5Name, $md5 ) if $md5 ne &slurpFile( $md5Name );
}
} else { # doesn't exist, so create it
print "\t\tCreating MD5 File $md5Name with value $md5\n" if $DEBUG > 4;
&writeData( $md5Name, $md5 );
}
} # foreach
return @dirsToMove;
}
# calculate the checksum of a directory by
# 1. calculate checksum of each individual file in the entire tree
# 2. Grab the first column, which is the checksum
# 3. sort the result since Linux will not always return them in the same order
# 4. do a checksum of the checksums
#
# This is highly unlikely to give the same answer if any file changes
# in the process of the copy
sub calcMD5 {
my $directory = shift;
return -1 unless -d $directory;
my $md5 = `find '$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1`;
chomp $md5;
return $md5;
}
# moves directory to staging area and puts the md5 sum into a file
# with the same name, but a .md5sum suffix
sub moveToStaging {
my ( $directory, $fullPath, $staging ) = @_;
# and let's get the md5 file name also
my $md5File = $fullPath . ".$config{'md5 suffix'}";
mkdir $staging unless -d $staging;
return 'Directory already exists in staging' if -e "$staging/$directory";
move( $fullPath, "$staging/$directory" ) or die "Error moving $fullPath to $staging/$directory: $!\n";
move( $md5File, $staging ) or die "Error moving $md5File to $staging: $!\n";
return '';
}
# verifies the directory is correct on the server by comparing the checksums
# calculated locally and remote server. If valid, moves it into the final
# location on the remote server
sub validateTarget {
my ( $remoteServer, $remoteStaging, $remoteTarget, $directory, $checksum ) = @_;
my $md5sum = `ssh $remoteServer "find '$remoteStaging/$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1"`;
chomp $md5sum;
if ( $checksum eq $md5sum ) {
my $command = "ssh $remoteServer \"mv '$remoteStaging/$directory' '$remoteTarget'\"";
if ( system( $command ) == 0 ) {
return 1;
} else {
&logit( "Unable to move $directory to $remoteServer:$remoteTarget" );
return 0;
}
} else {
&logit( "Invalid checksum moving directory $directory" );
return 0;
}
}
# reads the checksum file
sub getCheckSum {
my ( $directory, $staging ) = @_;
$directory .= $config{'md5 suffix'};
if ( open DATA, "<$staging/$directory" ) {
my $cksum = <DATA>;
chomp $cksum;
close DATA;
return $cksum;
}
&logit( "Could not open $staging/$directory: $!" );
return '';
}
# simple little logger that records some information
sub logit {
my $logfile = shift;
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
my $now = sprintf( "%04d-%02d-%02d %02d:%-2d:%02d", $year+1900, $mon+1, $mday, $hour, $min, $sec );
open LOG, ">>$logfile" or die "could not write to $logfile: $!\n";
while ( my $message = shift ) {
print LOG "$now\t$message\n";
}
close LOG;
}
# simply remove everything from the trash directory
sub cleanTrash {
my ( $trashDir, $age ) = @_;
`mkdir -p $trashDir` unless -d $trashDir;
`rm -fR $trashDir/*`;
}
unless ( -d $config{'local root dir'} ) {
`mkdir -p $config{'local root dir'}`;
`chmod 777 $config{'local root dir'}`;
}
# clean the trash if $config{ 'trash cleanup' } is non-zero
&cleanTrash( $config{'local trash dir'}, $config{ 'trash cleanup' } ) if $config{ 'trash cleanup' };
# Check if we have any directories which are ready to be moved.
@DirectoriesToMove = &getDirectories( $config{'local root dir'} );
print "Processing\n\t" . join( "\n\t", @DirectoriesToMove ) . "\n";
foreach my $directory ( @DirectoriesToMove ) {
my $fullPath = $config{'local root dir'} . "/$directory";
my $logFile = "$fullPath.$config{'log suffix'}";
my $errorFile = "$fullPath.$config{'error suffix'}";
print "Path for $directory is $fullPath\n\tLog File is $logFile\n\tError file is $errorFile\n" if $DEBUG > 3;
if ( -e $errorFile ) {
&logit( $logFile, "Aborting because we have a pre-existing error" );
print "\tAborting because we have a pre-existing error\n" if $DEBUG > 3;
next;
}
&logit( $logFile, "Processing $directory" );
my $error = &moveToStaging( $directory, $fullPath, $config{'local staging area'} );
if ( ! $error ) {
print "\tMoved to $config{'local staging area'}\n" if $DEBUG > 3;
&logit( $logFile, "Successfully moved to $config{'local staging area'}" );
} else {
&logit( $logFile, "Error, move aborted" );
&logit( $errorFile, $error );
}
}
# done with that, now we need to see if there is anything in the staging area
# that needs to be sent to the remote server
opendir( my $dh, $config{'local staging area'} ) or die "Could not read $config{'local staging area'}: $!\n";
my @directories;
my @toMove = grep { /$config{'md5 suffix'}$/ } readdir( $dh );
my $targetPath = "$config{'target server'}:$config{'target staging area'}/";
print "Copying the following to $targetPath\n\t" . join ("\n\t", @toMove ) . "\n";
die;
foreach my $directory ( @toMove ) {
$directory =~ m/^(.*)\.md5sum/;
$directory = $1;
my $md5sum = &getCheckSum( $directory, $config{'local staging area'} );
next unless $md5sum;
my $rsync = "rsync -av '$config{'local staging area'}/$directory' $config{'target server'}:$config{'target staging area'}/ > /tmp/lastrsync.log";
&logit( $rsync );
if ( system ( $rsync ) == 0 ) { # we succeeded
if ( &validateTarget( $config{'target server'}, $config{'target staging area'}, $config{'target final directory'}, $directory, $md5sum ) ) {
`mkdir -p $config{'local trash dir'}` unless -d $config{'local trash dir'};
move( "$config{'local staging area'}/$directory", "$config{'local trash dir'}/$directory" );
$directory .= $config{'md5 suffix'};
move( "$config{'local staging area'}/$directory", "$config{'local trash dir'}/$directory" );
&logit( "Successfully moved directory $directory to $config{'target server'}" );
} else {
&logit( "Unable to validate target for $directory" );
}
} else {
&logit( "Unknown error attempting to rsync $directory" );
}
}
1;