Rev 11 | Blame | Last modification | View Log | Download | RSS feed
#! /usr/bin/env perl
# archiveDirectorys.pl
# Author: R. W. Rodolico
# Date: 20180603
# Copyright: 2018, Vanduzen Enterprises, Dallas TX
# Script designed to be run from a cron job, which checks if any directorys
# are ready to be archived. A directory is defined as a directory under
# the root of $localDirectoryDirectory.
# If found, all directories are moved into the staging area and
# an md5 checksum is calculated for the entire tree.
# After all directorys are moved, a second process looks in the staging
# area and copies the files (using rsync for reliability) into the staging
# area of $targetServer. When a directory has been copied, a checksum is
# calculated on the remote copy and compared to the checksum calculated
# in the first stage and, if it passes, the directory is then moved to the
# $targetDirectoryDirectory.
# After the copy and move, the directory and its MD5 sum file are moved
# to the $trashDirectory (which is cleaned on the next invocation of
# the script).
# Script does NOT handle the situation where directorys are being moved
# while the script is running, so the script should be run at a time
# when there is no other activity on the server.
#
# Version: 1.0
use warnings;
use strict;
use Cwd qw();
use File::Copy qw(move);
use File::Basename;
# location where directorys are put by end users
my $localDirectoryDirectory = '/home/samba/transfers/denver_to_dallas/Archive_to_DaVinci';
# location where directories are moved while processing
my $rootWorkDirectory = '/home/transfer_work_area';
# location where directories are moved when job is completed
my $trashDirectory = "$localDirectoryDirectory/.Trash";
# location where directories are moved while being transferred
my $stagingArea = "$localDirectoryDirectory/.Staging";
# target server name/ip. Must be accessible via ssh with no password
my $targetServer = 'davinci';
# location on target server where directories are placed while copying
my $targetStagingArea = '/home/samba/archives/fromDenver/.Staging/';
# location on target server where directories are finally put
my $targetDirectoryDirectory = '/home/samba/archives/fromDenver/';
# suffix of md5 of directories
my $md5suffix = '.md5sum';
my @DirectorysToMove;
# look in the directorys to move directory and see if there is anything
# new in there.
sub getDirectorys {
my $rootDir = shift;
opendir( my $dh, $rootDir ) or die "Could not open directory $rootDir: $!\n";
my @dirs = grep { ! /^\./ && -d "$rootDir/$_" } readdir( $dh );
return @dirs;
}
# calculate the checksum of a directory by
# 1. calculate checksum of each individual file in the entire tree
# 2. Grab the first column, which is the checksum
# 3. sort the result since Linux will not always return them in the same order
# 4. do a checksum of the checksums
#
# This is highly unlikely to give the same answer if any file changes
# in the process of the copy
sub calcMD5 {
my $directory = shift;
return -1 unless -d $directory;
my $md5 = `find '$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1`;
chomp $md5;
return $md5;
}
# moves directory to staging area and puts the md5 sum into a file
# with the same name, but a .md5sum suffix
sub moveToStaging {
my ( $directory, $stagingArea, $md5 ) = @_;
mkdir $stagingArea unless -d $stagingArea;
move( "$localDirectoryDirectory/$directory", "$stagingArea/$directory" );
my $md5File = "$stagingArea/$directory" . $md5suffix;
open DATA,">$md5File" or die "Could not create md5sum file [$md5File]: $!\n";
print DATA "$md5\n";
close DATA;
return;
}
# verifies the directory is correct on the server by comparing the checksums
# calculated locally and remote server. If valid, moves it into the final
# location on the remote server
sub validateTarget {
my ( $remoteServer, $remoteStaging, $remoteTarget, $directory, $checksum ) = @_;
my $md5sum = `ssh $remoteServer "find '$remoteStaging/$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1"`;
chomp $md5sum;
if ( $checksum eq $md5sum ) {
my $command = "ssh $remoteServer \"mv '$remoteStaging/$directory' '$remoteTarget'\"";
if ( system( $command ) == 0 ) {
return 1;
} else {
&logit( "Unable to move $directory to $remoteServer:$remoteTarget" );
return 0;
}
} else {
&logit( "Invalid checksum moving directory $directory" );
return 0;
}
}
# reads the checksum file
sub getCheckSum {
my ( $directory, $staging ) = @_;
$directory .= $md5suffix;
if ( open DATA, "<$staging/$directory" ) {
my $cksum = <DATA>;
chomp $cksum;
close DATA;
return $cksum;
}
&logit( "Could not open $staging/$directory: $!" );
return '';
}
# simple little logger that records some information
sub logit {
my $message = shift;
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
my $now = sprintf( "%04d-%02d-%02d %02d:%-2d:%02d", $year+1900, $mon+1, $mday, $hour, $min, $sec );
open LOG, ">>/tmp/archiveDirectorys.log" or die "could not write to archiveDirectorys.log: $!\n";
print LOG "$now\t$message\n";
close LOG;
}
# simply remove everything from the trash directory
sub cleanTrash {
my $trashDir = shift;
`rm -fR $trashDir/*`;
}
#&cleanTrash( $trashDirectory ) if &getDirectorysToMove( $trashDirectory );
# first, check and see if we have any directorys we need to move
@DirectorysToMove = &getDirectorys( $localDirectoryDirectory );
foreach my $directory ( @DirectorysToMove ) {
my $md5 = &calcMD5( "$localDirectoryDirectory/$directory" );
&logit( "New Directory $md5\t$directory" );
&moveToStaging( $directory, $stagingArea, $md5 );
}
# done with that, now we need to see if there is anything in the staging area
# that needs to be sent to the remote server
opendir( my $dh, $stagingArea ) or die "Could not read $stagingArea: $!\n";
my @directories;
my @toMove = grep { /$md5suffix$/ } readdir( $dh );
foreach my $directory ( @toMove ) {
$directory =~ m/^(.*)\.md5sum/;
$directory = $1;
my $md5sum = &getCheckSum( $directory, $stagingArea );
next unless $md5sum;
my $rsync = "rsync -av '$stagingArea/$directory' $targetServer:$targetStagingArea/ > /tmp/lastrsync.log";
&logit( $rsync );
if ( system ( $rsync ) == 0 ) { # we succeeded
if ( &validateTarget( $targetServer, $targetStagingArea, $targetDirectoryDirectory, $directory, $md5sum ) ) {
`mkdir -p $trashDirectory` unless -d $trashDirectory;
move( "$stagingArea/$directory", "$trashDirectory/$directory" );
$directory .= $md5suffix;
move( "$stagingArea/$directory", "$trashDirectory/$directory" );
&logit( "Successfully moved directory $directory to $targetServer" );
} else {
&logit( "Unable to validate target for $directory" );
}
} else {
&logit( "Unknown error attempting to rsync $directory" );
}
}
1;