Subversion Repositories sysadmin_scripts

Rev

Rev 11 | Blame | Last modification | View Log | Download | RSS feed

#! /usr/bin/env perl

# archiveDirectorys.pl
# Author: R. W. Rodolico
# Date: 20180603
# Copyright: 2018, Vanduzen Enterprises, Dallas TX

# Script designed to be run from a cron job, which checks if any directorys
# are ready to be archived. A directory is defined as a directory under
# the root of $localDirectoryDirectory.

# If found, all directories are moved into the staging area and 
# an md5 checksum is calculated for the entire tree.
# After all directorys are moved, a second process looks in the staging
# area and copies the files (using rsync for reliability) into the staging
# area of $targetServer. When a directory has been copied, a checksum is
# calculated on the remote copy and compared to the checksum calculated
# in the first stage and, if it passes, the directory is then moved to the 
# $targetDirectoryDirectory.
# After the copy and move, the directory and its MD5 sum file are moved
# to the $trashDirectory (which is cleaned on the next invocation of
# the script).

# Script does NOT handle the situation where directorys are being moved
# while the script is running, so the script should be run at a time
# when there is no other activity on the server.
#
# Version: 1.0

use warnings;
use strict;
use Cwd qw();
use File::Copy qw(move);
use File::Basename;

# location where directorys are put by end users
my $localDirectoryDirectory = '/home/samba/transfers/denver_to_dallas/Archive_to_DaVinci';
# location where directories are moved while processing
my $rootWorkDirectory = '/home/transfer_work_area';
# location where directories are moved when job is completed
my $trashDirectory = "$localDirectoryDirectory/.Trash";
# location where directories are moved while being transferred
my $stagingArea = "$localDirectoryDirectory/.Staging";
# target server name/ip. Must be accessible via ssh with no password
my $targetServer = 'davinci';
# location on target server where directories are placed while copying
my $targetStagingArea = '/home/samba/archives/fromDenver/.Staging/';
# location on target server where directories are finally put
my $targetDirectoryDirectory = '/home/samba/archives/fromDenver/';
# suffix of md5 of directories
my $md5suffix = '.md5sum';

my @DirectorysToMove;

# look in the directorys to move directory and see if there is anything 
# new in there.
sub getDirectorys {
   my $rootDir = shift;
   opendir( my $dh, $rootDir ) or die "Could not open directory $rootDir: $!\n";
   my @dirs = grep { ! /^\./ && -d "$rootDir/$_" } readdir( $dh );
   return @dirs;
}

# calculate the checksum of a directory by
# 1. calculate checksum of each individual file in the entire tree
# 2. Grab the first column, which is the checksum
# 3. sort the result since Linux will not always return them in the same order
# 4. do a checksum of the checksums
#
# This is highly unlikely to give the same answer if any file changes
# in the process of the copy
sub calcMD5 {
   my $directory = shift;
   return -1 unless -d $directory;
   my $md5 = `find '$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1`;
   chomp $md5;
   return $md5;
}

# moves directory to staging area and puts the md5 sum into a file
# with the same name, but a .md5sum suffix
sub moveToStaging {
   my ( $directory, $stagingArea, $md5 ) = @_;
   mkdir $stagingArea unless -d $stagingArea;
   move( "$localDirectoryDirectory/$directory", "$stagingArea/$directory" );
   my $md5File = "$stagingArea/$directory" . $md5suffix;
   open DATA,">$md5File" or die "Could not create md5sum file [$md5File]: $!\n";
   print DATA "$md5\n";
   close DATA;
   return;
}
   
# verifies the directory is correct on the server by comparing the checksums
# calculated locally and remote server. If valid, moves it into the final
# location on the remote server
sub validateTarget {
   my ( $remoteServer, $remoteStaging, $remoteTarget, $directory, $checksum ) = @_;
   my $md5sum = `ssh $remoteServer "find '$remoteStaging/$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1"`;
   chomp $md5sum;
   if ( $checksum eq $md5sum ) {
      my $command = "ssh $remoteServer \"mv '$remoteStaging/$directory' '$remoteTarget'\"";
      if ( system( $command ) == 0 ) {
         return 1;
      } else {
         &logit( "Unable to move $directory to $remoteServer:$remoteTarget" );
         return 0;
      }
   } else {
      &logit( "Invalid checksum moving directory $directory" );
      return 0;
   }
}

# reads the checksum file
sub getCheckSum {
   my ( $directory, $staging )  = @_;
   $directory .= $md5suffix;
   if ( open DATA, "<$staging/$directory" ) {
      my $cksum = <DATA>;
      chomp $cksum;
      close DATA;
      return $cksum;
   } 
   &logit( "Could not open $staging/$directory: $!" );
   return '';
}
   
# simple little logger that records some information   
sub logit {
   my $message = shift;
   my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
   my $now = sprintf( "%04d-%02d-%02d %02d:%-2d:%02d", $year+1900, $mon+1, $mday, $hour, $min, $sec );
   open LOG, ">>/tmp/archiveDirectorys.log" or die "could not write to archiveDirectorys.log: $!\n";
   print LOG "$now\t$message\n";   
   close LOG;
}
   
# simply remove everything from the trash directory
sub cleanTrash {
   my $trashDir = shift;
   `rm -fR $trashDir/*`;
}


#&cleanTrash( $trashDirectory ) if &getDirectorysToMove( $trashDirectory );
   
   
# first, check and see if we have any directorys we need to move
@DirectorysToMove = &getDirectorys( $localDirectoryDirectory );

foreach my $directory ( @DirectorysToMove ) {
   my $md5 = &calcMD5( "$localDirectoryDirectory/$directory" );
   &logit( "New Directory $md5\t$directory" );
   &moveToStaging( $directory, $stagingArea, $md5 );
}

# done with that, now we need to see if there is anything in the staging area
# that needs to be sent to the remote server
opendir( my $dh, $stagingArea ) or die "Could not read $stagingArea: $!\n";
my @directories;
my @toMove = grep { /$md5suffix$/ } readdir( $dh );
foreach my $directory ( @toMove ) {
   $directory =~ m/^(.*)\.md5sum/;
   $directory = $1;
   my $md5sum = &getCheckSum( $directory, $stagingArea );
   next unless $md5sum;
   my $rsync = "rsync -av '$stagingArea/$directory' $targetServer:$targetStagingArea/ > /tmp/lastrsync.log";
   &logit( $rsync );
   if ( system ( $rsync ) == 0 ) { # we succeeded
      if ( &validateTarget( $targetServer, $targetStagingArea, $targetDirectoryDirectory, $directory, $md5sum ) ) {
         `mkdir -p $trashDirectory` unless -d $trashDirectory;
         move( "$stagingArea/$directory", "$trashDirectory/$directory" );
         $directory .= $md5suffix;
         move( "$stagingArea/$directory", "$trashDirectory/$directory" );
         &logit( "Successfully moved directory $directory to $targetServer" );
      } else {
         &logit( "Unable to validate target for $directory" );
      }
   } else {
      &logit( "Unknown error attempting to rsync $directory" );
   }
}


1;