Subversion Repositories sysadmin_scripts

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
11 rodolico 1
#! /usr/bin/env perl
2
 
13 rodolico 3
# archiveDirectories.pl
11 rodolico 4
# Author: R. W. Rodolico
5
# Date: 20180603
6
# Copyright: 2018, Vanduzen Enterprises, Dallas TX
7
 
13 rodolico 8
# Script designed to be run from a cron job, which checks if any directories
12 rodolico 9
# are ready to be archived. A directory is defined as a directory under
13 rodolico 10
# the root of $config{'local root dir'}.
11 rodolico 11
 
12
# If found, all directories are moved into the staging area and 
13
# an md5 checksum is calculated for the entire tree.
13 rodolico 14
# After all directories are moved, a second process looks in the staging
11 rodolico 15
# area and copies the files (using rsync for reliability) into the staging
13 rodolico 16
# area of $config{'target server'}. When a directory has been copied, a checksum is
11 rodolico 17
# calculated on the remote copy and compared to the checksum calculated
12 rodolico 18
# in the first stage and, if it passes, the directory is then moved to the 
13 rodolico 19
# $config{'target final directory'}.
12 rodolico 20
# After the copy and move, the directory and its MD5 sum file are moved
13 rodolico 21
# to the $config{'local trash dir'} (which is cleaned on the next invocation of
11 rodolico 22
# the script).
23
 
13 rodolico 24
# Script does NOT handle the situation where directories are being moved
11 rodolico 25
# while the script is running, so the script should be run at a time
26
# when there is no other activity on the server.
27
#
28
# Version: 1.0
29
 
30
use warnings;
31
use strict;
32
use Cwd qw();
33
use File::Copy qw(move);
34
use File::Basename;
13 rodolico 35
use File::stat;
11 rodolico 36
 
13 rodolico 37
my $DEBUG = 5;
11 rodolico 38
 
16 rodolico 39
my %config;
11 rodolico 40
 
16 rodolico 41
my @DirectoriesToMove;
13 rodolico 42
 
16 rodolico 43
sub loadConfig {
44
   use FindBin;
45
   my $configFileName = "$FindBin::Bin/$FindBin::Script";
46
   unless ( $configFileName =~ s/\.pl$/\.conf/ ) {
47
      $configFileName .= '.conf';
48
   }
49
 
50
   if ( -e $configFileName ) {
51
      my $configFileContents = &slurpFile( $configFileName );
52
      eval( $configFileContents );
53
      die "Error interpreting $configFileName: $@\n" if $@;
54
   } else {
55
      die "Could not locate config file $configFileName\n";
56
   } # if..else
57
} #loadConfig
13 rodolico 58
 
59
# simply read the entire fiel into a string
60
sub slurpFile {
61
   my $filename = shift;
62
   return '' unless -e $filename;
63
   open TEMP, "<$filename" or die "could not read $filename: $!\n";
64
   my @contents = <TEMP>;
65
   close TEMP;
66
   return join( '', @contents );
67
}
68
 
69
# print a value to a file
70
sub writeData {
71
   my $filename = shift;
72
   open TEMP, ">$filename" or die "could not write to $filename: $!\n";
73
   print TEMP join( '', @_ );
74
   close TEMP;
75
}
76
 
77
# look in the directories to move directory and see if there is anything 
78
# new in there. If so, check MD5 Sum file (create if necessary) and ensure
79
# we have waited long enough and the sums match
80
sub getDirectories {
11 rodolico 81
   my $rootDir = shift;
13 rodolico 82
   print "In getDirectories with dir of $rootDir\n" if $DEBUG;
11 rodolico 83
   opendir( my $dh, $rootDir ) or die "Could not open directory $rootDir: $!\n";
84
   my @dirs = grep { ! /^\./ && -d "$rootDir/$_" } readdir( $dh );
13 rodolico 85
   closedir ( $dh );
86
   print "Directories Found\n" . join( "\n", @dirs ) . "\n" if $DEBUG > 1;
87
   my @dirsToMove;
88
   foreach my $thisDir ( @dirs ) {
89
      my $fullyQualified = "$rootDir/$thisDir";
90
      my $md5 = calcMD5( $fullyQualified );
91
      print "\tFound Dir $fullyQualified with MD5 of $md5\n" if $DEBUG > 2;
92
      # let's look for the md5 checksum file and compare if it exist
93
      my $md5Name = "$fullyQualified.$config{'md5 suffix'}";
94
      if ( -e $md5Name ) {
95
         # find out when it was last written to
96
         my $lastModification = stat( $md5Name );
97
         $lastModification = $$lastModification[9];
98
         my $howOld = time - $lastModification;
99
         print "\tFound existing MD5 file $md5Name written to at $lastModification, or $howOld seconds ago\n" if $DEBUG > 3;
100
         # and blow it off if it is too recent
101
         if ( $howOld < $config{'quiesent seconds'} ) {
102
            print "\t\tBlowing it off because $howOld is less than $config{'quiesent seconds'}\n" if $DEBUG > 4;
103
            next;
104
         }
105
         my $oldMD5 = &slurpFile( $md5Name );
106
         if ( $md5 eq $oldMD5 ) {
107
            print "\t\tAdding, md5 not changed, $md5 same as $oldMD5\n" if $DEBUG > 4;
108
            push @dirsToMove, $thisDir;
109
         } else {
110
            print "\t\tWaiting, md5 changed, $md5 and $oldMD5\n" if $DEBUG > 4;
111
            # overwrite if the checksum has changed
112
            &writeData( $md5Name, $md5 ) if $md5 ne &slurpFile( $md5Name );
113
         }
114
      } else { # doesn't exist, so create it
115
         print "\t\tCreating MD5 File $md5Name with value $md5\n" if $DEBUG > 4;
116
         &writeData( $md5Name, $md5 );
117
      }
118
   } # foreach
119
   return @dirsToMove;
11 rodolico 120
}
121
 
122
# calculate the checksum of a directory by
123
# 1. calculate checksum of each individual file in the entire tree
124
# 2. Grab the first column, which is the checksum
125
# 3. sort the result since Linux will not always return them in the same order
126
# 4. do a checksum of the checksums
127
#
128
# This is highly unlikely to give the same answer if any file changes
129
# in the process of the copy
130
sub calcMD5 {
131
   my $directory = shift;
132
   return -1 unless -d $directory;
133
   my $md5 = `find '$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1`;
134
   chomp $md5;
135
   return $md5;
136
}
137
 
12 rodolico 138
# moves directory to staging area and puts the md5 sum into a file
11 rodolico 139
# with the same name, but a .md5sum suffix
140
sub moveToStaging {
13 rodolico 141
   my ( $directory, $fullPath, $staging ) = @_;
142
   # and let's get the md5 file name also
143
   my $md5File = $fullPath . ".$config{'md5 suffix'}";
144
   mkdir $staging unless -d $staging;
145
   return 'Directory already exists in staging' if -e "$staging/$directory";
146
   move( $fullPath, "$staging/$directory" ) or die "Error moving $fullPath to $staging/$directory: $!\n";
147
   move( $md5File, $staging ) or die "Error moving $md5File to $staging: $!\n";
148
   return '';
11 rodolico 149
}
16 rodolico 150
 
151
sub getCheckSum {
152
   my $project = shift;
153
   my $checkSumFile = $config{'local staging area'} . '/' . $project . '.' . $config{'md5 suffix'};
154
   if ( -e $checkSumFile ) {
155
      return &slurpFile( $checkSumFile );
156
   }
157
   return '';
158
}
159
 
12 rodolico 160
# verifies the directory is correct on the server by comparing the checksums
11 rodolico 161
# calculated locally and remote server. If valid, moves it into the final
162
# location on the remote server
163
sub validateTarget {
12 rodolico 164
   my ( $remoteServer, $remoteStaging, $remoteTarget, $directory, $checksum ) = @_;
165
   my $md5sum = `ssh $remoteServer "find '$remoteStaging/$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1"`;
11 rodolico 166
   chomp $md5sum;
167
   if ( $checksum eq $md5sum ) {
12 rodolico 168
      my $command = "ssh $remoteServer \"mv '$remoteStaging/$directory' '$remoteTarget'\"";
11 rodolico 169
      if ( system( $command ) == 0 ) {
170
         return 1;
171
      } else {
12 rodolico 172
         &logit( "Unable to move $directory to $remoteServer:$remoteTarget" );
11 rodolico 173
         return 0;
174
      }
175
   } else {
12 rodolico 176
      &logit( "Invalid checksum moving directory $directory" );
11 rodolico 177
      return 0;
178
   }
179
}
180
 
181
# simple little logger that records some information   
182
sub logit {
15 rodolico 183
   my $projectName = shift;
184
   my $suffix = shift;
16 rodolico 185
   my $logfile = $config{'local root dir'} . "/$projectName.$suffix";
11 rodolico 186
   my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
187
   my $now = sprintf( "%04d-%02d-%02d %02d:%-2d:%02d", $year+1900, $mon+1, $mday, $hour, $min, $sec );
13 rodolico 188
   open LOG, ">>$logfile" or die "could not write to $logfile: $!\n";
189
   while ( my $message = shift ) {
190
      print LOG "$now\t$message\n";
191
   }
11 rodolico 192
   close LOG;
193
}
15 rodolico 194
 
195
sub runRemoteCommand {
196
   my $server = shift;
197
   while ( my $command = shift ) {
198
      my $output = qx/ssh $server '$command'/;
199
      if ( my $error = $? & 127 ) {
200
         return ( $output, $error );
201
      }
202
   }
203
   return ('', 0);
204
}
205
 
11 rodolico 206
 
207
# simply remove everything from the trash directory
208
sub cleanTrash {
13 rodolico 209
   my ( $trashDir, $age ) = @_;
210
   `mkdir -p $trashDir` unless -d $trashDir;
11 rodolico 211
   `rm -fR $trashDir/*`;
212
}
213
 
15 rodolico 214
sub copyToRemote {
215
   my ( $path, $dirname, $remoteServer, $remotePath ) = @_;
216
   # first, copy the file
16 rodolico 217
   #print "rsync -a $path/$dirname $remoteServer:$remotePath > /tmp/lastrsync.log";
218
   #die;
15 rodolico 219
   qx"rsync -a $path/$dirname $remoteServer:$remotePath > /tmp/lastrsync.log";
220
   return 'rsync failed with error :' . $? & 127 if $? & 127;
221
   return '';
222
}
223
 
16 rodolico 224
###############################################################################
225
# Main
226
###############################################################################
15 rodolico 227
 
16 rodolico 228
&loadConfig();
229
#use Data::Dumper;
230
#print Dumper( \%config );
231
#die;
232
 
13 rodolico 233
unless ( -d $config{'local root dir'} ) {
234
   `mkdir -p $config{'local root dir'}`;
235
   `chmod 777 $config{'local root dir'}`;
236
}
237
# clean the trash if $config{ 'trash cleanup' } is non-zero
238
&cleanTrash( $config{'local trash dir'}, $config{ 'trash cleanup' } ) if $config{ 'trash cleanup' };
11 rodolico 239
 
13 rodolico 240
# Check if we have any directories which are ready to be moved.
241
@DirectoriesToMove = &getDirectories( $config{'local root dir'} );
11 rodolico 242
 
13 rodolico 243
print "Processing\n\t" . join( "\n\t", @DirectoriesToMove ) . "\n";
244
 
245
foreach my $directory ( @DirectoriesToMove ) {
246
   my $fullPath = $config{'local root dir'} . "/$directory";
247
   my $logFile = "$fullPath.$config{'log suffix'}";
248
   my $errorFile = "$fullPath.$config{'error suffix'}";
249
   print "Path for $directory is $fullPath\n\tLog File is $logFile\n\tError file is $errorFile\n" if $DEBUG > 3;
250
   if ( -e $errorFile ) {
15 rodolico 251
      &logit( $directory, $config{'log suffix'}, "Aborting because we have a pre-existing error" );
13 rodolico 252
      print "\tAborting because we have a pre-existing error\n" if $DEBUG > 3;
253
      next;
254
   }
15 rodolico 255
   &logit( $directory, $config{'log suffix'}, "Processing $directory" );
13 rodolico 256
   my $error = &moveToStaging( $directory, $fullPath, $config{'local staging area'} );
257
   if ( ! $error ) {
258
      print "\tMoved to $config{'local staging area'}\n" if $DEBUG > 3;
15 rodolico 259
      &logit( $directory, $config{'log suffix'},  "Successfully moved to $config{'local staging area'}" );
13 rodolico 260
   } else {
15 rodolico 261
      &logit( $directory, $config{'log suffix'},  "Error, move aborted" );
262
      &logit( $directory, $config{'error suffix'},  $error );
13 rodolico 263
   }
11 rodolico 264
}
265
 
266
# done with that, now we need to see if there is anything in the staging area
267
# that needs to be sent to the remote server
15 rodolico 268
`mkdir -p $config{'local staging area'}` unless -d $config{'local staging area'};
13 rodolico 269
opendir( my $dh, $config{'local staging area'} ) or die "Could not read $config{'local staging area'}: $!\n";
11 rodolico 270
my @directories;
15 rodolico 271
# get all the .md5 files
13 rodolico 272
my @toMove = grep { /$config{'md5 suffix'}$/ } readdir( $dh );
273
my $targetPath = "$config{'target server'}:$config{'target staging area'}/";
274
print "Copying the following to $targetPath\n\t" . join ("\n\t", @toMove ) . "\n";
15 rodolico 275
# create the target directory on the server if it doesn't exist
276
&runRemoteCommand( $config{'target server'},
277
   "[ ! -d $config{'target staging area'} ] && mkdir -p $config{'target staging area'}",
278
   "[ ! -d $config{'target final directory'} ] && mkdir -p $config{'target final directory'}"
279
   );
16 rodolico 280
 
281
 
15 rodolico 282
# now, process each directory in turn
16 rodolico 283
foreach my $dirname ( @toMove ) {
284
   print "Processing $dirname\n";
15 rodolico 285
   my $error;
16 rodolico 286
   $dirname =~ m/^(.*)\.$config{'md5 suffix'}$/;
15 rodolico 287
   $dirname = $1;
288
   $error = &copyToRemote( $config{'local staging area'}, $dirname, $config{'target server'}, $config{'target staging area'} );
289
   if ( $error ) {
16 rodolico 290
      &logit( $dirname, $config{'error suffix'}, $error );
15 rodolico 291
      next;
292
   } else {
16 rodolico 293
      &logit( $dirname, $config{'log suffix'}, "Copied to $config{'target server'}:$config{'target staging area'}" );
294
   }
15 rodolico 295
 
16 rodolico 296
   my $md5sum = &getCheckSum( $dirname );
11 rodolico 297
   next unless $md5sum;
16 rodolico 298
   my $rsync = "rsync -av '$config{'local staging area'}/$dirname' $config{'target server'}:$config{'target staging area'}/ > /tmp/lastrsync.log";
299
   &logit( $dirname, $config{'log suffix'}, $rsync );
11 rodolico 300
   if ( system ( $rsync ) == 0 ) { # we succeeded
16 rodolico 301
      if ( &validateTarget( $config{'target server'}, $config{'target staging area'}, $config{'target final directory'}, $dirname, $md5sum ) ) {
13 rodolico 302
         `mkdir -p $config{'local trash dir'}` unless -d $config{'local trash dir'};
16 rodolico 303
         move( "$config{'local staging area'}/$dirname", "$config{'local trash dir'}/$dirname" );
304
         $dirname .= $config{'md5 suffix'};
305
         move( "$config{'local staging area'}/$dirname", "$config{'local trash dir'}/$dirname" );
306
         &logit( $dirname, $config{'log suffix'}, "Successfully moved directory $dirname to $config{'target server'}" );
11 rodolico 307
      } else {
16 rodolico 308
         &logit( $dirname, $config{'error suffix'}, "Unable to validate target for $dirname" );
11 rodolico 309
      }
310
   } else {
16 rodolico 311
      &logit( $dirname, $config{'error suffix'}, "Unknown error attempting to rsync $dirname" );
11 rodolico 312
   }
313
}
314
 
315
 
316
1;