Subversion Repositories sysadmin_scripts

Rev

Rev 28 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
11 rodolico 1
#! /usr/bin/env perl
2
 
13 rodolico 3
# archiveDirectories.pl
11 rodolico 4
# Author: R. W. Rodolico
5
# Date: 20180603
6
 
18 rodolico 7
# Copyright (c) 2018, Daily Data, Inc
8
# All rights reserved.
9
# 
10
# Redistribution and use in source and binary forms, with or without
11
# modification, are permitted provided that the following conditions are met:
12
# 
13
# 1. Redistributions of source code must retain the above copyright notice, this
14
#    list of conditions and the following disclaimer.
15
# 2. Redistributions in binary form must reproduce the above copyright notice,
16
#    this list of conditions and the following disclaimer in the documentation
17
#    and/or other materials provided with the distribution.
18
# 
19
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
# 
30
# The views and conclusions contained in the software and documentation are those
31
# of the authors and should not be interpreted as representing official policies,
32
# either expressed or implied, of the <project name> project.
33
 
13 rodolico 34
# Script designed to be run from a cron job, which checks if any directories
12 rodolico 35
# are ready to be archived. A directory is defined as a directory under
13 rodolico 36
# the root of $config{'local root dir'}.
11 rodolico 37
 
38
# If found, all directories are moved into the staging area and 
39
# an md5 checksum is calculated for the entire tree.
13 rodolico 40
# After all directories are moved, a second process looks in the staging
11 rodolico 41
# area and copies the files (using rsync for reliability) into the staging
13 rodolico 42
# area of $config{'target server'}. When a directory has been copied, a checksum is
11 rodolico 43
# calculated on the remote copy and compared to the checksum calculated
12 rodolico 44
# in the first stage and, if it passes, the directory is then moved to the 
13 rodolico 45
# $config{'target final directory'}.
12 rodolico 46
# After the copy and move, the directory and its MD5 sum file are moved
13 rodolico 47
# to the $config{'local trash dir'} (which is cleaned on the next invocation of
11 rodolico 48
# the script).
49
 
50
#
51
# Version: 1.0
52
 
53
use warnings;
54
use strict;
55
use Cwd qw();
56
use File::Copy qw(move);
57
use File::Basename;
13 rodolico 58
use File::stat;
11 rodolico 59
 
22 rodolico 60
# http://computer-programming-forum.com/53-perl/843e6090fe295ffc.htm
61
# how to verify this script is not already running
62
# creates a lock file which only lasts the length of the script
63
# if we die before removing it, the lock is released.
64
use Fcntl qw(:flock);  #  imports some constants
65
my $LOCKFILE = '/tmp/archiveDirectories.lock';
66
open LOCK, ">>$LOCKFILE" or die ("Can't open lockfile $LOCKFILE: $!");
67
flock(LOCK, LOCK_EX) or die ("I'm already running"); 
68
 
18 rodolico 69
our $VERSION = '1.0';
11 rodolico 70
 
22 rodolico 71
my $DEBUG = 0;
18 rodolico 72
 
16 rodolico 73
my %config;
11 rodolico 74
 
16 rodolico 75
my @DirectoriesToMove;
13 rodolico 76
 
16 rodolico 77
sub loadConfig {
78
   use FindBin;
79
   my $configFileName = "$FindBin::Bin/$FindBin::Script";
80
   unless ( $configFileName =~ s/\.pl$/\.conf/ ) {
81
      $configFileName .= '.conf';
82
   }
83
 
84
   if ( -e $configFileName ) {
85
      my $configFileContents = &slurpFile( $configFileName );
86
      eval( $configFileContents );
87
      die "Error interpreting $configFileName: $@\n" if $@;
88
   } else {
89
      die "Could not locate config file $configFileName\n";
90
   } # if..else
91
} #loadConfig
13 rodolico 92
 
18 rodolico 93
# simply read the entire file into a string
13 rodolico 94
sub slurpFile {
95
   my $filename = shift;
96
   return '' unless -e $filename;
20 rodolico 97
   open TEMP, "<$filename" or die "could not read $filename: $!\n";
13 rodolico 98
   my @contents = <TEMP>;
99
   close TEMP;
100
   return join( '', @contents );
101
}
102
 
103
# print a value to a file
104
sub writeData {
105
   my $filename = shift;
106
   open TEMP, ">$filename" or die "could not write to $filename: $!\n";
107
   print TEMP join( '', @_ );
108
   close TEMP;
109
}
110
 
18 rodolico 111
# returns how many seconds ago a file was created
112
sub fileAge {
113
   my $filename = shift;
114
   my $age = stat( $filename );
115
   $age = $$age[9];
116
   print "$age\t$filename" if $DEBUG > 3;
117
   return time - $age;
118
}
119
 
120
 
13 rodolico 121
# look in the directories to move directory and see if there is anything 
122
# new in there. If so, check MD5 Sum file (create if necessary) and ensure
123
# we have waited long enough and the sums match
124
sub getDirectories {
11 rodolico 125
   my $rootDir = shift;
13 rodolico 126
   print "In getDirectories with dir of $rootDir\n" if $DEBUG;
11 rodolico 127
   opendir( my $dh, $rootDir ) or die "Could not open directory $rootDir: $!\n";
128
   my @dirs = grep { ! /^\./ && -d "$rootDir/$_" } readdir( $dh );
13 rodolico 129
   closedir ( $dh );
130
   print "Directories Found\n" . join( "\n", @dirs ) . "\n" if $DEBUG > 1;
131
   my @dirsToMove;
132
   foreach my $thisDir ( @dirs ) {
133
      my $fullyQualified = "$rootDir/$thisDir";
134
      my $md5 = calcMD5( $fullyQualified );
135
      print "\tFound Dir $fullyQualified with MD5 of $md5\n" if $DEBUG > 2;
136
      # let's look for the md5 checksum file and compare if it exist
137
      my $md5Name = "$fullyQualified.$config{'md5 suffix'}";
138
      if ( -e $md5Name ) {
139
         # find out when it was last written to
18 rodolico 140
         print "\tFound existing MD5 file $md5Name\n" if $DEBUG > 3;
13 rodolico 141
         # and blow it off if it is too recent
18 rodolico 142
         if ( &fileAge( $md5Name) < $config{'quiesent seconds'} ) {
143
            print "\t\tBlowing it off because it is less than $config{'quiesent seconds'} seconds old\n" if $DEBUG > 4;
13 rodolico 144
            next;
145
         }
146
         my $oldMD5 = &slurpFile( $md5Name );
147
         if ( $md5 eq $oldMD5 ) {
148
            print "\t\tAdding, md5 not changed, $md5 same as $oldMD5\n" if $DEBUG > 4;
149
            push @dirsToMove, $thisDir;
150
         } else {
151
            print "\t\tWaiting, md5 changed, $md5 and $oldMD5\n" if $DEBUG > 4;
152
            # overwrite if the checksum has changed
153
            &writeData( $md5Name, $md5 ) if $md5 ne &slurpFile( $md5Name );
154
         }
155
      } else { # doesn't exist, so create it
156
         print "\t\tCreating MD5 File $md5Name with value $md5\n" if $DEBUG > 4;
157
         &writeData( $md5Name, $md5 );
158
      }
159
   } # foreach
160
   return @dirsToMove;
11 rodolico 161
}
162
 
163
# calculate the checksum of a directory by
164
# 1. calculate checksum of each individual file in the entire tree
165
# 2. Grab the first column, which is the checksum
166
# 3. sort the result since Linux will not always return them in the same order
167
# 4. do a checksum of the checksums
168
#
169
# This is highly unlikely to give the same answer if any file changes
170
# in the process of the copy
171
sub calcMD5 {
172
   my $directory = shift;
173
   return -1 unless -d $directory;
174
   my $md5 = `find '$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1`;
175
   chomp $md5;
176
   return $md5;
177
}
178
 
12 rodolico 179
# moves directory to staging area and puts the md5 sum into a file
11 rodolico 180
# with the same name, but a .md5sum suffix
181
sub moveToStaging {
13 rodolico 182
   my ( $directory, $fullPath, $staging ) = @_;
183
   # and let's get the md5 file name also
184
   my $md5File = $fullPath . ".$config{'md5 suffix'}";
20 rodolico 185
   mkdir( $staging ) unless -d $staging;
13 rodolico 186
   return 'Directory already exists in staging' if -e "$staging/$directory";
187
   move( $fullPath, "$staging/$directory" ) or die "Error moving $fullPath to $staging/$directory: $!\n";
188
   move( $md5File, $staging ) or die "Error moving $md5File to $staging: $!\n";
189
   return '';
11 rodolico 190
}
16 rodolico 191
 
192
sub getCheckSum {
193
   my $project = shift;
194
   my $checkSumFile = $config{'local staging area'} . '/' . $project . '.' . $config{'md5 suffix'};
195
   if ( -e $checkSumFile ) {
196
      return &slurpFile( $checkSumFile );
197
   }
198
   return '';
199
}
200
 
12 rodolico 201
# verifies the directory is correct on the server by comparing the checksums
11 rodolico 202
# calculated locally and remote server. If valid, moves it into the final
203
# location on the remote server
204
sub validateTarget {
12 rodolico 205
   my ( $remoteServer, $remoteStaging, $remoteTarget, $directory, $checksum ) = @_;
206
   my $md5sum = `ssh $remoteServer "find '$remoteStaging/$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1"`;
11 rodolico 207
   chomp $md5sum;
208
   if ( $checksum eq $md5sum ) {
29 rodolico 209
      &logit( "checksums match" );
210
      return 1;
11 rodolico 211
   } else {
12 rodolico 212
      &logit( "Invalid checksum moving directory $directory" );
11 rodolico 213
      return 0;
214
   }
215
}
216
 
217
# simple little logger that records some information   
218
sub logit {
15 rodolico 219
   my $projectName = shift;
220
   my $suffix = shift;
29 rodolico 221
   $suffix = $config{'log suffix'} unless $suffix;
16 rodolico 222
   my $logfile = $config{'local root dir'} . "/$projectName.$suffix";
11 rodolico 223
   my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
224
   my $now = sprintf( "%04d-%02d-%02d %02d:%-2d:%02d", $year+1900, $mon+1, $mday, $hour, $min, $sec );
22 rodolico 225
   # create the logfile if it doesn't exist and set it to rw by everyone
226
   unless ( -e $logfile ) {
227
      qx(touch '$logfile');
228
      qx(chmod 666 '$logfile');
229
   }
13 rodolico 230
   open LOG, ">>$logfile" or die "could not write to $logfile: $!\n";
231
   while ( my $message = shift ) {
232
      print LOG "$now\t$message\n";
233
   }
11 rodolico 234
   close LOG;
235
}
15 rodolico 236
 
237
sub runRemoteCommand {
238
   my $server = shift;
239
   while ( my $command = shift ) {
240
      my $output = qx/ssh $server '$command'/;
241
      if ( my $error = $? & 127 ) {
242
         return ( $output, $error );
243
      }
244
   }
245
   return ('', 0);
246
}
247
 
11 rodolico 248
 
15 rodolico 249
sub copyToRemote {
250
   my ( $path, $dirname, $remoteServer, $remotePath ) = @_;
251
   # first, copy the file
16 rodolico 252
   #print "rsync -a $path/$dirname $remoteServer:$remotePath > /tmp/lastrsync.log";
253
   #die;
21 rodolico 254
   qx"rsync -a '$path/$dirname' $remoteServer:$remotePath > /tmp/lastrsync.log";
15 rodolico 255
   return 'rsync failed with error :' . $? & 127 if $? & 127;
256
   return '';
257
}
258
 
18 rodolico 259
# simply remove everything from the trash directory over $age seconds old
260
sub cleanTrash {
261
   my ( $trashDir, $age ) = @_;
262
   my $md5Suffix = $config{'md5 suffix'};
263
   my @toRemove = ();
264
   if ( opendir( my $dh, $trashDir ) ) {
265
      # get all the md5sum files which are older than $age seconds old
266
      @toRemove = grep { &fileAge( "$_" ) > $age  } map{ "$trashDir/$_" }  grep{ /$md5Suffix$/ } readdir( $dh);
267
      closedir( $dh );
268
   }
269
   print "You should remove the following files\n" if $DEBUG > 1;
270
   foreach my $thisDir ( @toRemove ) {
271
      $thisDir =~ m/(.*)\.$md5Suffix/;
272
      $thisDir = $1;
19 rodolico 273
      qx/rm -fR '$thisDir' '$thisDir.$md5Suffix'/;
18 rodolico 274
   }
275
}
276
 
20 rodolico 277
sub makeDirectories {
278
   my $directory = shift;
279
   my $permissions = shift;
280
   $permissions = '777' unless $permissions;
281
   unless ( -d $directory ) {
282
      print "Making directory $directory\n" if $DEBUG > 1;
283
      `mkdir -p $directory`; 
284
      `chmod $permissions $directory`;
285
   }
286
}
287
 
18 rodolico 288
 
289
 
16 rodolico 290
###############################################################################
291
# Main
292
###############################################################################
15 rodolico 293
 
16 rodolico 294
&loadConfig();
295
#use Data::Dumper;
296
#print Dumper( \%config );
297
#die;
298
 
20 rodolico 299
foreach my $dirsToMake ( 'local root dir', 'local trash dir', 'local staging area' ) {
300
   &makeDirectories( $config{$dirsToMake} );
13 rodolico 301
}
20 rodolico 302
 
13 rodolico 303
# clean the trash if $config{ 'trash cleanup' } is non-zero
304
&cleanTrash( $config{'local trash dir'}, $config{ 'trash cleanup' } ) if $config{ 'trash cleanup' };
18 rodolico 305
 
13 rodolico 306
# Check if we have any directories which are ready to be moved.
307
@DirectoriesToMove = &getDirectories( $config{'local root dir'} );
11 rodolico 308
 
18 rodolico 309
print "Processing\n\t" . join( "\n\t", @DirectoriesToMove ) . "\n" if $DEBUG > 1;
13 rodolico 310
 
311
foreach my $directory ( @DirectoriesToMove ) {
312
   my $fullPath = $config{'local root dir'} . "/$directory";
313
   my $logFile = "$fullPath.$config{'log suffix'}";
314
   my $errorFile = "$fullPath.$config{'error suffix'}";
315
   print "Path for $directory is $fullPath\n\tLog File is $logFile\n\tError file is $errorFile\n" if $DEBUG > 3;
316
   if ( -e $errorFile ) {
15 rodolico 317
      &logit( $directory, $config{'log suffix'}, "Aborting because we have a pre-existing error" );
13 rodolico 318
      print "\tAborting because we have a pre-existing error\n" if $DEBUG > 3;
319
      next;
320
   }
15 rodolico 321
   &logit( $directory, $config{'log suffix'}, "Processing $directory" );
13 rodolico 322
   my $error = &moveToStaging( $directory, $fullPath, $config{'local staging area'} );
323
   if ( ! $error ) {
324
      print "\tMoved to $config{'local staging area'}\n" if $DEBUG > 3;
15 rodolico 325
      &logit( $directory, $config{'log suffix'},  "Successfully moved to $config{'local staging area'}" );
13 rodolico 326
   } else {
15 rodolico 327
      &logit( $directory, $config{'log suffix'},  "Error, move aborted" );
328
      &logit( $directory, $config{'error suffix'},  $error );
13 rodolico 329
   }
11 rodolico 330
}
331
 
29 rodolico 332
 
11 rodolico 333
# done with that, now we need to see if there is anything in the staging area
334
# that needs to be sent to the remote server
15 rodolico 335
`mkdir -p $config{'local staging area'}` unless -d $config{'local staging area'};
13 rodolico 336
opendir( my $dh, $config{'local staging area'} ) or die "Could not read $config{'local staging area'}: $!\n";
11 rodolico 337
my @directories;
15 rodolico 338
# get all the .md5 files
13 rodolico 339
my @toMove = grep { /$config{'md5 suffix'}$/ } readdir( $dh );
340
my $targetPath = "$config{'target server'}:$config{'target staging area'}/";
18 rodolico 341
print "Copying the following to $targetPath\n\t" . join ("\n\t", @toMove ) . "\n" if $DEBUG > 1;
15 rodolico 342
# create the target directory on the server if it doesn't exist
343
&runRemoteCommand( $config{'target server'},
344
   "[ ! -d $config{'target staging area'} ] && mkdir -p $config{'target staging area'}",
345
   "[ ! -d $config{'target final directory'} ] && mkdir -p $config{'target final directory'}"
346
   );
16 rodolico 347
 
348
 
15 rodolico 349
# now, process each directory in turn
16 rodolico 350
foreach my $dirname ( @toMove ) {
351
   print "Processing $dirname\n";
15 rodolico 352
   my $error;
16 rodolico 353
   $dirname =~ m/^(.*)\.$config{'md5 suffix'}$/;
15 rodolico 354
   $dirname = $1;
355
   $error = &copyToRemote( $config{'local staging area'}, $dirname, $config{'target server'}, $config{'target staging area'} );
356
   if ( $error ) {
16 rodolico 357
      &logit( $dirname, $config{'error suffix'}, $error );
15 rodolico 358
      next;
359
   } else {
16 rodolico 360
      &logit( $dirname, $config{'log suffix'}, "Copied to $config{'target server'}:$config{'target staging area'}" );
361
   }
15 rodolico 362
 
16 rodolico 363
   my $md5sum = &getCheckSum( $dirname );
11 rodolico 364
   next unless $md5sum;
21 rodolico 365
   if ( &validateTarget( $config{'target server'}, $config{'target staging area'}, $config{'target final directory'}, $dirname, $md5sum ) ) {
366
      `mkdir -p $config{'local trash dir'}` unless -d $config{'local trash dir'};
367
      move( "$config{'local staging area'}/$dirname", "$config{'local trash dir'}/$dirname" );
368
      my $md5File = $dirname . '.' . $config{'md5 suffix'};
369
      move( "$config{'local staging area'}/$md5File", "$config{'local trash dir'}/$md5File" );
370
      &logit( $dirname, $config{'log suffix'}, "Successfully moved directory $dirname to $config{'target server'}" );
11 rodolico 371
   } else {
21 rodolico 372
      &logit( $dirname, $config{'error suffix'}, "Unable to validate target for $dirname" );
11 rodolico 373
   }
374
}
375
 
29 rodolico 376
if ( defined ( $config{ 'final procedure' } ) ) {
377
   my $dirs = &runRemoteCommand( $config{'target server'},"ls -p $config{target server}/$config{target staging area} | grep /" );
378
   my @dirs = split( "\n", $dirs );
379
   foreach my $thisDir ( @dirs ) {
380
      my $result = $config{ 'final procedure' }->( $remoteServer, $remoteStaging, $remoteTarget, $directory );
381
      &logit( $result ) if ( $result );
382
   }
383
} # do the final procedure, if it exist
11 rodolico 384
 
29 rodolico 385
 
386
 
11 rodolico 387
1;