Subversion Repositories sysadmin_scripts

Rev

Rev 21 | Rev 27 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
11 rodolico 1
#! /usr/bin/env perl
2
 
13 rodolico 3
# archiveDirectories.pl
11 rodolico 4
# Author: R. W. Rodolico
5
# Date: 20180603
6
 
18 rodolico 7
# Copyright (c) 2018, Daily Data, Inc
8
# All rights reserved.
9
# 
10
# Redistribution and use in source and binary forms, with or without
11
# modification, are permitted provided that the following conditions are met:
12
# 
13
# 1. Redistributions of source code must retain the above copyright notice, this
14
#    list of conditions and the following disclaimer.
15
# 2. Redistributions in binary form must reproduce the above copyright notice,
16
#    this list of conditions and the following disclaimer in the documentation
17
#    and/or other materials provided with the distribution.
18
# 
19
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
# 
30
# The views and conclusions contained in the software and documentation are those
31
# of the authors and should not be interpreted as representing official policies,
32
# either expressed or implied, of the <project name> project.
33
 
13 rodolico 34
# Script designed to be run from a cron job, which checks if any directories
12 rodolico 35
# are ready to be archived. A directory is defined as a directory under
13 rodolico 36
# the root of $config{'local root dir'}.
11 rodolico 37
 
38
# If found, all directories are moved into the staging area and 
39
# an md5 checksum is calculated for the entire tree.
13 rodolico 40
# After all directories are moved, a second process looks in the staging
11 rodolico 41
# area and copies the files (using rsync for reliability) into the staging
13 rodolico 42
# area of $config{'target server'}. When a directory has been copied, a checksum is
11 rodolico 43
# calculated on the remote copy and compared to the checksum calculated
12 rodolico 44
# in the first stage and, if it passes, the directory is then moved to the 
13 rodolico 45
# $config{'target final directory'}.
12 rodolico 46
# After the copy and move, the directory and its MD5 sum file are moved
13 rodolico 47
# to the $config{'local trash dir'} (which is cleaned on the next invocation of
11 rodolico 48
# the script).
49
 
50
#
51
# Version: 1.0
52
 
53
use warnings;
54
use strict;
55
use Cwd qw();
56
use File::Copy qw(move);
57
use File::Basename;
13 rodolico 58
use File::stat;
11 rodolico 59
 
22 rodolico 60
# http://computer-programming-forum.com/53-perl/843e6090fe295ffc.htm
61
# how to verify this script is not already running
62
# creates a lock file which only lasts the length of the script
63
# if we die before removing it, the lock is released.
64
use Fcntl qw(:flock);  #  imports some constants
65
my $LOCKFILE = '/tmp/archiveDirectories.lock';
66
open LOCK, ">>$LOCKFILE" or die ("Can't open lockfile $LOCKFILE: $!");
67
flock(LOCK, LOCK_EX) or die ("I'm already running"); 
68
 
18 rodolico 69
our $VERSION = '1.0';
11 rodolico 70
 
22 rodolico 71
my $DEBUG = 0;
18 rodolico 72
 
16 rodolico 73
my %config;
11 rodolico 74
 
16 rodolico 75
my @DirectoriesToMove;
13 rodolico 76
 
16 rodolico 77
sub loadConfig {
78
   use FindBin;
79
   my $configFileName = "$FindBin::Bin/$FindBin::Script";
80
   unless ( $configFileName =~ s/\.pl$/\.conf/ ) {
81
      $configFileName .= '.conf';
82
   }
83
 
84
   if ( -e $configFileName ) {
85
      my $configFileContents = &slurpFile( $configFileName );
86
      eval( $configFileContents );
87
      die "Error interpreting $configFileName: $@\n" if $@;
88
   } else {
89
      die "Could not locate config file $configFileName\n";
90
   } # if..else
91
} #loadConfig
13 rodolico 92
 
18 rodolico 93
# simply read the entire file into a string
13 rodolico 94
sub slurpFile {
95
   my $filename = shift;
96
   return '' unless -e $filename;
20 rodolico 97
   open TEMP, "<$filename" or die "could not read $filename: $!\n";
13 rodolico 98
   my @contents = <TEMP>;
99
   close TEMP;
100
   return join( '', @contents );
101
}
102
 
103
# print a value to a file
104
sub writeData {
105
   my $filename = shift;
106
   open TEMP, ">$filename" or die "could not write to $filename: $!\n";
107
   print TEMP join( '', @_ );
108
   close TEMP;
109
}
110
 
18 rodolico 111
# returns how many seconds ago a file was created
112
sub fileAge {
113
   my $filename = shift;
114
   my $age = stat( $filename );
115
   $age = $$age[9];
116
   print "$age\t$filename" if $DEBUG > 3;
117
   return time - $age;
118
}
119
 
120
 
13 rodolico 121
# look in the directories to move directory and see if there is anything 
122
# new in there. If so, check MD5 Sum file (create if necessary) and ensure
123
# we have waited long enough and the sums match
124
sub getDirectories {
11 rodolico 125
   my $rootDir = shift;
13 rodolico 126
   print "In getDirectories with dir of $rootDir\n" if $DEBUG;
11 rodolico 127
   opendir( my $dh, $rootDir ) or die "Could not open directory $rootDir: $!\n";
128
   my @dirs = grep { ! /^\./ && -d "$rootDir/$_" } readdir( $dh );
13 rodolico 129
   closedir ( $dh );
130
   print "Directories Found\n" . join( "\n", @dirs ) . "\n" if $DEBUG > 1;
131
   my @dirsToMove;
132
   foreach my $thisDir ( @dirs ) {
133
      my $fullyQualified = "$rootDir/$thisDir";
134
      my $md5 = calcMD5( $fullyQualified );
135
      print "\tFound Dir $fullyQualified with MD5 of $md5\n" if $DEBUG > 2;
136
      # let's look for the md5 checksum file and compare if it exist
137
      my $md5Name = "$fullyQualified.$config{'md5 suffix'}";
138
      if ( -e $md5Name ) {
139
         # find out when it was last written to
18 rodolico 140
         print "\tFound existing MD5 file $md5Name\n" if $DEBUG > 3;
13 rodolico 141
         # and blow it off if it is too recent
18 rodolico 142
         if ( &fileAge( $md5Name) < $config{'quiesent seconds'} ) {
143
            print "\t\tBlowing it off because it is less than $config{'quiesent seconds'} seconds old\n" if $DEBUG > 4;
13 rodolico 144
            next;
145
         }
146
         my $oldMD5 = &slurpFile( $md5Name );
147
         if ( $md5 eq $oldMD5 ) {
148
            print "\t\tAdding, md5 not changed, $md5 same as $oldMD5\n" if $DEBUG > 4;
149
            push @dirsToMove, $thisDir;
150
         } else {
151
            print "\t\tWaiting, md5 changed, $md5 and $oldMD5\n" if $DEBUG > 4;
152
            # overwrite if the checksum has changed
153
            &writeData( $md5Name, $md5 ) if $md5 ne &slurpFile( $md5Name );
154
         }
155
      } else { # doesn't exist, so create it
156
         print "\t\tCreating MD5 File $md5Name with value $md5\n" if $DEBUG > 4;
157
         &writeData( $md5Name, $md5 );
158
      }
159
   } # foreach
160
   return @dirsToMove;
11 rodolico 161
}
162
 
163
# calculate the checksum of a directory by
164
# 1. calculate checksum of each individual file in the entire tree
165
# 2. Grab the first column, which is the checksum
166
# 3. sort the result since Linux will not always return them in the same order
167
# 4. do a checksum of the checksums
168
#
169
# This is highly unlikely to give the same answer if any file changes
170
# in the process of the copy
171
sub calcMD5 {
172
   my $directory = shift;
173
   return -1 unless -d $directory;
174
   my $md5 = `find '$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1`;
175
   chomp $md5;
176
   return $md5;
177
}
178
 
12 rodolico 179
# moves directory to staging area and puts the md5 sum into a file
11 rodolico 180
# with the same name, but a .md5sum suffix
181
sub moveToStaging {
13 rodolico 182
   my ( $directory, $fullPath, $staging ) = @_;
183
   # and let's get the md5 file name also
184
   my $md5File = $fullPath . ".$config{'md5 suffix'}";
20 rodolico 185
   mkdir( $staging ) unless -d $staging;
13 rodolico 186
   return 'Directory already exists in staging' if -e "$staging/$directory";
187
   move( $fullPath, "$staging/$directory" ) or die "Error moving $fullPath to $staging/$directory: $!\n";
188
   move( $md5File, $staging ) or die "Error moving $md5File to $staging: $!\n";
189
   return '';
11 rodolico 190
}
16 rodolico 191
 
192
sub getCheckSum {
193
   my $project = shift;
194
   my $checkSumFile = $config{'local staging area'} . '/' . $project . '.' . $config{'md5 suffix'};
195
   if ( -e $checkSumFile ) {
196
      return &slurpFile( $checkSumFile );
197
   }
198
   return '';
199
}
200
 
12 rodolico 201
# verifies the directory is correct on the server by comparing the checksums
11 rodolico 202
# calculated locally and remote server. If valid, moves it into the final
203
# location on the remote server
204
sub validateTarget {
12 rodolico 205
   my ( $remoteServer, $remoteStaging, $remoteTarget, $directory, $checksum ) = @_;
206
   my $md5sum = `ssh $remoteServer "find '$remoteStaging/$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1"`;
11 rodolico 207
   chomp $md5sum;
208
   if ( $checksum eq $md5sum ) {
12 rodolico 209
      my $command = "ssh $remoteServer \"mv '$remoteStaging/$directory' '$remoteTarget'\"";
11 rodolico 210
      if ( system( $command ) == 0 ) {
211
         return 1;
212
      } else {
12 rodolico 213
         &logit( "Unable to move $directory to $remoteServer:$remoteTarget" );
11 rodolico 214
         return 0;
215
      }
216
   } else {
12 rodolico 217
      &logit( "Invalid checksum moving directory $directory" );
11 rodolico 218
      return 0;
219
   }
220
}
221
 
222
# simple little logger that records some information   
223
sub logit {
15 rodolico 224
   my $projectName = shift;
225
   my $suffix = shift;
16 rodolico 226
   my $logfile = $config{'local root dir'} . "/$projectName.$suffix";
11 rodolico 227
   my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
228
   my $now = sprintf( "%04d-%02d-%02d %02d:%-2d:%02d", $year+1900, $mon+1, $mday, $hour, $min, $sec );
22 rodolico 229
   # create the logfile if it doesn't exist and set it to rw by everyone
230
   unless ( -e $logfile ) {
231
      qx(touch '$logfile');
232
      qx(chmod 666 '$logfile');
233
   }
13 rodolico 234
   open LOG, ">>$logfile" or die "could not write to $logfile: $!\n";
235
   while ( my $message = shift ) {
236
      print LOG "$now\t$message\n";
237
   }
11 rodolico 238
   close LOG;
239
}
15 rodolico 240
 
241
sub runRemoteCommand {
242
   my $server = shift;
243
   while ( my $command = shift ) {
244
      my $output = qx/ssh $server '$command'/;
245
      if ( my $error = $? & 127 ) {
246
         return ( $output, $error );
247
      }
248
   }
249
   return ('', 0);
250
}
251
 
11 rodolico 252
 
15 rodolico 253
sub copyToRemote {
254
   my ( $path, $dirname, $remoteServer, $remotePath ) = @_;
255
   # first, copy the file
16 rodolico 256
   #print "rsync -a $path/$dirname $remoteServer:$remotePath > /tmp/lastrsync.log";
257
   #die;
21 rodolico 258
   qx"rsync -a '$path/$dirname' $remoteServer:$remotePath > /tmp/lastrsync.log";
15 rodolico 259
   return 'rsync failed with error :' . $? & 127 if $? & 127;
260
   return '';
261
}
262
 
18 rodolico 263
# simply remove everything from the trash directory over $age seconds old
264
sub cleanTrash {
265
   my ( $trashDir, $age ) = @_;
266
   my $md5Suffix = $config{'md5 suffix'};
267
   my @toRemove = ();
268
   if ( opendir( my $dh, $trashDir ) ) {
269
      # get all the md5sum files which are older than $age seconds old
270
      @toRemove = grep { &fileAge( "$_" ) > $age  } map{ "$trashDir/$_" }  grep{ /$md5Suffix$/ } readdir( $dh);
271
      closedir( $dh );
272
   }
273
   print "You should remove the following files\n" if $DEBUG > 1;
274
   foreach my $thisDir ( @toRemove ) {
275
      $thisDir =~ m/(.*)\.$md5Suffix/;
276
      $thisDir = $1;
19 rodolico 277
      qx/rm -fR '$thisDir' '$thisDir.$md5Suffix'/;
18 rodolico 278
   }
279
}
280
 
20 rodolico 281
sub makeDirectories {
282
   my $directory = shift;
283
   my $permissions = shift;
284
   $permissions = '777' unless $permissions;
285
   unless ( -d $directory ) {
286
      print "Making directory $directory\n" if $DEBUG > 1;
287
      `mkdir -p $directory`; 
288
      `chmod $permissions $directory`;
289
   }
290
}
291
 
18 rodolico 292
 
293
 
16 rodolico 294
###############################################################################
295
# Main
296
###############################################################################
15 rodolico 297
 
16 rodolico 298
&loadConfig();
299
#use Data::Dumper;
300
#print Dumper( \%config );
301
#die;
302
 
20 rodolico 303
foreach my $dirsToMake ( 'local root dir', 'local trash dir', 'local staging area' ) {
304
   &makeDirectories( $config{$dirsToMake} );
13 rodolico 305
}
20 rodolico 306
 
13 rodolico 307
# clean the trash if $config{ 'trash cleanup' } is non-zero
308
&cleanTrash( $config{'local trash dir'}, $config{ 'trash cleanup' } ) if $config{ 'trash cleanup' };
18 rodolico 309
 
13 rodolico 310
# Check if we have any directories which are ready to be moved.
311
@DirectoriesToMove = &getDirectories( $config{'local root dir'} );
11 rodolico 312
 
18 rodolico 313
print "Processing\n\t" . join( "\n\t", @DirectoriesToMove ) . "\n" if $DEBUG > 1;
13 rodolico 314
 
315
foreach my $directory ( @DirectoriesToMove ) {
316
   my $fullPath = $config{'local root dir'} . "/$directory";
317
   my $logFile = "$fullPath.$config{'log suffix'}";
318
   my $errorFile = "$fullPath.$config{'error suffix'}";
319
   print "Path for $directory is $fullPath\n\tLog File is $logFile\n\tError file is $errorFile\n" if $DEBUG > 3;
320
   if ( -e $errorFile ) {
15 rodolico 321
      &logit( $directory, $config{'log suffix'}, "Aborting because we have a pre-existing error" );
13 rodolico 322
      print "\tAborting because we have a pre-existing error\n" if $DEBUG > 3;
323
      next;
324
   }
15 rodolico 325
   &logit( $directory, $config{'log suffix'}, "Processing $directory" );
13 rodolico 326
   my $error = &moveToStaging( $directory, $fullPath, $config{'local staging area'} );
327
   if ( ! $error ) {
328
      print "\tMoved to $config{'local staging area'}\n" if $DEBUG > 3;
15 rodolico 329
      &logit( $directory, $config{'log suffix'},  "Successfully moved to $config{'local staging area'}" );
13 rodolico 330
   } else {
15 rodolico 331
      &logit( $directory, $config{'log suffix'},  "Error, move aborted" );
332
      &logit( $directory, $config{'error suffix'},  $error );
13 rodolico 333
   }
11 rodolico 334
}
335
 
336
# done with that, now we need to see if there is anything in the staging area
337
# that needs to be sent to the remote server
15 rodolico 338
`mkdir -p $config{'local staging area'}` unless -d $config{'local staging area'};
13 rodolico 339
opendir( my $dh, $config{'local staging area'} ) or die "Could not read $config{'local staging area'}: $!\n";
11 rodolico 340
my @directories;
15 rodolico 341
# get all the .md5 files
13 rodolico 342
my @toMove = grep { /$config{'md5 suffix'}$/ } readdir( $dh );
343
my $targetPath = "$config{'target server'}:$config{'target staging area'}/";
18 rodolico 344
print "Copying the following to $targetPath\n\t" . join ("\n\t", @toMove ) . "\n" if $DEBUG > 1;
15 rodolico 345
# create the target directory on the server if it doesn't exist
346
&runRemoteCommand( $config{'target server'},
347
   "[ ! -d $config{'target staging area'} ] && mkdir -p $config{'target staging area'}",
348
   "[ ! -d $config{'target final directory'} ] && mkdir -p $config{'target final directory'}"
349
   );
16 rodolico 350
 
351
 
15 rodolico 352
# now, process each directory in turn
16 rodolico 353
foreach my $dirname ( @toMove ) {
354
   print "Processing $dirname\n";
15 rodolico 355
   my $error;
16 rodolico 356
   $dirname =~ m/^(.*)\.$config{'md5 suffix'}$/;
15 rodolico 357
   $dirname = $1;
358
   $error = &copyToRemote( $config{'local staging area'}, $dirname, $config{'target server'}, $config{'target staging area'} );
359
   if ( $error ) {
16 rodolico 360
      &logit( $dirname, $config{'error suffix'}, $error );
15 rodolico 361
      next;
362
   } else {
16 rodolico 363
      &logit( $dirname, $config{'log suffix'}, "Copied to $config{'target server'}:$config{'target staging area'}" );
364
   }
15 rodolico 365
 
16 rodolico 366
   my $md5sum = &getCheckSum( $dirname );
11 rodolico 367
   next unless $md5sum;
21 rodolico 368
   if ( &validateTarget( $config{'target server'}, $config{'target staging area'}, $config{'target final directory'}, $dirname, $md5sum ) ) {
369
      `mkdir -p $config{'local trash dir'}` unless -d $config{'local trash dir'};
370
      move( "$config{'local staging area'}/$dirname", "$config{'local trash dir'}/$dirname" );
371
      my $md5File = $dirname . '.' . $config{'md5 suffix'};
372
      move( "$config{'local staging area'}/$md5File", "$config{'local trash dir'}/$md5File" );
373
      &logit( $dirname, $config{'log suffix'}, "Successfully moved directory $dirname to $config{'target server'}" );
11 rodolico 374
   } else {
21 rodolico 375
      &logit( $dirname, $config{'error suffix'}, "Unable to validate target for $dirname" );
11 rodolico 376
   }
377
}
378
 
379
 
380
1;