Subversion Repositories sysadmin_scripts

Rev

Rev 19 | Rev 21 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
11 rodolico 1
#! /usr/bin/env perl
2
 
13 rodolico 3
# archiveDirectories.pl
11 rodolico 4
# Author: R. W. Rodolico
5
# Date: 20180603
6
 
18 rodolico 7
# Copyright (c) 2018, Daily Data, Inc
8
# All rights reserved.
9
# 
10
# Redistribution and use in source and binary forms, with or without
11
# modification, are permitted provided that the following conditions are met:
12
# 
13
# 1. Redistributions of source code must retain the above copyright notice, this
14
#    list of conditions and the following disclaimer.
15
# 2. Redistributions in binary form must reproduce the above copyright notice,
16
#    this list of conditions and the following disclaimer in the documentation
17
#    and/or other materials provided with the distribution.
18
# 
19
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
# 
30
# The views and conclusions contained in the software and documentation are those
31
# of the authors and should not be interpreted as representing official policies,
32
# either expressed or implied, of the <project name> project.
33
 
13 rodolico 34
# Script designed to be run from a cron job, which checks if any directories
12 rodolico 35
# are ready to be archived. A directory is defined as a directory under
13 rodolico 36
# the root of $config{'local root dir'}.
11 rodolico 37
 
38
# If found, all directories are moved into the staging area and 
39
# an md5 checksum is calculated for the entire tree.
13 rodolico 40
# After all directories are moved, a second process looks in the staging
11 rodolico 41
# area and copies the files (using rsync for reliability) into the staging
13 rodolico 42
# area of $config{'target server'}. When a directory has been copied, a checksum is
11 rodolico 43
# calculated on the remote copy and compared to the checksum calculated
12 rodolico 44
# in the first stage and, if it passes, the directory is then moved to the 
13 rodolico 45
# $config{'target final directory'}.
12 rodolico 46
# After the copy and move, the directory and its MD5 sum file are moved
13 rodolico 47
# to the $config{'local trash dir'} (which is cleaned on the next invocation of
11 rodolico 48
# the script).
49
 
50
#
51
# Version: 1.0
52
 
53
use warnings;
54
use strict;
55
use Cwd qw();
56
use File::Copy qw(move);
57
use File::Basename;
13 rodolico 58
use File::stat;
11 rodolico 59
 
18 rodolico 60
our $VERSION = '1.0';
11 rodolico 61
 
20 rodolico 62
my $DEBUG = 3;
18 rodolico 63
 
16 rodolico 64
my %config;
11 rodolico 65
 
16 rodolico 66
my @DirectoriesToMove;
13 rodolico 67
 
16 rodolico 68
sub loadConfig {
69
   use FindBin;
70
   my $configFileName = "$FindBin::Bin/$FindBin::Script";
71
   unless ( $configFileName =~ s/\.pl$/\.conf/ ) {
72
      $configFileName .= '.conf';
73
   }
74
 
75
   if ( -e $configFileName ) {
76
      my $configFileContents = &slurpFile( $configFileName );
77
      eval( $configFileContents );
78
      die "Error interpreting $configFileName: $@\n" if $@;
79
   } else {
80
      die "Could not locate config file $configFileName\n";
81
   } # if..else
82
} #loadConfig
13 rodolico 83
 
18 rodolico 84
# simply read the entire file into a string
13 rodolico 85
sub slurpFile {
86
   my $filename = shift;
87
   return '' unless -e $filename;
20 rodolico 88
   open TEMP, "<$filename" or die "could not read $filename: $!\n";
13 rodolico 89
   my @contents = <TEMP>;
90
   close TEMP;
91
   return join( '', @contents );
92
}
93
 
94
# print a value to a file
95
sub writeData {
96
   my $filename = shift;
97
   open TEMP, ">$filename" or die "could not write to $filename: $!\n";
98
   print TEMP join( '', @_ );
99
   close TEMP;
100
}
101
 
18 rodolico 102
# returns how many seconds ago a file was created
103
sub fileAge {
104
   my $filename = shift;
105
   my $age = stat( $filename );
106
   $age = $$age[9];
107
   print "$age\t$filename" if $DEBUG > 3;
108
   return time - $age;
109
}
110
 
111
 
13 rodolico 112
# look in the directories to move directory and see if there is anything 
113
# new in there. If so, check MD5 Sum file (create if necessary) and ensure
114
# we have waited long enough and the sums match
115
sub getDirectories {
11 rodolico 116
   my $rootDir = shift;
13 rodolico 117
   print "In getDirectories with dir of $rootDir\n" if $DEBUG;
11 rodolico 118
   opendir( my $dh, $rootDir ) or die "Could not open directory $rootDir: $!\n";
119
   my @dirs = grep { ! /^\./ && -d "$rootDir/$_" } readdir( $dh );
13 rodolico 120
   closedir ( $dh );
121
   print "Directories Found\n" . join( "\n", @dirs ) . "\n" if $DEBUG > 1;
122
   my @dirsToMove;
123
   foreach my $thisDir ( @dirs ) {
124
      my $fullyQualified = "$rootDir/$thisDir";
125
      my $md5 = calcMD5( $fullyQualified );
126
      print "\tFound Dir $fullyQualified with MD5 of $md5\n" if $DEBUG > 2;
127
      # let's look for the md5 checksum file and compare if it exist
128
      my $md5Name = "$fullyQualified.$config{'md5 suffix'}";
129
      if ( -e $md5Name ) {
130
         # find out when it was last written to
18 rodolico 131
         print "\tFound existing MD5 file $md5Name\n" if $DEBUG > 3;
13 rodolico 132
         # and blow it off if it is too recent
18 rodolico 133
         if ( &fileAge( $md5Name) < $config{'quiesent seconds'} ) {
134
            print "\t\tBlowing it off because it is less than $config{'quiesent seconds'} seconds old\n" if $DEBUG > 4;
13 rodolico 135
            next;
136
         }
137
         my $oldMD5 = &slurpFile( $md5Name );
138
         if ( $md5 eq $oldMD5 ) {
139
            print "\t\tAdding, md5 not changed, $md5 same as $oldMD5\n" if $DEBUG > 4;
140
            push @dirsToMove, $thisDir;
141
         } else {
142
            print "\t\tWaiting, md5 changed, $md5 and $oldMD5\n" if $DEBUG > 4;
143
            # overwrite if the checksum has changed
144
            &writeData( $md5Name, $md5 ) if $md5 ne &slurpFile( $md5Name );
145
         }
146
      } else { # doesn't exist, so create it
147
         print "\t\tCreating MD5 File $md5Name with value $md5\n" if $DEBUG > 4;
148
         &writeData( $md5Name, $md5 );
149
      }
150
   } # foreach
151
   return @dirsToMove;
11 rodolico 152
}
153
 
154
# calculate the checksum of a directory by
155
# 1. calculate checksum of each individual file in the entire tree
156
# 2. Grab the first column, which is the checksum
157
# 3. sort the result since Linux will not always return them in the same order
158
# 4. do a checksum of the checksums
159
#
160
# This is highly unlikely to give the same answer if any file changes
161
# in the process of the copy
162
sub calcMD5 {
163
   my $directory = shift;
164
   return -1 unless -d $directory;
165
   my $md5 = `find '$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1`;
166
   chomp $md5;
167
   return $md5;
168
}
169
 
12 rodolico 170
# moves directory to staging area and puts the md5 sum into a file
11 rodolico 171
# with the same name, but a .md5sum suffix
172
sub moveToStaging {
13 rodolico 173
   my ( $directory, $fullPath, $staging ) = @_;
174
   # and let's get the md5 file name also
175
   my $md5File = $fullPath . ".$config{'md5 suffix'}";
20 rodolico 176
   mkdir( $staging ) unless -d $staging;
13 rodolico 177
   return 'Directory already exists in staging' if -e "$staging/$directory";
178
   move( $fullPath, "$staging/$directory" ) or die "Error moving $fullPath to $staging/$directory: $!\n";
179
   move( $md5File, $staging ) or die "Error moving $md5File to $staging: $!\n";
180
   return '';
11 rodolico 181
}
16 rodolico 182
 
183
sub getCheckSum {
184
   my $project = shift;
185
   my $checkSumFile = $config{'local staging area'} . '/' . $project . '.' . $config{'md5 suffix'};
186
   if ( -e $checkSumFile ) {
187
      return &slurpFile( $checkSumFile );
188
   }
189
   return '';
190
}
191
 
12 rodolico 192
# verifies the directory is correct on the server by comparing the checksums
11 rodolico 193
# calculated locally and remote server. If valid, moves it into the final
194
# location on the remote server
195
sub validateTarget {
12 rodolico 196
   my ( $remoteServer, $remoteStaging, $remoteTarget, $directory, $checksum ) = @_;
197
   my $md5sum = `ssh $remoteServer "find '$remoteStaging/$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1"`;
11 rodolico 198
   chomp $md5sum;
199
   if ( $checksum eq $md5sum ) {
12 rodolico 200
      my $command = "ssh $remoteServer \"mv '$remoteStaging/$directory' '$remoteTarget'\"";
11 rodolico 201
      if ( system( $command ) == 0 ) {
202
         return 1;
203
      } else {
12 rodolico 204
         &logit( "Unable to move $directory to $remoteServer:$remoteTarget" );
11 rodolico 205
         return 0;
206
      }
207
   } else {
12 rodolico 208
      &logit( "Invalid checksum moving directory $directory" );
11 rodolico 209
      return 0;
210
   }
211
}
212
 
213
# simple little logger that records some information   
214
sub logit {
15 rodolico 215
   my $projectName = shift;
216
   my $suffix = shift;
16 rodolico 217
   my $logfile = $config{'local root dir'} . "/$projectName.$suffix";
11 rodolico 218
   my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
219
   my $now = sprintf( "%04d-%02d-%02d %02d:%-2d:%02d", $year+1900, $mon+1, $mday, $hour, $min, $sec );
13 rodolico 220
   open LOG, ">>$logfile" or die "could not write to $logfile: $!\n";
221
   while ( my $message = shift ) {
222
      print LOG "$now\t$message\n";
223
   }
11 rodolico 224
   close LOG;
225
}
15 rodolico 226
 
227
sub runRemoteCommand {
228
   my $server = shift;
229
   while ( my $command = shift ) {
230
      my $output = qx/ssh $server '$command'/;
231
      if ( my $error = $? & 127 ) {
232
         return ( $output, $error );
233
      }
234
   }
235
   return ('', 0);
236
}
237
 
11 rodolico 238
 
15 rodolico 239
sub copyToRemote {
240
   my ( $path, $dirname, $remoteServer, $remotePath ) = @_;
241
   # first, copy the file
16 rodolico 242
   #print "rsync -a $path/$dirname $remoteServer:$remotePath > /tmp/lastrsync.log";
243
   #die;
15 rodolico 244
   qx"rsync -a $path/$dirname $remoteServer:$remotePath > /tmp/lastrsync.log";
245
   return 'rsync failed with error :' . $? & 127 if $? & 127;
246
   return '';
247
}
248
 
18 rodolico 249
# simply remove everything from the trash directory over $age seconds old
250
sub cleanTrash {
251
   my ( $trashDir, $age ) = @_;
252
   my $md5Suffix = $config{'md5 suffix'};
253
   my @toRemove = ();
254
   if ( opendir( my $dh, $trashDir ) ) {
255
      # get all the md5sum files which are older than $age seconds old
256
      @toRemove = grep { &fileAge( "$_" ) > $age  } map{ "$trashDir/$_" }  grep{ /$md5Suffix$/ } readdir( $dh);
257
      closedir( $dh );
258
   }
259
   print "You should remove the following files\n" if $DEBUG > 1;
260
   foreach my $thisDir ( @toRemove ) {
261
      $thisDir =~ m/(.*)\.$md5Suffix/;
262
      $thisDir = $1;
19 rodolico 263
      qx/rm -fR '$thisDir' '$thisDir.$md5Suffix'/;
18 rodolico 264
   }
265
}
266
 
20 rodolico 267
sub makeDirectories {
268
   my $directory = shift;
269
   my $permissions = shift;
270
   $permissions = '777' unless $permissions;
271
   unless ( -d $directory ) {
272
      print "Making directory $directory\n" if $DEBUG > 1;
273
      `mkdir -p $directory`; 
274
      `chmod $permissions $directory`;
275
   }
276
}
277
 
18 rodolico 278
 
279
 
16 rodolico 280
###############################################################################
281
# Main
282
###############################################################################
15 rodolico 283
 
16 rodolico 284
&loadConfig();
285
#use Data::Dumper;
286
#print Dumper( \%config );
287
#die;
288
 
20 rodolico 289
foreach my $dirsToMake ( 'local root dir', 'local trash dir', 'local staging area' ) {
290
   &makeDirectories( $config{$dirsToMake} );
13 rodolico 291
}
20 rodolico 292
 
13 rodolico 293
# clean the trash if $config{ 'trash cleanup' } is non-zero
294
&cleanTrash( $config{'local trash dir'}, $config{ 'trash cleanup' } ) if $config{ 'trash cleanup' };
18 rodolico 295
 
13 rodolico 296
# Check if we have any directories which are ready to be moved.
297
@DirectoriesToMove = &getDirectories( $config{'local root dir'} );
11 rodolico 298
 
18 rodolico 299
print "Processing\n\t" . join( "\n\t", @DirectoriesToMove ) . "\n" if $DEBUG > 1;
13 rodolico 300
 
301
foreach my $directory ( @DirectoriesToMove ) {
302
   my $fullPath = $config{'local root dir'} . "/$directory";
303
   my $logFile = "$fullPath.$config{'log suffix'}";
304
   my $errorFile = "$fullPath.$config{'error suffix'}";
305
   print "Path for $directory is $fullPath\n\tLog File is $logFile\n\tError file is $errorFile\n" if $DEBUG > 3;
306
   if ( -e $errorFile ) {
15 rodolico 307
      &logit( $directory, $config{'log suffix'}, "Aborting because we have a pre-existing error" );
13 rodolico 308
      print "\tAborting because we have a pre-existing error\n" if $DEBUG > 3;
309
      next;
310
   }
15 rodolico 311
   &logit( $directory, $config{'log suffix'}, "Processing $directory" );
13 rodolico 312
   my $error = &moveToStaging( $directory, $fullPath, $config{'local staging area'} );
313
   if ( ! $error ) {
314
      print "\tMoved to $config{'local staging area'}\n" if $DEBUG > 3;
15 rodolico 315
      &logit( $directory, $config{'log suffix'},  "Successfully moved to $config{'local staging area'}" );
13 rodolico 316
   } else {
15 rodolico 317
      &logit( $directory, $config{'log suffix'},  "Error, move aborted" );
318
      &logit( $directory, $config{'error suffix'},  $error );
13 rodolico 319
   }
11 rodolico 320
}
321
 
322
# done with that, now we need to see if there is anything in the staging area
323
# that needs to be sent to the remote server
15 rodolico 324
`mkdir -p $config{'local staging area'}` unless -d $config{'local staging area'};
13 rodolico 325
opendir( my $dh, $config{'local staging area'} ) or die "Could not read $config{'local staging area'}: $!\n";
11 rodolico 326
my @directories;
15 rodolico 327
# get all the .md5 files
13 rodolico 328
my @toMove = grep { /$config{'md5 suffix'}$/ } readdir( $dh );
329
my $targetPath = "$config{'target server'}:$config{'target staging area'}/";
18 rodolico 330
print "Copying the following to $targetPath\n\t" . join ("\n\t", @toMove ) . "\n" if $DEBUG > 1;
15 rodolico 331
# create the target directory on the server if it doesn't exist
332
&runRemoteCommand( $config{'target server'},
333
   "[ ! -d $config{'target staging area'} ] && mkdir -p $config{'target staging area'}",
334
   "[ ! -d $config{'target final directory'} ] && mkdir -p $config{'target final directory'}"
335
   );
16 rodolico 336
 
337
 
15 rodolico 338
# now, process each directory in turn
16 rodolico 339
foreach my $dirname ( @toMove ) {
340
   print "Processing $dirname\n";
15 rodolico 341
   my $error;
16 rodolico 342
   $dirname =~ m/^(.*)\.$config{'md5 suffix'}$/;
15 rodolico 343
   $dirname = $1;
344
   $error = &copyToRemote( $config{'local staging area'}, $dirname, $config{'target server'}, $config{'target staging area'} );
345
   if ( $error ) {
16 rodolico 346
      &logit( $dirname, $config{'error suffix'}, $error );
15 rodolico 347
      next;
348
   } else {
16 rodolico 349
      &logit( $dirname, $config{'log suffix'}, "Copied to $config{'target server'}:$config{'target staging area'}" );
350
   }
15 rodolico 351
 
16 rodolico 352
   my $md5sum = &getCheckSum( $dirname );
11 rodolico 353
   next unless $md5sum;
16 rodolico 354
   my $rsync = "rsync -av '$config{'local staging area'}/$dirname' $config{'target server'}:$config{'target staging area'}/ > /tmp/lastrsync.log";
355
   &logit( $dirname, $config{'log suffix'}, $rsync );
11 rodolico 356
   if ( system ( $rsync ) == 0 ) { # we succeeded
16 rodolico 357
      if ( &validateTarget( $config{'target server'}, $config{'target staging area'}, $config{'target final directory'}, $dirname, $md5sum ) ) {
13 rodolico 358
         `mkdir -p $config{'local trash dir'}` unless -d $config{'local trash dir'};
16 rodolico 359
         move( "$config{'local staging area'}/$dirname", "$config{'local trash dir'}/$dirname" );
18 rodolico 360
         my $md5File = $dirname . '.' . $config{'md5 suffix'};
361
         move( "$config{'local staging area'}/$md5File", "$config{'local trash dir'}/$md5File" );
16 rodolico 362
         &logit( $dirname, $config{'log suffix'}, "Successfully moved directory $dirname to $config{'target server'}" );
11 rodolico 363
      } else {
16 rodolico 364
         &logit( $dirname, $config{'error suffix'}, "Unable to validate target for $dirname" );
11 rodolico 365
      }
366
   } else {
16 rodolico 367
      &logit( $dirname, $config{'error suffix'}, "Unknown error attempting to rsync $dirname" );
11 rodolico 368
   }
369
}
370
 
371
 
372
1;