Subversion Repositories sysadmin_scripts

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
11 rodolico 1
#! /usr/bin/env perl
2
 
13 rodolico 3
# archiveDirectories.pl
11 rodolico 4
# Author: R. W. Rodolico
5
# Date: 20180603
6
# Copyright: 2018, Vanduzen Enterprises, Dallas TX
7
 
13 rodolico 8
# Script designed to be run from a cron job, which checks if any directories
12 rodolico 9
# are ready to be archived. A directory is defined as a directory under
13 rodolico 10
# the root of $config{'local root dir'}.
11 rodolico 11
 
12
# If found, all directories are moved into the staging area and 
13
# an md5 checksum is calculated for the entire tree.
13 rodolico 14
# After all directories are moved, a second process looks in the staging
11 rodolico 15
# area and copies the files (using rsync for reliability) into the staging
13 rodolico 16
# area of $config{'target server'}. When a directory has been copied, a checksum is
11 rodolico 17
# calculated on the remote copy and compared to the checksum calculated
12 rodolico 18
# in the first stage and, if it passes, the directory is then moved to the 
13 rodolico 19
# $config{'target final directory'}.
12 rodolico 20
# After the copy and move, the directory and its MD5 sum file are moved
13 rodolico 21
# to the $config{'local trash dir'} (which is cleaned on the next invocation of
11 rodolico 22
# the script).
23
 
13 rodolico 24
# Script does NOT handle the situation where directories are being moved
11 rodolico 25
# while the script is running, so the script should be run at a time
26
# when there is no other activity on the server.
27
#
28
# Version: 1.0
29
 
30
use warnings;
31
use strict;
32
use Cwd qw();
33
use File::Copy qw(move);
34
use File::Basename;
13 rodolico 35
use File::stat;
11 rodolico 36
 
13 rodolico 37
my $DEBUG = 5;
11 rodolico 38
 
13 rodolico 39
my %config = (
40
   # location where directories are put by end users   
41
   'local root dir' => '/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/ArchiveProjects',
42
   # location where directories are moved while processing
43
   'local work dir' => '/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/transfer_area',
44
   # location where directories are moved when job is completed
45
   'local trash dir' => "/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/Trash",
46
   # location where directories are moved while being transferred
47
   'local staging area' => '/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/Staging',
11 rodolico 48
 
13 rodolico 49
   # target server name/ip. Must be accessible via ssh with no password
15 rodolico 50
   'target server' => 'bsd',
13 rodolico 51
   # location on target server where directories are placed while copying
15 rodolico 52
   'target staging area' => '/home/test/.Staging',
13 rodolico 53
   # location on target server where directories are finally put
15 rodolico 54
   'target final directory' => '/home/archives/',
13 rodolico 55
 
56
   # suffix of md5 of directories
57
   'md5 suffix' => 'md5sum',
58
   # suffix of filename to create showing actions
59
   'log suffix' => 'log',
60
   # suffix of error log
61
   'error suffix' => 'err',
62
   # how long a directory must be undisturbed before it is ready to work on
63
   'quiesent seconds' => 60*5, # five minutes
64
   # how long to leave stuff in the trash directory. 0 indicates never do it.
65
   'trash cleanup' => 86400*7, # 7 days
66
);
67
 
68
my @DirectoriesToMove;
69
 
70
# simply read the entire fiel into a string
71
sub slurpFile {
72
   my $filename = shift;
73
   return '' unless -e $filename;
74
   open TEMP, "<$filename" or die "could not read $filename: $!\n";
75
   my @contents = <TEMP>;
76
   close TEMP;
77
   return join( '', @contents );
78
}
79
 
80
# print a value to a file
81
sub writeData {
82
   my $filename = shift;
83
   open TEMP, ">$filename" or die "could not write to $filename: $!\n";
84
   print TEMP join( '', @_ );
85
   close TEMP;
86
}
87
 
88
# look in the directories to move directory and see if there is anything 
89
# new in there. If so, check MD5 Sum file (create if necessary) and ensure
90
# we have waited long enough and the sums match
91
sub getDirectories {
11 rodolico 92
   my $rootDir = shift;
13 rodolico 93
   print "In getDirectories with dir of $rootDir\n" if $DEBUG;
11 rodolico 94
   opendir( my $dh, $rootDir ) or die "Could not open directory $rootDir: $!\n";
95
   my @dirs = grep { ! /^\./ && -d "$rootDir/$_" } readdir( $dh );
13 rodolico 96
   closedir ( $dh );
97
   print "Directories Found\n" . join( "\n", @dirs ) . "\n" if $DEBUG > 1;
98
   my @dirsToMove;
99
   foreach my $thisDir ( @dirs ) {
100
      my $fullyQualified = "$rootDir/$thisDir";
101
      my $md5 = calcMD5( $fullyQualified );
102
      print "\tFound Dir $fullyQualified with MD5 of $md5\n" if $DEBUG > 2;
103
      # let's look for the md5 checksum file and compare if it exist
104
      my $md5Name = "$fullyQualified.$config{'md5 suffix'}";
105
      if ( -e $md5Name ) {
106
         # find out when it was last written to
107
         my $lastModification = stat( $md5Name );
108
         $lastModification = $$lastModification[9];
109
         my $howOld = time - $lastModification;
110
         print "\tFound existing MD5 file $md5Name written to at $lastModification, or $howOld seconds ago\n" if $DEBUG > 3;
111
         # and blow it off if it is too recent
112
         if ( $howOld < $config{'quiesent seconds'} ) {
113
            print "\t\tBlowing it off because $howOld is less than $config{'quiesent seconds'}\n" if $DEBUG > 4;
114
            next;
115
         }
116
         my $oldMD5 = &slurpFile( $md5Name );
117
         if ( $md5 eq $oldMD5 ) {
118
            print "\t\tAdding, md5 not changed, $md5 same as $oldMD5\n" if $DEBUG > 4;
119
            push @dirsToMove, $thisDir;
120
         } else {
121
            print "\t\tWaiting, md5 changed, $md5 and $oldMD5\n" if $DEBUG > 4;
122
            # overwrite if the checksum has changed
123
            &writeData( $md5Name, $md5 ) if $md5 ne &slurpFile( $md5Name );
124
         }
125
      } else { # doesn't exist, so create it
126
         print "\t\tCreating MD5 File $md5Name with value $md5\n" if $DEBUG > 4;
127
         &writeData( $md5Name, $md5 );
128
      }
129
   } # foreach
130
   return @dirsToMove;
11 rodolico 131
}
132
 
133
# calculate the checksum of a directory by
134
# 1. calculate checksum of each individual file in the entire tree
135
# 2. Grab the first column, which is the checksum
136
# 3. sort the result since Linux will not always return them in the same order
137
# 4. do a checksum of the checksums
138
#
139
# This is highly unlikely to give the same answer if any file changes
140
# in the process of the copy
141
sub calcMD5 {
142
   my $directory = shift;
143
   return -1 unless -d $directory;
144
   my $md5 = `find '$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1`;
145
   chomp $md5;
146
   return $md5;
147
}
148
 
12 rodolico 149
# moves directory to staging area and puts the md5 sum into a file
11 rodolico 150
# with the same name, but a .md5sum suffix
151
sub moveToStaging {
13 rodolico 152
   my ( $directory, $fullPath, $staging ) = @_;
153
   # and let's get the md5 file name also
154
   my $md5File = $fullPath . ".$config{'md5 suffix'}";
155
   mkdir $staging unless -d $staging;
156
   return 'Directory already exists in staging' if -e "$staging/$directory";
157
   move( $fullPath, "$staging/$directory" ) or die "Error moving $fullPath to $staging/$directory: $!\n";
158
   move( $md5File, $staging ) or die "Error moving $md5File to $staging: $!\n";
159
   return '';
11 rodolico 160
}
161
 
12 rodolico 162
# verifies the directory is correct on the server by comparing the checksums
11 rodolico 163
# calculated locally and remote server. If valid, moves it into the final
164
# location on the remote server
165
sub validateTarget {
12 rodolico 166
   my ( $remoteServer, $remoteStaging, $remoteTarget, $directory, $checksum ) = @_;
167
   my $md5sum = `ssh $remoteServer "find '$remoteStaging/$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1"`;
11 rodolico 168
   chomp $md5sum;
169
   if ( $checksum eq $md5sum ) {
12 rodolico 170
      my $command = "ssh $remoteServer \"mv '$remoteStaging/$directory' '$remoteTarget'\"";
11 rodolico 171
      if ( system( $command ) == 0 ) {
172
         return 1;
173
      } else {
12 rodolico 174
         &logit( "Unable to move $directory to $remoteServer:$remoteTarget" );
11 rodolico 175
         return 0;
176
      }
177
   } else {
12 rodolico 178
      &logit( "Invalid checksum moving directory $directory" );
11 rodolico 179
      return 0;
180
   }
181
}
182
 
183
# simple little logger that records some information   
184
sub logit {
15 rodolico 185
   my $projectName = shift;
186
   my $suffix = shift;
187
   my $logfile = "$config{local root dir}/$projectName.$suffix"
11 rodolico 188
   my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
189
   my $now = sprintf( "%04d-%02d-%02d %02d:%-2d:%02d", $year+1900, $mon+1, $mday, $hour, $min, $sec );
13 rodolico 190
   open LOG, ">>$logfile" or die "could not write to $logfile: $!\n";
191
   while ( my $message = shift ) {
192
      print LOG "$now\t$message\n";
193
   }
11 rodolico 194
   close LOG;
195
}
15 rodolico 196
 
197
sub runRemoteCommand {
198
   my $server = shift;
199
   while ( my $command = shift ) {
200
      my $output = qx/ssh $server '$command'/;
201
      if ( my $error = $? & 127 ) {
202
         return ( $output, $error );
203
      }
204
   }
205
   return ('', 0);
206
}
207
 
11 rodolico 208
 
209
# simply remove everything from the trash directory
210
sub cleanTrash {
13 rodolico 211
   my ( $trashDir, $age ) = @_;
212
   `mkdir -p $trashDir` unless -d $trashDir;
11 rodolico 213
   `rm -fR $trashDir/*`;
214
}
215
 
15 rodolico 216
sub copyToRemote {
217
   my ( $path, $dirname, $remoteServer, $remotePath ) = @_;
218
   # first, copy the file
219
   qx"rsync -a $path/$dirname $remoteServer:$remotePath > /tmp/lastrsync.log";
220
   return 'rsync failed with error :' . $? & 127 if $? & 127;
221
   return '';
222
}
223
 
224
 
13 rodolico 225
unless ( -d $config{'local root dir'} ) {
226
   `mkdir -p $config{'local root dir'}`;
227
   `chmod 777 $config{'local root dir'}`;
228
}
229
# clean the trash if $config{ 'trash cleanup' } is non-zero
230
&cleanTrash( $config{'local trash dir'}, $config{ 'trash cleanup' } ) if $config{ 'trash cleanup' };
11 rodolico 231
 
13 rodolico 232
# Check if we have any directories which are ready to be moved.
233
@DirectoriesToMove = &getDirectories( $config{'local root dir'} );
11 rodolico 234
 
13 rodolico 235
print "Processing\n\t" . join( "\n\t", @DirectoriesToMove ) . "\n";
236
 
237
foreach my $directory ( @DirectoriesToMove ) {
238
   my $fullPath = $config{'local root dir'} . "/$directory";
239
   my $logFile = "$fullPath.$config{'log suffix'}";
240
   my $errorFile = "$fullPath.$config{'error suffix'}";
241
   print "Path for $directory is $fullPath\n\tLog File is $logFile\n\tError file is $errorFile\n" if $DEBUG > 3;
242
   if ( -e $errorFile ) {
15 rodolico 243
      &logit( $directory, $config{'log suffix'}, "Aborting because we have a pre-existing error" );
13 rodolico 244
      print "\tAborting because we have a pre-existing error\n" if $DEBUG > 3;
245
      next;
246
   }
15 rodolico 247
   &logit( $directory, $config{'log suffix'}, "Processing $directory" );
13 rodolico 248
   my $error = &moveToStaging( $directory, $fullPath, $config{'local staging area'} );
249
   if ( ! $error ) {
250
      print "\tMoved to $config{'local staging area'}\n" if $DEBUG > 3;
15 rodolico 251
      &logit( $directory, $config{'log suffix'},  "Successfully moved to $config{'local staging area'}" );
13 rodolico 252
   } else {
15 rodolico 253
      &logit( $directory, $config{'log suffix'},  "Error, move aborted" );
254
      &logit( $directory, $config{'error suffix'},  $error );
13 rodolico 255
   }
11 rodolico 256
}
257
 
258
# done with that, now we need to see if there is anything in the staging area
259
# that needs to be sent to the remote server
15 rodolico 260
`mkdir -p $config{'local staging area'}` unless -d $config{'local staging area'};
13 rodolico 261
opendir( my $dh, $config{'local staging area'} ) or die "Could not read $config{'local staging area'}: $!\n";
11 rodolico 262
my @directories;
15 rodolico 263
# get all the .md5 files
13 rodolico 264
my @toMove = grep { /$config{'md5 suffix'}$/ } readdir( $dh );
265
my $targetPath = "$config{'target server'}:$config{'target staging area'}/";
266
print "Copying the following to $targetPath\n\t" . join ("\n\t", @toMove ) . "\n";
267
die;
15 rodolico 268
# create the target directory on the server if it doesn't exist
269
&runRemoteCommand( $config{'target server'},
270
   "[ ! -d $config{'target staging area'} ] && mkdir -p $config{'target staging area'}",
271
   "[ ! -d $config{'target final directory'} ] && mkdir -p $config{'target final directory'}"
272
   );
273
# now, process each directory in turn
12 rodolico 274
foreach my $directory ( @toMove ) {
15 rodolico 275
   my $error;
276
   my $dirname =~ m/^(.*)\.$config{'md5 suffix'}$/;
277
   $dirname = $1;
278
   $error = &copyToRemote( $config{'local staging area'}, $dirname, $config{'target server'}, $config{'target staging area'} );
279
   if ( $error ) {
280
      &logit( $directory, $config{'error suffix'}, $error );
281
      next;
282
   } else {
283
      &logit( $directory, $config{'log suffix'}, "Copied to $config{target server}:$config{target staging area}" );
284
 
13 rodolico 285
   my $md5sum = &getCheckSum( $directory, $config{'local staging area'} );
11 rodolico 286
   next unless $md5sum;
13 rodolico 287
   my $rsync = "rsync -av '$config{'local staging area'}/$directory' $config{'target server'}:$config{'target staging area'}/ > /tmp/lastrsync.log";
11 rodolico 288
   &logit( $rsync );
289
   if ( system ( $rsync ) == 0 ) { # we succeeded
13 rodolico 290
      if ( &validateTarget( $config{'target server'}, $config{'target staging area'}, $config{'target final directory'}, $directory, $md5sum ) ) {
291
         `mkdir -p $config{'local trash dir'}` unless -d $config{'local trash dir'};
292
         move( "$config{'local staging area'}/$directory", "$config{'local trash dir'}/$directory" );
293
         $directory .= $config{'md5 suffix'};
294
         move( "$config{'local staging area'}/$directory", "$config{'local trash dir'}/$directory" );
295
         &logit( "Successfully moved directory $directory to $config{'target server'}" );
11 rodolico 296
      } else {
12 rodolico 297
         &logit( "Unable to validate target for $directory" );
11 rodolico 298
      }
299
   } else {
12 rodolico 300
      &logit( "Unknown error attempting to rsync $directory" );
11 rodolico 301
   }
302
}
303
 
304
 
305
1;