Subversion Repositories sysadmin_scripts

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
11 rodolico 1
#! /usr/bin/env perl
2
 
13 rodolico 3
# archiveDirectories.pl
11 rodolico 4
# Author: R. W. Rodolico
5
# Date: 20180603
6
# Copyright: 2018, Vanduzen Enterprises, Dallas TX
7
 
13 rodolico 8
# Script designed to be run from a cron job, which checks if any directories
12 rodolico 9
# are ready to be archived. A directory is defined as a directory under
13 rodolico 10
# the root of $config{'local root dir'}.
11 rodolico 11
 
12
# If found, all directories are moved into the staging area and 
13
# an md5 checksum is calculated for the entire tree.
13 rodolico 14
# After all directories are moved, a second process looks in the staging
11 rodolico 15
# area and copies the files (using rsync for reliability) into the staging
13 rodolico 16
# area of $config{'target server'}. When a directory has been copied, a checksum is
11 rodolico 17
# calculated on the remote copy and compared to the checksum calculated
12 rodolico 18
# in the first stage and, if it passes, the directory is then moved to the 
13 rodolico 19
# $config{'target final directory'}.
12 rodolico 20
# After the copy and move, the directory and its MD5 sum file are moved
13 rodolico 21
# to the $config{'local trash dir'} (which is cleaned on the next invocation of
11 rodolico 22
# the script).
23
 
13 rodolico 24
# Script does NOT handle the situation where directories are being moved
11 rodolico 25
# while the script is running, so the script should be run at a time
26
# when there is no other activity on the server.
27
#
28
# Version: 1.0
29
 
30
use warnings;
31
use strict;
32
use Cwd qw();
33
use File::Copy qw(move);
34
use File::Basename;
13 rodolico 35
use File::stat;
11 rodolico 36
 
13 rodolico 37
my $DEBUG = 5;
11 rodolico 38
 
13 rodolico 39
my %config = (
40
   # location where directories are put by end users   
41
   'local root dir' => '/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/ArchiveProjects',
42
   # location where directories are moved while processing
43
   'local work dir' => '/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/transfer_area',
44
   # location where directories are moved when job is completed
45
   'local trash dir' => "/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/Trash",
46
   # location where directories are moved while being transferred
47
   'local staging area' => '/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/Staging',
11 rodolico 48
 
13 rodolico 49
   # target server name/ip. Must be accessible via ssh with no password
50
   'target server' => 'davinci',
51
   # location on target server where directories are placed while copying
52
   'target staging area' => '/home/samba/archives/fromDenver/.Staging/',
53
   # location on target server where directories are finally put
54
   'target final directory' => '/home/samba/archives/fromDenver/',
55
 
56
   # suffix of md5 of directories
57
   'md5 suffix' => 'md5sum',
58
   # suffix of filename to create showing actions
59
   'log suffix' => 'log',
60
   # suffix of error log
61
   'error suffix' => 'err',
62
   # how long a directory must be undisturbed before it is ready to work on
63
   'quiesent seconds' => 60*5, # five minutes
64
   # how long to leave stuff in the trash directory. 0 indicates never do it.
65
   'trash cleanup' => 86400*7, # 7 days
66
);
67
 
68
my @DirectoriesToMove;
69
 
70
# simply read the entire fiel into a string
71
sub slurpFile {
72
   my $filename = shift;
73
   return '' unless -e $filename;
74
   open TEMP, "<$filename" or die "could not read $filename: $!\n";
75
   my @contents = <TEMP>;
76
   close TEMP;
77
   return join( '', @contents );
78
}
79
 
80
# print a value to a file
81
sub writeData {
82
   my $filename = shift;
83
   open TEMP, ">$filename" or die "could not write to $filename: $!\n";
84
   print TEMP join( '', @_ );
85
   close TEMP;
86
}
87
 
88
# look in the directories to move directory and see if there is anything 
89
# new in there. If so, check MD5 Sum file (create if necessary) and ensure
90
# we have waited long enough and the sums match
91
sub getDirectories {
11 rodolico 92
   my $rootDir = shift;
13 rodolico 93
   print "In getDirectories with dir of $rootDir\n" if $DEBUG;
11 rodolico 94
   opendir( my $dh, $rootDir ) or die "Could not open directory $rootDir: $!\n";
95
   my @dirs = grep { ! /^\./ && -d "$rootDir/$_" } readdir( $dh );
13 rodolico 96
   closedir ( $dh );
97
   print "Directories Found\n" . join( "\n", @dirs ) . "\n" if $DEBUG > 1;
98
   my @dirsToMove;
99
   foreach my $thisDir ( @dirs ) {
100
      my $fullyQualified = "$rootDir/$thisDir";
101
      my $md5 = calcMD5( $fullyQualified );
102
      print "\tFound Dir $fullyQualified with MD5 of $md5\n" if $DEBUG > 2;
103
      # let's look for the md5 checksum file and compare if it exist
104
      my $md5Name = "$fullyQualified.$config{'md5 suffix'}";
105
      if ( -e $md5Name ) {
106
         # find out when it was last written to
107
         my $lastModification = stat( $md5Name );
108
         $lastModification = $$lastModification[9];
109
         my $howOld = time - $lastModification;
110
         print "\tFound existing MD5 file $md5Name written to at $lastModification, or $howOld seconds ago\n" if $DEBUG > 3;
111
         # and blow it off if it is too recent
112
         if ( $howOld < $config{'quiesent seconds'} ) {
113
            print "\t\tBlowing it off because $howOld is less than $config{'quiesent seconds'}\n" if $DEBUG > 4;
114
            next;
115
         }
116
         my $oldMD5 = &slurpFile( $md5Name );
117
         if ( $md5 eq $oldMD5 ) {
118
            print "\t\tAdding, md5 not changed, $md5 same as $oldMD5\n" if $DEBUG > 4;
119
            push @dirsToMove, $thisDir;
120
         } else {
121
            print "\t\tWaiting, md5 changed, $md5 and $oldMD5\n" if $DEBUG > 4;
122
            # overwrite if the checksum has changed
123
            &writeData( $md5Name, $md5 ) if $md5 ne &slurpFile( $md5Name );
124
         }
125
      } else { # doesn't exist, so create it
126
         print "\t\tCreating MD5 File $md5Name with value $md5\n" if $DEBUG > 4;
127
         &writeData( $md5Name, $md5 );
128
      }
129
   } # foreach
130
   return @dirsToMove;
11 rodolico 131
}
132
 
133
# calculate the checksum of a directory by
134
# 1. calculate checksum of each individual file in the entire tree
135
# 2. Grab the first column, which is the checksum
136
# 3. sort the result since Linux will not always return them in the same order
137
# 4. do a checksum of the checksums
138
#
139
# This is highly unlikely to give the same answer if any file changes
140
# in the process of the copy
141
sub calcMD5 {
142
   my $directory = shift;
143
   return -1 unless -d $directory;
144
   my $md5 = `find '$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1`;
145
   chomp $md5;
146
   return $md5;
147
}
148
 
12 rodolico 149
# moves directory to staging area and puts the md5 sum into a file
11 rodolico 150
# with the same name, but a .md5sum suffix
151
sub moveToStaging {
13 rodolico 152
   my ( $directory, $fullPath, $staging ) = @_;
153
   # and let's get the md5 file name also
154
   my $md5File = $fullPath . ".$config{'md5 suffix'}";
155
   mkdir $staging unless -d $staging;
156
   return 'Directory already exists in staging' if -e "$staging/$directory";
157
   move( $fullPath, "$staging/$directory" ) or die "Error moving $fullPath to $staging/$directory: $!\n";
158
   move( $md5File, $staging ) or die "Error moving $md5File to $staging: $!\n";
159
   return '';
11 rodolico 160
}
161
 
12 rodolico 162
# verifies the directory is correct on the server by comparing the checksums
11 rodolico 163
# calculated locally and remote server. If valid, moves it into the final
164
# location on the remote server
165
sub validateTarget {
12 rodolico 166
   my ( $remoteServer, $remoteStaging, $remoteTarget, $directory, $checksum ) = @_;
167
   my $md5sum = `ssh $remoteServer "find '$remoteStaging/$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1"`;
11 rodolico 168
   chomp $md5sum;
169
   if ( $checksum eq $md5sum ) {
12 rodolico 170
      my $command = "ssh $remoteServer \"mv '$remoteStaging/$directory' '$remoteTarget'\"";
11 rodolico 171
      if ( system( $command ) == 0 ) {
172
         return 1;
173
      } else {
12 rodolico 174
         &logit( "Unable to move $directory to $remoteServer:$remoteTarget" );
11 rodolico 175
         return 0;
176
      }
177
   } else {
12 rodolico 178
      &logit( "Invalid checksum moving directory $directory" );
11 rodolico 179
      return 0;
180
   }
181
}
182
 
183
# reads the checksum file
184
sub getCheckSum {
12 rodolico 185
   my ( $directory, $staging )  = @_;
13 rodolico 186
   $directory .= $config{'md5 suffix'};
12 rodolico 187
   if ( open DATA, "<$staging/$directory" ) {
11 rodolico 188
      my $cksum = <DATA>;
189
      chomp $cksum;
190
      close DATA;
191
      return $cksum;
192
   } 
12 rodolico 193
   &logit( "Could not open $staging/$directory: $!" );
11 rodolico 194
   return '';
195
}
196
 
197
# simple little logger that records some information   
198
sub logit {
13 rodolico 199
   my $logfile = shift;
11 rodolico 200
   my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
201
   my $now = sprintf( "%04d-%02d-%02d %02d:%-2d:%02d", $year+1900, $mon+1, $mday, $hour, $min, $sec );
13 rodolico 202
   open LOG, ">>$logfile" or die "could not write to $logfile: $!\n";
203
   while ( my $message = shift ) {
204
      print LOG "$now\t$message\n";
205
   }
11 rodolico 206
   close LOG;
207
}
208
 
209
# simply remove everything from the trash directory
210
sub cleanTrash {
13 rodolico 211
   my ( $trashDir, $age ) = @_;
212
   `mkdir -p $trashDir` unless -d $trashDir;
11 rodolico 213
   `rm -fR $trashDir/*`;
214
}
215
 
13 rodolico 216
unless ( -d $config{'local root dir'} ) {
217
   `mkdir -p $config{'local root dir'}`;
218
   `chmod 777 $config{'local root dir'}`;
219
}
220
# clean the trash if $config{ 'trash cleanup' } is non-zero
221
&cleanTrash( $config{'local trash dir'}, $config{ 'trash cleanup' } ) if $config{ 'trash cleanup' };
11 rodolico 222
 
13 rodolico 223
# Check if we have any directories which are ready to be moved.
224
@DirectoriesToMove = &getDirectories( $config{'local root dir'} );
11 rodolico 225
 
13 rodolico 226
print "Processing\n\t" . join( "\n\t", @DirectoriesToMove ) . "\n";
227
 
228
foreach my $directory ( @DirectoriesToMove ) {
229
   my $fullPath = $config{'local root dir'} . "/$directory";
230
   my $logFile = "$fullPath.$config{'log suffix'}";
231
   my $errorFile = "$fullPath.$config{'error suffix'}";
232
   print "Path for $directory is $fullPath\n\tLog File is $logFile\n\tError file is $errorFile\n" if $DEBUG > 3;
233
   if ( -e $errorFile ) {
234
      &logit( $logFile, "Aborting because we have a pre-existing error" );
235
      print "\tAborting because we have a pre-existing error\n" if $DEBUG > 3;
236
      next;
237
   }
238
   &logit( $logFile, "Processing $directory" );
239
   my $error = &moveToStaging( $directory, $fullPath, $config{'local staging area'} );
240
   if ( ! $error ) {
241
      print "\tMoved to $config{'local staging area'}\n" if $DEBUG > 3;
242
      &logit( $logFile, "Successfully moved to $config{'local staging area'}" );
243
   } else {
244
      &logit( $logFile, "Error, move aborted" );
245
      &logit( $errorFile, $error );
246
   }
11 rodolico 247
}
248
 
249
# done with that, now we need to see if there is anything in the staging area
250
# that needs to be sent to the remote server
13 rodolico 251
opendir( my $dh, $config{'local staging area'} ) or die "Could not read $config{'local staging area'}: $!\n";
11 rodolico 252
my @directories;
13 rodolico 253
my @toMove = grep { /$config{'md5 suffix'}$/ } readdir( $dh );
254
my $targetPath = "$config{'target server'}:$config{'target staging area'}/";
255
print "Copying the following to $targetPath\n\t" . join ("\n\t", @toMove ) . "\n";
256
die;
12 rodolico 257
foreach my $directory ( @toMove ) {
258
   $directory =~ m/^(.*)\.md5sum/;
259
   $directory = $1;
13 rodolico 260
   my $md5sum = &getCheckSum( $directory, $config{'local staging area'} );
11 rodolico 261
   next unless $md5sum;
13 rodolico 262
   my $rsync = "rsync -av '$config{'local staging area'}/$directory' $config{'target server'}:$config{'target staging area'}/ > /tmp/lastrsync.log";
11 rodolico 263
   &logit( $rsync );
264
   if ( system ( $rsync ) == 0 ) { # we succeeded
13 rodolico 265
      if ( &validateTarget( $config{'target server'}, $config{'target staging area'}, $config{'target final directory'}, $directory, $md5sum ) ) {
266
         `mkdir -p $config{'local trash dir'}` unless -d $config{'local trash dir'};
267
         move( "$config{'local staging area'}/$directory", "$config{'local trash dir'}/$directory" );
268
         $directory .= $config{'md5 suffix'};
269
         move( "$config{'local staging area'}/$directory", "$config{'local trash dir'}/$directory" );
270
         &logit( "Successfully moved directory $directory to $config{'target server'}" );
11 rodolico 271
      } else {
12 rodolico 272
         &logit( "Unable to validate target for $directory" );
11 rodolico 273
      }
274
   } else {
12 rodolico 275
      &logit( "Unknown error attempting to rsync $directory" );
11 rodolico 276
   }
277
}
278
 
279
 
280
1;