11 |
rodolico |
1 |
#! /usr/bin/env perl
|
|
|
2 |
|
13 |
rodolico |
3 |
# archiveDirectories.pl
|
11 |
rodolico |
4 |
# Author: R. W. Rodolico
|
|
|
5 |
# Date: 20180603
|
|
|
6 |
# Copyright: 2018, Vanduzen Enterprises, Dallas TX
|
|
|
7 |
|
13 |
rodolico |
8 |
# Script designed to be run from a cron job, which checks if any directories
|
12 |
rodolico |
9 |
# are ready to be archived. A directory is defined as a directory under
|
13 |
rodolico |
10 |
# the root of $config{'local root dir'}.
|
11 |
rodolico |
11 |
|
|
|
12 |
# If found, all directories are moved into the staging area and
|
|
|
13 |
# an md5 checksum is calculated for the entire tree.
|
13 |
rodolico |
14 |
# After all directories are moved, a second process looks in the staging
|
11 |
rodolico |
15 |
# area and copies the files (using rsync for reliability) into the staging
|
13 |
rodolico |
16 |
# area of $config{'target server'}. When a directory has been copied, a checksum is
|
11 |
rodolico |
17 |
# calculated on the remote copy and compared to the checksum calculated
|
12 |
rodolico |
18 |
# in the first stage and, if it passes, the directory is then moved to the
|
13 |
rodolico |
19 |
# $config{'target final directory'}.
|
12 |
rodolico |
20 |
# After the copy and move, the directory and its MD5 sum file are moved
|
13 |
rodolico |
21 |
# to the $config{'local trash dir'} (which is cleaned on the next invocation of
|
11 |
rodolico |
22 |
# the script).
|
|
|
23 |
|
13 |
rodolico |
24 |
# Script does NOT handle the situation where directories are being moved
|
11 |
rodolico |
25 |
# while the script is running, so the script should be run at a time
|
|
|
26 |
# when there is no other activity on the server.
|
|
|
27 |
#
|
|
|
28 |
# Version: 1.0
|
|
|
29 |
|
|
|
30 |
use warnings;
|
|
|
31 |
use strict;
|
|
|
32 |
use Cwd qw();
|
|
|
33 |
use File::Copy qw(move);
|
|
|
34 |
use File::Basename;
|
13 |
rodolico |
35 |
use File::stat;
|
11 |
rodolico |
36 |
|
13 |
rodolico |
37 |
my $DEBUG = 5;
|
11 |
rodolico |
38 |
|
16 |
rodolico |
39 |
my %config;
|
11 |
rodolico |
40 |
|
16 |
rodolico |
41 |
my @DirectoriesToMove;
|
13 |
rodolico |
42 |
|
16 |
rodolico |
43 |
sub loadConfig {
|
|
|
44 |
use FindBin;
|
|
|
45 |
my $configFileName = "$FindBin::Bin/$FindBin::Script";
|
|
|
46 |
unless ( $configFileName =~ s/\.pl$/\.conf/ ) {
|
|
|
47 |
$configFileName .= '.conf';
|
|
|
48 |
}
|
|
|
49 |
|
|
|
50 |
if ( -e $configFileName ) {
|
|
|
51 |
my $configFileContents = &slurpFile( $configFileName );
|
|
|
52 |
eval( $configFileContents );
|
|
|
53 |
die "Error interpreting $configFileName: $@\n" if $@;
|
|
|
54 |
} else {
|
|
|
55 |
die "Could not locate config file $configFileName\n";
|
|
|
56 |
} # if..else
|
|
|
57 |
} #loadConfig
|
13 |
rodolico |
58 |
|
|
|
59 |
# simply read the entire fiel into a string
|
|
|
60 |
sub slurpFile {
|
|
|
61 |
my $filename = shift;
|
|
|
62 |
return '' unless -e $filename;
|
|
|
63 |
open TEMP, "<$filename" or die "could not read $filename: $!\n";
|
|
|
64 |
my @contents = <TEMP>;
|
|
|
65 |
close TEMP;
|
|
|
66 |
return join( '', @contents );
|
|
|
67 |
}
|
|
|
68 |
|
|
|
69 |
# print a value to a file
|
|
|
70 |
sub writeData {
|
|
|
71 |
my $filename = shift;
|
|
|
72 |
open TEMP, ">$filename" or die "could not write to $filename: $!\n";
|
|
|
73 |
print TEMP join( '', @_ );
|
|
|
74 |
close TEMP;
|
|
|
75 |
}
|
|
|
76 |
|
|
|
77 |
# look in the directories to move directory and see if there is anything
|
|
|
78 |
# new in there. If so, check MD5 Sum file (create if necessary) and ensure
|
|
|
79 |
# we have waited long enough and the sums match
|
|
|
80 |
sub getDirectories {
|
11 |
rodolico |
81 |
my $rootDir = shift;
|
13 |
rodolico |
82 |
print "In getDirectories with dir of $rootDir\n" if $DEBUG;
|
11 |
rodolico |
83 |
opendir( my $dh, $rootDir ) or die "Could not open directory $rootDir: $!\n";
|
|
|
84 |
my @dirs = grep { ! /^\./ && -d "$rootDir/$_" } readdir( $dh );
|
13 |
rodolico |
85 |
closedir ( $dh );
|
|
|
86 |
print "Directories Found\n" . join( "\n", @dirs ) . "\n" if $DEBUG > 1;
|
|
|
87 |
my @dirsToMove;
|
|
|
88 |
foreach my $thisDir ( @dirs ) {
|
|
|
89 |
my $fullyQualified = "$rootDir/$thisDir";
|
|
|
90 |
my $md5 = calcMD5( $fullyQualified );
|
|
|
91 |
print "\tFound Dir $fullyQualified with MD5 of $md5\n" if $DEBUG > 2;
|
|
|
92 |
# let's look for the md5 checksum file and compare if it exist
|
|
|
93 |
my $md5Name = "$fullyQualified.$config{'md5 suffix'}";
|
|
|
94 |
if ( -e $md5Name ) {
|
|
|
95 |
# find out when it was last written to
|
|
|
96 |
my $lastModification = stat( $md5Name );
|
|
|
97 |
$lastModification = $$lastModification[9];
|
|
|
98 |
my $howOld = time - $lastModification;
|
|
|
99 |
print "\tFound existing MD5 file $md5Name written to at $lastModification, or $howOld seconds ago\n" if $DEBUG > 3;
|
|
|
100 |
# and blow it off if it is too recent
|
|
|
101 |
if ( $howOld < $config{'quiesent seconds'} ) {
|
|
|
102 |
print "\t\tBlowing it off because $howOld is less than $config{'quiesent seconds'}\n" if $DEBUG > 4;
|
|
|
103 |
next;
|
|
|
104 |
}
|
|
|
105 |
my $oldMD5 = &slurpFile( $md5Name );
|
|
|
106 |
if ( $md5 eq $oldMD5 ) {
|
|
|
107 |
print "\t\tAdding, md5 not changed, $md5 same as $oldMD5\n" if $DEBUG > 4;
|
|
|
108 |
push @dirsToMove, $thisDir;
|
|
|
109 |
} else {
|
|
|
110 |
print "\t\tWaiting, md5 changed, $md5 and $oldMD5\n" if $DEBUG > 4;
|
|
|
111 |
# overwrite if the checksum has changed
|
|
|
112 |
&writeData( $md5Name, $md5 ) if $md5 ne &slurpFile( $md5Name );
|
|
|
113 |
}
|
|
|
114 |
} else { # doesn't exist, so create it
|
|
|
115 |
print "\t\tCreating MD5 File $md5Name with value $md5\n" if $DEBUG > 4;
|
|
|
116 |
&writeData( $md5Name, $md5 );
|
|
|
117 |
}
|
|
|
118 |
} # foreach
|
|
|
119 |
return @dirsToMove;
|
11 |
rodolico |
120 |
}
|
|
|
121 |
|
|
|
122 |
# calculate the checksum of a directory by
|
|
|
123 |
# 1. calculate checksum of each individual file in the entire tree
|
|
|
124 |
# 2. Grab the first column, which is the checksum
|
|
|
125 |
# 3. sort the result since Linux will not always return them in the same order
|
|
|
126 |
# 4. do a checksum of the checksums
|
|
|
127 |
#
|
|
|
128 |
# This is highly unlikely to give the same answer if any file changes
|
|
|
129 |
# in the process of the copy
|
|
|
130 |
sub calcMD5 {
|
|
|
131 |
my $directory = shift;
|
|
|
132 |
return -1 unless -d $directory;
|
|
|
133 |
my $md5 = `find '$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1`;
|
|
|
134 |
chomp $md5;
|
|
|
135 |
return $md5;
|
|
|
136 |
}
|
|
|
137 |
|
12 |
rodolico |
138 |
# moves directory to staging area and puts the md5 sum into a file
|
11 |
rodolico |
139 |
# with the same name, but a .md5sum suffix
|
|
|
140 |
sub moveToStaging {
|
13 |
rodolico |
141 |
my ( $directory, $fullPath, $staging ) = @_;
|
|
|
142 |
# and let's get the md5 file name also
|
|
|
143 |
my $md5File = $fullPath . ".$config{'md5 suffix'}";
|
|
|
144 |
mkdir $staging unless -d $staging;
|
|
|
145 |
return 'Directory already exists in staging' if -e "$staging/$directory";
|
|
|
146 |
move( $fullPath, "$staging/$directory" ) or die "Error moving $fullPath to $staging/$directory: $!\n";
|
|
|
147 |
move( $md5File, $staging ) or die "Error moving $md5File to $staging: $!\n";
|
|
|
148 |
return '';
|
11 |
rodolico |
149 |
}
|
16 |
rodolico |
150 |
|
|
|
151 |
sub getCheckSum {
|
|
|
152 |
my $project = shift;
|
|
|
153 |
my $checkSumFile = $config{'local staging area'} . '/' . $project . '.' . $config{'md5 suffix'};
|
|
|
154 |
if ( -e $checkSumFile ) {
|
|
|
155 |
return &slurpFile( $checkSumFile );
|
|
|
156 |
}
|
|
|
157 |
return '';
|
|
|
158 |
}
|
|
|
159 |
|
12 |
rodolico |
160 |
# verifies the directory is correct on the server by comparing the checksums
|
11 |
rodolico |
161 |
# calculated locally and remote server. If valid, moves it into the final
|
|
|
162 |
# location on the remote server
|
|
|
163 |
sub validateTarget {
|
12 |
rodolico |
164 |
my ( $remoteServer, $remoteStaging, $remoteTarget, $directory, $checksum ) = @_;
|
|
|
165 |
my $md5sum = `ssh $remoteServer "find '$remoteStaging/$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1"`;
|
11 |
rodolico |
166 |
chomp $md5sum;
|
|
|
167 |
if ( $checksum eq $md5sum ) {
|
12 |
rodolico |
168 |
my $command = "ssh $remoteServer \"mv '$remoteStaging/$directory' '$remoteTarget'\"";
|
11 |
rodolico |
169 |
if ( system( $command ) == 0 ) {
|
|
|
170 |
return 1;
|
|
|
171 |
} else {
|
12 |
rodolico |
172 |
&logit( "Unable to move $directory to $remoteServer:$remoteTarget" );
|
11 |
rodolico |
173 |
return 0;
|
|
|
174 |
}
|
|
|
175 |
} else {
|
12 |
rodolico |
176 |
&logit( "Invalid checksum moving directory $directory" );
|
11 |
rodolico |
177 |
return 0;
|
|
|
178 |
}
|
|
|
179 |
}
|
|
|
180 |
|
|
|
181 |
# simple little logger that records some information
|
|
|
182 |
sub logit {
|
15 |
rodolico |
183 |
my $projectName = shift;
|
|
|
184 |
my $suffix = shift;
|
16 |
rodolico |
185 |
my $logfile = $config{'local root dir'} . "/$projectName.$suffix";
|
11 |
rodolico |
186 |
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
|
|
|
187 |
my $now = sprintf( "%04d-%02d-%02d %02d:%-2d:%02d", $year+1900, $mon+1, $mday, $hour, $min, $sec );
|
13 |
rodolico |
188 |
open LOG, ">>$logfile" or die "could not write to $logfile: $!\n";
|
|
|
189 |
while ( my $message = shift ) {
|
|
|
190 |
print LOG "$now\t$message\n";
|
|
|
191 |
}
|
11 |
rodolico |
192 |
close LOG;
|
|
|
193 |
}
|
15 |
rodolico |
194 |
|
|
|
195 |
sub runRemoteCommand {
|
|
|
196 |
my $server = shift;
|
|
|
197 |
while ( my $command = shift ) {
|
|
|
198 |
my $output = qx/ssh $server '$command'/;
|
|
|
199 |
if ( my $error = $? & 127 ) {
|
|
|
200 |
return ( $output, $error );
|
|
|
201 |
}
|
|
|
202 |
}
|
|
|
203 |
return ('', 0);
|
|
|
204 |
}
|
|
|
205 |
|
11 |
rodolico |
206 |
|
|
|
207 |
# simply remove everything from the trash directory
|
|
|
208 |
sub cleanTrash {
|
13 |
rodolico |
209 |
my ( $trashDir, $age ) = @_;
|
|
|
210 |
`mkdir -p $trashDir` unless -d $trashDir;
|
11 |
rodolico |
211 |
`rm -fR $trashDir/*`;
|
|
|
212 |
}
|
|
|
213 |
|
15 |
rodolico |
214 |
sub copyToRemote {
|
|
|
215 |
my ( $path, $dirname, $remoteServer, $remotePath ) = @_;
|
|
|
216 |
# first, copy the file
|
16 |
rodolico |
217 |
#print "rsync -a $path/$dirname $remoteServer:$remotePath > /tmp/lastrsync.log";
|
|
|
218 |
#die;
|
15 |
rodolico |
219 |
qx"rsync -a $path/$dirname $remoteServer:$remotePath > /tmp/lastrsync.log";
|
|
|
220 |
return 'rsync failed with error :' . $? & 127 if $? & 127;
|
|
|
221 |
return '';
|
|
|
222 |
}
|
|
|
223 |
|
16 |
rodolico |
224 |
###############################################################################
|
|
|
225 |
# Main
|
|
|
226 |
###############################################################################
|
15 |
rodolico |
227 |
|
16 |
rodolico |
228 |
&loadConfig();
|
|
|
229 |
#use Data::Dumper;
|
|
|
230 |
#print Dumper( \%config );
|
|
|
231 |
#die;
|
|
|
232 |
|
13 |
rodolico |
233 |
unless ( -d $config{'local root dir'} ) {
|
|
|
234 |
`mkdir -p $config{'local root dir'}`;
|
|
|
235 |
`chmod 777 $config{'local root dir'}`;
|
|
|
236 |
}
|
|
|
237 |
# clean the trash if $config{ 'trash cleanup' } is non-zero
|
|
|
238 |
&cleanTrash( $config{'local trash dir'}, $config{ 'trash cleanup' } ) if $config{ 'trash cleanup' };
|
11 |
rodolico |
239 |
|
13 |
rodolico |
240 |
# Check if we have any directories which are ready to be moved.
|
|
|
241 |
@DirectoriesToMove = &getDirectories( $config{'local root dir'} );
|
11 |
rodolico |
242 |
|
13 |
rodolico |
243 |
print "Processing\n\t" . join( "\n\t", @DirectoriesToMove ) . "\n";
|
|
|
244 |
|
|
|
245 |
foreach my $directory ( @DirectoriesToMove ) {
|
|
|
246 |
my $fullPath = $config{'local root dir'} . "/$directory";
|
|
|
247 |
my $logFile = "$fullPath.$config{'log suffix'}";
|
|
|
248 |
my $errorFile = "$fullPath.$config{'error suffix'}";
|
|
|
249 |
print "Path for $directory is $fullPath\n\tLog File is $logFile\n\tError file is $errorFile\n" if $DEBUG > 3;
|
|
|
250 |
if ( -e $errorFile ) {
|
15 |
rodolico |
251 |
&logit( $directory, $config{'log suffix'}, "Aborting because we have a pre-existing error" );
|
13 |
rodolico |
252 |
print "\tAborting because we have a pre-existing error\n" if $DEBUG > 3;
|
|
|
253 |
next;
|
|
|
254 |
}
|
15 |
rodolico |
255 |
&logit( $directory, $config{'log suffix'}, "Processing $directory" );
|
13 |
rodolico |
256 |
my $error = &moveToStaging( $directory, $fullPath, $config{'local staging area'} );
|
|
|
257 |
if ( ! $error ) {
|
|
|
258 |
print "\tMoved to $config{'local staging area'}\n" if $DEBUG > 3;
|
15 |
rodolico |
259 |
&logit( $directory, $config{'log suffix'}, "Successfully moved to $config{'local staging area'}" );
|
13 |
rodolico |
260 |
} else {
|
15 |
rodolico |
261 |
&logit( $directory, $config{'log suffix'}, "Error, move aborted" );
|
|
|
262 |
&logit( $directory, $config{'error suffix'}, $error );
|
13 |
rodolico |
263 |
}
|
11 |
rodolico |
264 |
}
|
|
|
265 |
|
|
|
266 |
# done with that, now we need to see if there is anything in the staging area
|
|
|
267 |
# that needs to be sent to the remote server
|
15 |
rodolico |
268 |
`mkdir -p $config{'local staging area'}` unless -d $config{'local staging area'};
|
13 |
rodolico |
269 |
opendir( my $dh, $config{'local staging area'} ) or die "Could not read $config{'local staging area'}: $!\n";
|
11 |
rodolico |
270 |
my @directories;
|
15 |
rodolico |
271 |
# get all the .md5 files
|
13 |
rodolico |
272 |
my @toMove = grep { /$config{'md5 suffix'}$/ } readdir( $dh );
|
|
|
273 |
my $targetPath = "$config{'target server'}:$config{'target staging area'}/";
|
|
|
274 |
print "Copying the following to $targetPath\n\t" . join ("\n\t", @toMove ) . "\n";
|
15 |
rodolico |
275 |
# create the target directory on the server if it doesn't exist
|
|
|
276 |
&runRemoteCommand( $config{'target server'},
|
|
|
277 |
"[ ! -d $config{'target staging area'} ] && mkdir -p $config{'target staging area'}",
|
|
|
278 |
"[ ! -d $config{'target final directory'} ] && mkdir -p $config{'target final directory'}"
|
|
|
279 |
);
|
16 |
rodolico |
280 |
|
|
|
281 |
|
15 |
rodolico |
282 |
# now, process each directory in turn
|
16 |
rodolico |
283 |
foreach my $dirname ( @toMove ) {
|
|
|
284 |
print "Processing $dirname\n";
|
15 |
rodolico |
285 |
my $error;
|
16 |
rodolico |
286 |
$dirname =~ m/^(.*)\.$config{'md5 suffix'}$/;
|
15 |
rodolico |
287 |
$dirname = $1;
|
|
|
288 |
$error = ©ToRemote( $config{'local staging area'}, $dirname, $config{'target server'}, $config{'target staging area'} );
|
|
|
289 |
if ( $error ) {
|
16 |
rodolico |
290 |
&logit( $dirname, $config{'error suffix'}, $error );
|
15 |
rodolico |
291 |
next;
|
|
|
292 |
} else {
|
16 |
rodolico |
293 |
&logit( $dirname, $config{'log suffix'}, "Copied to $config{'target server'}:$config{'target staging area'}" );
|
|
|
294 |
}
|
15 |
rodolico |
295 |
|
16 |
rodolico |
296 |
my $md5sum = &getCheckSum( $dirname );
|
11 |
rodolico |
297 |
next unless $md5sum;
|
16 |
rodolico |
298 |
my $rsync = "rsync -av '$config{'local staging area'}/$dirname' $config{'target server'}:$config{'target staging area'}/ > /tmp/lastrsync.log";
|
|
|
299 |
&logit( $dirname, $config{'log suffix'}, $rsync );
|
11 |
rodolico |
300 |
if ( system ( $rsync ) == 0 ) { # we succeeded
|
16 |
rodolico |
301 |
if ( &validateTarget( $config{'target server'}, $config{'target staging area'}, $config{'target final directory'}, $dirname, $md5sum ) ) {
|
13 |
rodolico |
302 |
`mkdir -p $config{'local trash dir'}` unless -d $config{'local trash dir'};
|
16 |
rodolico |
303 |
move( "$config{'local staging area'}/$dirname", "$config{'local trash dir'}/$dirname" );
|
|
|
304 |
$dirname .= $config{'md5 suffix'};
|
|
|
305 |
move( "$config{'local staging area'}/$dirname", "$config{'local trash dir'}/$dirname" );
|
|
|
306 |
&logit( $dirname, $config{'log suffix'}, "Successfully moved directory $dirname to $config{'target server'}" );
|
11 |
rodolico |
307 |
} else {
|
16 |
rodolico |
308 |
&logit( $dirname, $config{'error suffix'}, "Unable to validate target for $dirname" );
|
11 |
rodolico |
309 |
}
|
|
|
310 |
} else {
|
16 |
rodolico |
311 |
&logit( $dirname, $config{'error suffix'}, "Unknown error attempting to rsync $dirname" );
|
11 |
rodolico |
312 |
}
|
|
|
313 |
}
|
|
|
314 |
|
|
|
315 |
|
|
|
316 |
1;
|