11 |
rodolico |
1 |
#! /usr/bin/env perl
2 |
13 |
rodolico |
3 |
# archiveDirectories.pl
11 |
rodolico |
4 |
# Author: R. W. Rodolico
5 |
# Date: 20180603
6 |
# Copyright: 2018, Vanduzen Enterprises, Dallas TX
7 |
13 |
rodolico |
8 |
# Script designed to be run from a cron job, which checks if any directories
12 |
rodolico |
9 |
# are ready to be archived. A directory is defined as a directory under
13 |
rodolico |
10 |
# the root of $config{'local root dir'}.
11 |
rodolico |
11 |
12 |
# If found, all directories are moved into the staging area and
13 |
# an md5 checksum is calculated for the entire tree.
13 |
rodolico |
14 |
# After all directories are moved, a second process looks in the staging
11 |
rodolico |
15 |
# area and copies the files (using rsync for reliability) into the staging
13 |
rodolico |
16 |
# area of $config{'target server'}. When a directory has been copied, a checksum is
11 |
rodolico |
17 |
# calculated on the remote copy and compared to the checksum calculated
12 |
rodolico |
18 |
# in the first stage and, if it passes, the directory is then moved to the
13 |
rodolico |
19 |
# $config{'target final directory'}.
12 |
rodolico |
20 |
# After the copy and move, the directory and its MD5 sum file are moved
13 |
rodolico |
21 |
# to the $config{'local trash dir'} (which is cleaned on the next invocation of
11 |
rodolico |
22 |
# the script).
23 |
13 |
rodolico |
24 |
# Script does NOT handle the situation where directories are being moved
11 |
rodolico |
25 |
# while the script is running, so the script should be run at a time
26 |
# when there is no other activity on the server.
27 |
28 |
# Version: 1.0
29 |
30 |
use warnings;
31 |
use strict;
32 |
use Cwd qw();
33 |
use File::Copy qw(move);
34 |
use File::Basename;
13 |
rodolico |
35 |
use File::stat;
11 |
rodolico |
36 |
13 |
rodolico |
37 |
my $DEBUG = 5;
11 |
rodolico |
38 |
16 |
rodolico |
39 |
my %config;
11 |
rodolico |
40 |
16 |
rodolico |
41 |
my @DirectoriesToMove;
13 |
rodolico |
42 |
16 |
rodolico |
43 |
sub loadConfig {
44 |
use FindBin;
45 |
my $configFileName = "$FindBin::Bin/$FindBin::Script";
46 |
unless ( $configFileName =~ s/\.pl$/\.conf/ ) {
47 |
$configFileName .= '.conf';
48 |
49 |
50 |
if ( -e $configFileName ) {
51 |
my $configFileContents = &slurpFile( $configFileName );
52 |
eval( $configFileContents );
53 |
die "Error interpreting $configFileName: $@\n" if $@;
54 |
} else {
55 |
die "Could not locate config file $configFileName\n";
56 |
} # if..else
57 |
} #loadConfig
13 |
rodolico |
58 |
59 |
# simply read the entire fiel into a string
60 |
sub slurpFile {
61 |
my $filename = shift;
62 |
return '' unless -e $filename;
63 |
open TEMP, "<$filename" or die "could not read $filename: $!\n";
64 |
my @contents = <TEMP>;
65 |
close TEMP;
66 |
return join( '', @contents );
67 |
68 |
69 |
# print a value to a file
70 |
sub writeData {
71 |
my $filename = shift;
72 |
open TEMP, ">$filename" or die "could not write to $filename: $!\n";
73 |
print TEMP join( '', @_ );
74 |
close TEMP;
75 |
76 |
77 |
# look in the directories to move directory and see if there is anything
78 |
# new in there. If so, check MD5 Sum file (create if necessary) and ensure
79 |
# we have waited long enough and the sums match
80 |
sub getDirectories {
11 |
rodolico |
81 |
my $rootDir = shift;
13 |
rodolico |
82 |
print "In getDirectories with dir of $rootDir\n" if $DEBUG;
11 |
rodolico |
83 |
opendir( my $dh, $rootDir ) or die "Could not open directory $rootDir: $!\n";
84 |
my @dirs = grep { ! /^\./ && -d "$rootDir/$_" } readdir( $dh );
13 |
rodolico |
85 |
closedir ( $dh );
86 |
print "Directories Found\n" . join( "\n", @dirs ) . "\n" if $DEBUG > 1;
87 |
my @dirsToMove;
88 |
foreach my $thisDir ( @dirs ) {
89 |
my $fullyQualified = "$rootDir/$thisDir";
90 |
my $md5 = calcMD5( $fullyQualified );
91 |
print "\tFound Dir $fullyQualified with MD5 of $md5\n" if $DEBUG > 2;
92 |
# let's look for the md5 checksum file and compare if it exist
93 |
my $md5Name = "$fullyQualified.$config{'md5 suffix'}";
94 |
if ( -e $md5Name ) {
95 |
# find out when it was last written to
96 |
my $lastModification = stat( $md5Name );
97 |
$lastModification = $$lastModification[9];
98 |
my $howOld = time - $lastModification;
99 |
print "\tFound existing MD5 file $md5Name written to at $lastModification, or $howOld seconds ago\n" if $DEBUG > 3;
100 |
# and blow it off if it is too recent
101 |
if ( $howOld < $config{'quiesent seconds'} ) {
102 |
print "\t\tBlowing it off because $howOld is less than $config{'quiesent seconds'}\n" if $DEBUG > 4;
103 |
104 |
105 |
my $oldMD5 = &slurpFile( $md5Name );
106 |
if ( $md5 eq $oldMD5 ) {
107 |
print "\t\tAdding, md5 not changed, $md5 same as $oldMD5\n" if $DEBUG > 4;
108 |
push @dirsToMove, $thisDir;
109 |
} else {
110 |
print "\t\tWaiting, md5 changed, $md5 and $oldMD5\n" if $DEBUG > 4;
111 |
# overwrite if the checksum has changed
112 |
&writeData( $md5Name, $md5 ) if $md5 ne &slurpFile( $md5Name );
113 |
114 |
} else { # doesn't exist, so create it
115 |
print "\t\tCreating MD5 File $md5Name with value $md5\n" if $DEBUG > 4;
116 |
&writeData( $md5Name, $md5 );
117 |
118 |
} # foreach
119 |
return @dirsToMove;
11 |
rodolico |
120 |
121 |
122 |
# calculate the checksum of a directory by
123 |
# 1. calculate checksum of each individual file in the entire tree
124 |
# 2. Grab the first column, which is the checksum
125 |
# 3. sort the result since Linux will not always return them in the same order
126 |
# 4. do a checksum of the checksums
127 |
128 |
# This is highly unlikely to give the same answer if any file changes
129 |
# in the process of the copy
130 |
sub calcMD5 {
131 |
my $directory = shift;
132 |
return -1 unless -d $directory;
133 |
my $md5 = `find '$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1`;
134 |
chomp $md5;
135 |
return $md5;
136 |
137 |
12 |
rodolico |
138 |
# moves directory to staging area and puts the md5 sum into a file
11 |
rodolico |
139 |
# with the same name, but a .md5sum suffix
140 |
sub moveToStaging {
13 |
rodolico |
141 |
my ( $directory, $fullPath, $staging ) = @_;
142 |
# and let's get the md5 file name also
143 |
my $md5File = $fullPath . ".$config{'md5 suffix'}";
144 |
mkdir $staging unless -d $staging;
145 |
return 'Directory already exists in staging' if -e "$staging/$directory";
146 |
move( $fullPath, "$staging/$directory" ) or die "Error moving $fullPath to $staging/$directory: $!\n";
147 |
move( $md5File, $staging ) or die "Error moving $md5File to $staging: $!\n";
148 |
return '';
11 |
rodolico |
149 |
16 |
rodolico |
150 |
151 |
sub getCheckSum {
152 |
my $project = shift;
153 |
my $checkSumFile = $config{'local staging area'} . '/' . $project . '.' . $config{'md5 suffix'};
154 |
if ( -e $checkSumFile ) {
155 |
return &slurpFile( $checkSumFile );
156 |
157 |
return '';
158 |
159 |
12 |
rodolico |
160 |
# verifies the directory is correct on the server by comparing the checksums
11 |
rodolico |
161 |
# calculated locally and remote server. If valid, moves it into the final
162 |
# location on the remote server
163 |
sub validateTarget {
12 |
rodolico |
164 |
my ( $remoteServer, $remoteStaging, $remoteTarget, $directory, $checksum ) = @_;
165 |
my $md5sum = `ssh $remoteServer "find '$remoteStaging/$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1"`;
11 |
rodolico |
166 |
chomp $md5sum;
167 |
if ( $checksum eq $md5sum ) {
12 |
rodolico |
168 |
my $command = "ssh $remoteServer \"mv '$remoteStaging/$directory' '$remoteTarget'\"";
11 |
rodolico |
169 |
if ( system( $command ) == 0 ) {
170 |
return 1;
171 |
} else {
12 |
rodolico |
172 |
&logit( "Unable to move $directory to $remoteServer:$remoteTarget" );
11 |
rodolico |
173 |
return 0;
174 |
175 |
} else {
12 |
rodolico |
176 |
&logit( "Invalid checksum moving directory $directory" );
11 |
rodolico |
177 |
return 0;
178 |
179 |
180 |
181 |
# simple little logger that records some information
182 |
sub logit {
15 |
rodolico |
183 |
my $projectName = shift;
184 |
my $suffix = shift;
16 |
rodolico |
185 |
my $logfile = $config{'local root dir'} . "/$projectName.$suffix";
11 |
rodolico |
186 |
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
187 |
my $now = sprintf( "%04d-%02d-%02d %02d:%-2d:%02d", $year+1900, $mon+1, $mday, $hour, $min, $sec );
13 |
rodolico |
188 |
open LOG, ">>$logfile" or die "could not write to $logfile: $!\n";
189 |
while ( my $message = shift ) {
190 |
print LOG "$now\t$message\n";
191 |
11 |
rodolico |
192 |
close LOG;
193 |
15 |
rodolico |
194 |
195 |
sub runRemoteCommand {
196 |
my $server = shift;
197 |
while ( my $command = shift ) {
198 |
my $output = qx/ssh $server '$command'/;
199 |
if ( my $error = $? & 127 ) {
200 |
return ( $output, $error );
201 |
202 |
203 |
return ('', 0);
204 |
205 |
11 |
rodolico |
206 |
207 |
# simply remove everything from the trash directory
208 |
sub cleanTrash {
13 |
rodolico |
209 |
my ( $trashDir, $age ) = @_;
210 |
`mkdir -p $trashDir` unless -d $trashDir;
11 |
rodolico |
211 |
`rm -fR $trashDir/*`;
212 |
213 |
15 |
rodolico |
214 |
sub copyToRemote {
215 |
my ( $path, $dirname, $remoteServer, $remotePath ) = @_;
216 |
# first, copy the file
16 |
rodolico |
217 |
#print "rsync -a $path/$dirname $remoteServer:$remotePath > /tmp/lastrsync.log";
218 |
15 |
rodolico |
219 |
qx"rsync -a $path/$dirname $remoteServer:$remotePath > /tmp/lastrsync.log";
220 |
return 'rsync failed with error :' . $? & 127 if $? & 127;
221 |
return '';
222 |
223 |
16 |
rodolico |
224 |
225 |
# Main
226 |
15 |
rodolico |
227 |
16 |
rodolico |
228 |
229 |
#use Data::Dumper;
230 |
#print Dumper( \%config );
231 |
232 |
13 |
rodolico |
233 |
unless ( -d $config{'local root dir'} ) {
234 |
`mkdir -p $config{'local root dir'}`;
235 |
`chmod 777 $config{'local root dir'}`;
236 |
237 |
# clean the trash if $config{ 'trash cleanup' } is non-zero
238 |
&cleanTrash( $config{'local trash dir'}, $config{ 'trash cleanup' } ) if $config{ 'trash cleanup' };
11 |
rodolico |
239 |
13 |
rodolico |
240 |
# Check if we have any directories which are ready to be moved.
241 |
@DirectoriesToMove = &getDirectories( $config{'local root dir'} );
11 |
rodolico |
242 |
13 |
rodolico |
243 |
print "Processing\n\t" . join( "\n\t", @DirectoriesToMove ) . "\n";
244 |
245 |
foreach my $directory ( @DirectoriesToMove ) {
246 |
my $fullPath = $config{'local root dir'} . "/$directory";
247 |
my $logFile = "$fullPath.$config{'log suffix'}";
248 |
my $errorFile = "$fullPath.$config{'error suffix'}";
249 |
print "Path for $directory is $fullPath\n\tLog File is $logFile\n\tError file is $errorFile\n" if $DEBUG > 3;
250 |
if ( -e $errorFile ) {
15 |
rodolico |
251 |
&logit( $directory, $config{'log suffix'}, "Aborting because we have a pre-existing error" );
13 |
rodolico |
252 |
print "\tAborting because we have a pre-existing error\n" if $DEBUG > 3;
253 |
254 |
15 |
rodolico |
255 |
&logit( $directory, $config{'log suffix'}, "Processing $directory" );
13 |
rodolico |
256 |
my $error = &moveToStaging( $directory, $fullPath, $config{'local staging area'} );
257 |
if ( ! $error ) {
258 |
print "\tMoved to $config{'local staging area'}\n" if $DEBUG > 3;
15 |
rodolico |
259 |
&logit( $directory, $config{'log suffix'}, "Successfully moved to $config{'local staging area'}" );
13 |
rodolico |
260 |
} else {
15 |
rodolico |
261 |
&logit( $directory, $config{'log suffix'}, "Error, move aborted" );
262 |
&logit( $directory, $config{'error suffix'}, $error );
13 |
rodolico |
263 |
11 |
rodolico |
264 |
265 |
266 |
# done with that, now we need to see if there is anything in the staging area
267 |
# that needs to be sent to the remote server
15 |
rodolico |
268 |
`mkdir -p $config{'local staging area'}` unless -d $config{'local staging area'};
13 |
rodolico |
269 |
opendir( my $dh, $config{'local staging area'} ) or die "Could not read $config{'local staging area'}: $!\n";
11 |
rodolico |
270 |
my @directories;
15 |
rodolico |
271 |
# get all the .md5 files
13 |
rodolico |
272 |
my @toMove = grep { /$config{'md5 suffix'}$/ } readdir( $dh );
273 |
my $targetPath = "$config{'target server'}:$config{'target staging area'}/";
274 |
print "Copying the following to $targetPath\n\t" . join ("\n\t", @toMove ) . "\n";
15 |
rodolico |
275 |
# create the target directory on the server if it doesn't exist
276 |
&runRemoteCommand( $config{'target server'},
277 |
"[ ! -d $config{'target staging area'} ] && mkdir -p $config{'target staging area'}",
278 |
"[ ! -d $config{'target final directory'} ] && mkdir -p $config{'target final directory'}"
279 |
16 |
rodolico |
280 |
281 |
15 |
rodolico |
282 |
# now, process each directory in turn
16 |
rodolico |
283 |
foreach my $dirname ( @toMove ) {
284 |
print "Processing $dirname\n";
15 |
rodolico |
285 |
my $error;
16 |
rodolico |
286 |
$dirname =~ m/^(.*)\.$config{'md5 suffix'}$/;
15 |
rodolico |
287 |
$dirname = $1;
288 |
$error = ©ToRemote( $config{'local staging area'}, $dirname, $config{'target server'}, $config{'target staging area'} );
289 |
if ( $error ) {
16 |
rodolico |
290 |
&logit( $dirname, $config{'error suffix'}, $error );
15 |
rodolico |
291 |
292 |
} else {
16 |
rodolico |
293 |
&logit( $dirname, $config{'log suffix'}, "Copied to $config{'target server'}:$config{'target staging area'}" );
294 |
15 |
rodolico |
295 |
16 |
rodolico |
296 |
my $md5sum = &getCheckSum( $dirname );
11 |
rodolico |
297 |
next unless $md5sum;
16 |
rodolico |
298 |
my $rsync = "rsync -av '$config{'local staging area'}/$dirname' $config{'target server'}:$config{'target staging area'}/ > /tmp/lastrsync.log";
299 |
&logit( $dirname, $config{'log suffix'}, $rsync );
11 |
rodolico |
300 |
if ( system ( $rsync ) == 0 ) { # we succeeded
16 |
rodolico |
301 |
if ( &validateTarget( $config{'target server'}, $config{'target staging area'}, $config{'target final directory'}, $dirname, $md5sum ) ) {
13 |
rodolico |
302 |
`mkdir -p $config{'local trash dir'}` unless -d $config{'local trash dir'};
16 |
rodolico |
303 |
move( "$config{'local staging area'}/$dirname", "$config{'local trash dir'}/$dirname" );
304 |
$dirname .= $config{'md5 suffix'};
305 |
move( "$config{'local staging area'}/$dirname", "$config{'local trash dir'}/$dirname" );
306 |
&logit( $dirname, $config{'log suffix'}, "Successfully moved directory $dirname to $config{'target server'}" );
11 |
rodolico |
307 |
} else {
16 |
rodolico |
308 |
&logit( $dirname, $config{'error suffix'}, "Unable to validate target for $dirname" );
11 |
rodolico |
309 |
310 |
} else {
16 |
rodolico |
311 |
&logit( $dirname, $config{'error suffix'}, "Unknown error attempting to rsync $dirname" );
11 |
rodolico |
312 |
313 |
314 |
315 |
316 |