11 |
rodolico |
1 |
#! /usr/bin/env perl
|
|
|
2 |
|
13 |
rodolico |
3 |
# archiveDirectories.pl
|
11 |
rodolico |
4 |
# Author: R. W. Rodolico
|
|
|
5 |
# Date: 20180603
|
|
|
6 |
# Copyright: 2018, Vanduzen Enterprises, Dallas TX
|
|
|
7 |
|
13 |
rodolico |
8 |
# Script designed to be run from a cron job, which checks if any directories
|
12 |
rodolico |
9 |
# are ready to be archived. A directory is defined as a directory under
|
13 |
rodolico |
10 |
# the root of $config{'local root dir'}.
|
11 |
rodolico |
11 |
|
|
|
12 |
# If found, all directories are moved into the staging area and
|
|
|
13 |
# an md5 checksum is calculated for the entire tree.
|
13 |
rodolico |
14 |
# After all directories are moved, a second process looks in the staging
|
11 |
rodolico |
15 |
# area and copies the files (using rsync for reliability) into the staging
|
13 |
rodolico |
16 |
# area of $config{'target server'}. When a directory has been copied, a checksum is
|
11 |
rodolico |
17 |
# calculated on the remote copy and compared to the checksum calculated
|
12 |
rodolico |
18 |
# in the first stage and, if it passes, the directory is then moved to the
|
13 |
rodolico |
19 |
# $config{'target final directory'}.
|
12 |
rodolico |
20 |
# After the copy and move, the directory and its MD5 sum file are moved
|
13 |
rodolico |
21 |
# to the $config{'local trash dir'} (which is cleaned on the next invocation of
|
11 |
rodolico |
22 |
# the script).
|
|
|
23 |
|
13 |
rodolico |
24 |
# Script does NOT handle the situation where directories are being moved
|
11 |
rodolico |
25 |
# while the script is running, so the script should be run at a time
|
|
|
26 |
# when there is no other activity on the server.
|
|
|
27 |
#
|
|
|
28 |
# Version: 1.0
|
|
|
29 |
|
|
|
30 |
use warnings;
|
|
|
31 |
use strict;
|
|
|
32 |
use Cwd qw();
|
|
|
33 |
use File::Copy qw(move);
|
|
|
34 |
use File::Basename;
|
13 |
rodolico |
35 |
use File::stat;
|
11 |
rodolico |
36 |
|
13 |
rodolico |
37 |
my $DEBUG = 5;
|
11 |
rodolico |
38 |
|
13 |
rodolico |
39 |
my %config = (
|
|
|
40 |
# location where directories are put by end users
|
|
|
41 |
'local root dir' => '/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/ArchiveProjects',
|
|
|
42 |
# location where directories are moved while processing
|
|
|
43 |
'local work dir' => '/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/transfer_area',
|
|
|
44 |
# location where directories are moved when job is completed
|
|
|
45 |
'local trash dir' => "/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/Trash",
|
|
|
46 |
# location where directories are moved while being transferred
|
|
|
47 |
'local staging area' => '/home/rodolico/scripts/sysadmin_scripts/archiveProjects/temp/Staging',
|
11 |
rodolico |
48 |
|
13 |
rodolico |
49 |
# target server name/ip. Must be accessible via ssh with no password
|
|
|
50 |
'target server' => 'davinci',
|
|
|
51 |
# location on target server where directories are placed while copying
|
|
|
52 |
'target staging area' => '/home/samba/archives/fromDenver/.Staging/',
|
|
|
53 |
# location on target server where directories are finally put
|
|
|
54 |
'target final directory' => '/home/samba/archives/fromDenver/',
|
|
|
55 |
|
|
|
56 |
# suffix of md5 of directories
|
|
|
57 |
'md5 suffix' => 'md5sum',
|
|
|
58 |
# suffix of filename to create showing actions
|
|
|
59 |
'log suffix' => 'log',
|
|
|
60 |
# suffix of error log
|
|
|
61 |
'error suffix' => 'err',
|
|
|
62 |
# how long a directory must be undisturbed before it is ready to work on
|
|
|
63 |
'quiesent seconds' => 60*5, # five minutes
|
|
|
64 |
# how long to leave stuff in the trash directory. 0 indicates never do it.
|
|
|
65 |
'trash cleanup' => 86400*7, # 7 days
|
|
|
66 |
);
|
|
|
67 |
|
|
|
68 |
my @DirectoriesToMove;
|
|
|
69 |
|
|
|
70 |
# simply read the entire fiel into a string
|
|
|
71 |
sub slurpFile {
|
|
|
72 |
my $filename = shift;
|
|
|
73 |
return '' unless -e $filename;
|
|
|
74 |
open TEMP, "<$filename" or die "could not read $filename: $!\n";
|
|
|
75 |
my @contents = <TEMP>;
|
|
|
76 |
close TEMP;
|
|
|
77 |
return join( '', @contents );
|
|
|
78 |
}
|
|
|
79 |
|
|
|
80 |
# print a value to a file
|
|
|
81 |
sub writeData {
|
|
|
82 |
my $filename = shift;
|
|
|
83 |
open TEMP, ">$filename" or die "could not write to $filename: $!\n";
|
|
|
84 |
print TEMP join( '', @_ );
|
|
|
85 |
close TEMP;
|
|
|
86 |
}
|
|
|
87 |
|
|
|
88 |
# look in the directories to move directory and see if there is anything
|
|
|
89 |
# new in there. If so, check MD5 Sum file (create if necessary) and ensure
|
|
|
90 |
# we have waited long enough and the sums match
|
|
|
91 |
sub getDirectories {
|
11 |
rodolico |
92 |
my $rootDir = shift;
|
13 |
rodolico |
93 |
print "In getDirectories with dir of $rootDir\n" if $DEBUG;
|
11 |
rodolico |
94 |
opendir( my $dh, $rootDir ) or die "Could not open directory $rootDir: $!\n";
|
|
|
95 |
my @dirs = grep { ! /^\./ && -d "$rootDir/$_" } readdir( $dh );
|
13 |
rodolico |
96 |
closedir ( $dh );
|
|
|
97 |
print "Directories Found\n" . join( "\n", @dirs ) . "\n" if $DEBUG > 1;
|
|
|
98 |
my @dirsToMove;
|
|
|
99 |
foreach my $thisDir ( @dirs ) {
|
|
|
100 |
my $fullyQualified = "$rootDir/$thisDir";
|
|
|
101 |
my $md5 = calcMD5( $fullyQualified );
|
|
|
102 |
print "\tFound Dir $fullyQualified with MD5 of $md5\n" if $DEBUG > 2;
|
|
|
103 |
# let's look for the md5 checksum file and compare if it exist
|
|
|
104 |
my $md5Name = "$fullyQualified.$config{'md5 suffix'}";
|
|
|
105 |
if ( -e $md5Name ) {
|
|
|
106 |
# find out when it was last written to
|
|
|
107 |
my $lastModification = stat( $md5Name );
|
|
|
108 |
$lastModification = $$lastModification[9];
|
|
|
109 |
my $howOld = time - $lastModification;
|
|
|
110 |
print "\tFound existing MD5 file $md5Name written to at $lastModification, or $howOld seconds ago\n" if $DEBUG > 3;
|
|
|
111 |
# and blow it off if it is too recent
|
|
|
112 |
if ( $howOld < $config{'quiesent seconds'} ) {
|
|
|
113 |
print "\t\tBlowing it off because $howOld is less than $config{'quiesent seconds'}\n" if $DEBUG > 4;
|
|
|
114 |
next;
|
|
|
115 |
}
|
|
|
116 |
my $oldMD5 = &slurpFile( $md5Name );
|
|
|
117 |
if ( $md5 eq $oldMD5 ) {
|
|
|
118 |
print "\t\tAdding, md5 not changed, $md5 same as $oldMD5\n" if $DEBUG > 4;
|
|
|
119 |
push @dirsToMove, $thisDir;
|
|
|
120 |
} else {
|
|
|
121 |
print "\t\tWaiting, md5 changed, $md5 and $oldMD5\n" if $DEBUG > 4;
|
|
|
122 |
# overwrite if the checksum has changed
|
|
|
123 |
&writeData( $md5Name, $md5 ) if $md5 ne &slurpFile( $md5Name );
|
|
|
124 |
}
|
|
|
125 |
} else { # doesn't exist, so create it
|
|
|
126 |
print "\t\tCreating MD5 File $md5Name with value $md5\n" if $DEBUG > 4;
|
|
|
127 |
&writeData( $md5Name, $md5 );
|
|
|
128 |
}
|
|
|
129 |
} # foreach
|
|
|
130 |
return @dirsToMove;
|
11 |
rodolico |
131 |
}
|
|
|
132 |
|
|
|
133 |
# calculate the checksum of a directory by
|
|
|
134 |
# 1. calculate checksum of each individual file in the entire tree
|
|
|
135 |
# 2. Grab the first column, which is the checksum
|
|
|
136 |
# 3. sort the result since Linux will not always return them in the same order
|
|
|
137 |
# 4. do a checksum of the checksums
|
|
|
138 |
#
|
|
|
139 |
# This is highly unlikely to give the same answer if any file changes
|
|
|
140 |
# in the process of the copy
|
|
|
141 |
sub calcMD5 {
|
|
|
142 |
my $directory = shift;
|
|
|
143 |
return -1 unless -d $directory;
|
|
|
144 |
my $md5 = `find '$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1`;
|
|
|
145 |
chomp $md5;
|
|
|
146 |
return $md5;
|
|
|
147 |
}
|
|
|
148 |
|
12 |
rodolico |
149 |
# moves directory to staging area and puts the md5 sum into a file
|
11 |
rodolico |
150 |
# with the same name, but a .md5sum suffix
|
|
|
151 |
sub moveToStaging {
|
13 |
rodolico |
152 |
my ( $directory, $fullPath, $staging ) = @_;
|
|
|
153 |
# and let's get the md5 file name also
|
|
|
154 |
my $md5File = $fullPath . ".$config{'md5 suffix'}";
|
|
|
155 |
mkdir $staging unless -d $staging;
|
|
|
156 |
return 'Directory already exists in staging' if -e "$staging/$directory";
|
|
|
157 |
move( $fullPath, "$staging/$directory" ) or die "Error moving $fullPath to $staging/$directory: $!\n";
|
|
|
158 |
move( $md5File, $staging ) or die "Error moving $md5File to $staging: $!\n";
|
|
|
159 |
return '';
|
11 |
rodolico |
160 |
}
|
|
|
161 |
|
12 |
rodolico |
162 |
# verifies the directory is correct on the server by comparing the checksums
|
11 |
rodolico |
163 |
# calculated locally and remote server. If valid, moves it into the final
|
|
|
164 |
# location on the remote server
|
|
|
165 |
sub validateTarget {
|
12 |
rodolico |
166 |
my ( $remoteServer, $remoteStaging, $remoteTarget, $directory, $checksum ) = @_;
|
|
|
167 |
my $md5sum = `ssh $remoteServer "find '$remoteStaging/$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1"`;
|
11 |
rodolico |
168 |
chomp $md5sum;
|
|
|
169 |
if ( $checksum eq $md5sum ) {
|
12 |
rodolico |
170 |
my $command = "ssh $remoteServer \"mv '$remoteStaging/$directory' '$remoteTarget'\"";
|
11 |
rodolico |
171 |
if ( system( $command ) == 0 ) {
|
|
|
172 |
return 1;
|
|
|
173 |
} else {
|
12 |
rodolico |
174 |
&logit( "Unable to move $directory to $remoteServer:$remoteTarget" );
|
11 |
rodolico |
175 |
return 0;
|
|
|
176 |
}
|
|
|
177 |
} else {
|
12 |
rodolico |
178 |
&logit( "Invalid checksum moving directory $directory" );
|
11 |
rodolico |
179 |
return 0;
|
|
|
180 |
}
|
|
|
181 |
}
|
|
|
182 |
|
|
|
183 |
# reads the checksum file
|
|
|
184 |
sub getCheckSum {
|
12 |
rodolico |
185 |
my ( $directory, $staging ) = @_;
|
13 |
rodolico |
186 |
$directory .= $config{'md5 suffix'};
|
12 |
rodolico |
187 |
if ( open DATA, "<$staging/$directory" ) {
|
11 |
rodolico |
188 |
my $cksum = <DATA>;
|
|
|
189 |
chomp $cksum;
|
|
|
190 |
close DATA;
|
|
|
191 |
return $cksum;
|
|
|
192 |
}
|
12 |
rodolico |
193 |
&logit( "Could not open $staging/$directory: $!" );
|
11 |
rodolico |
194 |
return '';
|
|
|
195 |
}
|
|
|
196 |
|
|
|
197 |
# simple little logger that records some information
|
|
|
198 |
sub logit {
|
13 |
rodolico |
199 |
my $logfile = shift;
|
11 |
rodolico |
200 |
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
|
|
|
201 |
my $now = sprintf( "%04d-%02d-%02d %02d:%-2d:%02d", $year+1900, $mon+1, $mday, $hour, $min, $sec );
|
13 |
rodolico |
202 |
open LOG, ">>$logfile" or die "could not write to $logfile: $!\n";
|
|
|
203 |
while ( my $message = shift ) {
|
|
|
204 |
print LOG "$now\t$message\n";
|
|
|
205 |
}
|
11 |
rodolico |
206 |
close LOG;
|
|
|
207 |
}
|
|
|
208 |
|
|
|
209 |
# simply remove everything from the trash directory
|
|
|
210 |
sub cleanTrash {
|
13 |
rodolico |
211 |
my ( $trashDir, $age ) = @_;
|
|
|
212 |
`mkdir -p $trashDir` unless -d $trashDir;
|
11 |
rodolico |
213 |
`rm -fR $trashDir/*`;
|
|
|
214 |
}
|
|
|
215 |
|
13 |
rodolico |
216 |
unless ( -d $config{'local root dir'} ) {
|
|
|
217 |
`mkdir -p $config{'local root dir'}`;
|
|
|
218 |
`chmod 777 $config{'local root dir'}`;
|
|
|
219 |
}
|
|
|
220 |
# clean the trash if $config{ 'trash cleanup' } is non-zero
|
|
|
221 |
&cleanTrash( $config{'local trash dir'}, $config{ 'trash cleanup' } ) if $config{ 'trash cleanup' };
|
11 |
rodolico |
222 |
|
13 |
rodolico |
223 |
# Check if we have any directories which are ready to be moved.
|
|
|
224 |
@DirectoriesToMove = &getDirectories( $config{'local root dir'} );
|
11 |
rodolico |
225 |
|
13 |
rodolico |
226 |
print "Processing\n\t" . join( "\n\t", @DirectoriesToMove ) . "\n";
|
|
|
227 |
|
|
|
228 |
foreach my $directory ( @DirectoriesToMove ) {
|
|
|
229 |
my $fullPath = $config{'local root dir'} . "/$directory";
|
|
|
230 |
my $logFile = "$fullPath.$config{'log suffix'}";
|
|
|
231 |
my $errorFile = "$fullPath.$config{'error suffix'}";
|
|
|
232 |
print "Path for $directory is $fullPath\n\tLog File is $logFile\n\tError file is $errorFile\n" if $DEBUG > 3;
|
|
|
233 |
if ( -e $errorFile ) {
|
|
|
234 |
&logit( $logFile, "Aborting because we have a pre-existing error" );
|
|
|
235 |
print "\tAborting because we have a pre-existing error\n" if $DEBUG > 3;
|
|
|
236 |
next;
|
|
|
237 |
}
|
|
|
238 |
&logit( $logFile, "Processing $directory" );
|
|
|
239 |
my $error = &moveToStaging( $directory, $fullPath, $config{'local staging area'} );
|
|
|
240 |
if ( ! $error ) {
|
|
|
241 |
print "\tMoved to $config{'local staging area'}\n" if $DEBUG > 3;
|
|
|
242 |
&logit( $logFile, "Successfully moved to $config{'local staging area'}" );
|
|
|
243 |
} else {
|
|
|
244 |
&logit( $logFile, "Error, move aborted" );
|
|
|
245 |
&logit( $errorFile, $error );
|
|
|
246 |
}
|
11 |
rodolico |
247 |
}
|
|
|
248 |
|
|
|
249 |
# done with that, now we need to see if there is anything in the staging area
|
|
|
250 |
# that needs to be sent to the remote server
|
13 |
rodolico |
251 |
opendir( my $dh, $config{'local staging area'} ) or die "Could not read $config{'local staging area'}: $!\n";
|
11 |
rodolico |
252 |
my @directories;
|
13 |
rodolico |
253 |
my @toMove = grep { /$config{'md5 suffix'}$/ } readdir( $dh );
|
|
|
254 |
my $targetPath = "$config{'target server'}:$config{'target staging area'}/";
|
|
|
255 |
print "Copying the following to $targetPath\n\t" . join ("\n\t", @toMove ) . "\n";
|
|
|
256 |
die;
|
12 |
rodolico |
257 |
foreach my $directory ( @toMove ) {
|
|
|
258 |
$directory =~ m/^(.*)\.md5sum/;
|
|
|
259 |
$directory = $1;
|
13 |
rodolico |
260 |
my $md5sum = &getCheckSum( $directory, $config{'local staging area'} );
|
11 |
rodolico |
261 |
next unless $md5sum;
|
13 |
rodolico |
262 |
my $rsync = "rsync -av '$config{'local staging area'}/$directory' $config{'target server'}:$config{'target staging area'}/ > /tmp/lastrsync.log";
|
11 |
rodolico |
263 |
&logit( $rsync );
|
|
|
264 |
if ( system ( $rsync ) == 0 ) { # we succeeded
|
13 |
rodolico |
265 |
if ( &validateTarget( $config{'target server'}, $config{'target staging area'}, $config{'target final directory'}, $directory, $md5sum ) ) {
|
|
|
266 |
`mkdir -p $config{'local trash dir'}` unless -d $config{'local trash dir'};
|
|
|
267 |
move( "$config{'local staging area'}/$directory", "$config{'local trash dir'}/$directory" );
|
|
|
268 |
$directory .= $config{'md5 suffix'};
|
|
|
269 |
move( "$config{'local staging area'}/$directory", "$config{'local trash dir'}/$directory" );
|
|
|
270 |
&logit( "Successfully moved directory $directory to $config{'target server'}" );
|
11 |
rodolico |
271 |
} else {
|
12 |
rodolico |
272 |
&logit( "Unable to validate target for $directory" );
|
11 |
rodolico |
273 |
}
|
|
|
274 |
} else {
|
12 |
rodolico |
275 |
&logit( "Unknown error attempting to rsync $directory" );
|
11 |
rodolico |
276 |
}
|
|
|
277 |
}
|
|
|
278 |
|
|
|
279 |
|
|
|
280 |
1;
|