11 |
rodolico |
1 |
#! /usr/bin/env perl
|
|
|
2 |
|
12 |
rodolico |
3 |
# archiveDirectorys.pl
|
11 |
rodolico |
4 |
# Author: R. W. Rodolico
|
|
|
5 |
# Date: 20180603
|
|
|
6 |
# Copyright: 2018, Vanduzen Enterprises, Dallas TX
|
|
|
7 |
|
12 |
rodolico |
8 |
# Script designed to be run from a cron job, which checks if any directorys
|
|
|
9 |
# are ready to be archived. A directory is defined as a directory under
|
|
|
10 |
# the root of $localDirectoryDirectory.
|
11 |
rodolico |
11 |
|
|
|
12 |
# If found, all directories are moved into the staging area and
|
|
|
13 |
# an md5 checksum is calculated for the entire tree.
|
12 |
rodolico |
14 |
# After all directorys are moved, a second process looks in the staging
|
11 |
rodolico |
15 |
# area and copies the files (using rsync for reliability) into the staging
|
12 |
rodolico |
16 |
# area of $targetServer. When a directory has been copied, a checksum is
|
11 |
rodolico |
17 |
# calculated on the remote copy and compared to the checksum calculated
|
12 |
rodolico |
18 |
# in the first stage and, if it passes, the directory is then moved to the
|
|
|
19 |
# $targetDirectoryDirectory.
|
|
|
20 |
# After the copy and move, the directory and its MD5 sum file are moved
|
11 |
rodolico |
21 |
# to the $trashDirectory (which is cleaned on the next invocation of
|
|
|
22 |
# the script).
|
|
|
23 |
|
12 |
rodolico |
24 |
# Script does NOT handle the situation where directorys are being moved
|
11 |
rodolico |
25 |
# while the script is running, so the script should be run at a time
|
|
|
26 |
# when there is no other activity on the server.
|
|
|
27 |
#
|
|
|
28 |
# Version: 1.0
|
|
|
29 |
|
|
|
30 |
use warnings;
|
|
|
31 |
use strict;
|
|
|
32 |
use Cwd qw();
|
|
|
33 |
use File::Copy qw(move);
|
|
|
34 |
use File::Basename;
|
|
|
35 |
|
12 |
rodolico |
36 |
# location where directorys are put by end users
|
|
|
37 |
my $localDirectoryDirectory = '/home/samba/transfers/denver_to_dallas/Archive_to_DaVinci';
|
|
|
38 |
# location where directories are moved while processing
|
|
|
39 |
my $rootWorkDirectory = '/home/transfer_work_area';
|
|
|
40 |
# location where directories are moved when job is completed
|
|
|
41 |
my $trashDirectory = "$localDirectoryDirectory/.Trash";
|
|
|
42 |
# location where directories are moved while being transferred
|
|
|
43 |
my $stagingArea = "$localDirectoryDirectory/.Staging";
|
|
|
44 |
# target server name/ip. Must be accessible via ssh with no password
|
|
|
45 |
my $targetServer = 'davinci';
|
|
|
46 |
# location on target server where directories are placed while copying
|
11 |
rodolico |
47 |
my $targetStagingArea = '/home/samba/archives/fromDenver/.Staging/';
|
12 |
rodolico |
48 |
# location on target server where directories are finally put
|
|
|
49 |
my $targetDirectoryDirectory = '/home/samba/archives/fromDenver/';
|
|
|
50 |
# suffix of md5 of directories
|
11 |
rodolico |
51 |
my $md5suffix = '.md5sum';
|
|
|
52 |
|
12 |
rodolico |
53 |
my @DirectorysToMove;
|
11 |
rodolico |
54 |
|
12 |
rodolico |
55 |
# look in the directorys to move directory and see if there is anything
|
11 |
rodolico |
56 |
# new in there.
|
12 |
rodolico |
57 |
sub getDirectorys {
|
11 |
rodolico |
58 |
my $rootDir = shift;
|
|
|
59 |
opendir( my $dh, $rootDir ) or die "Could not open directory $rootDir: $!\n";
|
|
|
60 |
my @dirs = grep { ! /^\./ && -d "$rootDir/$_" } readdir( $dh );
|
|
|
61 |
return @dirs;
|
|
|
62 |
}
|
|
|
63 |
|
|
|
64 |
# calculate the checksum of a directory by
|
|
|
65 |
# 1. calculate checksum of each individual file in the entire tree
|
|
|
66 |
# 2. Grab the first column, which is the checksum
|
|
|
67 |
# 3. sort the result since Linux will not always return them in the same order
|
|
|
68 |
# 4. do a checksum of the checksums
|
|
|
69 |
#
|
|
|
70 |
# This is highly unlikely to give the same answer if any file changes
|
|
|
71 |
# in the process of the copy
|
|
|
72 |
sub calcMD5 {
|
|
|
73 |
my $directory = shift;
|
|
|
74 |
return -1 unless -d $directory;
|
|
|
75 |
my $md5 = `find '$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1`;
|
|
|
76 |
chomp $md5;
|
|
|
77 |
return $md5;
|
|
|
78 |
}
|
|
|
79 |
|
12 |
rodolico |
80 |
# moves directory to staging area and puts the md5 sum into a file
|
11 |
rodolico |
81 |
# with the same name, but a .md5sum suffix
|
|
|
82 |
sub moveToStaging {
|
12 |
rodolico |
83 |
my ( $directory, $stagingArea, $md5 ) = @_;
|
11 |
rodolico |
84 |
mkdir $stagingArea unless -d $stagingArea;
|
12 |
rodolico |
85 |
move( "$localDirectoryDirectory/$directory", "$stagingArea/$directory" );
|
|
|
86 |
my $md5File = "$stagingArea/$directory" . $md5suffix;
|
11 |
rodolico |
87 |
open DATA,">$md5File" or die "Could not create md5sum file [$md5File]: $!\n";
|
|
|
88 |
print DATA "$md5\n";
|
|
|
89 |
close DATA;
|
|
|
90 |
return;
|
|
|
91 |
}
|
|
|
92 |
|
12 |
rodolico |
93 |
# verifies the directory is correct on the server by comparing the checksums
|
11 |
rodolico |
94 |
# calculated locally and remote server. If valid, moves it into the final
|
|
|
95 |
# location on the remote server
|
|
|
96 |
sub validateTarget {
|
12 |
rodolico |
97 |
my ( $remoteServer, $remoteStaging, $remoteTarget, $directory, $checksum ) = @_;
|
|
|
98 |
my $md5sum = `ssh $remoteServer "find '$remoteStaging/$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1"`;
|
11 |
rodolico |
99 |
chomp $md5sum;
|
|
|
100 |
if ( $checksum eq $md5sum ) {
|
12 |
rodolico |
101 |
my $command = "ssh $remoteServer \"mv '$remoteStaging/$directory' '$remoteTarget'\"";
|
11 |
rodolico |
102 |
if ( system( $command ) == 0 ) {
|
|
|
103 |
return 1;
|
|
|
104 |
} else {
|
12 |
rodolico |
105 |
&logit( "Unable to move $directory to $remoteServer:$remoteTarget" );
|
11 |
rodolico |
106 |
return 0;
|
|
|
107 |
}
|
|
|
108 |
} else {
|
12 |
rodolico |
109 |
&logit( "Invalid checksum moving directory $directory" );
|
11 |
rodolico |
110 |
return 0;
|
|
|
111 |
}
|
|
|
112 |
}
|
|
|
113 |
|
|
|
114 |
# reads the checksum file
|
|
|
115 |
sub getCheckSum {
|
12 |
rodolico |
116 |
my ( $directory, $staging ) = @_;
|
|
|
117 |
$directory .= $md5suffix;
|
|
|
118 |
if ( open DATA, "<$staging/$directory" ) {
|
11 |
rodolico |
119 |
my $cksum = <DATA>;
|
|
|
120 |
chomp $cksum;
|
|
|
121 |
close DATA;
|
|
|
122 |
return $cksum;
|
|
|
123 |
}
|
12 |
rodolico |
124 |
&logit( "Could not open $staging/$directory: $!" );
|
11 |
rodolico |
125 |
return '';
|
|
|
126 |
}
|
|
|
127 |
|
|
|
128 |
# simple little logger that records some information
|
|
|
129 |
sub logit {
|
|
|
130 |
my $message = shift;
|
|
|
131 |
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
|
|
|
132 |
my $now = sprintf( "%04d-%02d-%02d %02d:%-2d:%02d", $year+1900, $mon+1, $mday, $hour, $min, $sec );
|
12 |
rodolico |
133 |
open LOG, ">>/tmp/archiveDirectorys.log" or die "could not write to archiveDirectorys.log: $!\n";
|
11 |
rodolico |
134 |
print LOG "$now\t$message\n";
|
|
|
135 |
close LOG;
|
|
|
136 |
}
|
|
|
137 |
|
|
|
138 |
# simply remove everything from the trash directory
|
|
|
139 |
sub cleanTrash {
|
|
|
140 |
my $trashDir = shift;
|
|
|
141 |
`rm -fR $trashDir/*`;
|
|
|
142 |
}
|
|
|
143 |
|
|
|
144 |
|
12 |
rodolico |
145 |
#&cleanTrash( $trashDirectory ) if &getDirectorysToMove( $trashDirectory );
|
11 |
rodolico |
146 |
|
|
|
147 |
|
12 |
rodolico |
148 |
# first, check and see if we have any directorys we need to move
|
|
|
149 |
@DirectorysToMove = &getDirectorys( $localDirectoryDirectory );
|
11 |
rodolico |
150 |
|
12 |
rodolico |
151 |
foreach my $directory ( @DirectorysToMove ) {
|
|
|
152 |
my $md5 = &calcMD5( "$localDirectoryDirectory/$directory" );
|
|
|
153 |
&logit( "New Directory $md5\t$directory" );
|
|
|
154 |
&moveToStaging( $directory, $stagingArea, $md5 );
|
11 |
rodolico |
155 |
}
|
|
|
156 |
|
|
|
157 |
# done with that, now we need to see if there is anything in the staging area
|
|
|
158 |
# that needs to be sent to the remote server
|
|
|
159 |
opendir( my $dh, $stagingArea ) or die "Could not read $stagingArea: $!\n";
|
|
|
160 |
my @directories;
|
|
|
161 |
my @toMove = grep { /$md5suffix$/ } readdir( $dh );
|
12 |
rodolico |
162 |
foreach my $directory ( @toMove ) {
|
|
|
163 |
$directory =~ m/^(.*)\.md5sum/;
|
|
|
164 |
$directory = $1;
|
|
|
165 |
my $md5sum = &getCheckSum( $directory, $stagingArea );
|
11 |
rodolico |
166 |
next unless $md5sum;
|
12 |
rodolico |
167 |
my $rsync = "rsync -av '$stagingArea/$directory' $targetServer:$targetStagingArea/ > /tmp/lastrsync.log";
|
11 |
rodolico |
168 |
&logit( $rsync );
|
|
|
169 |
if ( system ( $rsync ) == 0 ) { # we succeeded
|
12 |
rodolico |
170 |
if ( &validateTarget( $targetServer, $targetStagingArea, $targetDirectoryDirectory, $directory, $md5sum ) ) {
|
11 |
rodolico |
171 |
`mkdir -p $trashDirectory` unless -d $trashDirectory;
|
12 |
rodolico |
172 |
move( "$stagingArea/$directory", "$trashDirectory/$directory" );
|
|
|
173 |
$directory .= $md5suffix;
|
|
|
174 |
move( "$stagingArea/$directory", "$trashDirectory/$directory" );
|
|
|
175 |
&logit( "Successfully moved directory $directory to $targetServer" );
|
11 |
rodolico |
176 |
} else {
|
12 |
rodolico |
177 |
&logit( "Unable to validate target for $directory" );
|
11 |
rodolico |
178 |
}
|
|
|
179 |
} else {
|
12 |
rodolico |
180 |
&logit( "Unknown error attempting to rsync $directory" );
|
11 |
rodolico |
181 |
}
|
|
|
182 |
}
|
|
|
183 |
|
|
|
184 |
|
|
|
185 |
1;
|