Subversion Repositories sysadmin_scripts

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
11 rodolico 1
#! /usr/bin/env perl
2
 
12 rodolico 3
# archiveDirectorys.pl
11 rodolico 4
# Author: R. W. Rodolico
5
# Date: 20180603
6
# Copyright: 2018, Vanduzen Enterprises, Dallas TX
7
 
12 rodolico 8
# Script designed to be run from a cron job, which checks if any directorys
9
# are ready to be archived. A directory is defined as a directory under
10
# the root of $localDirectoryDirectory.
11 rodolico 11
 
12
# If found, all directories are moved into the staging area and 
13
# an md5 checksum is calculated for the entire tree.
12 rodolico 14
# After all directorys are moved, a second process looks in the staging
11 rodolico 15
# area and copies the files (using rsync for reliability) into the staging
12 rodolico 16
# area of $targetServer. When a directory has been copied, a checksum is
11 rodolico 17
# calculated on the remote copy and compared to the checksum calculated
12 rodolico 18
# in the first stage and, if it passes, the directory is then moved to the 
19
# $targetDirectoryDirectory.
20
# After the copy and move, the directory and its MD5 sum file are moved
11 rodolico 21
# to the $trashDirectory (which is cleaned on the next invocation of
22
# the script).
23
 
12 rodolico 24
# Script does NOT handle the situation where directorys are being moved
11 rodolico 25
# while the script is running, so the script should be run at a time
26
# when there is no other activity on the server.
27
#
28
# Version: 1.0
29
 
30
use warnings;
31
use strict;
32
use Cwd qw();
33
use File::Copy qw(move);
34
use File::Basename;
35
 
12 rodolico 36
# location where directorys are put by end users
37
my $localDirectoryDirectory = '/home/samba/transfers/denver_to_dallas/Archive_to_DaVinci';
38
# location where directories are moved while processing
39
my $rootWorkDirectory = '/home/transfer_work_area';
40
# location where directories are moved when job is completed
41
my $trashDirectory = "$localDirectoryDirectory/.Trash";
42
# location where directories are moved while being transferred
43
my $stagingArea = "$localDirectoryDirectory/.Staging";
44
# target server name/ip. Must be accessible via ssh with no password
45
my $targetServer = 'davinci';
46
# location on target server where directories are placed while copying
11 rodolico 47
my $targetStagingArea = '/home/samba/archives/fromDenver/.Staging/';
12 rodolico 48
# location on target server where directories are finally put
49
my $targetDirectoryDirectory = '/home/samba/archives/fromDenver/';
50
# suffix of md5 of directories
11 rodolico 51
my $md5suffix = '.md5sum';
52
 
12 rodolico 53
my @DirectorysToMove;
11 rodolico 54
 
12 rodolico 55
# look in the directorys to move directory and see if there is anything 
11 rodolico 56
# new in there.
12 rodolico 57
sub getDirectorys {
11 rodolico 58
   my $rootDir = shift;
59
   opendir( my $dh, $rootDir ) or die "Could not open directory $rootDir: $!\n";
60
   my @dirs = grep { ! /^\./ && -d "$rootDir/$_" } readdir( $dh );
61
   return @dirs;
62
}
63
 
64
# calculate the checksum of a directory by
65
# 1. calculate checksum of each individual file in the entire tree
66
# 2. Grab the first column, which is the checksum
67
# 3. sort the result since Linux will not always return them in the same order
68
# 4. do a checksum of the checksums
69
#
70
# This is highly unlikely to give the same answer if any file changes
71
# in the process of the copy
72
sub calcMD5 {
73
   my $directory = shift;
74
   return -1 unless -d $directory;
75
   my $md5 = `find '$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1`;
76
   chomp $md5;
77
   return $md5;
78
}
79
 
12 rodolico 80
# moves directory to staging area and puts the md5 sum into a file
11 rodolico 81
# with the same name, but a .md5sum suffix
82
sub moveToStaging {
12 rodolico 83
   my ( $directory, $stagingArea, $md5 ) = @_;
11 rodolico 84
   mkdir $stagingArea unless -d $stagingArea;
12 rodolico 85
   move( "$localDirectoryDirectory/$directory", "$stagingArea/$directory" );
86
   my $md5File = "$stagingArea/$directory" . $md5suffix;
11 rodolico 87
   open DATA,">$md5File" or die "Could not create md5sum file [$md5File]: $!\n";
88
   print DATA "$md5\n";
89
   close DATA;
90
   return;
91
}
92
 
12 rodolico 93
# verifies the directory is correct on the server by comparing the checksums
11 rodolico 94
# calculated locally and remote server. If valid, moves it into the final
95
# location on the remote server
96
sub validateTarget {
12 rodolico 97
   my ( $remoteServer, $remoteStaging, $remoteTarget, $directory, $checksum ) = @_;
98
   my $md5sum = `ssh $remoteServer "find '$remoteStaging/$directory' -type f -exec md5sum \\{\\} \\; | cut -d' ' -f1 | sort | md5sum | cut -d' ' -f1"`;
11 rodolico 99
   chomp $md5sum;
100
   if ( $checksum eq $md5sum ) {
12 rodolico 101
      my $command = "ssh $remoteServer \"mv '$remoteStaging/$directory' '$remoteTarget'\"";
11 rodolico 102
      if ( system( $command ) == 0 ) {
103
         return 1;
104
      } else {
12 rodolico 105
         &logit( "Unable to move $directory to $remoteServer:$remoteTarget" );
11 rodolico 106
         return 0;
107
      }
108
   } else {
12 rodolico 109
      &logit( "Invalid checksum moving directory $directory" );
11 rodolico 110
      return 0;
111
   }
112
}
113
 
114
# reads the checksum file
115
sub getCheckSum {
12 rodolico 116
   my ( $directory, $staging )  = @_;
117
   $directory .= $md5suffix;
118
   if ( open DATA, "<$staging/$directory" ) {
11 rodolico 119
      my $cksum = <DATA>;
120
      chomp $cksum;
121
      close DATA;
122
      return $cksum;
123
   } 
12 rodolico 124
   &logit( "Could not open $staging/$directory: $!" );
11 rodolico 125
   return '';
126
}
127
 
128
# simple little logger that records some information   
129
sub logit {
130
   my $message = shift;
131
   my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
132
   my $now = sprintf( "%04d-%02d-%02d %02d:%-2d:%02d", $year+1900, $mon+1, $mday, $hour, $min, $sec );
12 rodolico 133
   open LOG, ">>/tmp/archiveDirectorys.log" or die "could not write to archiveDirectorys.log: $!\n";
11 rodolico 134
   print LOG "$now\t$message\n";   
135
   close LOG;
136
}
137
 
138
# simply remove everything from the trash directory
139
sub cleanTrash {
140
   my $trashDir = shift;
141
   `rm -fR $trashDir/*`;
142
}
143
 
144
 
12 rodolico 145
#&cleanTrash( $trashDirectory ) if &getDirectorysToMove( $trashDirectory );
11 rodolico 146
 
147
 
12 rodolico 148
# first, check and see if we have any directorys we need to move
149
@DirectorysToMove = &getDirectorys( $localDirectoryDirectory );
11 rodolico 150
 
12 rodolico 151
foreach my $directory ( @DirectorysToMove ) {
152
   my $md5 = &calcMD5( "$localDirectoryDirectory/$directory" );
153
   &logit( "New Directory $md5\t$directory" );
154
   &moveToStaging( $directory, $stagingArea, $md5 );
11 rodolico 155
}
156
 
157
# done with that, now we need to see if there is anything in the staging area
158
# that needs to be sent to the remote server
159
opendir( my $dh, $stagingArea ) or die "Could not read $stagingArea: $!\n";
160
my @directories;
161
my @toMove = grep { /$md5suffix$/ } readdir( $dh );
12 rodolico 162
foreach my $directory ( @toMove ) {
163
   $directory =~ m/^(.*)\.md5sum/;
164
   $directory = $1;
165
   my $md5sum = &getCheckSum( $directory, $stagingArea );
11 rodolico 166
   next unless $md5sum;
12 rodolico 167
   my $rsync = "rsync -av '$stagingArea/$directory' $targetServer:$targetStagingArea/ > /tmp/lastrsync.log";
11 rodolico 168
   &logit( $rsync );
169
   if ( system ( $rsync ) == 0 ) { # we succeeded
12 rodolico 170
      if ( &validateTarget( $targetServer, $targetStagingArea, $targetDirectoryDirectory, $directory, $md5sum ) ) {
11 rodolico 171
         `mkdir -p $trashDirectory` unless -d $trashDirectory;
12 rodolico 172
         move( "$stagingArea/$directory", "$trashDirectory/$directory" );
173
         $directory .= $md5suffix;
174
         move( "$stagingArea/$directory", "$trashDirectory/$directory" );
175
         &logit( "Successfully moved directory $directory to $targetServer" );
11 rodolico 176
      } else {
12 rodolico 177
         &logit( "Unable to validate target for $directory" );
11 rodolico 178
      }
179
   } else {
12 rodolico 180
      &logit( "Unknown error attempting to rsync $directory" );
11 rodolico 181
   }
182
}
183
 
184
 
185
1;