Subversion Repositories sysadmin_scripts

Rev

Rev 40 | Rev 50 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 rodolico 1
#! /usr/bin/perl -w
2
 
3
#    archiveIMAP: moves old messages from one IMAP account to another
4
#    maintaining hierarchy.
5
#    see http://wiki.linuxservertech.com for additional information
6
#    Copyright (C) 2014  R. W. Rodolico
7
#
8
#    version 1.0, 20140818
9
#       Initial Release
10
#
11
#    version 1.0.1 20140819
12
#        Removed dependancy on Email::Simple
13
#        Allowed 'separator' as an element in either source or target
14
#
15
#    This program is free software: you can redistribute it and/or modify
16
#    it under the terms of the GNU General Public License as published by
17
#    the Free Software Foundation, either version 3 of the License, or
18
#    (at your option) any later version.
19
#
20
#    This program is distributed in the hope that it will be useful,
21
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
22
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
#    GNU General Public License for more details.
24
#
25
#    You should have received a copy of the GNU General Public License
26
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
40 rodolico 27
#
28
#    for required libraries
42 rodolico 29
#    apt-get -y install libnet-imap-simple-ssl-perl libyaml-tiny-perl libhash-merge-simple-perl libclone-perl libdate-manip-perl libemail-simple-perl
14 rodolico 30
 
31
use strict;
32
use warnings;
33
use Net::IMAP::Simple; # libnet-imap-simple-ssl-perl
34
use POSIX; # to get floor and ceil
40 rodolico 35
use YAML::Tiny; # apt-get libyaml-tiny-perl under debian
36
use Clone 'clone'; # libclone-perl
37
use Hash::Merge::Simple qw/ merge clone_merge /; # libhash-merge-simple-perl
42 rodolico 38
use Date::Manip; # libdate-manip-perl
39
use Email::Simple; # libemail-simple-perl
40
use Date::Parse;
14 rodolico 41
 
42 rodolico 42
 
40 rodolico 43
use Data::Dumper;
44
 
14 rodolico 45
# globals
40 rodolico 46
my $CONFIG_FILE_NAME = 'archiveIMAP.yaml';
14 rodolico 47
 
40 rodolico 48
# the default values for everything. These are overridden
49
# by default values in the conf file (.yaml) or by individual
50
# accounts entries.
51
my $config = {
52
   'default' => {
53
      # where the mail is going to
54
      'target' => {
55
                  # these have no defaults. They should be in the configuration file
56
                  # 'password' => 'password',
57
                  # 'username' => 'username',
58
                  # hierarchy can be any combination of <path>, <month> and <year>
59
                  'hierarchy' => '<path>',
60
                  # default target server
61
                  'server' => 'localhost',
62
                 },
63
      # where the mail is coming from
64
      'source' => {
65
                  # these have no defaults. They should be in the configuration file
66
                  # 'password' => 'password',
67
                  # 'username' => 'username',
68
                  # Anything older than this is archived
69
                  # number of days unless followed by 'M' or 'Y', in which case it is
70
                  # multiplied by the number of days in a year (365.2425) or days in a month (30.5)
71
                  # may be a float, ie 1.25Y is the same as 15M
72
                  'age' => '1Y',
73
                  # if set to 1, any folders emptied out will be deleted EXCEPT system folders
74
                  'deleteEmptyFolders' => 0,
75
                  # default source server
76
                  'server' => 'localhost',
77
                  # these folders are considered system folders and never deleted, case insensitive
78
                  'system' => [
79
                                 'Outbox',
80
                                 'Sent Items',
81
                                 'INBOX'
82
                              ],
83
                  # these folders are ignored, ie not processed at all. Case insensitive
84
                  'ignore' => [
85
                                 'Deleted Messages',
86
                                 'Drafts',
87
                                 'Junk E-mail',
88
                                 'Junk',
89
                                 'Trash'
90
                               ],
91
                  # if 1, after successful copy to target, remove from source
92
                  'deleteOnSuccess' => 0
93
               },
94
      # if 1, does a dry run showing what would have happened
95
      'testing' => 0,
96
      # if 0, will not be processed
97
      'enabled' => 1,
98
   }
42 rodolico 99
};
40 rodolico 100
 
101
 
14 rodolico 102
#
103
# find where the script is actually located as cfg should be there
104
#
105
sub getScriptLocation {
106
   use strict;
107
   use File::Spec::Functions qw(rel2abs);
108
   use File::Basename;
109
   return dirname(rel2abs($0));
110
}
111
 
112
#
113
# Read the configuration file from current location 
114
# and return it as a string
115
#
116
sub readConfig {
117
   my $scriptLocation = &getScriptLocation();
118
   if ( -e "$scriptLocation/$CONFIG_FILE_NAME" ) {
40 rodolico 119
      my $yaml = YAML::Tiny->read( "$scriptLocation/$CONFIG_FILE_NAME" );
120
      # use clone_merge to merge conf file into $config
121
      # overwrites anything in $config if it exists in the config file
122
      $config = clone_merge( $config, $yaml->[0] );
123
      return 1;
14 rodolico 124
   }
40 rodolico 125
   return 0;
14 rodolico 126
}
127
 
40 rodolico 128
# merges default into current account, overwriting anything not defined in account with
129
# value from default EXCEPT arrays labeled in @tags, which will be merged together.
130
sub fixupAccount {
131
   my ( $default, $account ) = @_;
132
 
133
   # these arrays, part of source, will be appended together instead of being overwritten
134
   my @tags = ( 'ignore', 'system' );
135
   # merge the tags in question. NOTE: they can only be in source
136
   foreach my $tag ( @tags) {
137
      if ( $default->{'source'}->{$tag} && $account->{'source'}->{$tag} ) {
138
         my @j = ( @{$default->{'source'}->{$tag}}, @{$account->{'source'}->{$tag}} );
139
         $account->{'source'}->{$tag} = \@j;
140
      }
141
   }
142
   # now, merge account and default, with account taking precedence.
143
   return  clone_merge(  $default, $account );
144
   #my $c = clone_merge(  $default, $account );
145
   #return $c;
146
}
147
 
14 rodolico 148
#
149
# Open an IMAP connection
150
#
151
sub openIMAPConnection {
152
   my ( $server, $username, $password ) = @_;
153
   my $imap = Net::IMAP::Simple->new( $server ) ||
154
    die "Unable to connect to IMAP: $Net::IMAP::Simple::errstr\n";
155
   # Log on
156
   if(!$imap->login( $username, $password )){
157
     die "Login failed: " . $imap->errstr . "\n";
158
   }
159
   return $imap;
160
}
161
 
162
#
163
# returns a string in proper format for RFC which is $age days ago
40 rodolico 164
# $age is a float, possibly followed by a single character modifier
14 rodolico 165
#
166
sub getDate {
167
   my $age = shift;
40 rodolico 168
   # allow modifier to age which contains 'Y' (years) or 'M' (months)
169
   # Simply set multiplier to the correct value, then multiply the value
170
   $age = lc( $age );
171
   if ( $age =~ m/([0-9.]+)([a-z])/ ) {
42 rodolico 172
      # ~0 is the maximum integer which can be stored. Shifting right one gives max unsigned integer
173
      my $multiplier = ($2 eq 'y' ? 365.2425 : ( $2 eq 'm' ? 30.5 : ~0 >> 1) );
40 rodolico 174
      $age = floor( $1 * $multiplier);
175
   }
14 rodolico 176
   my @months = ('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec');
177
   my @now = localtime(time - 24 * 60 * 60 * $age);
178
   $now[4] = @months[$now[4]];
179
   $now[5] += 1900;
180
   my $date = sprintf( "%d-%s-%d", $now[3],$now[4],$now[5] ) ; # '1-Jan-2014';
181
   return $date;
182
}
183
 
42 rodolico 184
 
185
# calculateTargetFolder
186
# we are passed the target and the message
187
# pattern is carot (^) separated and may contain
188
# special placeholders <path>, <year>, <month>
189
# anything else is inserted directly
190
sub calculateTargetFolder {
191
   my ( $message, $source, $target ) = @_;
192
   # we may be sorting by date
193
   my $email = Email::Simple->new( join( '', @$message ) );
194
   my $msgDate = $email->header('Date');
195
   my @t = strptime( $msgDate );
196
   my $month = $t[4]+1;
197
   $month = '0' . $month if $month < 10;
198
   my $year = $t[5]+1900;
199
   # now, build the path on the new machine
200
   my $sourceFolder = join ( $target->{'separator'}, @{ $source->{'source folder list'} } );
201
   my $targetPattern = join( $target->{'separator'}, @{ $target->{'hierachy pattern'} } );
202
 
203
   $targetPattern =~ s/<path>/$sourceFolder/gi;
204
   $targetPattern =~ s/<month>/$month/gi;
205
   $targetPattern =~ s/<year>/$year/gi;
206
   # return the string we created, separated by
207
   # the delimiters for the target
208
   return $targetPattern;
209
}
210
 
211
 
212
# If folder has messages that match the criteria, move them to target
213
# creates the target folder if necessary
14 rodolico 214
#
42 rodolico 215
sub processFolder {
216
   my ( $source, $target, $folder, $TESTING ) = @_;
217
   #$dateBefore, $sourceDelimiter, $targetDelimiter, $deleteOnSuccess
218
   print "\n=== Processing $folder\n";
219
   my $sourceAccount = $source->{'connection'};
220
   my $targetAccount = $target->{'connection'};
221
   my $numMessages = 0;
222
   $sourceAccount->expunge_mailbox( $folder ); # clean it up so we don't copy deleted messages
223
   $sourceAccount->select( $folder ) or die "Could not connect to folder $folder\n"; # move into the correct folder for the source
224
   my @ids = $sourceAccount->search_sent_before( $source->{'before date'} ); # use sent_before to get the sent date from message
225
#   print join( "\n\t\t\t", @ids ) . "\n";
226
   return 0 unless @ids; # we have nothing to copy, so exit
227
   # make life easier by precalculating some paths as array pointers
228
   my @sourceFolders = split( '\\' . $source->{'separator'}, $folder );
229
   my @pattern = split '\\^', $target->{'hierarchy'};
230
   $source->{'source folder list'} = \@sourceFolders;
231
   $target->{'hierachy pattern'} = \@pattern;
232
#   print "\n\nDumping information\n";
233
#   print "Source Folder $folder\n";
234
#   print "Source Separator $source->{'separator'}\n";
235
#   print Dumper( $source->{'source folder list'} ) . "\n";
236
#   print "Target Separator $target->{'separator'}\n";
237
#   print "Target Hierarchy $target->{'hierarchy'}\n";
238
#   print Dumper( $target->{'hierachy pattern'} ) . "\n";
239
 
240
#   print 'Source => ' . join( "\n", @sourceFolders ) . "\n";
241
#   print 'Pattern => ' . join( "\n", @pattern ) . "\n";
242
 
243
 
244
   # process each message to be done
245
   foreach my $id ( @ids ) {
246
      # get the flags
247
      my @flags = $sourceAccount->msg_flags( $id );
248
      # get the message
249
      my $message = $sourceAccount->get( $id ) or die $sourceAccount->errstr;
250
      # calculate where we are going to move this to
251
      my $targetFolder = &calculateTargetFolder( $message, $source, $target );
252
      if ( $TESTING ) {
253
         print "Would have moved message to $targetFolder\n";
254
         next;
255
      }
256
      if ( $target->{'connection'}->select( $folder ) || &makeFolder( $target->{'connection'}, $folder, $target->{'separator'} ) ) {
257
         if ( $target->{'connection'}->put( $folder, $message, @flags ) ) {
258
            $source->{'connection'}->delete( $id ) if ( $source->{'deleteOnSuccess'} ) ;
259
            $numMessages++;
260
         } else {
261
            die "Could not write to target, aborting\n$target->{'connection'}->errstr\n";
262
         }
263
      } else {
264
         warn "\t\t$folder not found in target and could not create it\n";
265
      }
266
 
267
   }
268
   return $numMessages;
269
}
270
 
271
 
272
 
273
#
14 rodolico 274
# Get a list of all folders to be processed
275
# currently, it just weeds out items in the ignore list
276
#
277
sub getFolders {
278
   my ($imap, $ignore, $separator) = @_;
279
   $separator = '\\' . $separator;
280
   # build a regex that will be used to filter the input
281
   # assuming Trash, Drafts and Junk are in the ignore list
282
   # and a period is the separator, the generated regex is
283
   # (^|(\.))((Trash)|(Drafts)|(Junk))((\.)|$)
284
   # which basically says ignore those folders, but not substrings of them
285
   # ie, Junk02 would not be filtered but Junk would
286
   my $ignoreRegex = "(^|($separator))((" . join( ")\|(", @$ignore ) . "))(($separator)|\$)";
287
   # read all mailboxes and filter them with above regex into @boxes
42 rodolico 288
   my @boxes = grep{ ! /$ignoreRegex/i } $imap->mailboxes;
14 rodolico 289
   return \@boxes;
290
}
291
 
292
#
293
# make a folder on the IMAP account. The folder is assumed to be the
294
# fully qualified path with the correct delimiters
295
#
296
sub makeFolder {
297
   my ($imap, $folder, $delimiter) = @_;
298
 
299
   print "\n\t\tCreating folder $folder";
300
   # you must create the parent folder before creating the children
301
   my $escapedDelimiter = '\\' . $delimiter;
302
   my @folders = split( $escapedDelimiter, $folder );
303
   $folder = '';
304
   # take them from the left and, if they don't exist, create it
305
   while ( my $subdir = shift @folders ) {
306
      $folder .= $delimiter if $folder;
307
      $folder .= $subdir;
308
      next if $imap->select( $folder ); # already created, so look deeper in hierachy
309
      print "\n\t\t\tCreating subfolder $folder";
310
      $imap->create_mailbox( $folder ) || warn $imap->errstr();
311
      $imap->folder_subscribe( $folder ) || die $imap->errstr();
312
      unless ( $imap->select( $folder ) ) { # verify it was created
313
         warn "Unable to create $folder on target account\n";
314
         return 0;
315
      } # unless
316
   } # while
317
   return $folder;
318
}
319
 
320
#
321
# Delete an IMAP folder
322
#
323
sub deleteAFolder {
42 rodolico 324
   my ($source, $folder, $TESTING ) = @_;
325
   my $sourceAccount = $source->{'connection'};
326
   my $separator = $source->{'separator'};
14 rodolico 327
   return 1 if $folder eq 'INBOX'; # do NOT mess with INBOX
328
   return 2 if $sourceAccount->select($folder) > 0; # do not mess with it if it still has messages in it
329
   return 3 if $sourceAccount->mailboxes( $folder . $separator . '*' ); # do not mess with it if it has subfolders
42 rodolico 330
   return 4 if $source->{'system folders'}->{lc $folder}; # do not mess with system folders
331
   print "\n\t\tDeleting empty folder $folder" . ( $TESTING ? ' Dry Run' : '' );
332
   return if $TESTING;
14 rodolico 333
   $sourceAccount->folder_unsubscribe($folder);
334
   $sourceAccount->delete_mailbox( $folder );
335
}
336
 
337
 
40 rodolico 338
# main process loop to handle one account
14 rodolico 339
#
340
sub processAccount {
40 rodolico 341
   my $account = shift;
342
 
42 rodolico 343
   next unless $account->{'enabled'}; # blow it off if it is not enabled
344
   my $TESTING = $account->{'testing'}; # create mini global if we should test this account
14 rodolico 345
 
346
   # open and log into both source and target, and get the separator used
42 rodolico 347
   foreach my $acct ( 'target','source' ) {
348
      $account->{$acct}->{'connection'} = &openIMAPConnection( $account->{$acct}->{'server'}, $account->{$acct}->{'username'}, $account->{$acct}->{'password'} );
349
      unless ( $account->{$acct}->{'connection'} ) {
350
         warn "Unable to open $acct for $account->{$acct}->{username}, aborting move: $!\n";
351
         return -1;
352
      }
353
      $account->{$acct}->{'separator'} = $account->{$acct}->{'connection'}->separator unless $account->{$acct}->{'separator'};
354
   }
14 rodolico 355
 
42 rodolico 356
   # just being set up for convenience and readability
357
   my $source = $account->{'source'};
358
   my $target = $account->{'target'};
359
 
360
   my %temp = map{ lc($_) => 1 } @{$source->{'system'}};
361
   $source->{'system folders'} = \%temp;
362
 
363
   $source->{'before date'} = &getDate( $account->{'age'} );
364
   print "\t" . ( $source->{'deleteOnSuccess'} ? 'Moving' : 'Copying' ) . " all messages before $source->{'before date'}\n";
365
 
14 rodolico 366
   # get a list of all folders to be processed on the source
42 rodolico 367
   $source->{'folders'} = &getFolders( $source->{'connection'}, $source->{'ignore'}, $source->{'separator'} );
368
 
369
 
370
   my $folderList = $source->{'folders'};
14 rodolico 371
   my $count = 0; # count the number of messages processed
372
   my $processedCount = 0; # count the number of folders processed
373
   foreach my $folder ( @$folderList ) {
38 rodolico 374
      my $messages;
42 rodolico 375
      $messages = &processFolder( $source, $target, $folder, $TESTING ); #, $date, $$source{'separator'}, $$target{'separator'}, $deleteOnSuccess );
376
 
377
      $TESTING ? print "Would expunge $folder\n" : $source->{'connection'}->expunge_mailbox( $folder );
14 rodolico 378
      # delete folder if empty and client has requested it.
42 rodolico 379
      ( $TESTING ? print "Would delete $folder\n" : &deleteAFolder( $source, $folder, $TESTING ) ) if $account->{'source'}->{'deleteEmptyFolders'};
14 rodolico 380
      print "\n\t\t$messages processed\n";
381
      $count += $messages;
382
      $processedCount++;
383
      # next line used only for testing. Dies after 5 folders on first account
42 rodolico 384
      last if $processedCount > 5;
14 rodolico 385
   }
42 rodolico 386
 
387
   $source->{'connection'}->quit;
388
   $target->{'connection'}->quit;
14 rodolico 389
   return $count;
390
}
391
 
40 rodolico 392
 
393
 
14 rodolico 394
#######################################################################
395
#                   Main                                              #
396
#######################################################################
397
 
398
# read and evaluate configuration file
40 rodolico 399
&readConfig() || die "could not load config file\n";
400
#print Dumper( $config ); die;
401
foreach my $account ( keys %{$config->{'accounts'}} ) {
402
    $config->{'accounts'}->{$account} = &fixupAccount( $config->{'default'}, $config->{'accounts'}->{$account} );
14 rodolico 403
}
404
 
40 rodolico 405
#print Dumper( $config ) ; die;
14 rodolico 406
 
40 rodolico 407
# just a place to gather some stats
14 rodolico 408
my %processed;
409
$processed{'Accounts'} = 0;
410
$processed{'Messages'} = 0;
411
 
40 rodolico 412
# grab only the accounts for simplicity
413
my $accounts = $config->{'accounts'};
42 rodolico 414
 
415
print Dumper( $config ) ; die;
416
 
40 rodolico 417
# now, process each in turn
418
foreach my $account ( keys %$accounts ) {
14 rodolico 419
   # talk to user
420
   print "Processing account $account\n";
40 rodolico 421
   # do the account. This is the main worker bee
422
   $accounts->{$account}->{'processed'} = &processAccount( $accounts->{$account} );
423
   print "Done, $accounts->{$account}->{'processed'} messages copied\n";
14 rodolico 424
   $processed{'Accounts'}++;
40 rodolico 425
   $processed{'Messages'} += $accounts->{$account}->{'processed'};
42 rodolico 426
   # free up space we allocated since we stored a bunch of stuff in there, and we don't need it anymore
427
   $accounts->{$account} = undef; 
14 rodolico 428
} # foreach loop
429
 
430
print "$processed{'Accounts'} accounts processed, $processed{'Messages'} messages\n";
431
 
40 rodolico 432
 
14 rodolico 433
1;
434