Subversion Repositories sysadmin_scripts

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 rodolico 1
#! /usr/bin/perl -w
2
 
3
#    archiveIMAP: moves old messages from one IMAP account to another
4
#    maintaining hierarchy.
5
#    see http://wiki.linuxservertech.com for additional information
6
#    Copyright (C) 2014  R. W. Rodolico
7
#
8
#    version 1.0, 20140818
9
#       Initial Release
10
#
11
#    version 1.0.1 20140819
12
#        Removed dependancy on Email::Simple
13
#        Allowed 'separator' as an element in either source or target
14
#
50 rodolico 15
#    version 2.0.0 20190817 RWR
16
#        Major revision.
17
#           Config is now YAML
18
#           Default section which will fill in the blanks for anything not filled in on an account, so creating a lot of accounts
19
#              with common values is easier to set up an maintain
20
#           Target folder is configurable on a per account basis, using tags <folder>, <year>, <month> (called hierachy)
21
#
54 rodolico 22
#    version 2.1.0 20190822 RWR
23
#           Added sleeptime parameter to target which makes process sleep a number of seconds between each mail transfer
24
#              We use HiRes, so this can be a decimal number (ie, 0.5 for half a second).
25
#
108 rodolico 26
#    version 2.2.0 20220512 RWR
27
#           Added ability to utilized Dovecot admin account, which can process any account using an admin user. No knowledge of
28
#           client credentials needed. See https://doc.dovecot.org/configuration_manual/authentication/master_users/
54 rodolico 29
#
14 rodolico 30
#    This program is free software: you can redistribute it and/or modify
31
#    it under the terms of the GNU General Public License as published by
32
#    the Free Software Foundation, either version 3 of the License, or
33
#    (at your option) any later version.
34
#
35
#    This program is distributed in the hope that it will be useful,
36
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
37
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
38
#    GNU General Public License for more details.
39
#
40
#    You should have received a copy of the GNU General Public License
41
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
40 rodolico 42
#
43
#    for required libraries
108 rodolico 44
#    apt -y install libnet-imap-simple-ssl-perl libyaml-tiny-perl libhash-merge-simple-perl libclone-perl libdate-manip-perl libemail-simple-perl
14 rodolico 45
 
46
use strict;
47
use warnings;
48
use Net::IMAP::Simple; # libnet-imap-simple-ssl-perl
49
use POSIX; # to get floor and ceil
40 rodolico 50
use YAML::Tiny; # apt-get libyaml-tiny-perl under debian
51
use Clone 'clone'; # libclone-perl
52
use Hash::Merge::Simple qw/ merge clone_merge /; # libhash-merge-simple-perl
42 rodolico 53
use Date::Manip; # libdate-manip-perl
54
use Email::Simple; # libemail-simple-perl
55
use Date::Parse;
54 rodolico 56
use Time::HiRes;
14 rodolico 57
 
42 rodolico 58
 
40 rodolico 59
use Data::Dumper;
60
 
14 rodolico 61
# globals
40 rodolico 62
my $CONFIG_FILE_NAME = 'archiveIMAP.yaml';
14 rodolico 63
 
40 rodolico 64
# the default values for everything. These are overridden
65
# by default values in the conf file (.yaml) or by individual
66
# accounts entries.
67
my $config = {
68
   'default' => {
58 rodolico 69
      # if set, most log stuff is sent to this file
70
      'logfile' => '',
71
      # defines the amount of crap which ends up in the logfile
72
      'loglevel' => 4,
40 rodolico 73
      # where the mail is going to
74
      'target' => {
75
                  # these have no defaults. They should be in the configuration file
76
                  # 'password' => 'password',
77
                  # 'username' => 'username',
78
                  # hierarchy can be any combination of <path>, <month> and <year>
79
                  'hierarchy' => '<path>',
80
                  # default target server
81
                  'server' => 'localhost',
54 rodolico 82
                  # amount of time to sleep between messages, in seconds (float)
83
                  'sleeptime' => 0.5,
40 rodolico 84
                 },
85
      # where the mail is coming from
86
      'source' => {
87
                  # these have no defaults. They should be in the configuration file
88
                  # 'password' => 'password',
89
                  # 'username' => 'username',
90
                  # Anything older than this is archived
91
                  # number of days unless followed by 'M' or 'Y', in which case it is
92
                  # multiplied by the number of days in a year (365.2425) or days in a month (30.5)
93
                  # may be a float, ie 1.25Y is the same as 15M
94
                  'age' => '1Y',
95
                  # if set to 1, any folders emptied out will be deleted EXCEPT system folders
96
                  'deleteEmptyFolders' => 0,
97
                  # default source server
98
                  'server' => 'localhost',
99
                  # these folders are considered system folders and never deleted, case insensitive
100
                  'system' => [
101
                                 'Outbox',
102
                                 'Sent Items',
103
                                 'INBOX'
104
                              ],
105
                  # these folders are ignored, ie not processed at all. Case insensitive
106
                  'ignore' => [
107
                                 'Deleted Messages',
108
                                 'Drafts',
109
                                 'Junk E-mail',
110
                                 'Junk',
111
                                 'Trash'
112
                               ],
113
                  # if 1, after successful copy to target, remove from source
114
                  'deleteOnSuccess' => 0
115
               },
116
      # if 1, does a dry run showing what would have happened
117
      'testing' => 0,
118
      # if 0, will not be processed
119
      'enabled' => 1,
120
   }
42 rodolico 121
};
40 rodolico 122
 
58 rodolico 123
# prints to a log file, if defined. Otherwise, prints to STDOUT
124
sub logit {
125
   my $priority = shift;
126
   return unless $priority <= $config->{'default'}->{'loglevel'};
127
   if ( $config->{'default'}->{'logfile'} ) {
128
      open( my $logfile, '>>', $config->{'default'}->{'logfile'} ) || die "Could not write to $config->{default}->{logfile}: $!\n";
129
      while ( my $message = shift ) {
130
         print $logfile "$message\n";
131
      }
132
      close( $logfile );
133
   } else {
134
      print STDOUT join( "\n", @_ ) . "\n";
135
   }
136
   return;
137
} # logit
40 rodolico 138
 
58 rodolico 139
 
14 rodolico 140
#
141
# find where the script is actually located as cfg should be there
142
#
143
sub getScriptLocation {
144
   use strict;
145
   use File::Spec::Functions qw(rel2abs);
146
   use File::Basename;
147
   return dirname(rel2abs($0));
148
}
149
 
150
#
151
# Read the configuration file from current location 
152
# and return it as a string
153
#
154
sub readConfig {
155
   my $scriptLocation = &getScriptLocation();
156
   if ( -e "$scriptLocation/$CONFIG_FILE_NAME" ) {
40 rodolico 157
      my $yaml = YAML::Tiny->read( "$scriptLocation/$CONFIG_FILE_NAME" );
158
      # use clone_merge to merge conf file into $config
159
      # overwrites anything in $config if it exists in the config file
160
      $config = clone_merge( $config, $yaml->[0] );
161
      return 1;
14 rodolico 162
   }
40 rodolico 163
   return 0;
14 rodolico 164
}
165
 
40 rodolico 166
# merges default into current account, overwriting anything not defined in account with
167
# value from default EXCEPT arrays labeled in @tags, which will be merged together.
168
sub fixupAccount {
169
   my ( $default, $account ) = @_;
170
 
171
   # these arrays, part of source, will be appended together instead of being overwritten
172
   my @tags = ( 'ignore', 'system' );
173
   # merge the tags in question. NOTE: they can only be in source
174
   foreach my $tag ( @tags) {
175
      if ( $default->{'source'}->{$tag} && $account->{'source'}->{$tag} ) {
176
         my @j = ( @{$default->{'source'}->{$tag}}, @{$account->{'source'}->{$tag}} );
177
         $account->{'source'}->{$tag} = \@j;
178
      }
179
   }
180
   # now, merge account and default, with account taking precedence.
181
   return  clone_merge(  $default, $account );
182
   #my $c = clone_merge(  $default, $account );
183
   #return $c;
184
}
185
 
14 rodolico 186
#
187
# Open an IMAP connection
188
#
189
sub openIMAPConnection {
190
   my ( $server, $username, $password ) = @_;
191
   my $imap = Net::IMAP::Simple->new( $server ) ||
192
    die "Unable to connect to IMAP: $Net::IMAP::Simple::errstr\n";
193
   # Log on
194
   if(!$imap->login( $username, $password )){
195
     die "Login failed: " . $imap->errstr . "\n";
196
   }
197
   return $imap;
198
}
199
 
200
#
201
# returns a string in proper format for RFC which is $age days ago
40 rodolico 202
# $age is a float, possibly followed by a single character modifier
14 rodolico 203
#
204
sub getDate {
205
   my $age = shift;
40 rodolico 206
   # allow modifier to age which contains 'Y' (years) or 'M' (months)
207
   # Simply set multiplier to the correct value, then multiply the value
208
   $age = lc( $age );
209
   if ( $age =~ m/([0-9.]+)([a-z])/ ) {
42 rodolico 210
      # ~0 is the maximum integer which can be stored. Shifting right one gives max unsigned integer
211
      my $multiplier = ($2 eq 'y' ? 365.2425 : ( $2 eq 'm' ? 30.5 : ~0 >> 1) );
40 rodolico 212
      $age = floor( $1 * $multiplier);
213
   }
14 rodolico 214
   my @months = ('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec');
215
   my @now = localtime(time - 24 * 60 * 60 * $age);
216
   $now[4] = @months[$now[4]];
217
   $now[5] += 1900;
218
   my $date = sprintf( "%d-%s-%d", $now[3],$now[4],$now[5] ) ; # '1-Jan-2014';
219
   return $date;
220
}
221
 
42 rodolico 222
 
223
# calculateTargetFolder
224
# we are passed the target and the message
225
# pattern is carot (^) separated and may contain
226
# special placeholders <path>, <year>, <month>
227
# anything else is inserted directly
228
sub calculateTargetFolder {
229
   my ( $message, $source, $target ) = @_;
230
   # we may be sorting by date
231
   my $email = Email::Simple->new( join( '', @$message ) );
232
   my $msgDate = $email->header('Date');
233
   my @t = strptime( $msgDate );
234
   my $month = $t[4]+1;
235
   $month = '0' . $month if $month < 10;
236
   my $year = $t[5]+1900;
50 rodolico 237
   # also, may need the source hierarchy
238
   my $sourceFolder = join ( $target->{'separator'}, @{ $source->{'source folder list'} } );
42 rodolico 239
   # now, build the path on the new machine
240
   my $targetPattern = join( $target->{'separator'}, @{ $target->{'hierachy pattern'} } );
241
   $targetPattern =~ s/<path>/$sourceFolder/gi;
242
   $targetPattern =~ s/<month>/$month/gi;
243
   $targetPattern =~ s/<year>/$year/gi;
244
   # return the string we created, separated by
245
   # the delimiters for the target
246
   return $targetPattern;
247
}
248
 
249
 
250
# If folder has messages that match the criteria, move them to target
251
# creates the target folder if necessary
14 rodolico 252
#
42 rodolico 253
sub processFolder {
254
   my ( $source, $target, $folder, $TESTING ) = @_;
58 rodolico 255
   &logit( 1, '', "=== Processing $folder" );
42 rodolico 256
   my $sourceAccount = $source->{'connection'};
257
   my $targetAccount = $target->{'connection'};
258
   my $numMessages = 0;
259
   $sourceAccount->expunge_mailbox( $folder ); # clean it up so we don't copy deleted messages
260
   $sourceAccount->select( $folder ) or die "Could not connect to folder $folder\n"; # move into the correct folder for the source
261
   my @ids = $sourceAccount->search_sent_before( $source->{'before date'} ); # use sent_before to get the sent date from message
262
   return 0 unless @ids; # we have nothing to copy, so exit
58 rodolico 263
   &logit( 3, "Found " . scalar( @ids ) . " messages to process" );
42 rodolico 264
   # make life easier by precalculating some paths as array pointers
265
   my @sourceFolders = split( '\\' . $source->{'separator'}, $folder );
266
   my @pattern = split '\\^', $target->{'hierarchy'};
267
   $source->{'source folder list'} = \@sourceFolders;
268
   $target->{'hierachy pattern'} = \@pattern;
269
 
270
   # process each message to be done
56 rodolico 271
   while ( my $id = shift ( @ids ) ) {
42 rodolico 272
      # get the flags
273
      my @flags = $sourceAccount->msg_flags( $id );
274
      # get the message
86 rodolico 275
      my $message = $sourceAccount->get( $id ) or warn "Error getting message ID $id: $sourceAccount->errstr\n";
42 rodolico 276
      # calculate where we are going to move this to
277
      my $targetFolder = &calculateTargetFolder( $message, $source, $target );
278
      if ( $TESTING ) {
58 rodolico 279
         &logit( 0, "Would have " . ( $source->{'deleteOnSuccess'} ? 'moved' : 'copied' )  . " message to $targetFolder" );
42 rodolico 280
         next;
281
      }
50 rodolico 282
      if ( $target->{'connection'}->select( $targetFolder ) || &makeFolder( $target->{'connection'}, $targetFolder, $target->{'separator'} ) ) {
283
         if ( $target->{'connection'}->put( $targetFolder, $message, @flags ) ) {
42 rodolico 284
            $source->{'connection'}->delete( $id ) if ( $source->{'deleteOnSuccess'} ) ;
54 rodolico 285
            Time::HiRes::sleep( $target->{'sleeptime'} ) if $target->{'sleeptime'};
42 rodolico 286
            $numMessages++;
287
         } else {
50 rodolico 288
            die "Could not write to target, aborting\n$targetFolder->{'connection'}->errstr\n";
42 rodolico 289
         }
290
      } else {
50 rodolico 291
         warn "\t\t$targetFolder not found in target and could not create it\n";
42 rodolico 292
      }
293
 
294
   }
295
   return $numMessages;
296
}
297
 
298
 
299
 
300
#
14 rodolico 301
# Get a list of all folders to be processed
302
# currently, it just weeds out items in the ignore list
303
#
304
sub getFolders {
305
   my ($imap, $ignore, $separator) = @_;
306
   $separator = '\\' . $separator;
307
   # build a regex that will be used to filter the input
308
   # assuming Trash, Drafts and Junk are in the ignore list
309
   # and a period is the separator, the generated regex is
310
   # (^|(\.))((Trash)|(Drafts)|(Junk))((\.)|$)
311
   # which basically says ignore those folders, but not substrings of them
312
   # ie, Junk02 would not be filtered but Junk would
313
   my $ignoreRegex = "(^|($separator))((" . join( ")\|(", @$ignore ) . "))(($separator)|\$)";
314
   # read all mailboxes and filter them with above regex into @boxes
42 rodolico 315
   my @boxes = grep{ ! /$ignoreRegex/i } $imap->mailboxes;
14 rodolico 316
   return \@boxes;
317
}
318
 
319
#
320
# make a folder on the IMAP account. The folder is assumed to be the
321
# fully qualified path with the correct delimiters
322
#
323
sub makeFolder {
324
   my ($imap, $folder, $delimiter) = @_;
325
 
58 rodolico 326
   &logit( 3,  "\n\t\tCreating folder $folder" );
14 rodolico 327
   # you must create the parent folder before creating the children
328
   my $escapedDelimiter = '\\' . $delimiter;
329
   my @folders = split( $escapedDelimiter, $folder );
330
   $folder = '';
331
   # take them from the left and, if they don't exist, create it
332
   while ( my $subdir = shift @folders ) {
333
      $folder .= $delimiter if $folder;
334
      $folder .= $subdir;
335
      next if $imap->select( $folder ); # already created, so look deeper in hierachy
58 rodolico 336
      &logit( 1, "\n\t\t\tCreating subfolder $folder" );
14 rodolico 337
      $imap->create_mailbox( $folder ) || warn $imap->errstr();
338
      $imap->folder_subscribe( $folder ) || die $imap->errstr();
339
      unless ( $imap->select( $folder ) ) { # verify it was created
340
         warn "Unable to create $folder on target account\n";
341
         return 0;
342
      } # unless
343
   } # while
344
   return $folder;
345
}
346
 
347
#
348
# Delete an IMAP folder
349
#
350
sub deleteAFolder {
42 rodolico 351
   my ($source, $folder, $TESTING ) = @_;
352
   my $sourceAccount = $source->{'connection'};
353
   my $separator = $source->{'separator'};
14 rodolico 354
   return 1 if $folder eq 'INBOX'; # do NOT mess with INBOX
355
   return 2 if $sourceAccount->select($folder) > 0; # do not mess with it if it still has messages in it
356
   return 3 if $sourceAccount->mailboxes( $folder . $separator . '*' ); # do not mess with it if it has subfolders
42 rodolico 357
   return 4 if $source->{'system folders'}->{lc $folder}; # do not mess with system folders
58 rodolico 358
   &logit( 1, "\n\t\tDeleting empty folder $folder" . ( $TESTING ? ' Dry Run' : '' ) );
359
   return 0 if $TESTING;
360
   # select something other than the folder to be deleted
361
   $sourceAccount->select( 'INBOX' );
362
   if ( $sourceAccount->folder_unsubscribe($folder) ) {
363
      if ( $sourceAccount->delete_mailbox( $folder ) ) {
364
         return 0;
365
      } else {
366
         warn "Error trying to delete mailbox $folder: " . $sourceAccount->errstr . "\n";
367
      }
368
   } else {
369
      warn "Error trying to unsubscribe from $folder: " . $sourceAccount->errstr . "\n";
370
   }
371
   return 0;
14 rodolico 372
}
373
 
374
 
40 rodolico 375
# main process loop to handle one account
14 rodolico 376
#
377
sub processAccount {
40 rodolico 378
   my $account = shift;
379
 
53 rodolico 380
   return 0 unless $account->{'enabled'}; # blow it off if it is not enabled
42 rodolico 381
   my $TESTING = $account->{'testing'}; # create mini global if we should test this account
50 rodolico 382
 
58 rodolico 383
   &logit( 0, "========= Test Mode ========\n" ) if $TESTING;
14 rodolico 384
 
385
   # open and log into both source and target, and get the separator used
42 rodolico 386
   foreach my $acct ( 'target','source' ) {
108 rodolico 387
      # check if we should use admin info instead of the actual account
388
      # If they do not set a password, but there is an admin account, set that up
389
      if ( ! $account->{$acct}->{'password'} && $account->{$acct}->{'adminUsername'} && $account->{$acct}->{'adminPassword'} ) {
390
         $account->{$acct}->{'username'} = $account->{$acct}->{'adminUsername'} . $account->{$acct}->{'adminSeparator'} . $account->{$acct}->{'username'};
391
         $account->{$acct}->{'password'} = $account->{$acct}->{'adminPassword'};
392
      }
42 rodolico 393
      $account->{$acct}->{'connection'} = &openIMAPConnection( $account->{$acct}->{'server'}, $account->{$acct}->{'username'}, $account->{$acct}->{'password'} );
394
      unless ( $account->{$acct}->{'connection'} ) {
395
         warn "Unable to open $acct for $account->{$acct}->{username}, aborting move: $!\n";
396
         return -1;
397
      }
398
      $account->{$acct}->{'separator'} = $account->{$acct}->{'connection'}->separator unless $account->{$acct}->{'separator'};
399
   }
14 rodolico 400
 
42 rodolico 401
   # just being set up for convenience and readability
402
   my $source = $account->{'source'};
403
   my $target = $account->{'target'};
404
 
405
   my %temp = map{ lc($_) => 1 } @{$source->{'system'}};
406
   $source->{'system folders'} = \%temp;
407
 
50 rodolico 408
   $source->{'before date'} = &getDate( $source->{'age'} );
58 rodolico 409
   &logit( 1, "\t" . ( $source->{'deleteOnSuccess'} ? 'Moving' : 'Copying' ) . " all messages before $source->{'before date'}" );
42 rodolico 410
 
14 rodolico 411
   # get a list of all folders to be processed on the source
42 rodolico 412
   $source->{'folders'} = &getFolders( $source->{'connection'}, $source->{'ignore'}, $source->{'separator'} );
413
 
50 rodolico 414
   if ( $TESTING ) {
415
      print Dumper( $source );
416
      print "Source above, press enter to continue: "; my $j = <STDIN>;
417
      print Dumper( $target );
418
      print "Target above, Press enter to continue: "; $j = <STDIN>;
419
   }
42 rodolico 420
 
421
   my $folderList = $source->{'folders'};
14 rodolico 422
   my $count = 0; # count the number of messages processed
423
   my $processedCount = 0; # count the number of folders processed
424
   foreach my $folder ( @$folderList ) {
38 rodolico 425
      my $messages;
42 rodolico 426
      $messages = &processFolder( $source, $target, $folder, $TESTING ); #, $date, $$source{'separator'}, $$target{'separator'}, $deleteOnSuccess );
427
 
428
      $TESTING ? print "Would expunge $folder\n" : $source->{'connection'}->expunge_mailbox( $folder );
14 rodolico 429
      # delete folder if empty and client has requested it.
58 rodolico 430
      if ( $account->{'source'}->{'deleteEmptyFolders'} ) {
431
         my $returnCode = &deleteAFolder( $source, $folder, $TESTING );
432
         &logit( 3, "Return code from deleteAFolder is $returnCode" );
433
      }
434
      &logit( 1,  "$messages processed" ); 
14 rodolico 435
      $count += $messages;
436
      $processedCount++;
437
      # next line used only for testing. Dies after 5 folders on first account
50 rodolico 438
      last if $processedCount > 5 and $TESTING;
14 rodolico 439
   }
42 rodolico 440
 
441
   $source->{'connection'}->quit;
442
   $target->{'connection'}->quit;
14 rodolico 443
   return $count;
444
}
445
 
40 rodolico 446
 
447
 
14 rodolico 448
#######################################################################
449
#                   Main                                              #
450
#######################################################################
451
 
452
# read and evaluate configuration file
40 rodolico 453
&readConfig() || die "could not load config file\n";
58 rodolico 454
 
40 rodolico 455
#print Dumper( $config ); die;
456
foreach my $account ( keys %{$config->{'accounts'}} ) {
457
    $config->{'accounts'}->{$account} = &fixupAccount( $config->{'default'}, $config->{'accounts'}->{$account} );
14 rodolico 458
}
459
 
108 rodolico 460
print Dumper( $config ) ; die;
14 rodolico 461
 
40 rodolico 462
# just a place to gather some stats
14 rodolico 463
my %processed;
464
$processed{'Accounts'} = 0;
465
$processed{'Messages'} = 0;
466
 
40 rodolico 467
# grab only the accounts for simplicity
468
my $accounts = $config->{'accounts'};
42 rodolico 469
 
50 rodolico 470
#die Dumper( $accounts );
42 rodolico 471
 
40 rodolico 472
# now, process each in turn
473
foreach my $account ( keys %$accounts ) {
14 rodolico 474
   # talk to user
58 rodolico 475
   &logit( 1,  "Processing account $account" );
40 rodolico 476
   # do the account. This is the main worker bee
477
   $accounts->{$account}->{'processed'} = &processAccount( $accounts->{$account} );
58 rodolico 478
   &logit( 1,  "Done, $accounts->{$account}->{'processed'} messages copied" );
14 rodolico 479
   $processed{'Accounts'}++;
40 rodolico 480
   $processed{'Messages'} += $accounts->{$account}->{'processed'};
42 rodolico 481
   # free up space we allocated since we stored a bunch of stuff in there, and we don't need it anymore
482
   $accounts->{$account} = undef; 
14 rodolico 483
} # foreach loop
484
 
58 rodolico 485
&logit( 1, "$processed{Accounts} accounts processed, $processed{Messages} messages" );
14 rodolico 486
 
40 rodolico 487
 
14 rodolico 488
1;
489