Subversion Repositories sysadmin_scripts

Rev

Rev 38 | Rev 42 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 rodolico 1
#! /usr/bin/perl -w
2
 
3
#    archiveIMAP: moves old messages from one IMAP account to another
4
#    maintaining hierarchy.
5
#    see http://wiki.linuxservertech.com for additional information
6
#    Copyright (C) 2014  R. W. Rodolico
7
#
8
#    version 1.0, 20140818
9
#       Initial Release
10
#
11
#    version 1.0.1 20140819
12
#        Removed dependancy on Email::Simple
13
#        Allowed 'separator' as an element in either source or target
14
#
15
#    This program is free software: you can redistribute it and/or modify
16
#    it under the terms of the GNU General Public License as published by
17
#    the Free Software Foundation, either version 3 of the License, or
18
#    (at your option) any later version.
19
#
20
#    This program is distributed in the hope that it will be useful,
21
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
22
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
#    GNU General Public License for more details.
24
#
25
#    You should have received a copy of the GNU General Public License
26
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
40 rodolico 27
#
28
#    for required libraries
29
#    apt-get -y install libnet-imap-simple-ssl-perl libyaml-tiny-perl libhash-merge-simple-perl libclone-perl
14 rodolico 30
 
31
use strict;
32
use warnings;
33
use Net::IMAP::Simple; # libnet-imap-simple-ssl-perl
34
use POSIX; # to get floor and ceil
40 rodolico 35
use YAML::Tiny; # apt-get libyaml-tiny-perl under debian
36
use Clone 'clone'; # libclone-perl
37
use Hash::Merge::Simple qw/ merge clone_merge /; # libhash-merge-simple-perl
14 rodolico 38
 
40 rodolico 39
use Data::Dumper;
40
 
14 rodolico 41
# globals
40 rodolico 42
my $CONFIG_FILE_NAME = 'archiveIMAP.yaml';
14 rodolico 43
 
40 rodolico 44
# the default values for everything. These are overridden
45
# by default values in the conf file (.yaml) or by individual
46
# accounts entries.
47
my $config = {
48
   'default' => {
49
      # where the mail is going to
50
      'target' => {
51
                  # these have no defaults. They should be in the configuration file
52
                  # 'password' => 'password',
53
                  # 'username' => 'username',
54
                  # hierarchy can be any combination of <path>, <month> and <year>
55
                  'hierarchy' => '<path>',
56
                  # default target server
57
                  'server' => 'localhost',
58
                 },
59
      # where the mail is coming from
60
      'source' => {
61
                  # these have no defaults. They should be in the configuration file
62
                  # 'password' => 'password',
63
                  # 'username' => 'username',
64
                  # Anything older than this is archived
65
                  # number of days unless followed by 'M' or 'Y', in which case it is
66
                  # multiplied by the number of days in a year (365.2425) or days in a month (30.5)
67
                  # may be a float, ie 1.25Y is the same as 15M
68
                  'age' => '1Y',
69
                  # if set to 1, any folders emptied out will be deleted EXCEPT system folders
70
                  'deleteEmptyFolders' => 0,
71
                  # default source server
72
                  'server' => 'localhost',
73
                  # these folders are considered system folders and never deleted, case insensitive
74
                  'system' => [
75
                                 'Outbox',
76
                                 'Sent Items',
77
                                 'INBOX'
78
                              ],
79
                  # these folders are ignored, ie not processed at all. Case insensitive
80
                  'ignore' => [
81
                                 'Deleted Messages',
82
                                 'Drafts',
83
                                 'Junk E-mail',
84
                                 'Junk',
85
                                 'Trash'
86
                               ],
87
                  # if 1, after successful copy to target, remove from source
88
                  'deleteOnSuccess' => 0
89
               },
90
 
91
      # if 1, does a dry run showing what would have happened
92
      'testing' => 0,
93
      # if 0, will not be processed
94
      'enabled' => 1,
95
   }
96
   };
97
 
98
 
14 rodolico 99
#
100
# find where the script is actually located as cfg should be there
101
#
102
sub getScriptLocation {
103
   use strict;
104
   use File::Spec::Functions qw(rel2abs);
105
   use File::Basename;
106
   return dirname(rel2abs($0));
107
}
108
 
109
#
110
# Read the configuration file from current location 
111
# and return it as a string
112
#
113
sub readConfig {
114
   my $scriptLocation = &getScriptLocation();
115
   if ( -e "$scriptLocation/$CONFIG_FILE_NAME" ) {
40 rodolico 116
      my $yaml = YAML::Tiny->read( "$scriptLocation/$CONFIG_FILE_NAME" );
117
      # use clone_merge to merge conf file into $config
118
      # overwrites anything in $config if it exists in the config file
119
      $config = clone_merge( $config, $yaml->[0] );
120
      return 1;
14 rodolico 121
   }
40 rodolico 122
   return 0;
14 rodolico 123
}
124
 
40 rodolico 125
# merges default into current account, overwriting anything not defined in account with
126
# value from default EXCEPT arrays labeled in @tags, which will be merged together.
127
sub fixupAccount {
128
   my ( $default, $account ) = @_;
129
 
130
   # these arrays, part of source, will be appended together instead of being overwritten
131
   my @tags = ( 'ignore', 'system' );
132
   # merge the tags in question. NOTE: they can only be in source
133
   foreach my $tag ( @tags) {
134
      if ( $default->{'source'}->{$tag} && $account->{'source'}->{$tag} ) {
135
         my @j = ( @{$default->{'source'}->{$tag}}, @{$account->{'source'}->{$tag}} );
136
         $account->{'source'}->{$tag} = \@j;
137
      }
138
   }
139
   # now, merge account and default, with account taking precedence.
140
   return  clone_merge(  $default, $account );
141
   #my $c = clone_merge(  $default, $account );
142
   #return $c;
143
}
144
 
14 rodolico 145
#
146
# Open an IMAP connection
147
#
148
sub openIMAPConnection {
149
   my ( $server, $username, $password ) = @_;
150
   my $imap = Net::IMAP::Simple->new( $server ) ||
151
    die "Unable to connect to IMAP: $Net::IMAP::Simple::errstr\n";
152
   # Log on
153
   if(!$imap->login( $username, $password )){
154
     die "Login failed: " . $imap->errstr . "\n";
155
   }
156
   return $imap;
157
}
158
 
159
#
160
# returns a string in proper format for RFC which is $age days ago
40 rodolico 161
# $age is a float, possibly followed by a single character modifier
14 rodolico 162
#
163
sub getDate {
164
   my $age = shift;
40 rodolico 165
   # allow modifier to age which contains 'Y' (years) or 'M' (months)
166
   # Simply set multiplier to the correct value, then multiply the value
167
   $age = lc( $age );
168
   if ( $age =~ m/([0-9.]+)([a-z])/ ) {
169
      my $multiplier = ($2 == 'y' ? 365.2425 : ( $2 == 'm' ? 30.5 : MAXINT) );
170
      $age = floor( $1 * $multiplier);
171
   }
14 rodolico 172
   my @months = ('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec');
173
   my @now = localtime(time - 24 * 60 * 60 * $age);
174
   $now[4] = @months[$now[4]];
175
   $now[5] += 1900;
176
   my $date = sprintf( "%d-%s-%d", $now[3],$now[4],$now[5] ) ; # '1-Jan-2014';
177
   return $date;
178
}
179
 
180
#
181
# Get a list of all folders to be processed
182
# currently, it just weeds out items in the ignore list
183
#
184
sub getFolders {
185
   my ($imap, $ignore, $separator) = @_;
186
   $separator = '\\' . $separator;
187
   # build a regex that will be used to filter the input
188
   # assuming Trash, Drafts and Junk are in the ignore list
189
   # and a period is the separator, the generated regex is
190
   # (^|(\.))((Trash)|(Drafts)|(Junk))((\.)|$)
191
   # which basically says ignore those folders, but not substrings of them
192
   # ie, Junk02 would not be filtered but Junk would
193
   my $ignoreRegex = "(^|($separator))((" . join( ")\|(", @$ignore ) . "))(($separator)|\$)";
194
   # read all mailboxes and filter them with above regex into @boxes
195
   my @boxes = grep{ ! /$ignoreRegex/ } $imap->mailboxes;
196
   return \@boxes;
197
}
198
 
199
#
200
# make a folder on the IMAP account. The folder is assumed to be the
201
# fully qualified path with the correct delimiters
202
#
203
sub makeFolder {
204
   my ($imap, $folder, $delimiter) = @_;
205
 
206
   print "\n\t\tCreating folder $folder";
207
   # you must create the parent folder before creating the children
208
   my $escapedDelimiter = '\\' . $delimiter;
209
   my @folders = split( $escapedDelimiter, $folder );
210
   $folder = '';
211
   # take them from the left and, if they don't exist, create it
212
   while ( my $subdir = shift @folders ) {
213
      $folder .= $delimiter if $folder;
214
      $folder .= $subdir;
215
      next if $imap->select( $folder ); # already created, so look deeper in hierachy
216
      print "\n\t\t\tCreating subfolder $folder";
217
      $imap->create_mailbox( $folder ) || warn $imap->errstr();
218
      $imap->folder_subscribe( $folder ) || die $imap->errstr();
219
      unless ( $imap->select( $folder ) ) { # verify it was created
220
         warn "Unable to create $folder on target account\n";
221
         return 0;
222
      } # unless
223
   } # while
224
   return $folder;
225
}
226
 
227
#
228
# Delete an IMAP folder
229
#
230
sub deleteAFolder {
231
   my ($sourceAccount, $folder, $separator, $systemFolders) = @_;
232
   return 1 if $folder eq 'INBOX'; # do NOT mess with INBOX
233
   return 2 if $sourceAccount->select($folder) > 0; # do not mess with it if it still has messages in it
234
   return 3 if $sourceAccount->mailboxes( $folder . $separator . '*' ); # do not mess with it if it has subfolders
235
   return 4 if ( ref ( $systemFolders ) eq 'ARRAY' ) && ( grep{$_ eq $folder} @$systemFolders );
236
   print "\n\t\tDeleting empty folder $folder";
237
   $sourceAccount->folder_unsubscribe($folder);
238
   $sourceAccount->delete_mailbox( $folder );
239
}
240
 
241
 
40 rodolico 242
# main process loop to handle one account
14 rodolico 243
#
244
sub processAccount {
40 rodolico 245
   my $account = shift;
246
 
247
   my $date = &getDate( $age );
14 rodolico 248
   print "\t" . ( $deleteOnSuccess ? 'Moving' : 'Copying' ) . " all messages before $date\n";
249
 
250
   # open and log into both source and target, and get the separator used
251
   my $sourceAccount = &openIMAPConnection( $$source{'server'}, $$source{'username'}, $$source{'password'} );
252
   my $targetAccount = &openIMAPConnection( $$target{'server'}, $$target{'username'}, $$target{'password'} );
253
   $$source{'separator'} = $sourceAccount->separator unless $$source{'separator'};
254
   $$target{'separator'} = $targetAccount->separator unless $$target{'separator'};
255
 
256
   # get a list of all folders to be processed on the source
257
   $$source{'folders'} = &getFolders( $sourceAccount, $ignore, $$source{'separator'} );
258
   #print Dumper( $targetAccount );
259
   #die;
260
   my $folderList = $$source{'folders'};
261
   my $count = 0; # count the number of messages processed
262
   my $processedCount = 0; # count the number of folders processed
263
   foreach my $folder ( @$folderList ) {
264
      print "\t$folder";
38 rodolico 265
      my $messages;
40 rodolico 266
      $messages = &processFolder( $sourceAccount, $targetAccount, $folder, $date, $$source{'separator'}, $$target{'separator'}, $deleteOnSuccess );
267
      $TESTING ? print "Would expunge $folder\n" : $sourceAccount->expunge_mailbox( $folder );
14 rodolico 268
      # delete folder if empty and client has requested it.
40 rodolico 269
      ( $TESTING ? print "Would delete $folder\n" : &deleteAFolder( $sourceAccount, $folder, $$source{'separator'}, $$source{'systemfolder'} ) ) if ( $deleteEmptyFolders );
14 rodolico 270
      print "\n\t\t$messages processed\n";
271
      $count += $messages;
272
      $processedCount++;
273
      # next line used only for testing. Dies after 5 folders on first account
274
      # last if $processedCount > 5;
275
   }
276
   $sourceAccount->quit;
277
   $targetAccount->quit;
278
   return $count;
279
}
280
 
40 rodolico 281
 
282
 
14 rodolico 283
#######################################################################
284
#                   Main                                              #
285
#######################################################################
286
 
287
# read and evaluate configuration file
40 rodolico 288
&readConfig() || die "could not load config file\n";
289
#print Dumper( $config ); die;
290
foreach my $account ( keys %{$config->{'accounts'}} ) {
291
    $config->{'accounts'}->{$account} = &fixupAccount( $config->{'default'}, $config->{'accounts'}->{$account} );
14 rodolico 292
}
293
 
40 rodolico 294
#print Dumper( $config ) ; die;
14 rodolico 295
 
40 rodolico 296
# just a place to gather some stats
14 rodolico 297
my %processed;
298
$processed{'Accounts'} = 0;
299
$processed{'Messages'} = 0;
300
 
40 rodolico 301
# grab only the accounts for simplicity
302
my $accounts = $config->{'accounts'};
303
# now, process each in turn
304
foreach my $account ( keys %$accounts ) {
14 rodolico 305
   # talk to user
306
   print "Processing account $account\n";
40 rodolico 307
   # do the account. This is the main worker bee
308
   $accounts->{$account}->{'processed'} = &processAccount( $accounts->{$account} );
309
   print "Done, $accounts->{$account}->{'processed'} messages copied\n";
14 rodolico 310
   $processed{'Accounts'}++;
40 rodolico 311
   $processed{'Messages'} += $accounts->{$account}->{'processed'};
14 rodolico 312
} # foreach loop
313
 
314
print "$processed{'Accounts'} accounts processed, $processed{'Messages'} messages\n";
315
 
40 rodolico 316
 
14 rodolico 317
1;
318