Rev 36 | Rev 40 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed
#! /usr/bin/perl -w
# archiveIMAP: moves old messages from one IMAP account to another
# maintaining hierarchy.
# see http://wiki.linuxservertech.com for additional information
# Copyright (C) 2014 R. W. Rodolico
#
# version 1.0, 20140818
# Initial Release
#
# version 1.0.1 20140819
# Removed dependancy on Email::Simple
# Allowed 'separator' as an element in either source or target
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
use strict;
use warnings;
use Net::IMAP::Simple; # libnet-imap-simple-ssl-perl
use POSIX; # to get floor and ceil
#use Data::Dumper;
# globals
my $CONFIG_FILE_NAME = 'archiveIMAP.cfg';
my $server = 'localhost'; # used if not specified in definition
# Following are added to ignore in definition in account (%accounts)
my @ignore = ( 'Deleted Messages','Drafts','Junk E-mail','Junk','Trash' );
# these are folders we may want to copy, but we never want to delete
my @systemFolders = ('Outbox','Sent Items','INBOX','Inbox');
my $deleteEmptyFolders = 1;
my $deleteOnSuccess = 1;
my %accounts; # this must be configured in archiveIMAP.cfg. See sample.
#
# find where the script is actually located as cfg should be there
#
sub getScriptLocation {
use strict;
use File::Spec::Functions qw(rel2abs);
use File::Basename;
return dirname(rel2abs($0));
}
#
# Read the configuration file from current location
# and return it as a string
#
sub readConfig {
my $scriptLocation = &getScriptLocation();
my $return = '';
if ( -e "$scriptLocation/$CONFIG_FILE_NAME" ) {
open CFG,"<$scriptLocation/$CONFIG_FILE_NAME" or die "Could not read $scriptLocation/$CONFIG_FILE_NAME: $!\n";
$return = join ( '', <CFG> );
close CFG;
}
return $return;
}
#
# Open an IMAP connection
#
sub openIMAPConnection {
my ( $server, $username, $password ) = @_;
my $imap = Net::IMAP::Simple->new( $server ) ||
die "Unable to connect to IMAP: $Net::IMAP::Simple::errstr\n";
# Log on
if(!$imap->login( $username, $password )){
die "Login failed: " . $imap->errstr . "\n";
}
return $imap;
}
#
# returns a string in proper format for RFC which is $age days ago
#
sub getDate {
my $age = shift;
my @months = ('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec');
my @now = localtime(time - 24 * 60 * 60 * $age);
$now[4] = @months[$now[4]];
$now[5] += 1900;
my $date = sprintf( "%d-%s-%d", $now[3],$now[4],$now[5] ) ; # '1-Jan-2014';
return $date;
}
#
# Get a list of all folders to be processed
# currently, it just weeds out items in the ignore list
#
sub getFolders {
my ($imap, $ignore, $separator) = @_;
$separator = '\\' . $separator;
# build a regex that will be used to filter the input
# assuming Trash, Drafts and Junk are in the ignore list
# and a period is the separator, the generated regex is
# (^|(\.))((Trash)|(Drafts)|(Junk))((\.)|$)
# which basically says ignore those folders, but not substrings of them
# ie, Junk02 would not be filtered but Junk would
my $ignoreRegex = "(^|($separator))((" . join( ")\|(", @$ignore ) . "))(($separator)|\$)";
# read all mailboxes and filter them with above regex into @boxes
my @boxes = grep{ ! /$ignoreRegex/ } $imap->mailboxes;
return \@boxes;
}
#
# make a folder on the IMAP account. The folder is assumed to be the
# fully qualified path with the correct delimiters
#
sub makeFolder {
my ($imap, $folder, $delimiter) = @_;
print "\n\t\tCreating folder $folder";
# you must create the parent folder before creating the children
my $escapedDelimiter = '\\' . $delimiter;
my @folders = split( $escapedDelimiter, $folder );
$folder = '';
# take them from the left and, if they don't exist, create it
while ( my $subdir = shift @folders ) {
$folder .= $delimiter if $folder;
$folder .= $subdir;
next if $imap->select( $folder ); # already created, so look deeper in hierachy
print "\n\t\t\tCreating subfolder $folder";
$imap->create_mailbox( $folder ) || warn $imap->errstr();
$imap->folder_subscribe( $folder ) || die $imap->errstr();
unless ( $imap->select( $folder ) ) { # verify it was created
warn "Unable to create $folder on target account\n";
return 0;
} # unless
} # while
return $folder;
}
#
# Delete an IMAP folder
#
sub deleteAFolder {
my ($sourceAccount, $folder, $separator, $systemFolders) = @_;
return 1 if $folder eq 'INBOX'; # do NOT mess with INBOX
return 2 if $sourceAccount->select($folder) > 0; # do not mess with it if it still has messages in it
return 3 if $sourceAccount->mailboxes( $folder . $separator . '*' ); # do not mess with it if it has subfolders
return 4 if ( ref ( $systemFolders ) eq 'ARRAY' ) && ( grep{$_ eq $folder} @$systemFolders );
print "\n\t\tDeleting empty folder $folder";
$sourceAccount->folder_unsubscribe($folder);
$sourceAccount->delete_mailbox( $folder );
}
#
# If folder has messages that match the criteria, move them to target
# creates the target folder if necessary
#
sub processFolder {
my ( $sourceAccount, $targetAccount, $folder, $dateBefore, $sourceDelimiter, $targetDelimiter, $deleteOnSuccess ) = @_;
my $numMessages = 0;
$sourceAccount->expunge_mailbox( $folder ); # clean it up so we don't copy deleted messages
$sourceAccount->select( $folder ) or die "Could not connect to folder $folder\n"; # move into the correct folder for the source
my @ids = $sourceAccount->search_sent_before( $dateBefore ); # use sent_before to get the sent date from message
# print join( "\n\t\t\t", @ids ) . "\n";
return 0 unless @ids; # we have nothing to copy, so exit
if ( $sourceDelimiter ne $targetDelimiter ) { # different delimiters, so we have to change them in the folder def
# they may contain meta chars, so escape them. We can use \Q \E in the pattern to match, but the
# second part of the s/// will substitute literal EXCEPT for the delimiter and the dollar sign
# I'm escaping the double quote (what I'm using for the delimiter) "just in case" but I'm not doing
# the dollar sign, so this will fail, fail, fail if there is an issue there.
my $escapedTargetDelimiter = $targetDelimiter eq '"' ? '\\' . $targetDelimiter : $targetDelimiter;
$folder =~ s"\Q$sourceDelimiter\E"$escapedTargetDelimiter"g;
}
if ( $targetAccount->select( $folder ) || &makeFolder( $targetAccount, $folder, $targetDelimiter ) ) {
foreach my $id ( @ids ) {
my @flags = $sourceAccount->msg_flags( $id );
my $message = $sourceAccount->get( $id ) or die $sourceAccount->errstr;
if ( $targetAccount->put( $folder, $message, @flags ) ) {
$sourceAccount->delete( $id ) if ( $deleteOnSuccess ) ;
$numMessages++;
} else {
die "Could not write to target, aborting\n$targetAccount->errstr\n";
}
}
} else {
warn "\t\t$folder not found in target and could not create it\n";
}
return $numMessages;
}
# this is an alternative to maintaining the hierarchy of the original system.
# instead, it will take
sub processFolderByDate {
my ( $sourceAccount, $targetAccount, $folder, $targetFormat, $dateBefore, $sourceDelimiter, $targetDelimiter, $deleteOnSuccess ) = @_;
my $numMessages = 0;
$sourceAccount->expunge_mailbox( $folder ); # clean it up so we don't copy deleted messages
$sourceAccount->select( $folder ) or die "Could not connect to folder $folder\n"; # move into the correct folder for the source
my @ids = $sourceAccount->search_sent_before( $dateBefore ); # use sent_before to get the sent date from message
# print join( "\n\t\t\t", @ids ) . "\n";
return 0 unless @ids; # we have nothing to copy, so exit
if ( $sourceDelimiter ne $targetDelimiter ) { # different delimiters, so we have to change them in the folder def
# they may contain meta chars, so escape them. We can use \Q \E in the pattern to match, but the
# second part of the s/// will substitute literal EXCEPT for the delimiter and the dollar sign
# I'm escaping the double quote (what I'm using for the delimiter) "just in case" but I'm not doing
# the dollar sign, so this will fail, fail, fail if there is an issue there.
my $escapedTargetDelimiter = $targetDelimiter eq '"' ? '\\' . $targetDelimiter : $targetDelimiter;
$folder =~ s"\Q$sourceDelimiter\E"$escapedTargetDelimiter"g;
}
if ( $targetAccount->select( $folder ) || &makeFolder( $targetAccount, $folder, $targetDelimiter ) ) {
foreach my $id ( @ids ) {
my @flags = $sourceAccount->msg_flags( $id );
my $message = $sourceAccount->get( $id ) or die $sourceAccount->errstr;
if ( $targetAccount->put( $folder, $message, @flags ) ) {
$sourceAccount->delete( $id ) if ( $deleteOnSuccess ) ;
$numMessages++;
} else {
die "Could not write to target, aborting\n$targetAccount->errstr\n";
}
}
} else {
warn "\t\t$folder not found in target and could not create it\n";
}
return $numMessages;
}
#
# main processing loop to handle one account
#
sub processAccount {
my ( $source, $target, $age, $ignore, $deleteOnSuccess, $deleteEmptyFolders, $hierarchy ) = @_;
$heirarchy = '' unless $hierarchy;
my $date = getDate( $age );
print "\t" . ( $deleteOnSuccess ? 'Moving' : 'Copying' ) . " all messages before $date\n";
# open and log into both source and target, and get the separator used
my $sourceAccount = &openIMAPConnection( $$source{'server'}, $$source{'username'}, $$source{'password'} );
my $targetAccount = &openIMAPConnection( $$target{'server'}, $$target{'username'}, $$target{'password'} );
$$source{'separator'} = $sourceAccount->separator unless $$source{'separator'};
$$target{'separator'} = $targetAccount->separator unless $$target{'separator'};
# get a list of all folders to be processed on the source
$$source{'folders'} = &getFolders( $sourceAccount, $ignore, $$source{'separator'} );
#print Dumper( $targetAccount );
#die;
my $folderList = $$source{'folders'};
my $count = 0; # count the number of messages processed
my $processedCount = 0; # count the number of folders processed
foreach my $folder ( @$folderList ) {
print "\t$folder";
my $messages;
if ( $heirarchy ) {
$messages = &processFolderByDate( $sourceAccount, $targetAccount, $folder, $hierarchy, $date, $$source{'separator'}, $$target{'separator'}, $deleteOnSuccess );
} else {
$messages = &processFolder( $sourceAccount, $targetAccount, $folder, $date, $$source{'separator'}, $$target{'separator'}, $deleteOnSuccess );
}
$sourceAccount->expunge_mailbox( $folder );
# delete folder if empty and client has requested it.
&deleteAFolder( $sourceAccount, $folder, $$source{'separator'}, $$source{'systemfolder'} ) if ( $deleteEmptyFolders );
print "\n\t\t$messages processed\n";
$count += $messages;
$processedCount++;
# next line used only for testing. Dies after 5 folders on first account
# last if $processedCount > 5;
}
$sourceAccount->quit;
$targetAccount->quit;
return $count;
}
#######################################################################
# Main #
#######################################################################
# read and evaluate configuration file
if ( my $config = readConfig() ) {
eval $config;
die "Error in configuraration file $CONFIG_FILE_NAME\n$@" if $@;
} else {
die "Configurastion file $CONFIG_FILE_NAME must be located in script directory\n";
}
my %processed;
$processed{'Accounts'} = 0;
$processed{'Messages'} = 0;
foreach my $account ( keys %accounts ) {
# talk to user
print "Processing account $account\n";
# Set some defaults from globals if they were not defined
$accounts{$account}{'source'}{'server'} = $server unless exists( $accounts{$account}{'source'}{'server'} );
$accounts{$account}{'target'}{'server'} = $server unless exists( $accounts{$account}{'target'}{'server'} );
$accounts{$account}{'deleteOnSuccess'} = $deleteOnSuccess unless exists $accounts{$account}{'deleteOnSuccess'};
$accounts{$account}{'deleteEmptyFolders'} = $deleteEmptyFolders unless exists $accounts{$account}{'deleteEmptyFolders'};
if ( exists $accounts{$account}{'ignore'} ) {
foreach my $element ( @ignore ) {
push $accounts{$account}{'ignore'}, $element;
}
} else {
$accounts{$account}{'ignore'} = \@ignore;
}
if ( exists $accounts{$account}{'source'}{'systemFolders'} ) {
foreach my $element ( @systemFolders ) {
push $accounts{$account}{'source'}{'systemFolders'}, $element;
}
} else {
$accounts{$account}{'source'}{'systemFolders'} = \@systemFolders;
}
# do the account
$accounts{$account}{'processed'} =
&processAccount(
$accounts{$account}{'source'},
$accounts{$account}{'target'},
$accounts{$account}{'age'},
$accounts{$account}{'ignore'},
$accounts{$account}{'deleteOnSuccess'},
$accounts{$account}{'deleteEmptyFolders'}
);
print "Done, $accounts{$account}{'processed'} messages copied\n";
$processed{'Accounts'}++;
$processed{'Messages'} += $accounts{$account}{'processed'};
} # foreach loop
print "$processed{'Accounts'} accounts processed, $processed{'Messages'} messages\n";
1;
Generated by GNU Enscript 1.6.5.90.