Rev 57 | Rev 108 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed
#! /usr/bin/perl -w
# archiveIMAP: moves old messages from one IMAP account to another
# maintaining hierarchy.
# see http://wiki.linuxservertech.com for additional information
# Copyright (C) 2014 R. W. Rodolico
#
# version 1.0, 20140818
# Initial Release
#
# version 1.0.1 20140819
# Removed dependancy on Email::Simple
# Allowed 'separator' as an element in either source or target
#
# version 2.0.0 20190817 RWR
# Major revision.
# Config is now YAML
# Default section which will fill in the blanks for anything not filled in on an account, so creating a lot of accounts
# with common values is easier to set up an maintain
# Target folder is configurable on a per account basis, using tags <folder>, <year>, <month> (called hierachy)
#
# version 2.1.0 20190822 RWR
# Added sleeptime parameter to target which makes process sleep a number of seconds between each mail transfer
# We use HiRes, so this can be a decimal number (ie, 0.5 for half a second).
#
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# for required libraries
# apt-get -y install libnet-imap-simple-ssl-perl libyaml-tiny-perl libhash-merge-simple-perl libclone-perl libdate-manip-perl libemail-simple-perl
use strict;
use warnings;
use Net::IMAP::Simple; # libnet-imap-simple-ssl-perl
use POSIX; # to get floor and ceil
use YAML::Tiny; # apt-get libyaml-tiny-perl under debian
use Clone 'clone'; # libclone-perl
use Hash::Merge::Simple qw/ merge clone_merge /; # libhash-merge-simple-perl
use Date::Manip; # libdate-manip-perl
use Email::Simple; # libemail-simple-perl
use Date::Parse;
use Time::HiRes;
use Data::Dumper;
# globals
my $CONFIG_FILE_NAME = 'archiveIMAP.yaml';
# the default values for everything. These are overridden
# by default values in the conf file (.yaml) or by individual
# accounts entries.
my $config = {
'default' => {
# if set, most log stuff is sent to this file
'logfile' => '',
# defines the amount of crap which ends up in the logfile
'loglevel' => 4,
# where the mail is going to
'target' => {
# these have no defaults. They should be in the configuration file
# 'password' => 'password',
# 'username' => 'username',
# hierarchy can be any combination of <path>, <month> and <year>
'hierarchy' => '<path>',
# default target server
'server' => 'localhost',
# amount of time to sleep between messages, in seconds (float)
'sleeptime' => 0.5,
},
# where the mail is coming from
'source' => {
# these have no defaults. They should be in the configuration file
# 'password' => 'password',
# 'username' => 'username',
# Anything older than this is archived
# number of days unless followed by 'M' or 'Y', in which case it is
# multiplied by the number of days in a year (365.2425) or days in a month (30.5)
# may be a float, ie 1.25Y is the same as 15M
'age' => '1Y',
# if set to 1, any folders emptied out will be deleted EXCEPT system folders
'deleteEmptyFolders' => 0,
# default source server
'server' => 'localhost',
# these folders are considered system folders and never deleted, case insensitive
'system' => [
'Outbox',
'Sent Items',
'INBOX'
],
# these folders are ignored, ie not processed at all. Case insensitive
'ignore' => [
'Deleted Messages',
'Drafts',
'Junk E-mail',
'Junk',
'Trash'
],
# if 1, after successful copy to target, remove from source
'deleteOnSuccess' => 0
},
# if 1, does a dry run showing what would have happened
'testing' => 0,
# if 0, will not be processed
'enabled' => 1,
}
};
# prints to a log file, if defined. Otherwise, prints to STDOUT
sub logit {
my $priority = shift;
return unless $priority <= $config->{'default'}->{'loglevel'};
if ( $config->{'default'}->{'logfile'} ) {
open( my $logfile, '>>', $config->{'default'}->{'logfile'} ) || die "Could not write to $config->{default}->{logfile}: $!\n";
while ( my $message = shift ) {
print $logfile "$message\n";
}
close( $logfile );
} else {
print STDOUT join( "\n", @_ ) . "\n";
}
return;
} # logit
#
# find where the script is actually located as cfg should be there
#
sub getScriptLocation {
use strict;
use File::Spec::Functions qw(rel2abs);
use File::Basename;
return dirname(rel2abs($0));
}
#
# Read the configuration file from current location
# and return it as a string
#
sub readConfig {
my $scriptLocation = &getScriptLocation();
if ( -e "$scriptLocation/$CONFIG_FILE_NAME" ) {
my $yaml = YAML::Tiny->read( "$scriptLocation/$CONFIG_FILE_NAME" );
# use clone_merge to merge conf file into $config
# overwrites anything in $config if it exists in the config file
$config = clone_merge( $config, $yaml->[0] );
return 1;
}
return 0;
}
# merges default into current account, overwriting anything not defined in account with
# value from default EXCEPT arrays labeled in @tags, which will be merged together.
sub fixupAccount {
my ( $default, $account ) = @_;
# these arrays, part of source, will be appended together instead of being overwritten
my @tags = ( 'ignore', 'system' );
# merge the tags in question. NOTE: they can only be in source
foreach my $tag ( @tags) {
if ( $default->{'source'}->{$tag} && $account->{'source'}->{$tag} ) {
my @j = ( @{$default->{'source'}->{$tag}}, @{$account->{'source'}->{$tag}} );
$account->{'source'}->{$tag} = \@j;
}
}
# now, merge account and default, with account taking precedence.
return clone_merge( $default, $account );
#my $c = clone_merge( $default, $account );
#return $c;
}
#
# Open an IMAP connection
#
sub openIMAPConnection {
my ( $server, $username, $password ) = @_;
my $imap = Net::IMAP::Simple->new( $server ) ||
die "Unable to connect to IMAP: $Net::IMAP::Simple::errstr\n";
# Log on
if(!$imap->login( $username, $password )){
die "Login failed: " . $imap->errstr . "\n";
}
return $imap;
}
#
# returns a string in proper format for RFC which is $age days ago
# $age is a float, possibly followed by a single character modifier
#
sub getDate {
my $age = shift;
# allow modifier to age which contains 'Y' (years) or 'M' (months)
# Simply set multiplier to the correct value, then multiply the value
$age = lc( $age );
if ( $age =~ m/([0-9.]+)([a-z])/ ) {
# ~0 is the maximum integer which can be stored. Shifting right one gives max unsigned integer
my $multiplier = ($2 eq 'y' ? 365.2425 : ( $2 eq 'm' ? 30.5 : ~0 >> 1) );
$age = floor( $1 * $multiplier);
}
my @months = ('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec');
my @now = localtime(time - 24 * 60 * 60 * $age);
$now[4] = @months[$now[4]];
$now[5] += 1900;
my $date = sprintf( "%d-%s-%d", $now[3],$now[4],$now[5] ) ; # '1-Jan-2014';
return $date;
}
# calculateTargetFolder
# we are passed the target and the message
# pattern is carot (^) separated and may contain
# special placeholders <path>, <year>, <month>
# anything else is inserted directly
sub calculateTargetFolder {
my ( $message, $source, $target ) = @_;
# we may be sorting by date
my $email = Email::Simple->new( join( '', @$message ) );
my $msgDate = $email->header('Date');
my @t = strptime( $msgDate );
my $month = $t[4]+1;
$month = '0' . $month if $month < 10;
my $year = $t[5]+1900;
# also, may need the source hierarchy
my $sourceFolder = join ( $target->{'separator'}, @{ $source->{'source folder list'} } );
# now, build the path on the new machine
my $targetPattern = join( $target->{'separator'}, @{ $target->{'hierachy pattern'} } );
$targetPattern =~ s/<path>/$sourceFolder/gi;
$targetPattern =~ s/<month>/$month/gi;
$targetPattern =~ s/<year>/$year/gi;
# return the string we created, separated by
# the delimiters for the target
return $targetPattern;
}
# If folder has messages that match the criteria, move them to target
# creates the target folder if necessary
#
sub processFolder {
my ( $source, $target, $folder, $TESTING ) = @_;
&logit( 1, '', "=== Processing $folder" );
my $sourceAccount = $source->{'connection'};
my $targetAccount = $target->{'connection'};
my $numMessages = 0;
$sourceAccount->expunge_mailbox( $folder ); # clean it up so we don't copy deleted messages
$sourceAccount->select( $folder ) or die "Could not connect to folder $folder\n"; # move into the correct folder for the source
my @ids = $sourceAccount->search_sent_before( $source->{'before date'} ); # use sent_before to get the sent date from message
return 0 unless @ids; # we have nothing to copy, so exit
&logit( 3, "Found " . scalar( @ids ) . " messages to process" );
# make life easier by precalculating some paths as array pointers
my @sourceFolders = split( '\\' . $source->{'separator'}, $folder );
my @pattern = split '\\^', $target->{'hierarchy'};
$source->{'source folder list'} = \@sourceFolders;
$target->{'hierachy pattern'} = \@pattern;
# process each message to be done
while ( my $id = shift ( @ids ) ) {
# get the flags
my @flags = $sourceAccount->msg_flags( $id );
# get the message
my $message = $sourceAccount->get( $id ) or die "Error getting message ID $id: $sourceAccount->errstr\n";
# calculate where we are going to move this to
my $targetFolder = &calculateTargetFolder( $message, $source, $target );
if ( $TESTING ) {
&logit( 0, "Would have " . ( $source->{'deleteOnSuccess'} ? 'moved' : 'copied' ) . " message to $targetFolder" );
next;
}
if ( $target->{'connection'}->select( $targetFolder ) || &makeFolder( $target->{'connection'}, $targetFolder, $target->{'separator'} ) ) {
if ( $target->{'connection'}->put( $targetFolder, $message, @flags ) ) {
$source->{'connection'}->delete( $id ) if ( $source->{'deleteOnSuccess'} ) ;
Time::HiRes::sleep( $target->{'sleeptime'} ) if $target->{'sleeptime'};
$numMessages++;
} else {
die "Could not write to target, aborting\n$targetFolder->{'connection'}->errstr\n";
}
} else {
warn "\t\t$targetFolder not found in target and could not create it\n";
}
}
return $numMessages;
}
#
# Get a list of all folders to be processed
# currently, it just weeds out items in the ignore list
#
sub getFolders {
my ($imap, $ignore, $separator) = @_;
$separator = '\\' . $separator;
# build a regex that will be used to filter the input
# assuming Trash, Drafts and Junk are in the ignore list
# and a period is the separator, the generated regex is
# (^|(\.))((Trash)|(Drafts)|(Junk))((\.)|$)
# which basically says ignore those folders, but not substrings of them
# ie, Junk02 would not be filtered but Junk would
my $ignoreRegex = "(^|($separator))((" . join( ")\|(", @$ignore ) . "))(($separator)|\$)";
# read all mailboxes and filter them with above regex into @boxes
my @boxes = grep{ ! /$ignoreRegex/i } $imap->mailboxes;
return \@boxes;
}
#
# make a folder on the IMAP account. The folder is assumed to be the
# fully qualified path with the correct delimiters
#
sub makeFolder {
my ($imap, $folder, $delimiter) = @_;
&logit( 3, "\n\t\tCreating folder $folder" );
# you must create the parent folder before creating the children
my $escapedDelimiter = '\\' . $delimiter;
my @folders = split( $escapedDelimiter, $folder );
$folder = '';
# take them from the left and, if they don't exist, create it
while ( my $subdir = shift @folders ) {
$folder .= $delimiter if $folder;
$folder .= $subdir;
next if $imap->select( $folder ); # already created, so look deeper in hierachy
&logit( 1, "\n\t\t\tCreating subfolder $folder" );
$imap->create_mailbox( $folder ) || warn $imap->errstr();
$imap->folder_subscribe( $folder ) || die $imap->errstr();
unless ( $imap->select( $folder ) ) { # verify it was created
warn "Unable to create $folder on target account\n";
return 0;
} # unless
} # while
return $folder;
}
#
# Delete an IMAP folder
#
sub deleteAFolder {
my ($source, $folder, $TESTING ) = @_;
my $sourceAccount = $source->{'connection'};
my $separator = $source->{'separator'};
return 1 if $folder eq 'INBOX'; # do NOT mess with INBOX
return 2 if $sourceAccount->select($folder) > 0; # do not mess with it if it still has messages in it
return 3 if $sourceAccount->mailboxes( $folder . $separator . '*' ); # do not mess with it if it has subfolders
return 4 if $source->{'system folders'}->{lc $folder}; # do not mess with system folders
&logit( 1, "\n\t\tDeleting empty folder $folder" . ( $TESTING ? ' Dry Run' : '' ) );
return 0 if $TESTING;
# select something other than the folder to be deleted
$sourceAccount->select( 'INBOX' );
if ( $sourceAccount->folder_unsubscribe($folder) ) {
if ( $sourceAccount->delete_mailbox( $folder ) ) {
return 0;
} else {
warn "Error trying to delete mailbox $folder: " . $sourceAccount->errstr . "\n";
}
} else {
warn "Error trying to unsubscribe from $folder: " . $sourceAccount->errstr . "\n";
}
return 0;
}
# main process loop to handle one account
#
sub processAccount {
my $account = shift;
return 0 unless $account->{'enabled'}; # blow it off if it is not enabled
my $TESTING = $account->{'testing'}; # create mini global if we should test this account
&logit( 0, "========= Test Mode ========\n" ) if $TESTING;
# open and log into both source and target, and get the separator used
foreach my $acct ( 'target','source' ) {
$account->{$acct}->{'connection'} = &openIMAPConnection( $account->{$acct}->{'server'}, $account->{$acct}->{'username'}, $account->{$acct}->{'password'} );
unless ( $account->{$acct}->{'connection'} ) {
warn "Unable to open $acct for $account->{$acct}->{username}, aborting move: $!\n";
return -1;
}
$account->{$acct}->{'separator'} = $account->{$acct}->{'connection'}->separator unless $account->{$acct}->{'separator'};
}
# just being set up for convenience and readability
my $source = $account->{'source'};
my $target = $account->{'target'};
my %temp = map{ lc($_) => 1 } @{$source->{'system'}};
$source->{'system folders'} = \%temp;
$source->{'before date'} = &getDate( $source->{'age'} );
&logit( 1, "\t" . ( $source->{'deleteOnSuccess'} ? 'Moving' : 'Copying' ) . " all messages before $source->{'before date'}" );
# get a list of all folders to be processed on the source
$source->{'folders'} = &getFolders( $source->{'connection'}, $source->{'ignore'}, $source->{'separator'} );
if ( $TESTING ) {
print Dumper( $source );
print "Source above, press enter to continue: "; my $j = <STDIN>;
print Dumper( $target );
print "Target above, Press enter to continue: "; $j = <STDIN>;
}
my $folderList = $source->{'folders'};
my $count = 0; # count the number of messages processed
my $processedCount = 0; # count the number of folders processed
foreach my $folder ( @$folderList ) {
my $messages;
$messages = &processFolder( $source, $target, $folder, $TESTING ); #, $date, $$source{'separator'}, $$target{'separator'}, $deleteOnSuccess );
$TESTING ? print "Would expunge $folder\n" : $source->{'connection'}->expunge_mailbox( $folder );
# delete folder if empty and client has requested it.
if ( $account->{'source'}->{'deleteEmptyFolders'} ) {
my $returnCode = &deleteAFolder( $source, $folder, $TESTING );
&logit( 3, "Return code from deleteAFolder is $returnCode" );
}
&logit( 1, "$messages processed" );
$count += $messages;
$processedCount++;
# next line used only for testing. Dies after 5 folders on first account
last if $processedCount > 5 and $TESTING;
}
$source->{'connection'}->quit;
$target->{'connection'}->quit;
return $count;
}
#######################################################################
# Main #
#######################################################################
# read and evaluate configuration file
&readConfig() || die "could not load config file\n";
#print Dumper( $config ); die;
foreach my $account ( keys %{$config->{'accounts'}} ) {
$config->{'accounts'}->{$account} = &fixupAccount( $config->{'default'}, $config->{'accounts'}->{$account} );
}
#print Dumper( $config ) ; die;
# just a place to gather some stats
my %processed;
$processed{'Accounts'} = 0;
$processed{'Messages'} = 0;
# grab only the accounts for simplicity
my $accounts = $config->{'accounts'};
#die Dumper( $accounts );
# now, process each in turn
foreach my $account ( keys %$accounts ) {
# talk to user
&logit( 1, "Processing account $account" );
# do the account. This is the main worker bee
$accounts->{$account}->{'processed'} = &processAccount( $accounts->{$account} );
&logit( 1, "Done, $accounts->{$account}->{'processed'} messages copied" );
$processed{'Accounts'}++;
$processed{'Messages'} += $accounts->{$account}->{'processed'};
# free up space we allocated since we stored a bunch of stuff in there, and we don't need it anymore
$accounts->{$account} = undef;
} # foreach loop
&logit( 1, "$processed{Accounts} accounts processed, $processed{Messages} messages" );
1;