#!/usr/bin/perl -w # # $Id: mail_rotate.pl,v 1.29 2002/09/27 02:18:18 jmates Exp $ # # Copyright (c) 2000-2002, Jeremy A. Mates. This script is free # software; you can redistribute it and/or modify it under the same # terms as Perl itself. # # Run perldoc(1) on this file for additional documentation. # ###################################################################### # # REQUIREMENTS require 5; use strict; ###################################################################### # # MODULES use Carp; # better error reporting use Getopt::Std; # command line option handling use Date::Parse; # stringy dates to array dates use POSIX qw(strftime); # date formatting use Time::Local; # array dates to epoch time use File::Basename; # file path splitting routines use File::Path; # directory tree creation/removal use File::Spec; # cross platform file path ops use Mail::Cclient; # requires UW-IMAP sources to build ###################################################################### # # VARIABLES my $VERSION; ($VERSION = '$Revision: 1.29 $ ') =~ s/[^0-9.]//g; # opts holds command line switches, config a ref to a hash holding # configuration options, and %data used to store what-to-do lists # on folders. $rules ref to array of anon hashes for later expansion my (%opts, $config, $rules, $dir_sep, $preview); $config->{'base_path'} = ($ENV{'IMAPHOME'}) ? $ENV{'IMAPHOME'} : $ENV{'HOME'}; # default ignore_recent is those messages within a week of now $config->{'ignore_recent'} = time - 604800; # is there a better way (e.g. via module?) to determine file path # separator item? (could use if $^O eq 'foo' to support others...) $dir_sep = '/'; # default to unix for fun ###################################################################### # # MAIN # parse command-line options into config hash getopts('h?pb:ar:o:c:', \%opts); help() if exists $opts{'h'} or exists $opts{'?'}; # load options & rules from config file if called for if (exists $opts{'c'}) { ($config, $rules) = load_config($opts{'c'}, $config); } # preview mode flag $preview = 1 if exists $opts{'p'}; # hack base path if called for, and sanity check it $config->{'base_path'} = $opts{'b'} if exists $opts{'b'}; unless (-d $config->{'base_path'}) { die "Error: ", $config->{'base_path'}, " not a directory/does not exist\n"; } $config->{'all_msgs'} = 1 if exists $opts{'a'}; unless ($config->{'all_msgs'}) { # and a ignore-messages-older-than-this if needed, and unset # the default of this month if (exists $opts{'o'}) { $config->{'ignore_older'} = str2time($opts{'o'}); $config->{'ignore_recent'} = undef; } # over-ride first-of-month default if they supplied data $config->{'ignore_recent'} = str2time($opts{'r'}) if exists $opts{'r'}; } # read from STDIN if no args left and not using config file unless (exists $opts{'c'}) { chomp(@ARGV = ) unless @ARGV; # and flag the help text if nothing left on command line at this point help() unless @ARGV; } # # Convert command line rules into array of anon hashes # for (@ARGV) { my ($src, $dest) = split ':', $_, 2; # obligatory sanity checking to spare users File::Spec's complaints unless (defined $src) { warn "Warning: no source folder for $_, skipping\n"; next; } unless (defined $dest) { warn "Warning: no destination for $_, skipping\n"; next; } push @$rules, { 'source' => $src, 'destination' => $dest, }; } # # Loop over rules and attempt to apply them # for my $hash (@$rules) { my ($source, $destination, %data, $mbox); $source = $hash->{'source'}; $destination = $hash->{'destination'}; # figure out whether path is relative to root or not for source # and destination paths, if not, root path under the base path unless ($source =~ m/^$dir_sep/o) { $source = File::Spec->catfile($config->{'base_path'}, $source); } unless ($destination =~ m/^$dir_sep/o) { $destination = File::Spec->catfile($config->{'base_path'}, $destination); } # open up mailbox w/ error checking $mbox = Mail::Cclient->new($source); unless (defined $mbox) { warn "Warning: could not open ", $source, ", skipping\n"; next; } # walk over mailbox, extract dates, move files (better way?) for my $msgno (1 .. $mbox->nmsgs) { my $envelope = $mbox->fetchstructure($msgno); # need epoch format for > < comparisons my $msg_date = str2time($envelope->date); # error checking on the date... unless (defined $msg_date) { warn "Warning: invalid date '", $envelope->date, "' for msg $msgno in $source, skipping\n"; next; } unless ($config->{'all_msgs'}) { if (defined $config->{'ignore_recent'}) { next if $msg_date >= $config->{'ignore_recent'}; } elsif (defined $config->{'ignore_older'}) { next if $msg_date <= $config->{'ignore_older'}; } } # form up hash of destinations & message numbers, converting # date and POSIX string properly push @{$data{strftime $destination, localtime $msg_date}}, $msgno; } # in %data, key is destination folder, value array of message numbers # destined to the said folder for (sort keys %data) { # only do stuff to folders if *not* in preview mode!!! unless ($preview) { # make sure directories leading up to destination exist # might have to eval() this to catch errors properly... my $dirname = dirname($_); unless (-d $dirname) { mkpath($dirname); } $mbox->create($_); # create the mailbox first # if list of ,'s too long, could parse it and replace ranges # of numbers with 1-5, for example... $mbox->move(join (",", @{$data{$_}}), $_); } # simple little report for the time being to give some # indication of progress... print "Moved ", scalar @{$data{$_}}, " messages from ", $source, " to ", $_, "\n\n"; # DBG } # close out connection to this mailbox, saving changes # would be nice if could reuse mbox object across multiple # mailboxes somehow... $mbox->expunge; $mbox->close; } exit; ###################################################################### # # SUBROUTINES # routine to parse out config and rules from text file sub load_config { my $file = shift; # path to file to parse my $config = shift; # hash ref, prior global config defaults my @rules; # array of anon hashes for rules # no locking as access & modification of file will be rare, and # generally not at the same time (we hope) unless (open FILE, $file) { warn "Warning: problem opening ", $file, ": ", $!, "\n"; return; } else { while () { chomp; # skip blank/commented lines next if m/^\#/; next if m/^\s*$/; # split on whitespace runs, only want 2 values out my ($key, $value) = split /\s+/, $_, 2; # either a rule or a preference of some kind... if ($key eq 'rule') { my ($src, $dest) = split ':', $value, 2; # obligatory sanity checking unless (defined $src) { warn "Warning: no source folder for $_, skipping\n"; next; } unless (defined $dest) { warn "Warning: no destination for $_, skipping\n"; next; } push @rules, { 'source' => $src, 'destination' => $dest, }; } else { # treat as pref, add to config hash unless (defined $value) { warn "Invalid preference value for $_, skipping\n"; } # base_path needs tilde expansion done, stolen # from Perl Cookbook, page 231. :) if ($key eq 'base_path') { $value =~ s{ ^ ~ ( [^/]* ) } { $1 ? (getpwnam($1))[7] : ( $ENV{HOME} || $ENV{LOGDIR} || (getpwuid($>))[7] ) }ex; } $config->{$key} = $value; } } close FILE; } return ($config, \@rules); } # a generic help blarb sub help { print <<"HELP"; Usage: $0 [options] folder:action [folder1:action1 .. fN:aN] Script to rotate mail folders. Options for version $VERSION: -h/-? Display this message -p Preview mode. No changes will be made to mail folders. -c xx Load config and rules from file xx. -b xx Directory to work from (default: $config->{'base_path'}) -a Deal with all messages (default ignore within a week). This option overrides the following two: -r xx Ignore messages newer than the date xx. -o xx Ignore messages older than the date xx. Run perldoc(1) on this script for additional documentation. HELP exit; } __END__ ###################################################################### # # DOCUMENTATION =head1 NAME mail_rotate.pl - a script to rotate mail folders. =head1 SYNOPSIS JAM 2002-09-26 seeing problems with messages getting broken while rotating, but haven't had time to debug what's going on... probably a c-client issue? To rotate the mail folder Sent under your home directory into monthly archives, ignoring messages during the current month: $ mail_rotate.pl Sent:Sent-%Y-%B Or course, some folks alter the default IMAP directory, do not want a cluttered ~/IMAP, and want to operate on all messages in Sent: $ mail_rotate.pl -a -b $HOME/IMAP Sent:Sent-Archives/%Y/%m-%b =head1 DESCRIPTION This script allows an administrator to setup simple mail folder rotation. For instance, a user may want their Sent mail archived off by monthly or yearly segments automatically, without requiring a special mail client. The Mail::Cclient module is used to access mail folders. See L<"CCLIENT"> for more details. The POSIX strftime() routine is used to move the messages to the proper destination; consult strftime(3) for more details. =head1 USAGE $ mail_rotate.pl [options] folder1:action1 [f2:a2 .. fN:aN] Options are detailed in L<"OPTIONS">. Arguments will be read from STDIN if the command line lacks rules. Rules are folder:action pairs: colon-separated entities describing what to parse and where to send messages to. The "folder" is a path to a source folder to read messages from, relative to the base directory setting. The "action" is another folder used as a destination for the message. The action folder name will be evaluated using POSIX strftime() expansion. For example, take a mailbox called "spam" that has unfortunately grown over several years, containing messages from 1998 to 2000. To divide out the messages in that folder based on their year, one would run the following command from the directory containing the mailbox: $ mail_rotate.pl -b `pwd` spam:spam-%Y B: Use the B<-p> preview command line option to see what various rules would do to your mail folders without committing to anything. The useful (and portable) POSIX expansions are: B<%Y> Year with century. B<%y> Year without century. B<%m> Month as decimal (01 .. 12). B<%d> Day as decimal (01 .. 31). B<%B> Full month name. B<%b> Abbreviated month name. B<%A> Full weekday. B<%a> Abbreviated weekday. B<%%> Literal %. =head1 OPTIONS The following options are supported: =over 4 =item B<-h> or B<-?> Displays a brief usage blarb, then quits. =item B<-p> Preview mode. No changes will be made to your mail folders. Good for debugging or testing out new rules. =item B<-c file> Load preferences and rules (folder action pairs) from the specified config file. Command line options take precedence over parameters in the config file; however, rules specified on the command line will be added to any specified in the config file. The config file option is intended to make it easy to setup a cron job to rotate the mail folders every week or two, where all the options and rules are maintained in a text file: 30 1 */7 * * /usr/local/bin/mail_rotate.pl -c ~/.mailrotaterc See L<"CONFIG FILE"> for more details. =item B<-b directory> Base directory to work from. If not specified, the default is the contents of the IMAPHOME environment variable; failing that, the HOME environment variable. All folders specified are assumed to be relative to the base directory, except for ones that begin with the $dir_sep pattern, which defaults to the unix-compatible / character. The script will fail if the base path is not a directory or does not exist. =item B<-a> Include all messages. Overrides the default of ignoring messages dated inside the last week, and takes precedence over the -r or -o options. =item B<-r date> Ignore recent messages. Messages newer than the strptime(3) compatible date supplied with not be rotated. See L for a list of allowed date types and caveats. =item B<-o date> Ignore messages older than the required date. This option allows for fine-grained manipulation of a mail folder, e.g. when combined with -r to extract a narrow range of messages from a folder. If either of -r or -o are specified, the default of ignoring messages dated inside the last week is removed. =back =head1 CONFIG FILE The config file allows preferences and rules to be stored in a file, an easy target for this script running from cron using the -c option. See -c under L<"OPTIONS"> for more details. Command line options B take precendence over config file values, which in turn will override script defaults. If in doubt, do not mix command line options with config file values. The format for the config file is simple, namely key -> value pairs separated by a run of whitespace. Blank lines or lines beginning with a # are ignored. The current list of supported keys are: =over 4 =item B Literal path to location from where all operations should be based. A tilde may be used to represent the home directory for the user the script is running as: base_path ~/IMAP What the base_path is set to changes how all the rules behave, e.g. a base_path of ~/ might leads to rules like: rule Mail/incoming:Mail/incoming-%Y While a base_path of ~/Mail would simpilfy the rule to: rule incoming:incoming-%Y =item B The value should be a folder:action pair, along the lines of: rule Sent:Sent-Archives/%Y/%m-%b As many rules as required may be specified; L<"USAGE"> has more details on rules. =item B True or false value indicating whether all messages should be included when applying rules, instead of the default of ignoring messages dated inside the last week. B overrides the date-related B and B options as well. Default is false. =item B Exclude messages after the specified strptime(3) compatible date from rotation. =item B Exclude messages older than the specified date from rotation. =back =head1 CCLIENT The Mail::Cclient module is used as the means of access to the mail folders specified. This usually requires the UW IMAP source distribution to be around when the module is compiled, so CPAN will not be of any help. To compile Mail::Cclient, obtain the UW-IMAP source code from: http://www.washington.edu/imap/ and follow the documentation to build the server for your system (e.g. C for UW-IMAP 2001a under FreeBSD). As of 2002-03-27, the CPAN shell is only showing an old version (1.1) of Mail::Cclient, though a more recent version (1.5) is available at the following URL: http://www.cpan.org/authors/id/H/HD/HDIAS/ Build Mail::Cclient 1.5 against the local UW-IMAP source directory: $ perl Makefile.PL --cclient_dir=/path/to/imap-2001a/c-client --with-ssl $ make # make install =head1 TODO Better output report. Means to run script safely as root so users can easily (e.g. through a web interface) setup mail rotation? Allowance for relative dates (e.g. 3 months ago) in the ignore-date related options? Means of storing "undo" information so mail moves can be undone? =head1 BUGS =head2 Reporting Bugs Newer versions of this script may be available from: http://sial.org/code/perl/ If the bug is in the latest version, send a report to the author. Patches that fix problems or add new features are welcome. =head2 Known Issues There is little error checking on the folder part of a rule; this script will happily attempt to rotate the same folder two different ways if two rules with the same source folder are supplied. Date::Parse's strptime() combined with the POSIX strftime() conspire to ignore the timezone. This means that when rotating an Incoming mail folder on a daily basis, all mail dated 2000-07-31 will end up in the 2000-07-31 folder, despite some mail being sent on different days relative to the local timezone. Also, the date set by the sending agent is used, so messages from wierd software (e.g. old versions of the Elm mailer) may get rotated into odd years, e.g. 1969. Probably should do sanity checks on the dates... =head1 SEE ALSO perl(1), strftime(3), strptime(3) =head1 AUTHOR Jeremy A. Mates, http://sial.org/contact/ =head1 COPYRIGHT Copyright (c) 2000-2002, Jeremy A. Mates. This script is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 VERSION $Id: mail_rotate.pl,v 1.29 2002/09/27 02:18:18 jmates Exp $ =cut