#!/usr/bin/perl # # $Id: fm,v 1.38 2004/04/13 06:06:43 jmates Exp $ # # The author disclaims all copyrights and releases this script into the # public domain. # # procmail logfile parser and reporter. # # Run perldoc(1) on this file for additional documentation. require 5; use strict; # for the format() and write() reports use FileHandle; # file locking related modules use Fcntl qw(:DEFAULT :flock); use IO::Handle; my ( %opts, $procmailrc, $logfile, $days_of_week, $months_of_year, $backup, @data, %folders, %folder_sizes, $oldest, $newest, $total_messages, $total_size, $machine, $summary, $reverse, $zap, %tags, %tag_counts ); # where to look for the procmailrc... $procmailrc = $ENV{'HOME'} . '/.procmailrc'; # for matching date field in from line $days_of_week = '(Sun|Mon|Tue|Wed|Thu|Fri|Sat)'; $months_of_year = '(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)'; # prefs for the humanize file size routine my %global_prefs = ( # include decimals in output? (e.g. 25.8 K vs. 26 K) 'decimal' => 0, # include .0 in decmail output? 'decimal_zero' => 0, # what to divide file sizes down by 'factor' => 1024, # percentage above which will be bumped up # (e.g. 999 bytes -> 1 K as within 5% of 1024) # set to undef to turn off 'fudge' => 0.98, # lengths above which decimals will not be included # for better readability 'max_human_length' => 2, # list of suffixes for human readable output 'suffix' => [ 'B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y' ], ); use Getopt::Std; getopts( 'h?srzb:m:t:', \%opts ); help() if exists $opts{'h'} or exists $opts{'?'}; # extract backup option, if any $backup = $opts{'b'} if exists $opts{'b'}; # whether to zap logfile $zap = 1 if exists $opts{'z'}; # summary only? (no message log) $summary = 1 if exists $opts{'s'}; # reverse sort message log? $reverse = 1 if exists $opts{'r'}; # "machine" output style required? $machine = $opts{'m'} if exists $opts{'m'}; # special counting patterns "tags" # TODO input validation/checking of some sort? if ( exists $opts{'t'} ) { for my $kv ( split ';', $opts{'t'} ) { my ( $name, $regex ) = split ',', $kv; next unless $name and $regex; $tags{$name} = $regex; } } # first, we find out where the logfile is (command line or automagic) if (@ARGV) { $logfile = shift; } else { $logfile = get_logfile_loc($procmailrc); } # parse through the logfile, feeding various global variables parse_logfile(); # see whether there is any new mail to report about... unless ( keys %folders ) { print "No new e-mail.\n"; exit; } # and print off a pretty little report for fun print_report(); # see if machine-style output format is required print_machine($machine) if defined $machine; # dumps a pretty report to STDOUT... sub print_report { my ( $subject, $from, $folder, $size, $date ); print "E-mail report from $oldest to $newest\n\n"; # use format to easily line up everything the way I want... # disable notion of "pages" in resulting format format_lines_per_page STDOUT 99999999; # perl doesn't like format_formfeed on a per-handle basis, ergo: $^L = ''; format STDOUT_TOP = Count Destination Size ---------------------------------------------------------------------- . format STDOUT = @>>>>> @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @>>>>>>>>>>> commify($folders{$_}), $_, humanize($folder_sizes{$_}) . for ( sort { $a cmp $b } keys %folders ) { write; } # hack to get a "footer" for the above summary including totals... format_name STDOUT "FOOTER"; format FOOTER = ---------------------------------------------------------------------- @>>>>> @>>>>>>>>>>> commify($total_messages), humanize($total_size) . write; if (keys %tag_counts) { format_name STDOUT "TAGS"; format_top_name STDOUT "TAGS_TOP"; format_lines_left STDOUT 0; format TAGS_TOP = Count Test Name ---------------------------------------------------------------------- . format TAGS = @>>>>> @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< $tag_counts{$_}, $_ . for (sort keys %tag_counts) { write; } print "\n" unless $summary; } unless ($summary) { # select new format style format_name STDOUT "MESSAGE"; format_top_name STDOUT "MESSAGE_TOP"; # reset line count to force header format_lines_left STDOUT 0; my $order; if ($reverse) { $order = 'newest-at-top'; } else { $order = 'oldest-at-top'; } format MESSAGE_TOP = Message Summaries by Date (@<<<<<<<<<<<<) Time $order ---------------------------------------------------------------------- . format MESSAGE = Subject: ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @>>>>>>> $subject, $date ~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<... $subject From: ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< $from ~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<... $from To: @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @>>>>>>> $folder, $size . @data = reverse(@data) if $reverse; for (@data) { # use strict complains if use funky calls, so de-ref 'em... $subject = $_->{'_subject'}; $from = $_->{'_from'}; $folder = $_->{'_folder'}; $size = $_->{'_size'}; $date = $_->{'_date'}; # only show size if above certain amount (should be a constant, # or even better based on standard deviation above average size!) if ( $size > 10000 ) { $size = humanize($size); } else { $size = undef; } # just HH:MM now, as that's all I need to see... my ( $wday, $mon, $day, $time, $year ) = split /\s+/, $date; $time =~ s/:\d+$//; $date = $time; write; } } } # machine-style output format (e.g. easily parsed by a machine) sub print_machine { my $file_loc = shift; # open & aquire write lock on the file_loc open( FD, "> $file_loc" ) or die "Error opening $file_loc: $!\n"; FD->autoflush(1); unless ( flock( FD, LOCK_EX | LOCK_NB ) ) { warn "Waiting for write lock on $file_loc ...\n"; flock( FD, LOCK_EX ); } # delete file just to be sure seek( FD, 0, 0 ) or die "Problem seeking $file_loc: $!\n"; truncate( FD, 0 ) or die "Problem truncating $file_loc: $!\n"; my $oldfh = select(FD); # global info print "global\tstart\t", $oldest, "\n"; print "global\tfinish\t", $newest, "\n"; print "global\tcount\t", $total_messages, "\n"; print "global\tsize\t", $total_size, "\n"; # folder info for ( sort { $a cmp $b } keys %folders ) { print "folder\t", $_, "\t", $folders{$_}, "\t", $folder_sizes{$_}, "\n"; } # message info my $t_count = 0; for (@data) { print "message", $t_count, "\tsubject\t", $_->{'_subject'}, "\n"; print "message", $t_count, "\tfrom\t", $_->{'_from'}, "\n"; print "message", $t_count, "\tfolder\t", $_->{'_folder'}, "\n"; print "message", $t_count, "\tsize\t", $_->{'_size'}, "\n"; print "message", $t_count, "\tdate\t", $_->{'_date'}, "\n"; $t_count++; } select($oldfh); flock( FD, LOCK_UN ); close(FD); } # parse procmail's logfile w/ locking using global variables sub parse_logfile { my $count = -1; # track where we are in array of anon hashes # open & aquire write lock on the logfile open( FD, "+< $logfile" ) or die "Error opening $logfile: $!\n"; FD->autoflush(1); unless ( flock( FD, LOCK_EX | LOCK_NB ) ) { warn "Waiting for write lock on $logfile ...\n"; flock( FD, LOCK_EX ); } # are we backing up the file? if ($backup) { open( BACK, "> $backup" ) or die "Error opening $backup: $!\n"; BACK->autoflush(1); unless ( flock( BACK, LOCK_EX | LOCK_NB ) ) { warn "Waiting for write lock on $logfile ...\n"; flock( BACK, LOCK_EX ); } # zap contents of current backup file (just to be sure) seek( BACK, 0, 0 ) or die "Problem seeking $backup: $!\n"; truncate( BACK, 0 ) or die "Problem truncating $backup: $!\n"; } # parse the file while () { # keep backup if necessary print BACK $_ if $backup; chomp; if ( m/^ From \s+(.*?)\s+ ($days_of_week \s+ $months_of_year \s+ \d{1,2} \s \d\d:\d\d:\d\d \s \d\d\d\d)/ox ) { # new From line matched, increment our position in @data array $count++; # and tack various info into anon hash in @data array... $data[$count]->{'_from'} = $1; $data[$count]->{'_date'} = $2; } if (m/^\s*Subject:\s+(.*)$/) { $data[$count]->{'_subject'} = $1; } if ( my ( $folder, $size ) = m/^\s*Folder:\s+(.+?)\s+(\d+)$/ ) { $folder =~ s:/new.+::; $data[$count]->{'_folder'} = $folder; $data[$count]->{'_size'} = $size; # keep separate track of folder locs for ease # of lookup at cost of more memory $folders{$folder}++; # and also amount of data gone to specific destinations $folder_sizes{$folder} += $size; # and some totals for fun $total_messages++; $total_size += $size; } if (keys %tags) { for my $name (keys %tags) { $tag_counts{$name}++ if m/$tags{$name}/; } } } # this (optionally) erases the logfile if ($zap) { seek( FD, 0, 0 ) or die "Problem seeking $logfile: $!\n"; truncate( FD, 0 ) or die "Problem truncating $logfile: $!\n"; } # clean up all our locks/open files if ($backup) { flock( BACK, LOCK_UN ); close(BACK); } flock( FD, LOCK_UN ); close(FD); # finally, extract out the oldest & newest dates from # the data array using a lot of assumptions $oldest = $data[0]->{'_date'}; $newest = $data[$#data]->{'_date'}; } # automagically gets logfile location from .procmailrc file, # single argument is path to the procmailrc file sub get_logfile_loc { my $procmailrc = shift; my $logfile; open FILE, "< $procmailrc" or die "Error opening $procmailrc: $!\n"; while () { chomp; # not too sure about formatting allowed in .procmailrc... if (m/LOGFILE\s*=\s*(.*)$/) { my $logfile_loc = $1; # convert (potentially) shell-notational logfile loc # to a full pathname (e.g. $HOME/.pm_log -> /home/user/.pm_log) $logfile = `echo $logfile_loc`; } } close FILE; return $logfile; } # little generic add commas to data routine... sub commify { my $text = reverse $_[0]; $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; return scalar reverse $text; } # Inspired from GNU's df -h output, which fixes 133456345 bytes # to be something human readable. # # takes a number, returns formatted string. Also takes optional # hash containing various defaults that affect output style. sub humanize { my $num = shift; # could also take a array ref or hash ref to parse thru? my %prefs = @_; # inherit global prefs, but give preference to user supplied ones unless ( keys %prefs ) { %prefs = %global_prefs; } else { # benchmarking w/ 5.6.0 on Linux PPC & i386 showed this next # faster than direct merge method (p. 145 Perl Cookbook) while ( my ( $k, $v ) = each(%global_prefs) ) { $prefs{$k} = $v unless exists $prefs{$k}; } } # some local working variables my $count = 0; my $prefix = ''; my $tmp = ''; # handle negatives if ( $num < 0 ) { $num = abs $num; $prefix = '-'; } # reduce number to something readable by factor specified while ( $num > $prefs{'factor'} ) { $num /= $prefs{'factor'}; $count++; } # optionally fudge "near" values up to next higher level if ( defined $prefs{'fudge'} ) { if ( $num > ( $prefs{'fudge'} * $prefs{'factor'} ) ) { $count++; $num /= $prefs{'factor'}; } } # no .[1-9] decimal on longer numbers for easier reading # only show decimal if prefs say so if ( length sprintf( "%.f", $num ) > $prefs{'max_human_length'} || !$prefs{'decimal'} ) { $tmp = sprintf( "%.0f", $num ); } else { $tmp = sprintf( "%.1f", $num ); # optionally hack trailing .0 as is not needed $tmp =~ s/\.0$// unless $prefs{'decimal_zero'}; } # return number with proper style applied return $prefix . $tmp . $prefs{'suffix'}->[$count]; } # a generic help blarb sub help { print <<"HELP"; Usage: $0 [options] [LOGFILE] Procmail logfile parser and reporter. Options: -h/-? Display this message -s Summary mode only, don't print listing of messages. -r Reverse sort newest messages to top of message list. -t xx Count lines matching various named regular expressions. -b xx Make a backup of the LOGFILE, where xx is the (optional) target. -z Truncate logfile after parsing it. -m xx Export "machine" style output to file xx. Run perldoc(1) on this script for additional documentation. HELP exit; } __END__ =head1 NAME fm - procmail logfile parser and reporter. =head1 SYNOPSIS Pretty simple, just run: $ fm Or something fancier to empty (-z) the named logfile (~/.pm_logfile), back it up (-b), and also tally up a count of messages with "SPAM" in the subject (-t): $ fm -zb pm_logfile.old -t 'spam,^\s*Subject:.+SPAM' ~/.pm_logfile =head1 DESCRIPTION =head2 Overview fm is a procmail(1) logfile parser that shows you how many messages of what size went where based on the contents of your procmail logfile. =head2 Normal Usage $ fm [options] [LOGFILE] See L<"OPTIONS"> for details on the command line switches supported. If you omit the logfile, fm will attempt to determine its location from your ~/.procmailrc file. Certain preferences must currently be set inside the script itself, such as the options on the humanization of bytes. This might be constrewed as a bug. =head1 OPTIONS This script currently supports the following command line switches: =over 4 =item B<-h>, B<-?> Prints a brief usage note about the script. =item B<-s> Print only the count and size summary of the logfile. By default, a listing of individual messages follows the leading count summary, which might take up more screen space for quick mail summary checks. =item B<-t> I Specify named regular expressions to count logfile lines with. Format is a name for the expression such as "spam", a comma, and a regular expression to test with. Additional C expressions can be added if separated with a semi-colon. For example, one can count how many Subject lines contained English vowels, and additionally how many contained a number. -t 'vowels,^\s*Subject:.+[aeiou];digits,^\s*Subject:.+\d' =item B<-r> Reverse sort the individual message listings. Setting the reverse mode prints the message listings in a newest-at-top format instead of the default format which has the most recent messages down at the bottom of the file. =item B<-b> I Save a backup of the LOGFILE to location I. By default, no backups of the logfile are made. =item B<-z> Zap the logfile. By default, the logfile is preserved. Set the zap option to empty out the current logfile. =item B<-m> I Save a machine-style output to the file I. =back =head1 EXAMPLES If you want backups and to zero the manually located logfile once done: $ fm -zb ~/.pm_logfile.old From cron, that would be something like: 15 6 * * * /usr/local/bin/fm -zb .pm_logfile.old | mail $USER =head1 BUGS =head2 Reporting Bugs Newer versions of this script may be available from: http://sial.org/code/perl/ If the bug is in the latest version, send a report to the author. Patches that fix problems or add new features are welcome. =head2 Known Issues Various other tweaks/massive rewrite might be prudent at some point, but hey, it gets the job done. =head1 SEE ALSO perl(1), procmail(1), procmailrc(5) =head1 AUTHOR Jeremy Mates, http://sial.org/contact/ =head1 COPYRIGHT The author disclaims all copyrights and releases this script into the public domain. =head1 VERSION $Id: fm,v 1.38 2004/04/13 06:06:43 jmates Exp $ =head1 SCRIPT CATEGORIES Mail =cut