#!/usr/bin/perl # # $Id: download_log_parse.pl,v 1.8 2003/01/13 05:28:42 jmates Exp $ # # Copyright (c) 2000-2002, Jeremy Mates. This script is free # software; you can redistribute it and/or modify it under the same # terms as Perl itself. # # Run perldoc(1) on this file for additional documentation. # ###################################################################### # # REQUIREMENTS require 5; use strict; ###################################################################### # # MODULES use Carp; # better error reporting use Getopt::Std; # command line option processing use Digest::MD5 qw(md5_hex); use Fcntl qw(:DEFAULT :flock); use File::Basename; use File::Spec; use IO::Handle; use Mail::Send; ###################################################################### # # VARIABLES my $VERSION; ($VERSION = '$Revision: 1.8 $ ') =~ s/[^0-9.]//g; my ( %opts, %idx, $source, $destdir, $filename, $filedir, $filepath, $logfile, @lf, $clear_logs, $printer, $subject ); $clear_logs = 1; # base subject of the email message sent out $subject = 'Download report'; # hash holding array positions for the multidimensional logfile array $idx{'index'} = 0; # line numbers from file, for ease of sorting $idx{'time'} = 1; $idx{'ip'} = 2; $idx{'host'} = 3; $idx{'file'} = 4; $idx{'check'} = 5; # MAC checksum of time, ip, host, and file merged ###################################################################### # # MAIN # extract current time into various variables for later use my ($sec, $min, $hours, $day, $month, $year) = (localtime)[0 .. 5]; my $month_name = (qw:Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec:)[$month]; # fix localtime's notion of "time" $year += 1900; $month++; # parse command-line options getopts('h?d:e:pn', \%opts); help() if exists $opts{'h'} or exists $opts{'?'}; # figure out other command line opts $destdir = $opts{'d'} if exists $opts{'d'}; $clear_logs = 0 if exists $opts{'p'}; # file to go poke should be first argument of those remaining $source = $ARGV[0] or help(); # set the file to deal with to the first argument passed $filepath = $ARGV[0]; # chop up the file given ($filename, $filedir) = fileparse($filepath); $destdir = $filedir unless defined $destdir; # open logfile, with error checking & exclusive write lock (we hope :) open LOGFILE, "+< $filepath" or die 'error opening ', $filepath, ': ', $!, "\n"; LOGFILE->autoflush(1); # attempt a write lock on the file unless (flock LOGFILE, LOCK_EX | LOCK_NB) { warn 'warning: waiting for write lock on ', $filepath, "\n"; flock LOGFILE, LOCK_EX; } # read the file into array of arrays while () { chomp; next if m/^\s*$/; # skip blank lines push @lf, [$., split "\t"]; } # zap the logfile if called for if ($clear_logs) { seek LOGFILE, 0, 0 or die 'error seeking ', $filepath, ': ', $!, "\n"; truncate LOGFILE, 0 or die 'error truncating ', $filepath, ': ', $!, "\n"; } # clean up flock LOGFILE, LOCK_UN; close LOGFILE; # brief error checking unless (@lf) { send_email( $opts{'e'}, 'Download report: empty logfile', 'No report created due to an empty logfile.' ) if exists $opts{'e'}; die "error: no log entries gained from $filepath"; } # produce report & checksum thereof my ($r_report, $r_checksum) = make_report(\@lf); # see whether we have to log the report to files on disk unless (exists $opts{'n'}) { my $destfile = sprintf("${filename}_%04d%02d%02d", $year, $month, $day); my $destpath = File::Spec->catfile($destdir, $destfile); # log file open OUT, ">$destpath" or warn 'error writing ', $destpath, ': ', $!; print OUT $$r_report; close OUT; # md5 file open OUT, ">${destpath}.md5" or warn 'error writing ', $destpath, '.md5: ', $!; print OUT $$r_checksum; close OUT; } # send out some email to the same effect, if required... if (exists $opts{'e'}) { send_email($opts{'e'}, $subject . ' for ' . $filename . ' (' . sprintf("%04d%02d%02d", $year, $month, $day) . ')', "MD5 checksum: " . $$r_checksum . "\n\n" . $$r_report); } # and finally check for STDOUT option... (trailing - on list of args to script) if ($ARGV[$#ARGV] eq '-') { print 'MD5 checksum: ', $$r_checksum, "\n\n"; print $$r_report; } ###################################################################### # # SUBROUTINES # # make_report # # Given an array ref, will attempt to produce a report from the # array. Returns ref to the report (a string) and a ref to a # checksum on the report (another string). sub make_report { my $aref = shift; my ($report, $md5_sum, $previous_file); # begin report header $report = "Download report for $filename as of $month_name " . sprintf("%02d %02d:%02d:%02d %04d", $day, $hours, $min, $sec, $year) . "\n"; # now need to sort the array by filename, subsort on order in file for (sort { $a->[$idx{'file'}] cmp $b->[$idx{'file'}] || $a->[$idx{'index'}] <=> $b->[$idx{'index'}] } @$aref ) { my $timestamp = $_->[$idx{'time'}]; my $ip = $_->[$idx{'ip'}]; my $hostname = $_->[$idx{'host'}]; my $file = $_->[$idx{'file'}]; my $checksum = $_->[$idx{'check'}]; my $warning; # remake the checksum in same manner as nudl.cgi does my $testsum = md5_hex($timestamp, $ip, $hostname, $file); if ($testsum ne $checksum) { warn 'Checksum mismatch for ', $file, ' on ', $timestamp, ' from ', $ip, "\n"; $warning = 1; } # print a new sub-header for each individual file if ($file ne $previous_file) { $report .= "\nFile: " . $file . "\n\n"; $report .= 'Time' . ' ' x 18 . 'IP Address' . ' ' x 7 . "Hostname\n"; $previous_file = $file; } # append annoying ! if checksum failed $report .= '!' if $warning; # hack redundant weekday off of timestamp to save space my $timestamp = unpack("x4 A*", $timestamp); $report .= $timestamp . ' ' . $ip . ' ' x (17 - length($ip)); # and the hostname, if there is one if ($ip eq $hostname) { $report .= "n/a\n"; } else { $report .= $hostname . "\n"; } } # report should now be complete; run a MD5 vs. it... # checksum should equal `md5sum $archive_file` results on command line $md5_sum = md5_hex($report); return (\$report, \$md5_sum); } ###################################################################### # # help # # prints out a canned help message, then exits program sub help { print <<"HELP"; Usage: $0 [options] path_to_logfile_to_parse [-] A logfile parser for the download.pl CGI script. Options are detailed below. Specifying a '-' as the last item on the command line will get the checksum then the report ejected to STDOUT, perfect for piping to other utilities. Otherwise, by default, the script attempts to create a logfile_date report and a logfile_date.md5 checksum of the report in the same directory as the logfile. Options: -h/-? Display this message -p Preserve logfile (do not zap it when done) -n Suppress the creation of the report and md5 files -d dir Use dir as destination for resulting files instead of same directory as input logfile (ignored if the -n option is set). -e emails Send log message to comma separated list of emails HELP exit; } ###################################################################### # # send_email # # A generic little routine that sends out email via Mail::Send, part # of the MailTools module on CPAN. # # You must send your complete email message to this routine; it adds # nothing to the message. sub send_email { my $to = shift || 'postmaster'; my $subject = shift || 'Erronious send_email() subroutine call'; my $body = shift || 'As subject!'; my $proto = shift || 'sendmail'; # method to use in outgoing email # let SMTP figure out who is sending this email... # my $from = $from_email; my $msg = Mail::Send->new; $msg->to($to); # $msg->set('From', $from); $msg->subject($subject); my $fh = $msg->open($proto); print $fh $body; $fh->close; } ###################################################################### # # DOCUMENTATION =head1 NAME download_log_parse.pl - a download.pl logfile parser. =head1 SYNOPSIS From crontab, one might configure the following: 1 0 * * 1 /www/scripts/download_log_parse.pl \ -e webmaster@example.org /tmp/dl.log To send weekly reports to the webmaster from the file /tmp/dl.log. =head1 DESCRIPTION download_log_parse.pl parses the logfile download.pl builds up over time, forming a date-based report file in the same directory as the logfile. Additionally, it verifys the MD5 sums on each logfile line, and creates a MD5 sum for the resulting report. If MD5 mismatches occur, report lines will be prefixed with an exclaimation point: Download report for dl.log as of Aug 22 18:02:13 1900 File: /www/example.org/html/../download/bar/foo.txt Time IP Address Hostname !Aug 22 17:49:17 2000 10.0.0.2 client.example.org Aug 22 17:52:29 2000 10.0.0.2 client.example.org File: /www/example.org/html/../download/foo.txt Time IP Address Hostname !Aug 22 17:37:47 2000 10.0.0.2 client.example.org =head1 USAGE $ download_log_parse.pl [OPTIONS] /path/to/logfile [-] Options are detailed below. Specifying a '-' as the last item on the command line will get the checksum then the report ejected to STDOUT, perfect for piping to other utilities. Otherwise, by default, the script attempts to create a logfile_date report and a logfile_date.md5 checksum of the report in the same directory as the logfile. =head1 OPTIONS download_log_parse.pl recognizes the following options: =over 4 =item B<-h>, B<-?> Display a short help blarb. =item B<-p> Preserve the logfile (default is to zap it). =item B<-n> Suppress the creation of the report and md5 files. Use with the trailing - feature to silently pipe to some other utility. =item B<-d> I Use the specified directory for resulting report and md5 sum file instead of the default, which is the same directory as the specified logfile. =item B<-e> I
,[I,..I] Send log message to each of comma separated list of recipients. =back =head1 BUGS =head2 Reporting Bugs Newer versions of this script may be available from: http://sial.org/code/perl/ If the bug is in the latest version, send a report to the author. Patches that fix problems or add new features are welcome. =head2 Known Issues No known bugs. =head1 SEE ALSO perl(1). =head1 AUTHOR Jeremy Mates, http://sial.org/contact/ =head1 COPYRIGHT Copyright (c) 2000-2002, Jeremy Mates. This script is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 VERSION $Id: download_log_parse.pl,v 1.8 2003/01/13 05:28:42 jmates Exp $ =cut