#!/usr/bin/perl -wT # # $Id: download.pl,v 1.19 2003/01/13 05:28:42 jmates Exp $ # # Copyright (c) 2000-2002, Jeremy Mates. This script is free # software; you can redistribute it and/or modify it under the same # terms as Perl itself. # # Run perldoc(1) on this file for additional documentation. # ###################################################################### # # REQUIREMENTS require 5; use strict; # clean up env settings for taint mode (man perlsec) sub BEGIN { delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; $ENV{'PATH'} = '/bin:/usr/bin'; } ###################################################################### # # MODULES use CGI; use CGI::Carp; use Digest::MD5; use Fcntl qw(:DEFAULT :flock); use File::Basename; use File::Spec; use HTML::Template; use MIME::Types(); ###################################################################### # # VARIABLES my $VERSION; ($VERSION = '$Revision: 1.19 $ ') =~ s/[^0-9.]//g; # Change the following variables to suit your site # Path to directory containing files to download, relative to where this # script is being run from if leading / ommited: my $source = "$ENV{DOCUMENT_ROOT}/../download"; # Path to optional logfile, to which the webserver needs write access. # Path's without leading / will be relative to where script is located. # logfile has trouble with variables, as Taint mode doesn't trust them. # Set logfile to undef to turn off logging. my $logfile = '/tmp/dl.log'; # where to locate the file used by HTML::Template to produce error # output my $tmpl_file = "$ENV{DOCUMENT_ROOT}/template/download.tmpl"; # the following hash of key/value pairs is passed directly into the # HTML::Template object that spits out the error page down in error() # you should modify your $tmpl_file to match any directive... my %tmpl_err_param = ( TITLE => 'File Download Error', CONTACT => $ENV{'SERVER_ADMIN'}, COPYRIGHT => 1900 + (localtime)[5], ); # CGI parameter tag to match e.g. 'download.pl?file=somefile' Script # now checks PATH_INFO first, so this option is included just to be # complete. Set tag to undef to turn off the CGI interface. my $tag = 'file'; # ** no constants to modify below here ** # However, you may want to review the error messages sent back during # the verify/untaint section below! # A list of 'good' characters, negated with the leading ^ such that # everything else is stripped, for CGI filename input cleansing. # This reduces the script's odds of appearing on BugTraq. my $bad_chars = '[^A-Za-z0-9._/-]'; # some misc variables you shouldn't muck around with my ($filedata, $filepath, $media_type); ###################################################################### # # MAIN # check the more elegant PATH_INFO first; otherwise, resort to # CGI parameter method to obtain the filename data we want if (defined $ENV{'PATH_INFO'}) { $filedata = $ENV{'PATH_INFO'}; } elsif (defined $tag) { # if we have a CGI parameter to read in on, try the CGI method: my $query = CGI->new; $filedata = $query->param($tag); } # check whether we actually got something unless (defined $filedata) { warn 'filedata contains no data'; error('Not enough information given in request to proceed with download.'); } # untaint their input by removing bad characters, quit on problems if ($filedata =~ s/$bad_chars//og) { warn 'bad characters detected in request, now \'', $filedata, '\''; error('Illegal characters specified in request.'); } # also remove runs of dot's, e.g. folks trying to ../../../ escape on us # (not an issue with Apache, which translates ../'s before PATH_INFO gets # set to something!) if ($filedata =~ s/\.{2,}//) { warn 'run of dots hacked from input, now \'', $filedata, '\''; error('Invalid directory path specified.'); } # extract directory/file data from the filename data my ($filename, $directory) = fileparse($filedata); undef $directory if $directory =~ m!^\./$!; # determine MIME type to set on the outgoing document # this is for version 0.02 of the MIME::Types module ($media_type) = MIME::Types::by_suffix($filename); unless (defined $media_type) { warn "no known MIME type for ", $filename; error('Not enough information given in request to proceed with download.'); } # construct a filepath to the file, based in $source directory $filepath = File::Spec->catfile($source, $directory, $filename); $filepath =~ s!/{2,}!/!; # correct Spec bug that leaves // in path's # do a few checks for nefarious file-level hacking; if you're especially # paranoid, you could add in a is-file-owned-by-right user/group check... unless (-f $filepath) { warn 'file ', $filepath, ' not a file, or does not exist'; error('Problem locating the specified file.'); } elsif (-l $filepath) { warn 'file ', $filepath, ' is an evil link'; error('Problem locating the specified file.'); } # optionally log this request. If logging is turned on, and the script # can't log the request, the user won't get the file! do_log($filepath, $logfile) if defined $logfile; # Finally, open a filehandle for the file & dump the bugger unless (open FILE, "<$filepath") { warn 'error opening ', $filepath, ': ', $!; error('Problem locating the specified file.'); } else { # proper HTTP header for our MIME type, # with a suggested name of the filename in question (see RFC 2183) print 'Content-Type: ', $media_type, "\n"; print 'Content-Disposition: inline; filename="', $filename, "\"\n\n"; # and dump the raw file out print ; close FILE; } ###################################################################### # # SUBROUTINES # generic error-handling routine, call with a message string. # dumps out HTML::Template formatted page for the user, containing # the message as one of the parameters, then exits the CGI. sub error { my $msg = shift; # create and populate new template my $tmpl = HTML::Template->new( filename => $tmpl_file, die_on_bad_params => 0 ); # add message to template parameters $tmpl_err_param{'MESSAGE'} = $msg; $tmpl->param(%tmpl_err_param); # dump the template print "Content-Type: text/html\n\n", $tmpl->output; exit; } # routine to write log of this event to logfile sub do_log { my $filepath = shift; my $logfile = shift; my $time = localtime(); # do a MD5 hash on time, IP, hostname, and filename run together # this allows line-by-line verification of data my $d = Digest::MD5->new; $d->add($time, $ENV{'REMOTE_ADDR'}, $ENV{'REMOTE_HOST'}, $filepath); my $md5_hash = $d->hexdigest(); # (attempt to) write the logfile out unless (open LOGFILE, ">> $logfile") { warn 'error logging to ', $logfile, ': ', $!; error(); } else { my $oldfh = select LOGFILE; $| = 1; # attempt a write lock on the file unless (flock LOGFILE, LOCK_EX | LOCK_NB) { warn 'warning: waiting for write lock on ', $logfile, "\n"; flock LOGFILE, LOCK_EX; } print $time, "\t", $ENV{'REMOTE_ADDR'}, "\t", $ENV{'REMOTE_HOST'}, "\t", $filepath, "\t", $md5_hash, "\n"; select $oldfh; flock LOGFILE, LOCK_UN; close LOGFILE; } } ###################################################################### # # DOCUMENTATION =head1 NAME download.pl - CGI for restricted file downloads =head1 DESCRIPTION download.pl allows file downloads from an arbitrary directory through itself. This behaviour is ideal where audit trails for publically available files must be maintained, as files are distributed only after checks and logging. The script reads in the passed filename from the C environment variable, determines a MIME type for the file, then dumps the file to the user's browser, using the previously gained MIME type to set the Content-Type and Content-Disposition HTTP Header fields. See RFCs 1341, 1521, 1522 and 2183 for more info on MIME types. However, be aware that even though certain MIME types are valid, the files may not download correctly due to how the browser handles the file type in question (e.g. Java's jar files). On failure, the script returns a L generated page to the user. A sample template file would look something like this: <TMPL_VAR NAME="TITLE">

There was a problem with your request:

If the problem persists, please contact .


Copyright © . All Rights Reserved.

=head1 CONFIGURATION The script should be located in a C directory under your webserver. You should change the C<$source>, C<$logfile>, and C<$tmpl_file> variables to suit your needs in C in the source. Note that while logging is optional, if logging is turned on, and the webserver can not write to the logfile, the user will not get the file. Example: assuming the script is located in C on the webserver, a request for the file C would look like: http://www.example.org/cgi-bin/download.pl/sample Subdirectories are also allowed inside the C<$source> directory, e.g.: http://www.example.org/cgi-bin/download.pl/bar/sample =head1 PARAMETERS In lieu of C information, the script will attempt to grab the information it needs from the CGI parameter given by the C<$tag> variable. A request using the CGI syntax would look like the following, assuming C<$tag> was set to C: http://www.example.org/cgi-bin/download.pl?file=sample =head1 ENVIRONMENT The following environment variables may be necessary for the script to operate properly: =over 4 =item C used to optionally specify paths to download or log folders. =item C default means of extracting which file the browser wants. =item C default C email address displayed in error output. =item C if exists, included with log output. =item C if exists, included with log output. =back =head1 FILES download.pl requires the presence of a L template file somewhere on disk to output errors to. See the C section of the script for more details. =head1 BUGS =head2 Reporting Bugs Newer versions of this script may be available from: http://sial.org/code/perl/ If the bug is in the latest version, send a report to the author. Patches that fix problems or add new features are welcome. =head2 Known Issues No known bugs. =head1 SEE ALSO CGI, L, L, perl(1). =head1 AUTHOR Jeremy Mates, http://sial.org/contact/ =head1 COPYRIGHT Copyright (c) 2000-2002, Jeremy Mates. This script is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 VERSION $Id: download.pl,v 1.19 2003/01/13 05:28:42 jmates Exp $ =cut