#!/usr/bin/perl -w # # $Id: mathu,v 1.12 2005/09/05 20:29:58 jmates Exp $ # # The author disclaims all copyrights and releases this script into the # public domain. # # Performs various mathematical and statistical operations on # input data. my %ability = ( sum => {handle => \&handle_sum,}, mean => {handle => \&handle_mean,}, sdev => {handle => \&handle_sdev,}, percent => {handle => \&handle_percent,}, max => {handle => \&handle_max,}, min => {handle => \&handle_min,}, basic => {handle => \&handle_basic,}, dist => {handle => \&handle_distribution,}, ); # how to format output numbers with printf my $format = '%.2f'; # parse command-line options use Getopt::Std; my %opts; getopts('lf:', \%opts); # lists abilities, good for tab completion systems to query if (exists $opts{l}) { print join (" ", sort keys %ability), "\n"; exit 1; } my $action = shift; die "error: no such action: $action\n" unless exists $ability{$action}; $format = $opts{f} if exists $opts{f}; $ability{$action}->{handle}->(); sub handle_sum { my $result; while () { chomp; next if $_ eq q{}; $result += $_; } $result = numbertidy($result) if $result =~ /\./; print $result, "\n"; } sub handle_mean { my @array; while () { chomp; next if $_ eq q{}; push @array, $_; } my $result = mean(\@array); $result = numbertidy($result) if $result =~ /\./; print $result, "\n"; } sub handle_sdev { my @array; while () { chomp; next if $_ eq q{}; push @array, $_; } my $result = standard_deviation(\@array, mean(\@array)); $result = numbertidy($result) if $result =~ /\./; print $result, "\n"; } sub handle_percent { printf "$format%%\n", $ARGV[0] / $ARGV[1] * 100; } sub handle_max { my $result; while () { chomp; next if $_ eq q{}; $result = $_ unless defined $result; $result = $_ if $_ > $result; } $result = numbertidy($result) if $result =~ /\./; print $result, "\n"; } sub handle_min { my $result; while () { chomp; next if $_ eq q{}; $result = $_ unless defined $result; $result = $_ if $_ < $result; } $result = numbertidy($result) if $result =~ /\./; print $result, "\n"; } sub handle_basic { my @array; my %results; while () { chomp; next if $_ eq q{}; $results{sum} += $_; $results{min} = $_ unless exists $results{min}; $results{max} = $_ unless exists $results{max}; $results{min} = $_ if $_ < $results{min}; $results{max} = $_ if $_ > $results{max}; push @array, $_; } $results{count} = $.; $results{mean} = mean(\@array, $results{sum}); $results{sdev} = standard_deviation(\@array, $results{mean}); print map { $_ . " " . ($results{$_} =~ /\./ ? numbertidy($results{$_}) : $results{$_}) . "\n" } sort keys %results; } sub handle_distribution { my @array; my %results; my $way = shift @ARGV; my $output = shift @ARGV; while () { chomp; next if $_ eq q{}; $results{min} = $_ unless exists $results{min}; $results{max} = $_ unless exists $results{max}; $results{min} = $_ if $_ < $results{min}; $results{max} = $_ if $_ > $results{max}; push @array, $_; } @array = sort { $a <=> $b } @array; if ($way =~ /^b=?(\d*)/) { my $bucket = $1 || 2; $bucket = $#array if $bucket > $#array; my $diff = $results{max} - $results{min}; my $interval = $diff / $bucket; my @buckets = map { $results{min} + $interval * $_ } 1 .. $bucket; do_dist(\@array, \@buckets); } elsif ($way =~ /^d=?(\d[\d:]*)?/) { my $bucket = $1; my @buckets; if (defined $bucket) { my %seen; @buckets = sort { $a <=> $b } grep { defined && /^\d+$/ && $_ >= $results{min} && $_ <= $results{max} && not $seen{$_}++ } split /:/, $1; } unless (@buckets) { my $diff = $results{max} - $results{min}; push @buckets, $results{min} + $diff / 2; } push @buckets, $results{max} unless $buckets[-1] == $results{max}; do_dist(\@array, \@buckets); } elsif ($way =~ /^r/) { # ensure range goes to 1 higher than floating point numbers my $max = $results{max}; $max = 1 + int($max) if $max =~ /\./; my @buckets = $results{min} .. $max; do_dist(\@array, \@buckets); } } sub do_dist { my $array = shift; my $bref = shift; die "not enough buckets\n" unless @$bref > 1; # TODO support for different output modes (counts, literal # values in each bucket, percents of total) my $i = 0; my $count = 0; for my $v (@$array) { # print "CUR: ", $v, " at $j and ", $bref->[$i], "\n"; if ($v > $bref->[$i]) { dist_print($bref->[$i], $count); $count = 0; $i++; while ($v > $bref->[$i]) { #print "EMPTY: ", $v, " at $j and ", $bref->[$i], "\n"; dist_print($bref->[$i], $count); $i++; } } $count++; } dist_print($bref->[$i], $count); } sub dist_print { my $bucket = shift; my $count = shift; $bucket =~ s/\.\d+//; print "$bucket $count\n"; } sub numbertidy { my @tidyied = map { $_ = sprintf "$format", $_; s/0+$//; $_; } @_; return wantarray ? @tidyied : $tidyied[0]; } # $mean = mean(\@array) computes the mean of an array of numbers. sub mean { my $arrayref = shift; my $sum = shift; unless ($sum) { for (@$arrayref) { $sum += $_ } } return $sum / @$arrayref; } # $sd = standard_deviation_data(\@array) computes the standard # deviation of an array of numbers. sub standard_deviation { my $arrayref = shift; my $mean = shift; return sqrt(mean([map (($_ - $mean)**2, @$arrayref)])); }