#!/usr/bin/perl -w
#
# $Revision: 1.7 $ $Date: 2012-07-26 17:11:47-04 $
# $Source: /home/vogelke/bin/RCS/avg,v $
# $Host: sys7.com $
# $UUID: 4b8d3e44-30a3-3ece-8737-78cca6f7b5b5 $
#
#<avg: calculate standard statistics for a list of numbers.
# This includes the maximum, mean, median, minimum, mode, standard
# deviation, and sum.
#
# http://gonze.com/blog/code/stats
#
# Changelog: May 13, 2003: created changelog
# Author: Lucas Gonze <lucas@gonze.com>

use strict;

# need sorted numbers for the median.
sub numeric { $a <=> $b; }

# ugly workaround for emacs indentation bug -- cperl-mode can't handle a
# dollar sign at the end of a regex, so put the character in a var and
# reference the var.
my $dollar = "\$";

##########
# input

my @a;
while (<>) {
    chomp;
    s/#.*//g;                      # zap comments
    s/(^\s*)(.*)(\s*$dollar)/$2/g; # trim whitespace
    s/(.*)(\r$)/$1/g;              # trim windows CR
    next if length() == 0;         # skip empty lines

    push(@a, $_ + 0);
}

my @numbers = sort numeric @a;
my $cnt = $#numbers + 1;
if ($cnt < 1) {
    print "No input\n";
    exit(1);
}

##########
# average

my $sum = 0;
for my $number (@numbers) {
    $sum += $number;
}
my $average = $sum / $cnt;

##########
# median

my $median = $numbers[$cnt / 2];

##########
# standard deviation

my $stddev = 0;
if ($cnt < 2) {    # Only 1 number?
    $stddev = "UNDEFINED";
}
else {
    for my $number (@numbers) {
        $stddev = $stddev + ($number - $average)**2;
    }
    $stddev = sqrt($stddev / ($cnt + 1));
}

##########
# mode

my $mode = $numbers[0];
my %histo;
for my $number (@numbers) {

    if (!$histo{$number}) {
        $histo{$number} = 1;
    }
    else {
        $histo{$number} += 1;
    }

    if ($histo{$number} > $histo{$mode}) {
        $mode = $number;
    }
}

# Sanity check: if numbers are unique, there is no mode.
my $found = 0;
foreach (keys %histo) {
    $found++ if $histo{$_} > 1;
}
$mode = 'none' unless $found;

##########
# min and max
my $min = $numbers[0];
my $max = $min;
for my $number (@numbers) {

    if ($number < $min) {
        $min = $number;
    }

    if ($number > $max) {
        $max = $number;
    }

}

##########
# output

print <<EOF;
             maximum: $max
                mean: $average
              median: $median
             minimum: $min
                mode: $mode
  standard deviation: $stddev
                 sum: $sum
EOF

exit(0);
