-
Notifications
You must be signed in to change notification settings - Fork 66
/
simple_statistics.pl
executable file
·98 lines (76 loc) · 2.14 KB
/
simple_statistics.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env perl
# Copyright 2014 Wei Shen (shenwei356#gmail.com). All rights reserved.
# Use of this source code is governed by a MIT-license
# that can be found in the LICENSE file.
# https://github.com/shenwei356
use strict;
my $usage = <<USAGE;
usage: simple_statistics <infile> <column>
<infile> is a plain text file. each column should be seperated by TAB(\\t)
<column> is the column number of the table.
USAGE
die $usage unless @ARGV == 2;
my $file = shift @ARGV;
my $column = shift @ARGV;
my $data = get_column_data( $file, $column );
printf "#.\t%d\n", scalar @$data;
printf "min.\t%d\n", min($data);
printf "max.\t%d\n", max($data);
my ($mean, $stdev) = mean_and_stdev($data);
printf "mean.\t%.2f\n", $mean;
printf "stdev.\t%.2f\n", $stdev;
sub get_column_data {
my ( $file, $column ) = @_;
unless ( $column =~ /^(\d+)$/ and $column > 0 ) {
warn
"column number ($column) should be an integer and greater than 0.\n";
$column = 1;
}
open IN, "<", $file or die "failed to open file: $file\n";
my @linedata = ();
my @data = ();
my $n = 0;
while (<IN>) {
s/\r?\n//;
@linedata = split /\t/, $_;
$n = scalar @linedata;
next unless $n > 0;
if ( $column > $n ) {
die
"number of columns of this line ($n) is less than given column number ($column)\n";
}
push @data, $linedata[ $column - 1 ];
}
close IN;
return \@data;
}
# you can also modules
# use List::Util qw/max min sum/;
sub max {
my ($list) = @_;
my $max = shift @$list;
for (@$list) {
$max = $_ if $_ > $max;
}
return $max;
}
sub min {
my ($list) = @_;
my $min = shift @$list;
for (@$list) {
$min = $_ if $_ < $min;
}
return $min;
}
sub mean_and_stdev($) {
my ($list) = @_;
return ( 0, 0 ) if @$list == 0;
my $sum = 0;
$sum += $_ for @$list;
my $sum_square = 0;
$sum_square += $_ * $_ for @$list;
my $mean = $sum / @$list;
my $variance = $sum_square / @$list - $mean * $mean;
my $std = sqrt $variance;
return ( $mean, $std );
}