summaryrefslogtreecommitdiff
path: root/t/cookbook_variance.t
blob: cd66144d0efccc84ceaf547fc430c0e54e218014 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/perl

use strict;
BEGIN {
	$|  = 1;
	$^W = 1;
}

use t::lib::Test;
use Test::More;
use Test::NoWarnings;

plan tests => 3 * @CALL_FUNCS * 3 + 1;

# The following snippets are copied from Cookbook.pod by hand.
# Don't forget to update here when the pod is updated.
# Or, use/coin something like Test::Snippets for better synching.

SCOPE: {
  package variance;
  
  sub new { bless [], shift; }
  
  sub step {
      my ( $self, $value ) = @_;
  
      push @$self, $value;
  }
  
  sub finalize {
      my $self = $_[0];
  
      my $n = @$self;
  
      # Variance is NULL unless there is more than one row
      return undef unless $n || $n == 1;
  
      my $mu = 0;
      foreach my $v ( @$self ) {
          $mu += $v;
      }
      $mu /= $n;
  
      my $sigma = 0;
      foreach my $v ( @$self ) {
          $sigma += ($v - $mu)**2;
      }
      $sigma = $sigma / ($n - 1);
  
      return $sigma;
  }
}

SCOPE2: {
  package variance2;
  
  sub new { bless {sum => 0, count=>0, hash=> {} }, shift; }
  
  sub step {
      my ( $self, $value ) = @_;
      my $hash = $self->{hash};
  
      # by truncating and hashing, we can comsume many more data points
      $value = int($value); # change depending on need for precision
                            # use sprintf for arbitrary fp precision
      if (exists $hash->{$value}) {
          $hash->{$value}++;
      } else {
          $hash->{$value} = 1;
      }
      $self->{sum} += $value;
      $self->{count}++;
  }
  
  sub finalize {
      my $self = $_[0];
  
      # Variance is NULL unless there is more than one row
      return undef unless $self->{count} > 1;
  
      # calculate avg
      my $mu = $self->{sum} / $self->{count};
  
      my $sigma = 0;
      while (my ($h, $v) = each %{$self->{hash}}) {
          $sigma += (($h - $mu)**2) * $v;
      }
      $sigma = $sigma / ($self->{count} - 1);
  
      return $sigma;
  }
}

SCOPE3: {
  package variance3;
  
  sub new { bless {mu=>0, count=>0, S=>0}, shift; }
  
  sub step {
      my ( $self, $value ) = @_;
      $self->{count}++;
      my $delta = $value - $self->{mu};
      $self->{mu} += $delta/$self->{count};
      $self->{S} += $delta*($value - $self->{mu});
  }
  
  sub finalize {
      my $self = $_[0];
      return $self->{S} / ($self->{count} - 1);
  }
}

foreach my $variance (qw/variance variance2 variance3/) {
	foreach my $call_func (@CALL_FUNCS) {
		my $dbh = connect_ok( PrintError => 0 );
		$dbh->do('CREATE TABLE results (group_name, score)');
		my $sth = $dbh->prepare('INSERT INTO results VALUES (?,?)');
		$sth->execute('foo', 100);
		$sth->execute('foo', 50);
		$sth->finish;

		$dbh->$call_func($variance, 1, $variance, "create_aggregate");

		my $result = $dbh->selectrow_arrayref(<<"END_SQL");
		    SELECT group_name, ${variance}(score)
		    FROM results
		    GROUP BY group_name;
END_SQL

		is $result->[0] => 'foo';
		is $result->[1] => 1250;
	}
}