|
Packit |
723767 |
=head1 NAME
|
|
Packit |
723767 |
|
|
Packit |
723767 |
DBD::SQLite::Cookbook - The DBD::SQLite Cookbook
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=head1 DESCRIPTION
|
|
Packit |
723767 |
|
|
Packit |
723767 |
This is the L<DBD::SQLite> cookbook.
|
|
Packit |
723767 |
|
|
Packit |
723767 |
It is intended to provide a place to keep a variety of functions and
|
|
Packit |
723767 |
formals for use in callback APIs in L<DBD::SQLite>.
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=head1 AGGREGATE FUNCTIONS
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=head2 Variance
|
|
Packit |
723767 |
|
|
Packit |
723767 |
This is a simple aggregate function which returns a variance. It is
|
|
Packit |
723767 |
adapted from an example implementation in pysqlite.
|
|
Packit |
723767 |
|
|
Packit |
723767 |
package variance;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub new { bless [], shift; }
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub step {
|
|
Packit |
723767 |
my ( $self, $value ) = @_;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
push @$self, $value;
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub finalize {
|
|
Packit |
723767 |
my $self = $_[0];
|
|
Packit |
723767 |
|
|
Packit |
723767 |
my $n = @$self;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
# Variance is NULL unless there is more than one row
|
|
Packit |
723767 |
return undef unless $n || $n == 1;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
my $mu = 0;
|
|
Packit |
723767 |
foreach my $v ( @$self ) {
|
|
Packit |
723767 |
$mu += $v;
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
$mu /= $n;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
my $sigma = 0;
|
|
Packit |
723767 |
foreach my $v ( @$self ) {
|
|
Packit |
723767 |
$sigma += ($v - $mu)**2;
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
$sigma = $sigma / ($n - 1);
|
|
Packit |
723767 |
|
|
Packit |
723767 |
return $sigma;
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
# NOTE: If you use an older DBI (< 1.608),
|
|
Packit |
723767 |
# use $dbh->func(..., "create_aggregate") instead.
|
|
Packit |
723767 |
$dbh->sqlite_create_aggregate( "variance", 1, 'variance' );
|
|
Packit |
723767 |
|
|
Packit |
723767 |
The function can then be used as:
|
|
Packit |
723767 |
|
|
Packit |
723767 |
SELECT group_name, variance(score)
|
|
Packit |
723767 |
FROM results
|
|
Packit |
723767 |
GROUP BY group_name;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=head2 Variance (Memory Efficient)
|
|
Packit |
723767 |
|
|
Packit |
723767 |
A more efficient variance function, optimized for memory usage at the
|
|
Packit |
723767 |
expense of precision:
|
|
Packit |
723767 |
|
|
Packit |
723767 |
package variance2;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub new { bless {sum => 0, count=>0, hash=> {} }, shift; }
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub step {
|
|
Packit |
723767 |
my ( $self, $value ) = @_;
|
|
Packit |
723767 |
my $hash = $self->{hash};
|
|
Packit |
723767 |
|
|
Packit |
723767 |
# by truncating and hashing, we can comsume many more data points
|
|
Packit |
723767 |
$value = int($value); # change depending on need for precision
|
|
Packit |
723767 |
# use sprintf for arbitrary fp precision
|
|
Packit |
723767 |
if (exists $hash->{$value}) {
|
|
Packit |
723767 |
$hash->{$value}++;
|
|
Packit |
723767 |
} else {
|
|
Packit |
723767 |
$hash->{$value} = 1;
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
$self->{sum} += $value;
|
|
Packit |
723767 |
$self->{count}++;
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub finalize {
|
|
Packit |
723767 |
my $self = $_[0];
|
|
Packit |
723767 |
|
|
Packit |
723767 |
# Variance is NULL unless there is more than one row
|
|
Packit |
723767 |
return undef unless $self->{count} > 1;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
# calculate avg
|
|
Packit |
723767 |
my $mu = $self->{sum} / $self->{count};
|
|
Packit |
723767 |
|
|
Packit |
723767 |
my $sigma = 0;
|
|
Packit |
723767 |
while (my ($h, $v) = each %{$self->{hash}}) {
|
|
Packit |
723767 |
$sigma += (($h - $mu)**2) * $v;
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
$sigma = $sigma / ($self->{count} - 1);
|
|
Packit |
723767 |
|
|
Packit |
723767 |
return $sigma;
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
The function can then be used as:
|
|
Packit |
723767 |
|
|
Packit |
723767 |
SELECT group_name, variance2(score)
|
|
Packit |
723767 |
FROM results
|
|
Packit |
723767 |
GROUP BY group_name;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=head2 Variance (Highly Scalable)
|
|
Packit |
723767 |
|
|
Packit |
723767 |
A third variable implementation, designed for arbitrarily large data sets:
|
|
Packit |
723767 |
|
|
Packit |
723767 |
package variance3;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub new { bless {mu=>0, count=>0, S=>0}, shift; }
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub step {
|
|
Packit |
723767 |
my ( $self, $value ) = @_;
|
|
Packit |
723767 |
$self->{count}++;
|
|
Packit |
723767 |
my $delta = $value - $self->{mu};
|
|
Packit |
723767 |
$self->{mu} += $delta/$self->{count};
|
|
Packit |
723767 |
$self->{S} += $delta*($value - $self->{mu});
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub finalize {
|
|
Packit |
723767 |
my $self = $_[0];
|
|
Packit |
723767 |
return $self->{S} / ($self->{count} - 1);
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
The function can then be used as:
|
|
Packit |
723767 |
|
|
Packit |
723767 |
SELECT group_name, variance3(score)
|
|
Packit |
723767 |
FROM results
|
|
Packit |
723767 |
GROUP BY group_name;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=head1 SUPPORT
|
|
Packit |
723767 |
|
|
Packit |
723767 |
Bugs should be reported via the CPAN bug tracker at
|
|
Packit |
723767 |
|
|
Packit |
723767 |
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=DBD-SQLite>
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=head1 TO DO
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=over
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=item *
|
|
Packit |
723767 |
|
|
Packit |
723767 |
Add more and varied cookbook recipes, until we have enough to
|
|
Packit |
723767 |
turn them into a separate CPAN distribution.
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=item *
|
|
Packit |
723767 |
|
|
Packit |
723767 |
Create a series of tests scripts that validate the cookbook recipes.
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=back
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=head1 AUTHOR
|
|
Packit |
723767 |
|
|
Packit |
723767 |
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=head1 COPYRIGHT
|
|
Packit |
723767 |
|
|
Packit |
723767 |
Copyright 2009 - 2012 Adam Kennedy.
|
|
Packit |
723767 |
|
|
Packit |
723767 |
This program is free software; you can redistribute
|
|
Packit |
723767 |
it and/or modify it under the same terms as Perl itself.
|
|
Packit |
723767 |
|
|
Packit |
723767 |
The full text of the license can be found in the
|
|
Packit |
723767 |
LICENSE file included with this module.
|