|
Packit |
723767 |
#======================================================================
|
|
Packit |
723767 |
package DBD::SQLite::VirtualTable::FileContent;
|
|
Packit |
723767 |
#======================================================================
|
|
Packit |
723767 |
use strict;
|
|
Packit |
723767 |
use warnings;
|
|
Packit |
723767 |
use base 'DBD::SQLite::VirtualTable';
|
|
Packit |
723767 |
|
|
Packit |
723767 |
my %option_ok = map {($_ => 1)} qw/source content_col path_col
|
|
Packit |
723767 |
expose root get_content/;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
my %defaults = (
|
|
Packit |
723767 |
content_col => "content",
|
|
Packit |
723767 |
path_col => "path",
|
|
Packit |
723767 |
expose => "*",
|
|
Packit |
723767 |
get_content => "DBD::SQLite::VirtualTable::FileContent::get_content",
|
|
Packit |
723767 |
);
|
|
Packit |
723767 |
|
|
Packit |
723767 |
|
|
Packit |
723767 |
#----------------------------------------------------------------------
|
|
Packit |
723767 |
# object instanciation
|
|
Packit |
723767 |
#----------------------------------------------------------------------
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub NEW {
|
|
Packit |
723767 |
my $class = shift;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
my $self = $class->_PREPARE_SELF(@_);
|
|
Packit |
723767 |
|
|
Packit |
723767 |
local $" = ", "; # for array interpolation in strings
|
|
Packit |
723767 |
|
|
Packit |
723767 |
# initial parameter check
|
|
Packit |
723767 |
!@{$self->{columns}}
|
|
Packit |
723767 |
or die "${class}->NEW(): illegal options: @{$self->{columns}}";
|
|
Packit |
723767 |
$self->{options}{source}
|
|
Packit |
723767 |
or die "${class}->NEW(): missing (source=...)";
|
|
Packit |
723767 |
my @bad_options = grep {!$option_ok{$_}} keys %{$self->{options}};
|
|
Packit |
723767 |
!@bad_options
|
|
Packit |
723767 |
or die "${class}->NEW(): bad options: @bad_options";
|
|
Packit |
723767 |
|
|
Packit |
723767 |
# defaults ... tempted to use //= but we still want to support perl 5.8 :-(
|
|
Packit |
723767 |
foreach my $k (keys %defaults) {
|
|
Packit |
723767 |
defined $self->{options}{$k}
|
|
Packit |
723767 |
or $self->{options}{$k} = $defaults{$k};
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
# get list of columns from the source table
|
|
Packit |
723767 |
my $src_table = $self->{options}{source};
|
|
Packit |
723767 |
my $sql = "PRAGMA table_info($src_table)";
|
|
Packit |
723767 |
my $dbh = ${$self->{dbh_ref}}; # can't use method ->dbh, not blessed yet
|
|
Packit |
723767 |
my $src_info = $dbh->selectall_arrayref($sql, {Slice => [1, 2]});
|
|
Packit |
723767 |
@$src_info
|
|
Packit |
723767 |
or die "${class}->NEW(source=$src_table): no such table in database";
|
|
Packit |
723767 |
|
|
Packit |
723767 |
# associate each source colname with its type info or " " (should eval true)
|
|
Packit |
723767 |
my %src_col = map { ($_->[0] => $_->[1] || " ") } @$src_info;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
|
|
Packit |
723767 |
# check / complete the exposed columns
|
|
Packit |
723767 |
my @exposed_cols;
|
|
Packit |
723767 |
if ($self->{options}{expose} eq '*') {
|
|
Packit |
723767 |
@exposed_cols = map {$_->[0]} @$src_info;
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
else {
|
|
Packit |
723767 |
@exposed_cols = split /\s*,\s*/, $self->{options}{expose};
|
|
Packit |
723767 |
my @bad_cols = grep { !$src_col{$_} } @exposed_cols;
|
|
Packit |
723767 |
die "table $src_table has no column named @bad_cols" if @bad_cols;
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
for (@exposed_cols) {
|
|
Packit |
723767 |
die "$class: $self->{options}{content_col} cannot be both the "
|
|
Packit |
723767 |
. "content_col and an exposed col" if $_ eq $self->{options}{content_col};
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
# build the list of columns for this table
|
|
Packit |
723767 |
$self->{columns} = [ "$self->{options}{content_col} TEXT",
|
|
Packit |
723767 |
map {"$_ $src_col{$_}"} @exposed_cols ];
|
|
Packit |
723767 |
|
|
Packit |
723767 |
# acquire a coderef to the get_content() implementation, which
|
|
Packit |
723767 |
# was given as a symbolic reference in %options
|
|
Packit |
723767 |
no strict 'refs';
|
|
Packit |
723767 |
$self->{get_content} = \ &{$self->{options}{get_content}};
|
|
Packit |
723767 |
|
|
Packit |
723767 |
bless $self, $class;
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub _build_headers {
|
|
Packit |
723767 |
my $self = shift;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
my $cols = $self->sqlite_table_info;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
# headers : names of columns, without type information
|
|
Packit |
723767 |
$self->{headers} = [ map {$_->{name}} @$cols ];
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
|
|
Packit |
723767 |
#----------------------------------------------------------------------
|
|
Packit |
723767 |
# method for initiating a search
|
|
Packit |
723767 |
#----------------------------------------------------------------------
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub BEST_INDEX {
|
|
Packit |
723767 |
my ($self, $constraints, $order_by) = @_;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
$self->_build_headers if !$self->{headers};
|
|
Packit |
723767 |
|
|
Packit |
723767 |
my @conditions;
|
|
Packit |
723767 |
my $ix = 0;
|
|
Packit |
723767 |
foreach my $constraint (grep {$_->{usable}} @$constraints) {
|
|
Packit |
723767 |
my $col = $constraint->{col};
|
|
Packit |
723767 |
|
|
Packit |
723767 |
# if this is the content column, skip because we can't filter on it
|
|
Packit |
723767 |
next if $col == 0;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
# for other columns, build a fragment for SQL WHERE on the underlying table
|
|
Packit |
723767 |
my $colname = $col == -1 ? "rowid" : $self->{headers}[$col];
|
|
Packit |
723767 |
push @conditions, "$colname $constraint->{op} ?";
|
|
Packit |
723767 |
$constraint->{argvIndex} = $ix++;
|
|
Packit |
723767 |
$constraint->{omit} = 1; # SQLite doesn't need to re-check the op
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
# TODO : exploit $order_by to add ordering clauses within idxStr
|
|
Packit |
723767 |
|
|
Packit |
723767 |
my $outputs = {
|
|
Packit |
723767 |
idxNum => 1,
|
|
Packit |
723767 |
idxStr => join(" AND ", @conditions),
|
|
Packit |
723767 |
orderByConsumed => 0,
|
|
Packit |
723767 |
estimatedCost => 1.0,
|
|
Packit |
723767 |
estimatedRows => undef,
|
|
Packit |
723767 |
};
|
|
Packit |
723767 |
|
|
Packit |
723767 |
return $outputs;
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
|
|
Packit |
723767 |
#----------------------------------------------------------------------
|
|
Packit |
723767 |
# method for preventing updates
|
|
Packit |
723767 |
#----------------------------------------------------------------------
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub _SQLITE_UPDATE {
|
|
Packit |
723767 |
my ($self, $old_rowid, $new_rowid, @values) = @_;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
die "attempt to update a readonly virtual table";
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
|
|
Packit |
723767 |
#----------------------------------------------------------------------
|
|
Packit |
723767 |
# file slurping function (not a method!)
|
|
Packit |
723767 |
#----------------------------------------------------------------------
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub get_content {
|
|
Packit |
723767 |
my ($path, $root) = @_;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
$path = "$root/$path" if $root;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
my $content = "";
|
|
Packit |
723767 |
if (open my $fh, "<", $path) {
|
|
Packit |
723767 |
local $/; # slurp the whole file into a scalar
|
|
Packit |
723767 |
$content = <$fh>;
|
|
Packit |
723767 |
close $fh;
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
else {
|
|
Packit |
723767 |
warn "can't open $path";
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
return $content;
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
|
|
Packit |
723767 |
|
|
Packit |
723767 |
#======================================================================
|
|
Packit |
723767 |
package DBD::SQLite::VirtualTable::FileContent::Cursor;
|
|
Packit |
723767 |
#======================================================================
|
|
Packit |
723767 |
use strict;
|
|
Packit |
723767 |
use warnings;
|
|
Packit |
723767 |
use base "DBD::SQLite::VirtualTable::Cursor";
|
|
Packit |
723767 |
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub FILTER {
|
|
Packit |
723767 |
my ($self, $idxNum, $idxStr, @values) = @_;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
my $vtable = $self->{vtable};
|
|
Packit |
723767 |
|
|
Packit |
723767 |
# build SQL
|
|
Packit |
723767 |
local $" = ", ";
|
|
Packit |
723767 |
my @cols = @{$vtable->{headers}};
|
|
Packit |
723767 |
$cols[0] = 'rowid'; # replace the content column by the rowid
|
|
Packit |
723767 |
push @cols, $vtable->{options}{path_col}; # path col in last position
|
|
Packit |
723767 |
my $sql = "SELECT @cols FROM $vtable->{options}{source}";
|
|
Packit |
723767 |
$sql .= " WHERE $idxStr" if $idxStr;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
# request on the index table
|
|
Packit |
723767 |
my $dbh = $vtable->dbh;
|
|
Packit |
723767 |
$self->{sth} = $dbh->prepare($sql)
|
|
Packit |
723767 |
or die DBI->errstr;
|
|
Packit |
723767 |
$self->{sth}->execute(@values);
|
|
Packit |
723767 |
$self->{row} = $self->{sth}->fetchrow_arrayref;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
return;
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub EOF {
|
|
Packit |
723767 |
my ($self) = @_;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
return !$self->{row};
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub NEXT {
|
|
Packit |
723767 |
my ($self) = @_;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
$self->{row} = $self->{sth}->fetchrow_arrayref;
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub COLUMN {
|
|
Packit |
723767 |
my ($self, $idxCol) = @_;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
return $idxCol == 0 ? $self->file_content : $self->{row}[$idxCol];
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub ROWID {
|
|
Packit |
723767 |
my ($self) = @_;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
return $self->{row}[0];
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
sub file_content {
|
|
Packit |
723767 |
my ($self) = @_;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
my $root = $self->{vtable}{options}{root};
|
|
Packit |
723767 |
my $path = $self->{row}[-1];
|
|
Packit |
723767 |
my $get_content_func = $self->{vtable}{get_content};
|
|
Packit |
723767 |
|
|
Packit |
723767 |
return $get_content_func->($path, $root);
|
|
Packit |
723767 |
}
|
|
Packit |
723767 |
|
|
Packit |
723767 |
|
|
Packit |
723767 |
1;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
__END__
|
|
Packit |
723767 |
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=head1 NAME
|
|
Packit |
723767 |
|
|
Packit |
723767 |
DBD::SQLite::VirtualTable::FileContent -- virtual table for viewing file contents
|
|
Packit |
723767 |
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=head1 SYNOPSIS
|
|
Packit |
723767 |
|
|
Packit |
723767 |
Within Perl :
|
|
Packit |
723767 |
|
|
Packit |
723767 |
$dbh->sqlite_create_module(fcontent => "DBD::SQLite::VirtualTable::FileContent");
|
|
Packit |
723767 |
|
|
Packit |
723767 |
Then, within SQL :
|
|
Packit |
723767 |
|
|
Packit |
723767 |
CREATE VIRTUAL TABLE tbl USING fcontent(
|
|
Packit |
723767 |
source = src_table,
|
|
Packit |
723767 |
content_col = content,
|
|
Packit |
723767 |
path_col = path,
|
|
Packit |
723767 |
expose = "path, col1, col2, col3", -- or "*"
|
|
Packit |
723767 |
root = "/foo/bar"
|
|
Packit |
723767 |
get_content = Foo::Bar::read_from_file
|
|
Packit |
723767 |
);
|
|
Packit |
723767 |
|
|
Packit |
723767 |
SELECT col1, path, content FROM tbl WHERE ...;
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=head1 DESCRIPTION
|
|
Packit |
723767 |
|
|
Packit |
723767 |
A "FileContent" virtual table is bound to some underlying I
|
|
Packit |
723767 |
table>, which has a column containing paths to files. The virtual
|
|
Packit |
723767 |
table behaves like a database view on the source table, with an added
|
|
Packit |
723767 |
column which exposes the content from those files.
|
|
Packit |
723767 |
|
|
Packit |
723767 |
This is especially useful as an "external content" to some
|
|
Packit |
723767 |
fulltext table (see L<DBD::SQLite::Fulltext_search>) : the index
|
|
Packit |
723767 |
table stores some metadata about files, and then the fulltext engine
|
|
Packit |
723767 |
can index both the metadata and the file contents.
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=head1 PARAMETERS
|
|
Packit |
723767 |
|
|
Packit |
723767 |
Parameters for creating a C<FileContent> virtual table are
|
|
Packit |
723767 |
specified within the C<CREATE VIRTUAL TABLE> statement, just
|
|
Packit |
723767 |
like regular column declarations, but with an '=' sign.
|
|
Packit |
723767 |
Authorized parameters are :
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=over
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=item C<source>
|
|
Packit |
723767 |
|
|
Packit |
723767 |
The name of the I<source table>.
|
|
Packit |
723767 |
This parameter is mandatory. All other parameters are optional.
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=item C<content_col>
|
|
Packit |
723767 |
|
|
Packit |
723767 |
The name of the virtual column exposing file contents.
|
|
Packit |
723767 |
The default is C<content>.
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=item C<path_col>
|
|
Packit |
723767 |
|
|
Packit |
723767 |
The name of the column in C<source> that contains paths to files.
|
|
Packit |
723767 |
The default is C<path>.
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=item C<expose>
|
|
Packit |
723767 |
|
|
Packit |
723767 |
A comma-separated list (within double quotes) of source column names
|
|
Packit |
723767 |
to be exposed by the virtual table. The default is C<"*">, which means
|
|
Packit |
723767 |
all source columns.
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=item C<root>
|
|
Packit |
723767 |
|
|
Packit |
723767 |
An optional root directory that will be prepended to the I<path> column
|
|
Packit |
723767 |
when opening files.
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=item C<get_content>
|
|
Packit |
723767 |
|
|
Packit |
723767 |
Fully qualified name of a Perl function for reading file contents.
|
|
Packit |
723767 |
The default implementation just slurps the entire file into a string;
|
|
Packit |
723767 |
but this hook can point to more sophisticated implementations, like for
|
|
Packit |
723767 |
example a function that would remove html tags. The hooked function is
|
|
Packit |
723767 |
called like this :
|
|
Packit |
723767 |
|
|
Packit |
723767 |
$file_content = $get_content->($path, $root);
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=back
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=head1 AUTHOR
|
|
Packit |
723767 |
|
|
Packit |
723767 |
Laurent Dami E<lt>dami@cpan.orgE<gt>
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=head1 COPYRIGHT AND LICENSE
|
|
Packit |
723767 |
|
|
Packit |
723767 |
Copyright Laurent Dami, 2014.
|
|
Packit |
723767 |
|
|
Packit |
723767 |
This library is free software; you can redistribute it and/or modify
|
|
Packit |
723767 |
it under the same terms as Perl itself.
|
|
Packit |
723767 |
|
|
Packit |
723767 |
=cut
|