Blame lib/DBD/SQLite/VirtualTable/FileContent.pm

Packit 723767
#======================================================================
Packit 723767
package DBD::SQLite::VirtualTable::FileContent;
Packit 723767
#======================================================================
Packit 723767
use strict;
Packit 723767
use warnings;
Packit 723767
use base 'DBD::SQLite::VirtualTable';
Packit 723767
Packit 723767
my %option_ok = map {($_ => 1)} qw/source content_col path_col
Packit 723767
                                   expose root get_content/;
Packit 723767
Packit 723767
my %defaults = (
Packit 723767
  content_col => "content",
Packit 723767
  path_col    => "path",
Packit 723767
  expose      => "*",
Packit 723767
  get_content => "DBD::SQLite::VirtualTable::FileContent::get_content",
Packit 723767
);
Packit 723767
Packit 723767
Packit 723767
#----------------------------------------------------------------------
Packit 723767
# object instanciation
Packit 723767
#----------------------------------------------------------------------
Packit 723767
Packit 723767
sub NEW {
Packit 723767
  my $class = shift;
Packit 723767
Packit 723767
  my $self  = $class->_PREPARE_SELF(@_);
Packit 723767
Packit 723767
  local $" = ", "; # for array interpolation in strings
Packit 723767
Packit 723767
  # initial parameter check
Packit 723767
  !@{$self->{columns}}
Packit 723767
    or die "${class}->NEW(): illegal options: @{$self->{columns}}";
Packit 723767
  $self->{options}{source}
Packit 723767
    or die "${class}->NEW(): missing (source=...)";
Packit 723767
  my @bad_options = grep {!$option_ok{$_}} keys %{$self->{options}};
Packit 723767
  !@bad_options
Packit 723767
    or die "${class}->NEW(): bad options: @bad_options";
Packit 723767
Packit 723767
  # defaults ... tempted to use //= but we still want to support perl 5.8 :-(
Packit 723767
  foreach my $k (keys %defaults) {
Packit 723767
    defined $self->{options}{$k}
Packit 723767
      or $self->{options}{$k} = $defaults{$k};
Packit 723767
  }
Packit 723767
Packit 723767
  # get list of columns from the source table
Packit 723767
  my $src_table  = $self->{options}{source};
Packit 723767
  my $sql        = "PRAGMA table_info($src_table)";
Packit 723767
  my $dbh        = ${$self->{dbh_ref}}; # can't use method ->dbh, not blessed yet
Packit 723767
  my $src_info   = $dbh->selectall_arrayref($sql, {Slice => [1, 2]});
Packit 723767
  @$src_info
Packit 723767
    or die "${class}->NEW(source=$src_table): no such table in database";
Packit 723767
Packit 723767
  # associate each source colname with its type info or " " (should eval true)
Packit 723767
  my %src_col = map  { ($_->[0] => $_->[1] || " ") } @$src_info;
Packit 723767
Packit 723767
Packit 723767
  # check / complete the exposed columns
Packit 723767
  my @exposed_cols;
Packit 723767
  if ($self->{options}{expose} eq '*') {
Packit 723767
    @exposed_cols = map {$_->[0]} @$src_info;
Packit 723767
  }
Packit 723767
  else {
Packit 723767
    @exposed_cols = split /\s*,\s*/, $self->{options}{expose};
Packit 723767
    my @bad_cols  = grep { !$src_col{$_} } @exposed_cols;
Packit 723767
    die "table $src_table has no column named @bad_cols" if @bad_cols;
Packit 723767
  }
Packit 723767
  for (@exposed_cols) {
Packit 723767
    die "$class: $self->{options}{content_col} cannot be both the "
Packit 723767
      . "content_col and an exposed col" if $_ eq $self->{options}{content_col};
Packit 723767
  }
Packit 723767
Packit 723767
  # build the list of columns for this table
Packit 723767
  $self->{columns} = [ "$self->{options}{content_col} TEXT",
Packit 723767
                       map {"$_ $src_col{$_}"} @exposed_cols ];
Packit 723767
Packit 723767
  # acquire a coderef to the get_content() implementation, which
Packit 723767
  # was given as a symbolic reference in %options
Packit 723767
  no strict 'refs';
Packit 723767
  $self->{get_content} = \ &{$self->{options}{get_content}};
Packit 723767
Packit 723767
  bless $self, $class;
Packit 723767
}
Packit 723767
Packit 723767
sub _build_headers {
Packit 723767
  my $self = shift;
Packit 723767
Packit 723767
  my $cols = $self->sqlite_table_info;
Packit 723767
Packit 723767
  # headers : names of columns, without type information
Packit 723767
  $self->{headers} = [ map {$_->{name}} @$cols ];
Packit 723767
}
Packit 723767
Packit 723767
Packit 723767
#----------------------------------------------------------------------
Packit 723767
# method for initiating a search
Packit 723767
#----------------------------------------------------------------------
Packit 723767
Packit 723767
sub BEST_INDEX {
Packit 723767
  my ($self, $constraints, $order_by) = @_;
Packit 723767
Packit 723767
  $self->_build_headers if !$self->{headers};
Packit 723767
Packit 723767
  my @conditions;
Packit 723767
  my $ix = 0;
Packit 723767
  foreach my $constraint (grep {$_->{usable}} @$constraints) {
Packit 723767
    my $col     = $constraint->{col};
Packit 723767
Packit 723767
    # if this is the content column, skip because we can't filter on it
Packit 723767
    next if $col == 0;
Packit 723767
Packit 723767
    # for other columns, build a fragment for SQL WHERE on the underlying table
Packit 723767
    my $colname = $col == -1 ? "rowid" : $self->{headers}[$col];
Packit 723767
    push @conditions, "$colname $constraint->{op} ?";
Packit 723767
    $constraint->{argvIndex} = $ix++;
Packit 723767
    $constraint->{omit}      = 1;     # SQLite doesn't need to re-check the op
Packit 723767
  }
Packit 723767
Packit 723767
  # TODO : exploit $order_by to add ordering clauses within idxStr
Packit 723767
Packit 723767
  my $outputs = {
Packit 723767
    idxNum           => 1,
Packit 723767
    idxStr           => join(" AND ", @conditions),
Packit 723767
    orderByConsumed  => 0,
Packit 723767
    estimatedCost    => 1.0,
Packit 723767
    estimatedRows    => undef,
Packit 723767
   };
Packit 723767
Packit 723767
  return $outputs;
Packit 723767
}
Packit 723767
Packit 723767
Packit 723767
#----------------------------------------------------------------------
Packit 723767
# method for preventing updates
Packit 723767
#----------------------------------------------------------------------
Packit 723767
Packit 723767
sub _SQLITE_UPDATE {
Packit 723767
  my ($self, $old_rowid, $new_rowid, @values) = @_;
Packit 723767
Packit 723767
  die "attempt to update a readonly virtual table";
Packit 723767
}
Packit 723767
Packit 723767
Packit 723767
#----------------------------------------------------------------------
Packit 723767
# file slurping function (not a method!)
Packit 723767
#----------------------------------------------------------------------
Packit 723767
Packit 723767
sub get_content {
Packit 723767
  my ($path, $root) = @_;
Packit 723767
Packit 723767
  $path = "$root/$path" if $root;
Packit 723767
Packit 723767
  my $content = "";
Packit 723767
  if (open my $fh, "<", $path) {
Packit 723767
    local $/;          # slurp the whole file into a scalar
Packit 723767
    $content = <$fh>;
Packit 723767
    close $fh;
Packit 723767
  }
Packit 723767
  else {
Packit 723767
    warn "can't open $path";
Packit 723767
  }
Packit 723767
Packit 723767
  return $content;
Packit 723767
}
Packit 723767
Packit 723767
Packit 723767
Packit 723767
#======================================================================
Packit 723767
package DBD::SQLite::VirtualTable::FileContent::Cursor;
Packit 723767
#======================================================================
Packit 723767
use strict;
Packit 723767
use warnings;
Packit 723767
use base "DBD::SQLite::VirtualTable::Cursor";
Packit 723767
Packit 723767
Packit 723767
sub FILTER {
Packit 723767
  my ($self, $idxNum, $idxStr, @values) = @_;
Packit 723767
Packit 723767
  my $vtable = $self->{vtable};
Packit 723767
Packit 723767
  # build SQL
Packit 723767
  local $" = ", ";
Packit 723767
  my @cols = @{$vtable->{headers}};
Packit 723767
  $cols[0] = 'rowid';                 # replace the content column by the rowid
Packit 723767
  push @cols, $vtable->{options}{path_col}; # path col in last position
Packit 723767
  my $sql  = "SELECT @cols FROM $vtable->{options}{source}";
Packit 723767
  $sql .= " WHERE $idxStr" if $idxStr;
Packit 723767
Packit 723767
  # request on the index table
Packit 723767
  my $dbh = $vtable->dbh;
Packit 723767
  $self->{sth} = $dbh->prepare($sql)
Packit 723767
    or die DBI->errstr;
Packit 723767
  $self->{sth}->execute(@values);
Packit 723767
  $self->{row} = $self->{sth}->fetchrow_arrayref;
Packit 723767
Packit 723767
  return;
Packit 723767
}
Packit 723767
Packit 723767
Packit 723767
sub EOF {
Packit 723767
  my ($self) = @_;
Packit 723767
Packit 723767
  return !$self->{row};
Packit 723767
}
Packit 723767
Packit 723767
sub NEXT {
Packit 723767
  my ($self) = @_;
Packit 723767
Packit 723767
  $self->{row} = $self->{sth}->fetchrow_arrayref;
Packit 723767
}
Packit 723767
Packit 723767
sub COLUMN {
Packit 723767
  my ($self, $idxCol) = @_;
Packit 723767
Packit 723767
  return $idxCol == 0 ? $self->file_content : $self->{row}[$idxCol];
Packit 723767
}
Packit 723767
Packit 723767
sub ROWID {
Packit 723767
  my ($self) = @_;
Packit 723767
Packit 723767
  return $self->{row}[0];
Packit 723767
}
Packit 723767
Packit 723767
sub file_content {
Packit 723767
  my ($self) = @_;
Packit 723767
Packit 723767
  my $root = $self->{vtable}{options}{root};
Packit 723767
  my $path = $self->{row}[-1];
Packit 723767
  my $get_content_func = $self->{vtable}{get_content};
Packit 723767
Packit 723767
  return $get_content_func->($path, $root);
Packit 723767
}
Packit 723767
Packit 723767
Packit 723767
1;
Packit 723767
Packit 723767
__END__
Packit 723767
Packit 723767
Packit 723767
=head1 NAME
Packit 723767
Packit 723767
DBD::SQLite::VirtualTable::FileContent -- virtual table for viewing file contents
Packit 723767
Packit 723767
Packit 723767
=head1 SYNOPSIS
Packit 723767
Packit 723767
Within Perl :
Packit 723767
Packit 723767
  $dbh->sqlite_create_module(fcontent => "DBD::SQLite::VirtualTable::FileContent");
Packit 723767
Packit 723767
Then, within SQL :
Packit 723767
Packit 723767
  CREATE VIRTUAL TABLE tbl USING fcontent(
Packit 723767
     source      = src_table,
Packit 723767
     content_col = content,
Packit 723767
     path_col    = path,
Packit 723767
     expose      = "path, col1, col2, col3", -- or "*"
Packit 723767
     root        = "/foo/bar"
Packit 723767
     get_content = Foo::Bar::read_from_file
Packit 723767
    );
Packit 723767
Packit 723767
  SELECT col1, path, content FROM tbl WHERE ...;
Packit 723767
Packit 723767
=head1 DESCRIPTION
Packit 723767
Packit 723767
A "FileContent" virtual table is bound to some underlying I
Packit 723767
table>, which has a column containing paths to files.  The virtual
Packit 723767
table behaves like a database view on the source table, with an added
Packit 723767
column which exposes the content from those files.
Packit 723767
Packit 723767
This is especially useful as an "external content" to some
Packit 723767
fulltext table (see L<DBD::SQLite::Fulltext_search>) : the index
Packit 723767
table stores some metadata about files, and then the fulltext engine
Packit 723767
can index both the metadata and the file contents.
Packit 723767
Packit 723767
=head1 PARAMETERS
Packit 723767
Packit 723767
Parameters for creating a C<FileContent> virtual table are
Packit 723767
specified within the C<CREATE VIRTUAL TABLE> statement, just
Packit 723767
like regular column declarations, but with an '=' sign.
Packit 723767
Authorized parameters are :
Packit 723767
Packit 723767
=over
Packit 723767
Packit 723767
=item C<source>
Packit 723767
Packit 723767
The name of the I<source table>.
Packit 723767
This parameter is mandatory. All other parameters are optional.
Packit 723767
Packit 723767
=item C<content_col>
Packit 723767
Packit 723767
The name of the virtual column exposing file contents.
Packit 723767
The default is C<content>.
Packit 723767
Packit 723767
=item C<path_col>
Packit 723767
Packit 723767
The name of the column in C<source> that contains paths to files.
Packit 723767
The default is C<path>.
Packit 723767
Packit 723767
=item C<expose>
Packit 723767
Packit 723767
A comma-separated list (within double quotes) of source column names
Packit 723767
to be exposed by the virtual table. The default is C<"*">, which means
Packit 723767
all source columns.
Packit 723767
Packit 723767
=item C<root>
Packit 723767
Packit 723767
An optional root directory that will be prepended to the I<path> column
Packit 723767
when opening files.
Packit 723767
Packit 723767
=item C<get_content>
Packit 723767
Packit 723767
Fully qualified name of a Perl function for reading file contents.
Packit 723767
The default implementation just slurps the entire file into a string;
Packit 723767
but this hook can point to more sophisticated implementations, like for
Packit 723767
example a function that would remove html tags. The hooked function is
Packit 723767
called like this :
Packit 723767
Packit 723767
  $file_content = $get_content->($path, $root);
Packit 723767
Packit 723767
=back
Packit 723767
Packit 723767
=head1 AUTHOR
Packit 723767
Packit 723767
Laurent Dami E<lt>dami@cpan.orgE<gt>
Packit 723767
Packit 723767
=head1 COPYRIGHT AND LICENSE
Packit 723767
Packit 723767
Copyright Laurent Dami, 2014.
Packit 723767
Packit 723767
This library is free software; you can redistribute it and/or modify
Packit 723767
it under the same terms as Perl itself.
Packit 723767
Packit 723767
=cut