Blob Blame History Raw
# Copyright (c) 2008-2012 Zmanda, Inc.  All Rights Reserved.
# Copyright (c) 2013-2016 Carbonite, Inc.  All Rights Reserved.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
# Contact information: Zmanda Inc, 505 N Mathlida Ave, Suite 120
# Sunnyvale, CA 94085, or:

package Amanda::DB::Catalog;

=head1 NAME

Amanda::DB::Catalog - access to the Amanda catalog: where is that dump?


  use Amanda::DB::Catalog;

  # get all dump timestamps on record
  my @timestamps = Amanda::DB::Catalog::get_timestamps();

  # loop over those timestamps, printing dump info for each one
  for my $timestamp (@timestamps) {
      my @dumpfiles = Amanda::DB::Catalog::get_parts(
	  timestamp => $timestamp,
	  ok => 1
      print "$timstamp:\n";
      for my $dumpfile (@dumpfiles) {
	  print " ", $dumpfile->{hostname}, ":", $dumpfile->{diskname}, 
		" level ", $dumpfile->{level}, "\n";

=head1 MODEL

The Amanda catalog is modeled as a set of dumps comprised of parts.  A dump is
a complete bytestream received from an application, and is uniquely identified
by the combination of C<hostname>, C<diskname>, C<dump_timestamp>, C<level>,
and C<write_timestamp>.  A dump may be partial, or even a complete failure.

A part corresponds to a single file on a volume, containing a portion of the
data for a dump.  A part, then, is completely specified by a volume label and a
file number (C<filenum>).  Each part has, among other things, a part number
(C<partnum>) which gives its relative position within the dump.  The bytestream
for a dump is recovered by concatenating all of the successful (C<status> = OK)
parts matching the dump.

Files in the holding disk are considered part of the catalog, and are
represented as single-part dumps (holding-disk chunking is ignored, as it is
distinct from split parts).

=head2 DUMPS

The dump table contains one row per dump.  It has the following columns:


=item dump_timestamp

(string) -- timestamp of the run in which the dump was created

=item write_timestamp

(string) -- timestamp of the run in which the part was written to this volume,
or C<"00000000000000"> for dumps in the holding disk.

=item hostname

(string) -- dump hostname

=item diskname

(string) -- dump diskname

=item level

(integer) -- dump level

=item status

(string) -- The status of the dump - "OK", "PARTIAL", or "FAIL".  If a disk
failed to dump at all, then it is not part of the catalog and thus will not
have an associated dump row.

=item message

(string) -- reason for PARTIAL or FAIL status

=item nparts

(integer) -- number of successful parts in this dump

=item bytes

(integer) -- size (in bytes) of the dump on disk, 0 if the size is not known.

=item kb

(integer) -- size (in kb) of the dump on disk

=item orig_kb

(integer) -- size (in kb) of the complete dump (before compression or encryption); undef
if not available

=item native_crc

(string) -- CRC of the complete dump (before compression or encryption); undef
if not available

=item client_crc

(string) -- CRC of the complete dump (after client compression or client encryption); undef
if not available

=item server_crc

(string) -- CRC of the complete dump (after server compression or server encryption); undef
if not available

=item sec

(integer) -- time (in seconds) spent writing this part

=item parts

(arrayref) -- array of parts, indexed by partnum (so C<< $parts->[0] >> is
always C<undef>).  When multiple partial parts are available, the choice of the
partial that is included in this array is undefined.


A dump is represented as a hashref with these keys.

The C<write_timestamp> gives the time of the amanda run in which the part was
written to this volume.  The C<write_timestamp> may differ from the
C<dump_timestamp> if, for example, I<amflush> wrote the part to tape after the
initial dump.

=head2 PARTS

The parts table contains one row per part, and has the following columns:


=item label

(string) -- volume label (not present for holding files)

=item filenum

(integer) -- file on that volume (not present for holding files)

=item holding_file

(string) -- fully-qualified pathname of the holding file (not present for
on-media dumps)

=item dump

(object ref) -- a reference to the dump containing this part

=item status

(string) -- The status of the part - "OK", "PARTIAL", or "FAILED".

=item partnum

(integer) -- part number of a split part (1-based)

=item kb

(integer) -- size (in kb) of this part

=item sec

(integer) -- time (in seconds) spent writing this part


A part is represented as a hashref with these keys.  The C<label> and
C<filenum> serve as a primary key. 

Note that parts' C<dump> and dumps' C<parts> create a reference loop.  This is
broken by making the C<parts> array's contents weak references in C<get_dumps>,
and the C<dump> reference weak in C<get_parts>.

=head2 NOTES

All timestamps used in this module are full-length, in the format
C<YYYYMMDDHHMMSS>.  If the underlying data contains only datestamps, they are
zero-extended into timestamps: C<YYYYMMDD000000>.  A C<dump_timestamp> always
corresponds to the initiation of the I<original> dump run, while
C<write_timestamp> gives the time the file was written to the volume.  When
parts are migrated from volume to volume (e.g., by I<amvault>), the
C<dump_timestamp> does not change.  

In Amanda, the tuple (C<hostname>, C<diskname>, C<level>, C<dump_timestamp>)
serves as a unique identifier for a dump bytestream, but because the bytestream
may appear several times in the catalog (due to vaulting) the additional
C<write_timestamp> is required to identify a particular on-storage instance of
a dump.  Note that the part sizes may differ between instances, so it is not
valid to concatenate parts from different dump instances.



The following functions provide summary data based on the contents of the


=item get_write_timestamps()

Get a list of all write timestamps, sorted in chronological order.

=item get_latest_write_timestamp()

Return the most recent write timestamp.

=item get_latest_write_timestamp(type => 'amvault')
=item get_latest_write_timestamp(types => [ 'amvault', .. ])

Return the timestamp of the most recent dump of the given type or types.  The
available types are given below for C<get_run_type>.

=item get_labels_written_at_timestamp($ts)

Return a list of labels for volumes written at the given timestamp.

=item get_run_type($ts)

Return the type of run made at the given timestamp.  The result is one of
C<amvault>, C<amdump>, C<amflush>, or the default, C<unknown>.


=head2 PARTS


=item get_parts(%parameters)

This function returns a sequence of parts.  Values in C<%parameters> restrict
the set of parts that are returned.  The hash can have any of the following


=item write_timestamp

restrict to parts written at this timestamp

=item write_timestamps

(arrayref) restrict to parts written at any of these timestamps (note that
holding-disk files have no C<write_timestamp>, so this option and the previous
will omit them)

=item dump_timestamp

restrict to parts with exactly this timestamp

=item dump_timestamps

(arrayref) restrict to parts with any of these timestamps

=item dump_timestamp_match

restrict to parts with timestamps matching this expression

=item holding

if true, only return dumps on holding disk.  If false, omit dumps on holding

=item hostname

restrict to parts with exactly this hostname

=item hostnames

(arrayref) restrict to parts with any of these hostnames

=item hostname_match

restrict to parts with hostnames matching this expression

=item diskname

restrict to parts with exactly this diskname

=item disknames

(arrayref) restrict to parts with any of these disknames

=item diskname_match

restrict to parts with disknames matching this expression

=item label

restrict to parts with exactly this label

=item labels

(arrayref) restrict to parts with any of these labels

=item level

restrict to parts with exactly this level

=item levels

(arrayref) restrict to parts with any of these levels

=item status

restrict to parts with this status

=item labelstr

restrict to parts on volume matching the labelstr.

=item dumpspecs

(arrayref of dumpspecs) restruct to parts matching one or more of these dumpspecs


Match expressions are described in the amanda(8) manual page.

=item sort_parts([ $key1, $key2, .. ], @parts)

Given a list of parts, this function sorts that list by the requested keys.
The following keys are available:


=item hostname

=item diskname

=item write_timestamp

=item dump_timestamp

=item level

=item filenum

=item label

Note that this sorts labels I<lexically>, not necessarily in the order they were used!

=item partnum

=item nparts


Keys are processed from left to right: if two dumps have the same value for
C<$key1>, then C<$key2> is examined, and so on.  Key names may be prefixed by a
dash (C<->) to reverse the order.

Note that some of these keys are dump keys; the function will automatically
access those values via the C<dump> attribute.


=head2 DUMPS


=item get_dumps(%parameters)

This function returns a sequence of dumps.  Values in C<%parameters> restrict
the set of dumps that are returned.  The same keys as are used for C<get_parts>
are available here, with the exception of C<label> and C<labels>.  In this
case, the C<status> parameter applies to the dump status, not the status of its
constituent parts.

=item sort_dumps([ $key1, $key2 ], @dumps)

Like C<sort_parts>, this sorts a sequence of dumps generated by C<get_dumps>.
The same keys are available, with the exception of C<label>, C<filenum>, and




=item add_part($part)

Add the given part to the database.  In terms of logfiles, this will either
create a new logfile (if the part's C<write_timestamp> has not been seen
before) or append to an existing logfile.  Note that a new logfile will require
a corresponding new entry in the tapelist.

Note that no locking is performed: multiple simultaneous calls to this function
can result in a corrupted or incorrect logfile.

TODO: add_dump



use Amanda::Logfile qw( :constants );
use Amanda::Tapelist;
use Amanda::Config qw( :init :getconf config_dir_relative );
use Amanda::Util qw( quote_string weaken_ref match_disk match_host match_datestamp match_level match_labelstr_expr);
use File::Glob qw( :glob );
use warnings;
use strict;

# tapelist cache
my $tapelist = undef;

# utility function
sub zeropad {
    my ($timestamp) = @_;
    if (length($timestamp) == 8) {
	return $timestamp."000000";
    return $timestamp;

sub get_write_timestamps {
    my @rv;

    # find_log assumes that the tapelist has been loaded, so load it now

    for (Amanda::Logfile::find_log()) {
	next unless (my ($timestamp) = /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
	push @rv, zeropad($timestamp);

    return sort @rv;

sub get_latest_write_timestamp {
    my %params = @_;

    if ($params{'type'}) {
	push @{$params{'types'}}, $params{'type'};

    # get all of the timestamps and select the last one
    my @timestamps = get_write_timestamps();

    if (@timestamps) {
	# if we're not looking for a particular type, then this is easy
	if (!defined $params{'types'}) {
	    return $timestamps[-1];

	# otherwise we need to search backward until we find a logfile of
	# the right type
	while (@timestamps) {
	    my $ts = pop @timestamps;
	    my $typ = get_run_type($ts);
	    if (grep { $_ eq $typ } @{$params{'types'}}) {
		return $ts;

    return undef;

sub get_run_type {
    my ($write_timestamp) = @_;

    # find all of the logfiles with that name
    my $logdir = config_dir_relative(getconf($CNF_LOGDIR));
    my @matches = File::Glob::bsd_glob("$logdir/log.$write_timestamp.*", GLOB_NOSORT);
    if ($write_timestamp =~ /000000$/) {
	my $write_datestamp = substr($write_timestamp, 0, 8);
	push @matches, File::Glob::bsd_glob("$logdir/log.$write_datestamp.*", GLOB_NOSORT);

    for my $lf (@matches) {
	open(my $fh, "<", $lf) or next;
	while (<$fh>) {
	    # amflush and amvault put their own names in
	    return $1 if (/^START (amflush|amvault)/);
	    # but for amdump we see planner
	    return 'amdump' if (/^START planner/);

    return "unknown";

# this generic function implements the loop of scanning logfiles to find
# the requested data; get_parts and get_dumps then adjust the results to
# match what the user expects.
sub get_parts_and_dumps {
    my $get_what = shift; # "parts" or "dumps"
    my %params = @_;
    my $logfile_dir = config_dir_relative(getconf($CNF_LOGDIR));

    # find_log assumes that the tapelist has been loaded, so load it now

    # pre-process params by appending all of the "singular" parameters to the "plurals"
    push @{$params{'write_timestamps'}}, map { zeropad($_) } $params{'write_timestamp'} 
	if defined($params{'write_timestamp'});
    push @{$params{'dump_timestamps'}}, map { zeropad($_) } $params{'dump_timestamp'} 
	if defined($params{'dump_timestamp'});
    push @{$params{'hostnames'}}, $params{'hostname'} 
	if defined($params{'hostname'});
    push @{$params{'disknames'}}, $params{'diskname'} 
	if defined($params{'diskname'});
    push @{$params{'levels'}}, $params{'level'} 
	if defined($params{'level'});
    push @{$params{'storages'}}, $params{'storage'}
	if defined($params{'storage'});
    if ($get_what eq 'parts') {
	push @{$params{'labels'}}, $params{'label'}
	    if defined($params{'label'});
    } else {
	delete $params{'labels'};

    # specifying write_timestamps implies we won't check holding files
    if ($params{'write_timestamps'}) {
	if (defined $params{'holding'} and $params{'holding'}) {
	    return [], []; # well, that's easy..
	$params{'holding'} = 0;
    # specifying labelstr implies we won't check holding files
    if ($params{'labelstr'}) {
	if (defined $params{'holding'} and $params{'holding'}) {
	    return [], []; # well, that's easy..
	$params{'holding'} = 0;

    # Since we're working from logfiles, we have to pick the logfiles we'll use first.
    # Then we can use search_logfile.
    my @logfiles;
    if ($params{'holding'}) {
	@logfiles = ( 'holding', );
    } elsif (defined($params{'write_timestamps'})) {
	# if we have specific write_timestamps, the job is pretty easy.
	my %timestamps_hash = map { ($_, undef) } @{$params{'write_timestamps'}};
	for my $logfile (Amanda::Logfile::find_log()) {
	    next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
	    next unless (exists($timestamps_hash{zeropad($timestamp)}));
	    push @logfiles, $logfile;
    } elsif (defined($params{'dump_timestamps'})) {
	# otherwise, we need only look in logfiles at or after the earliest dump timestamp
	my @sorted_timestamps = sort @{$params{'dump_timestamps'}};
	my $earliest_timestamp = $sorted_timestamps[0];
	for my $logfile (Amanda::Logfile::find_log()) {
	    next unless (my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
	    next unless (zeropad($timestamp) ge $earliest_timestamp);
	    push @logfiles, $logfile;
    } else {
	# oh well -- it looks like we'll have to read all existing logfiles.
	@logfiles = Amanda::Logfile::find_log();

    # Set up some hash tables for speedy lookups of various attributes
    my (%dump_timestamps_hash, %hostnames_hash, %disknames_hash, %levels_hash, %storages_hash, %labels_hash);
    %dump_timestamps_hash = map { ($_, undef) } @{$params{'dump_timestamps'}}
	if (defined($params{'dump_timestamps'}));
    %hostnames_hash = map { ($_, undef) } @{$params{'hostnames'}}
	if (defined($params{'hostnames'}));
    %disknames_hash = map { ($_, undef) } @{$params{'disknames'}}
	if (defined($params{'disknames'}));
    %levels_hash = map { ($_, undef) } @{$params{'levels'}}
	if (defined($params{'levels'}));
    %storages_hash = map { ($_, undef) } @{$params{'storages'}}
	if (defined($params{'storages'}));
    %labels_hash = map { ($_, undef) } @{$params{'labels'}}
	if (defined($params{'labels'}));

    my %dumps;
    my @parts;

    # *also* scan holding if the holding param wasn't specified
    if (!defined $params{'holding'}) {
	push @logfiles, 'holding';

    # now loop over those logfiles and use search_logfile to load the dumpfiles
    # from them, then process each entry from the logfile
    for my $logfile (@logfiles) {
	my (@find_results, $write_timestamp);

	# get the raw contents from search_logfile, or use holding if
	# $logfile is undef
	if ($logfile ne 'holding') {
	    @find_results = Amanda::Logfile::search_logfile(undef, undef,
							"$logfile_dir/$logfile", 1, 1);
	    # convert to dumpfile hashes, including the write_timestamp from the logfile name
	    my ($timestamp) = $logfile =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/;
	    $write_timestamp = zeropad($timestamp);

	} else {
	    @find_results = Amanda::Logfile::search_holding_disk(1);
	    $write_timestamp = '00000000000000';

	# filter against *_match with dumps_match
	@find_results = Amanda::Logfile::dumps_match([@find_results],
	    defined($params{'hostname_match'})? $params{'hostname_match'} : undef,
	    defined($params{'diskname_match'})? $params{'diskname_match'} : undef,
	    defined($params{'dump_timestamp_match'})? $params{'dump_timestamp_match'} : undef,

	# loop over each entry in the logfile.
	for my $find_result (@find_results) {

	    # filter out the non-dump error messages that find.c produces
	    next unless (defined $find_result->{'label'});

	    # bail out on this result early, if possible
	    next if (%dump_timestamps_hash
		and !exists($dump_timestamps_hash{zeropad($find_result->{'timestamp'})}));
	    next if (%hostnames_hash
		and !exists($hostnames_hash{$find_result->{'hostname'}}));
	    next if (%disknames_hash
		and !exists($disknames_hash{$find_result->{'diskname'}}));
	    next if (%levels_hash
		and !exists($levels_hash{$find_result->{'level'}}));
	    next if (%storages_hash
		and !exists($storages_hash{$find_result->{'storage'}}));
	    next if (%labels_hash
		and !exists($labels_hash{$find_result->{'label'}}));
	    next if (defined $params{'labelstr'}
		and !match_labelstr_expr($params{'labelstr'},$find_result->{'label'}));
	    if ($get_what eq 'parts') {
		next if (exists($params{'status'}) and defined $params{'status'}
		    and defined $find_result->{'status'}
		    and $find_result->{'status'} ne $params{'status'});
	    } elsif ($get_what eq 'dumps') {
		next if (    exists($params{'status'}) and defined $params{'status'}
			 and defined $find_result->{'dump_status'}
			 and $find_result->{'dump_status'} ne $params{'status'});

	    # filter each result against dumpspecs, to avoid dumps_match_dumpspecs'
	    # tendency to produce duplicate results
	    next if ($params{'dumpspecs'}
		and !Amanda::Logfile::dumps_match_dumpspecs([$find_result],
						    $params{'dumpspecs'}, 0));

	    my $dump_timestamp = zeropad($find_result->{'timestamp'});

	    my $dumpkey = join("\0", $find_result->{'hostname'}, $find_result->{'diskname'},
			             $write_timestamp, $find_result->{'level'}, $dump_timestamp);
	    my $dump = $dumps{$dumpkey};
	    if (!defined $dump) {
		$dump = $dumps{$dumpkey} = {
		    dump_timestamp => $dump_timestamp,
		    write_timestamp => $write_timestamp,
		    hostname => $find_result->{'hostname'},
		    diskname => $find_result->{'diskname'},
		    storage  => $find_result->{'storage'},
		    pool => $find_result->{'pool'},
		    level => $find_result->{'level'}+0,
		    orig_kb => $find_result->{'orig_kb'},
		    native_crc => $find_result->{'native_crc'},
		    client_crc => $find_result->{'client_crc'},
		    server_crc => $find_result->{'server_crc'},
		    status => $find_result->{'dump_status'},
		    message => $find_result->{'message'},
		    # the rest of these params are unknown until we see a taper
		    # DONE, PARTIAL, or FAIL line, although we count nparts
		    # manually instead of relying on the logfile
		    nparts => 0, # $find_result->{'totalparts'}
		    bytes => -1, # $find_result->{'bytes'}
		    kb => -1,    # $find_result->{'kb'}
		    sec => -1,   # $find_result->{'sec'}
	    } elsif (defined $find_result->{'partnum'} and $find_result->{'partnum'} == 1) {
		$dumps{$dumpkey}->{'orig_kb'} = $find_result->{'orig_kb'},
		$dumps{$dumpkey}->{'native_crc'} = $find_result->{'native_crc'},
		$dumps{$dumpkey}->{'client_crc'} = $find_result->{'client_crc'},
		$dumps{$dumpkey}->{'server_crc'} = $find_result->{'server_crc'},
		$dumps{$dumpkey}->{'status'} = $find_result->{'dump_status'};
		$dumps{$dumpkey}->{'message'} = $find_result->{'message'};
		$dumps{$dumpkey}->{'nparts'} = 0;
		$dumps{$dumpkey}->{'bytes'} = -1;
		$dumps{$dumpkey}->{'kb'} = -1;
		$dumps{$dumpkey}->{'sec'} = -1;

	    # start setting up a part hash for this result
	    my %part;
	    if ($logfile ne 'holding') {
		# on-media dump
		%part = (
		    label => $find_result->{'label'},
		    filenum => $find_result->{'filenum'},
		    dump => $dump,
		    status => $find_result->{'status'} || 'FAILED',
		    sec => $find_result->{'sec'},
		    kb => $find_result->{'kb'},
		    orig_kb => $find_result->{'orig_kb'},
		    native_crc => $find_result->{'native_crc'},
		    client_crc => $find_result->{'client_crc'},
		    server_crc => $find_result->{'server_crc'},
		    partnum => $find_result->{'partnum'},
	    } else {
		# holding disk
		%part = (
		    holding_file => $find_result->{'label'},
		    dump => $dump,
		    status => $find_result->{'status'} || 'FAILED',
		    sec => 0.0,
		    kb => $find_result->{'kb'},
		    orig_kb => $find_result->{'orig_kb'},
		    native_crc => $find_result->{'native_crc'},
		    client_crc => $find_result->{'client_crc'},
		    server_crc => $find_result->{'server_crc'},
		    partnum => 1,
		# and fix up the dump, too
		$dump->{'status'} = $find_result->{'dump_status'} || 'FAILED';
		$dump->{'bytes'} = $find_result->{'bytes'};
		$dump->{'kb'} = $find_result->{'kb'};
		$dump->{'sec'} = $find_result->{'sec'};

	    # weaken the dump ref if we're returning dumps
		if ($get_what eq 'dumps');

	    # count the number of successful parts in the dump
	    $dump->{'nparts'}++ if $part{'status'} eq 'OK';

	    # and add a ref to the array of parts; if we're getting
	    # parts, then this is a weak ref
	    $dump->{'parts'}[$part{'partnum'}] = \%part;
		if ($get_what eq 'parts');

	    push @parts, \%part;

	# if these dumps were on the holding disk, then we're done
	next if $logfile eq 'holding';

	# re-read the logfile to extract dump-level info that's not captured by
	# search_logfile
	my $logh = Amanda::Logfile::open_logfile("$logfile_dir/$logfile");
	die "logfile '$logfile' not found" unless $logh;
	while (my ($type, $prog, $str) = Amanda::Logfile::get_logline($logh)) {
	    next unless $prog == $P_TAPER;
	    my $status;
	    if ($type == $L_DONE) {
		$status = 'OK';
	    } elsif ($type == $L_PARTIAL) {
		$status = 'PARTIAL';
	    } elsif ($type == $L_FAIL) {
		$status = 'FAIL';
	    } elsif ($type == $L_SUCCESS) {
		$status = "OK";
	    } else {

	    # now extract the appropriate info; luckily these log lines have the same
	    # format, more or less
	    my ($storage, $pool, $hostname, $diskname, $dump_timestamp, $nparts, $level, $secs, $kb, $bytes, $message);
	    ($storage, $str) = Amanda::Util::skip_quoted_string($str);
	    $storage = Amanda::Util::unquote_string($storage);
	    if ($storage =~ /^ST:/) {
		$storage =~ s/^ST://;
		($pool, $str) = Amanda::Util::skip_quoted_string($str);
		$pool = Amanda::Util::unquote_string($pool);
		if ($pool =~ /^POOL/) {
		    $pool =~ s/^POOL//;
		    ($hostname, $str) = Amanda::Util::skip_quoted_string($str);
		} else {
		    $hostname = $pool;
		    $pool = Amanda::Config::get_config_name();
	    } else {
		$hostname = $storage;
		$storage = Amanda::Config::get_config_name();
		$pool = $storage;
	    ($diskname, $str) = Amanda::Util::skip_quoted_string($str);
	    ($dump_timestamp, $str) = Amanda::Util::skip_quoted_string($str);
	    if ($status ne 'FAIL' and $type != $L_SUCCESS) { # nparts is not in SUCCESS lines
		($nparts, my $str1) = Amanda::Util::skip_quoted_string($str);
		if (substr($str1, 0,1) ne '[') {
		    $str = $str1;
		} else { # nparts is not in all PARTIAL lines
		    $nparts = 0;

	    } else {
		$nparts = 0;
	    ($level, $str) = Amanda::Util::skip_quoted_string($str);
	    if ($status ne 'FAIL') {
		if ($str !~ /^\[sec/) {
		    (my $crc1, $str) = Amanda::Util::skip_quoted_string($str);
		if ($str !~ /^\[sec/) {
		    (my $crc2, $str) = Amanda::Util::skip_quoted_string($str);
		if ($str !~ /^\[sec/) {
		    (my $crc3, $str) = Amanda::Util::skip_quoted_string($str);
		my $s = $str;
		my $b_unit;
		($secs, $b_unit, $kb, $str) = ($str =~ /^\[sec ([-0-9.]+) (kb|bytes) ([-0-9]+).*\] ?(.*)$/)
		    or die("'$s'");
		if ($b_unit eq 'bytes') {
		    $bytes = $kb;
		    $kb /= 1024;
		} else {
		    $bytes = 0;
		$secs = 0.1 if ($secs <= 0);
	    if ($status ne 'OK') {
		$message = $str;
	    } else {
		$message = '';

	    $hostname = Amanda::Util::unquote_string($hostname);
	    $diskname = Amanda::Util::unquote_string($diskname);
	    $message = Amanda::Util::unquote_string($message) if $message;

	    # filter against dump criteria
	    next if ($params{'dump_timestamp_match'}
		and !match_datestamp($params{'dump_timestamp_match'}, zeropad($dump_timestamp)));
	    next if (%dump_timestamps_hash
		and !exists($dump_timestamps_hash{zeropad($dump_timestamp)}));

	    next if ($params{'hostname_match'}
		and !match_host($params{'hostname_match'}, $hostname));
	    next if (%hostnames_hash
		and !exists($hostnames_hash{$hostname}));

	    next if ($params{'diskname_match'}
		and !match_disk($params{'diskname_match'}, $diskname));
	    next if (%disknames_hash
		and !exists($disknames_hash{$diskname}));

	    next if (%levels_hash
		and !exists($levels_hash{$level}));

	    next if (%storages_hash
		and !exists($storages_hash{$storage}));

	    # get_dumps filters on status

	    if ($params{'dumpspecs'}) {
		my $ok = 0;
		for my $ds (@{$params{'dumpspecs'}}) {
		    # (the "". are for SWIG's benefit - SWIGged functions don't like
		    # strings generated by SWIG.  Long story.)
		    next if (defined $ds->{'host'}
			    and !match_host("".$ds->{'host'}, $hostname));
		    next if (defined $ds->{'disk'}
			    and !match_disk("".$ds->{'disk'}, $diskname));
		    next if (defined $ds->{'datestamp'}
			    and !match_datestamp("".$ds->{'datestamp'}, $dump_timestamp));
		    next if (defined $ds->{'level'}
			    and !match_level("".$ds->{'level'}, $level));
		    next if (defined $ds->{'write_timestamp'}
			     and !match_datestamp("".$ds->{'write_timestamp'}, $write_timestamp));
		    $ok = 1;
		next unless $ok;

	    my $dumpkey = join("\0", $hostname, $diskname, $storage, $write_timestamp,
				     $level, zeropad($dump_timestamp));
	    my $dump = $dumps{$dumpkey};
	    if (!defined $dump) {
		# this will happen when a dump has no parts - a FAILed dump.
		$dump = $dumps{$dumpkey} = {
		    dump_timestamp => zeropad($dump_timestamp),
		    write_timestamp => $write_timestamp,
		    hostname => $hostname,
		    diskname => $diskname,
		    storage  => $storage,
		    level => $level+0,
		    orig_kb => undef,
		    native_crc => undef,
		    client_crc => undef,
		    server_crc => undef,
		    status => "FAILED",
		    # message set below
		    nparts => $nparts, # hopefully 0?
		    # kb set below
		    # sec set below

	    $dump->{'message'} = $message;
	    if ($status eq 'FAIL') {
		$dump->{'bytes'} = 0;
		$dump->{'kb'} = 0;
		$dump->{'sec'} = 0.0;
	    } else {
		$dump->{'bytes'} = $bytes+0;
		$dump->{'kb'} = $kb+0;
		$dump->{'sec'} = $secs+0.0;

    return [ values %dumps], \@parts;

sub get_parts {
    my ($dumps, $parts) = get_parts_and_dumps("parts", @_);
    return @$parts;

sub get_dumps {
    my %params = @_;
    my ($dumps, $parts) = get_parts_and_dumps("dumps", @_);
    my @dumps = @$dumps;

    if (exists $params{'status'} and defined $params{'status'}) {
	@dumps = grep { $_->{'status'} eq $params{'status'} } @dumps;

    return @dumps;

sub sort_parts {
    my ($keys, @parts) = @_;

    # TODO: make this more efficient by selecting the comparison
    # functions once, in advance, and just applying them
    return sort {
	my $res;
	for my $key (@$keys) {
	    my ($rev, $k) = ($key =~ /^(-?)(.*)$/);

	    if ($k =~ /^(partnum|filenum)$/) {
		# compare part components numerically
		$res = $a->{$k} <=> $b->{$k};
	    } elsif ($k =~ /^(nparts|level)$/) {
		# compare dump components numerically
		$res = $a->{'dump'}->{$k} <=> $b->{'dump'}->{$k};
	    } elsif ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/) {
		# compare dump components alphabetically
		$res = $a->{'dump'}->{$k} cmp $b->{'dump'}->{$k};
	    } else { # (label)
		# compare part components alphabetically
		$res = $a->{$k} cmp $b->{$k};
	    $res = -$res if ($rev eq '-' and $res);
	    return $res if $res;
	return 0;
    } @parts;

sub sort_dumps {
    my ($keys, @dumps) = @_;

    # TODO: make this more efficient by selecting the comparison
    # functions once, in advance, and just applying them
    return sort {
	my $res;
	for my $key (@$keys) {
	    my ($rev, $k) = ($key =~ /^(-?)(.*)$/);

	    if ($k =~ /^(nparts|level|filenum)$/) {
		# compare dump components numerically
		$res = $a->{$k} <=> $b->{$k};
	    } else { # ($k =~ /^(hostname|diskname|write_timestamp|dump_timestamp)$/)
		# compare dump components alphabetically
		$res = $a->{$k} cmp $b->{$k};
	    $res = -$res if ($rev eq '-' and $res);
	    return $res if $res;
	return 0;
    } @dumps;

# caches for add_part() to avoid repeatedly looking up the log
# filename for a particular write_timestamp.
my $add_part_last_label = undef;
my $add_part_last_write_timestamp = undef;
my $add_part_last_logfile = undef;

sub add_part {
    my ($dump) = @_;
    my $found;
    my $logfh;
    my $logfile;
    my $find_result;
    my $logdir = config_dir_relative(getconf($CNF_LOGDIR));
    my ($last_filenum, $last_secs, $last_kbs);

    # first order of business is to find out whether we need to make a new
    # dumpfile for this.
    my $write_timestamp = zeropad($dump->{'write_timestamp'});
    die "dump has no 'write_timestamp'" unless defined $write_timestamp;

    # consult our one-element cache for this label and write_timestamp
    if (!defined $add_part_last_label
	or $add_part_last_label ne $dump->{'label'}
	or $add_part_last_write_timestamp ne $dump->{'write_timestamp'}) {

	# update the cache
	$add_part_last_logfile = undef;
	for my $lf (Amanda::Logfile::find_log()) {
	    next unless (my ($log_timestamp) = $lf =~ /^log\.([0-9]+)(?:\.[0-9]+|\.amflush)?$/);
	    next unless (zeropad($log_timestamp) eq $write_timestamp);

	    # write timestamp matches; now check the label
	    for $find_result (Amanda::Logfile::search_logfile(undef, undef,
					"$logdir/$lf", 1, 1)) {
		next unless (defined $find_result->{'label'});

		if ($find_result->{'label'} eq $dump->{'label'}) {
		    $add_part_last_label = $dump->{'label'};
		    $add_part_last_write_timestamp = $dump->{'write_timestamp'};
		    $add_part_last_logfile = $lf;
		    last LOGFILE;
    $logfile = $add_part_last_logfile;

    # truncate the write_timestamp if we're not using timestamps
    if (!getconf($CNF_USETIMESTAMPS)) {
	$write_timestamp = substr($write_timestamp, 0, 8);

    # get the information on the last dump and part in this logfile, or create
    # a new logfile if none exists, then open the logfile for writing.
    if (defined $logfile) {
	$last_filenum = -1;

	# NOTE: this depends on an implementation detail of search_logfile: it
	# returns the results in the reverse order of appearance in the logfile.
	# Since we're concerned with the last elements of this logfile that we
	# will be appending to shortly, we simply reverse this list.  As this
	# package is rewritten to parse logfiles on its own (or access a relational
	# database), this implementation detail will no longer be relevant.
	my @find_results = reverse Amanda::Logfile::search_logfile(undef, undef,
						    "$logdir/$logfile", 1, 1);
	for $find_result (@find_results) {
	    # filter out the non-dump error messages that find.c produces
	    next unless (defined $find_result->{'label'});

	    $last_filenum = $find_result->{'filenum'};

	    # if this is part number 1, reset our secs and kbs counters on the
	    # assumption that this is the beginning of a new dump
	    if ($find_result->{'partnum'} == 1) {
		$last_secs = $last_kbs = 0;
	    $last_secs += $find_result->{'sec'};
	    $last_kbs += $find_result->{'kb'};

	open($logfh, ">>", "$logdir/$logfile");
    } else {
	$last_filenum = -1;
	$last_secs = 0;
	$last_kbs = 0;

	# pick an unused log filename
	my $i = 0;
	while (1) {
	    $logfile = "log.$write_timestamp.$i";
	    last unless -f "$logdir/$logfile";

	open($logfh, ">", "$logdir/$logfile")
	    or die("Could not write '$logdir/$logfile': $!");

	print $logfh
	    "INFO taper This logfile was generated by Amanda::DB::Catalog\n";

	print $logfh
	    "START taper datestamp $write_timestamp label $dump->{label} tape $i\n";

	if (!defined $tapelist) {
	} else {
	    # reload the tapelist immediately, in case it's been modified

	# see if we need to add an entry to the tapelist for this dump
	if (!grep { $_->{'label'} eq $dump->{'label'}
		    and zeropad($_->{'datestamp'}) eq zeropad($dump->{'write_timestamp'})
		} @{$tapelist->{tles}}) {
	    $tapelist->add_tapelabel($write_timestamp, $dump->{'label'}, undef, 1,undef,undef,undef,undef,undef, Amanda::Config::get_config_name());

    if ($last_filenum >= 0 && $last_filenum+1 != $dump->{'filenum'}) {
	warn "Discontinuity in filenums in $logfile: " .
	     "from $last_filenum to $dump->{filenum}";

    my $kps = $dump->{'sec'}? (($dump->{'kb'} + 0.0) / $dump->{'sec'}) : 0.0;

    my $part_line = "PART taper ";
    $part_line .= "$dump->{label} ";
    $part_line .= "$dump->{filenum} ";
    $part_line .= quote_string($dump->{hostname}) . " ";
    $part_line .= quote_string($dump->{diskname}) . " ";
    $part_line .= "$dump->{dump_timestamp} ";
    $part_line .= "$dump->{partnum}/$dump->{nparts} ";
    $part_line .= "$dump->{level} ";
    $part_line .= "[sec $dump->{sec} kb $dump->{kb} kps $kps]";
    print $logfh "$part_line\n";

    # TODO: we don't always know nparts when writing a part, so
    # this is not always an effective way to detect a complete dump.
    # However, it works for purposes of data vaulting.
    if ($dump->{'partnum'} == $dump->{'nparts'}) {
	my $secs = $last_secs + $dump->{'sec'};
	my $kbs = $last_kbs + $dump->{'kb'};
	$kps = $secs? ($kbs + 0.0) / $secs : 0.0;

	my $done_line = "DONE taper ";
	$done_line .= quote_string($dump->{hostname}) ." ";
	$done_line .= quote_string($dump->{diskname}) ." ";
	$done_line .= "$dump->{dump_timestamp} ";
	$done_line .= "$dump->{nparts} ";
	$done_line .= "$dump->{level} ";
	$done_line .= "[sec $secs kb $kbs kps $kps]";
	print $logfh "$done_line\n";


sub _load_tapelist {
#    if (!defined $tapelist) {
	my $tapelist_filename = config_dir_relative(getconf($CNF_TAPELIST));
	($tapelist, my $message) = Amanda::Tapelist->new($tapelist_filename);
#    }

sub _clear_cache { # (used by installcheck)
    $tapelist = undef;
