Blame gfs2/fsck/metawalk.c

Packit 6ef888
#include "clusterautoconfig.h"
Packit 6ef888
Packit 6ef888
#include <inttypes.h>
Packit 6ef888
#include <stdio.h>
Packit 6ef888
#include <stdlib.h>
Packit 6ef888
#include <string.h>
Packit 6ef888
#include <sys/types.h>
Packit 6ef888
#include <sys/stat.h>
Packit 6ef888
#include <unistd.h>
Packit 6ef888
#include <libintl.h>
Packit 6ef888
#include <ctype.h>
Packit 6ef888
#include <fcntl.h>
Packit 6ef888
#define _(String) gettext(String)
Packit 6ef888
Packit 6ef888
#include <logging.h>
Packit 6ef888
#include "libgfs2.h"
Packit 6ef888
#include "link.h"
Packit 6ef888
#include "osi_tree.h"
Packit 6ef888
#include "fsck.h"
Packit 6ef888
#include "util.h"
Packit 6ef888
#include "metawalk.h"
Packit 6ef888
#include "inode_hash.h"
Packit 6ef888
Packit 6ef888
#define COMFORTABLE_BLKS 5242880 /* 20GB in 4K blocks */
Packit 6ef888
Packit 6ef888
/* There are two bitmaps: (1) The "blockmap" that fsck uses to keep track of
Packit 6ef888
   what block type has been discovered, and (2) The rgrp bitmap.  Function
Packit 6ef888
   gfs2_blockmap_set is used to set the former and gfs2_set_bitmap
Packit 6ef888
   is used to set the latter.  The two must be kept in sync, otherwise
Packit 6ef888
   you'll get bitmap mismatches.  This function checks the status of the
Packit 6ef888
   bitmap whenever the blockmap changes, and fixes it accordingly. */
Packit 6ef888
int check_n_fix_bitmap(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
Packit 6ef888
		       uint64_t blk, int error_on_dinode, int new_state)
Packit 6ef888
{
Packit 6ef888
	int old_state;
Packit 6ef888
	int treat_as_inode = 0;
Packit 6ef888
	int rewrite_rgrp = 0;
Packit 6ef888
	struct gfs_rgrp *gfs1rg;
Packit 6ef888
	const char *allocdesc[2][5] = { /* gfs2 descriptions */
Packit 6ef888
		{"free", "data", "unlinked", "inode", "reserved"},
Packit 6ef888
		/* gfs1 descriptions: */
Packit 6ef888
		{"free", "data", "free meta", "metadata", "reserved"}};
Packit 6ef888
	static struct rgrp_tree *prevrgd = NULL;
Packit 6ef888
Packit 6ef888
	if (prevrgd && rgrp_contains_block(prevrgd, blk)) {
Packit 6ef888
		rgd = prevrgd;
Packit 6ef888
	} else if (rgd == NULL || !rgrp_contains_block(rgd, blk)) {
Packit 6ef888
		rgd = gfs2_blk2rgrpd(sdp, blk);
Packit 6ef888
		prevrgd = rgd;
Packit 6ef888
	}
Packit 6ef888
Packit 6ef888
	gfs1rg = (struct gfs_rgrp *)&rgd->rg;
Packit 6ef888
Packit 6ef888
	old_state = lgfs2_get_bitmap(sdp, blk, rgd);
Packit 6ef888
	if (old_state < 0) {
Packit 6ef888
		log_err( _("Block %llu (0x%llx) is not represented in the "
Packit 6ef888
			   "system bitmap; part of an rgrp or superblock.\n"),
Packit 6ef888
			 (unsigned long long)blk, (unsigned long long)blk);
Packit 6ef888
		return -1;
Packit 6ef888
	}
Packit 6ef888
	if (old_state == new_state)
Packit 6ef888
		return 0;
Packit 6ef888
Packit 6ef888
	if (error_on_dinode && old_state == GFS2_BLKST_DINODE &&
Packit 6ef888
	    new_state != GFS2_BLKST_FREE) {
Packit 6ef888
		log_debug(_("Reference as '%s' to block %llu (0x%llx) which "
Packit 6ef888
			    "was marked as dinode. Needs further "
Packit 6ef888
			    "investigation.\n"),
Packit 6ef888
			  allocdesc[sdp->gfs1][new_state],
Packit 6ef888
			  (unsigned long long)blk, (unsigned long long)blk);
Packit 6ef888
		return 1;
Packit 6ef888
	}
Packit 6ef888
	/* Keep these messages as short as possible, or the output gets to be
Packit 6ef888
	   huge and unmanageable. */
Packit 6ef888
	log_err( _("Block %llu (0x%llx) was '%s', should be %s.\n"),
Packit 6ef888
		 (unsigned long long)blk, (unsigned long long)blk,
Packit 6ef888
		 allocdesc[sdp->gfs1][old_state],
Packit 6ef888
		 allocdesc[sdp->gfs1][new_state]);
Packit 6ef888
	if (!query( _("Fix the bitmap? (y/n)"))) {
Packit 6ef888
		log_err( _("The bitmap inconsistency was ignored.\n"));
Packit 6ef888
		return 0;
Packit 6ef888
	}
Packit 6ef888
	/* If the new bitmap state is free (and therefore the old state was
Packit 6ef888
	   not) we have to add to the free space in the rgrp. If the old
Packit 6ef888
	   bitmap state was free (and therefore it no longer is) we have to
Packit 6ef888
	   subtract to the free space.  If the type changed from dinode to 
Packit 6ef888
	   data or data to dinode, no change in free space. */
Packit 6ef888
	gfs2_set_bitmap(rgd, blk, new_state);
Packit 6ef888
	if (new_state == GFS2_BLKST_FREE) {
Packit 6ef888
		rgd->rg.rg_free++;
Packit 6ef888
		rewrite_rgrp = 1;
Packit 6ef888
	} else if (old_state == GFS2_BLKST_FREE) {
Packit 6ef888
		rgd->rg.rg_free--;
Packit 6ef888
		rewrite_rgrp = 1;
Packit 6ef888
	}
Packit 6ef888
	/* If we're freeing a dinode, get rid of the data structs for it. */
Packit 6ef888
	if (old_state == GFS2_BLKST_DINODE ||
Packit 6ef888
	    old_state == GFS2_BLKST_UNLINKED) {
Packit 6ef888
		struct dir_info *dt;
Packit 6ef888
		struct inode_info *ii;
Packit 6ef888
Packit 6ef888
		dt = dirtree_find(blk);
Packit 6ef888
		if (dt) {
Packit 6ef888
			dirtree_delete(dt);
Packit 6ef888
			treat_as_inode = 1;
Packit 6ef888
		}
Packit 6ef888
		ii = inodetree_find(blk);
Packit 6ef888
		if (ii) {
Packit 6ef888
			inodetree_delete(ii);
Packit 6ef888
			treat_as_inode = 1;
Packit 6ef888
		} else if (!sdp->gfs1) {
Packit 6ef888
			treat_as_inode = 1;
Packit 6ef888
		} else if (link1_type(&nlink1map, blk) == 1) {
Packit 6ef888
			/* This is a GFS1 fs (so all metadata is marked inode).
Packit 6ef888
			   We need to verify it is an inode before we can decr
Packit 6ef888
			   the rgrp inode count. */
Packit 6ef888
			treat_as_inode = 1;
Packit 6ef888
		}
Packit 6ef888
		if (old_state == GFS2_BLKST_DINODE) {
Packit 6ef888
			if (treat_as_inode && rgd->rg.rg_dinodes > 0)
Packit 6ef888
				rgd->rg.rg_dinodes--;
Packit 6ef888
			else if (sdp->gfs1 && gfs1rg->rg_usedmeta > 0)
Packit 6ef888
				gfs1rg->rg_usedmeta--;
Packit 6ef888
			rewrite_rgrp = 1;
Packit 6ef888
		}
Packit 6ef888
		link1_set(&nlink1map, blk, 0);
Packit 6ef888
	} else if (new_state == GFS2_BLKST_DINODE) {
Packit 6ef888
		if (!sdp->gfs1) {
Packit 6ef888
			treat_as_inode = 1;
Packit 6ef888
		} else {
Packit 6ef888
			/* This is GFS1 (so all metadata is marked inode). We
Packit 6ef888
			   need to verify it is an inode before we can decr
Packit 6ef888
			   the rgrp inode count. */
Packit 6ef888
			if (link1_type(&nlink1map, blk) == 1)
Packit 6ef888
				treat_as_inode = 1;
Packit 6ef888
			else {
Packit 6ef888
				struct dir_info *dt;
Packit 6ef888
				struct inode_info *ii;
Packit 6ef888
Packit 6ef888
				dt = dirtree_find(blk);
Packit 6ef888
				if (dt)
Packit 6ef888
					treat_as_inode = 1;
Packit 6ef888
				else {
Packit 6ef888
					ii = inodetree_find(blk);
Packit 6ef888
					if (ii)
Packit 6ef888
						treat_as_inode = 1;
Packit 6ef888
				}
Packit 6ef888
			}
Packit 6ef888
		}
Packit 6ef888
		if (treat_as_inode)
Packit 6ef888
			rgd->rg.rg_dinodes++;
Packit 6ef888
		else if (sdp->gfs1)
Packit 6ef888
			gfs1rg->rg_usedmeta++;
Packit 6ef888
		rewrite_rgrp = 1;
Packit 6ef888
	}
Packit 6ef888
	if (rewrite_rgrp) {
Packit 6ef888
		if (sdp->gfs1)
Packit 6ef888
			gfs_rgrp_out((struct gfs_rgrp *)&rgd->rg, rgd->bits[0].bi_bh);
Packit 6ef888
		else
Packit 6ef888
			gfs2_rgrp_out(&rgd->rg, rgd->bits[0].bi_bh->b_data);
Packit 6ef888
		bmodified(rgd->bits[0].bi_bh);
Packit 6ef888
	}
Packit 6ef888
	log_err( _("The bitmap was fixed.\n"));
Packit 6ef888
	return 0;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
/*
Packit 6ef888
 * _fsck_bitmap_set - Mark a block in the bitmap, and adjust free space.
Packit 6ef888
 */
Packit 6ef888
int _fsck_bitmap_set(struct gfs2_inode *ip, uint64_t bblock,
Packit 6ef888
		     const char *btype, int mark,
Packit 6ef888
		     int error_on_dinode, const char *caller, int fline)
Packit 6ef888
{
Packit 6ef888
	int error;
Packit 6ef888
	static int prev_ino_addr = 0;
Packit 6ef888
	static int prev_mark = 0;
Packit 6ef888
	static int prevcount = 0;
Packit 6ef888
	static const char *prev_caller = NULL;
Packit 6ef888
Packit 6ef888
	if (print_level >= MSG_DEBUG) {
Packit 6ef888
		if ((ip->i_di.di_num.no_addr == prev_ino_addr) &&
Packit 6ef888
		    (mark == prev_mark) && caller == prev_caller) {
Packit 6ef888
			log_info("(0x%llx) ", (unsigned long long)bblock);
Packit 6ef888
			prevcount++;
Packit 6ef888
			if (prevcount > 10) {
Packit 6ef888
				log_info("\n");
Packit 6ef888
				prevcount = 0;
Packit 6ef888
			}
Packit 6ef888
		/* I'm circumventing the log levels here on purpose to make the
Packit 6ef888
		   output easier to debug. */
Packit 6ef888
		} else if (ip->i_di.di_num.no_addr == bblock) {
Packit 6ef888
			if (prevcount) {
Packit 6ef888
				log_info("\n");
Packit 6ef888
				prevcount = 0;
Packit 6ef888
			}
Packit 6ef888
			printf( _("(%s:%d) %s inode found at block "
Packit 6ef888
				  "(0x%llx): marking as '%s'\n"), caller, fline,
Packit 6ef888
			       btype,
Packit 6ef888
			       (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
			       block_type_string(mark));
Packit 6ef888
Packit 6ef888
		} else {
Packit 6ef888
			if (prevcount) {
Packit 6ef888
				log_info("\n");
Packit 6ef888
				prevcount = 0;
Packit 6ef888
			}
Packit 6ef888
			printf( _("(%s:%d) inode (0x%llx) references %s block"
Packit 6ef888
				  " (0x%llx): marking as '%s'\n"),
Packit 6ef888
			       caller, fline,
Packit 6ef888
			       (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
			       btype, (unsigned long long)bblock,
Packit 6ef888
			       block_type_string(mark));
Packit 6ef888
		}
Packit 6ef888
		prev_ino_addr = ip->i_di.di_num.no_addr;
Packit 6ef888
		prev_mark = mark;
Packit 6ef888
		prev_caller = caller;
Packit 6ef888
	}
Packit 6ef888
	error = check_n_fix_bitmap(ip->i_sbd, ip->i_rgd, bblock,
Packit 6ef888
				   error_on_dinode, mark);
Packit 6ef888
	if (error < 0)
Packit 6ef888
		log_err(_("This block is not represented in the bitmap.\n"));
Packit 6ef888
	return error;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
struct duptree *dupfind(uint64_t block)
Packit 6ef888
{
Packit 6ef888
	struct osi_node *node = dup_blocks.osi_node;
Packit 6ef888
Packit 6ef888
	while (node) {
Packit 6ef888
		struct duptree *dt = (struct duptree *)node;
Packit 6ef888
Packit 6ef888
		if (block < dt->block)
Packit 6ef888
			node = node->osi_left;
Packit 6ef888
		else if (block > dt->block)
Packit 6ef888
			node = node->osi_right;
Packit 6ef888
		else
Packit 6ef888
			return dt;
Packit 6ef888
	}
Packit 6ef888
	return NULL;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
struct gfs2_inode *fsck_system_inode(struct gfs2_sbd *sdp, uint64_t block)
Packit 6ef888
{
Packit 6ef888
	int j;
Packit 6ef888
Packit 6ef888
	if (lf_dip && lf_dip->i_di.di_num.no_addr == block)
Packit 6ef888
		return lf_dip;
Packit 6ef888
	if (!sdp->gfs1)
Packit 6ef888
		return is_system_inode(sdp, block);
Packit 6ef888
Packit 6ef888
	if (sdp->md.statfs && block == sdp->md.statfs->i_di.di_num.no_addr)
Packit 6ef888
		return sdp->md.statfs;
Packit 6ef888
	if (sdp->md.jiinode && block == sdp->md.jiinode->i_di.di_num.no_addr)
Packit 6ef888
		return sdp->md.jiinode;
Packit 6ef888
	if (sdp->md.riinode && block == sdp->md.riinode->i_di.di_num.no_addr)
Packit 6ef888
		return sdp->md.riinode;
Packit 6ef888
	if (sdp->md.qinode && block == sdp->md.qinode->i_di.di_num.no_addr)
Packit 6ef888
		return sdp->md.qinode;
Packit 6ef888
	if (sdp->md.rooti && block == sdp->md.rooti->i_di.di_num.no_addr)
Packit 6ef888
		return sdp->md.rooti;
Packit 6ef888
	for (j = 0; j < sdp->md.journals; j++)
Packit 6ef888
		if (sdp->md.journal && sdp->md.journal[j] &&
Packit 6ef888
		    block == sdp->md.journal[j]->i_di.di_num.no_addr)
Packit 6ef888
			return sdp->md.journal[j];
Packit 6ef888
	return NULL;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
/* fsck_load_inode - same as gfs2_load_inode() in libgfs2 but system inodes
Packit 6ef888
   get special treatment. */
Packit 6ef888
struct gfs2_inode *fsck_load_inode(struct gfs2_sbd *sdp, uint64_t block)
Packit 6ef888
{
Packit 6ef888
	struct gfs2_inode *ip = NULL;
Packit 6ef888
Packit 6ef888
	ip = fsck_system_inode(sdp, block);
Packit 6ef888
	if (ip)
Packit 6ef888
		return ip;
Packit 6ef888
	if (sdp->gfs1)
Packit 6ef888
		return lgfs2_gfs_inode_read(sdp, block);
Packit 6ef888
	return lgfs2_inode_read(sdp, block);
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
/* fsck_inode_get - same as inode_get() in libgfs2 but system inodes
Packit 6ef888
   get special treatment. */
Packit 6ef888
struct gfs2_inode *fsck_inode_get(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
Packit 6ef888
				  struct gfs2_buffer_head *bh)
Packit 6ef888
{
Packit 6ef888
	struct gfs2_inode *sysip;
Packit 6ef888
	struct gfs2_inode *ip;
Packit 6ef888
Packit 6ef888
	sysip = fsck_system_inode(sdp, bh->b_blocknr);
Packit 6ef888
	if (sysip)
Packit 6ef888
		return sysip;
Packit 6ef888
Packit 6ef888
	if (sdp->gfs1)
Packit 6ef888
		ip = lgfs2_gfs_inode_get(sdp, bh);
Packit 6ef888
	else
Packit 6ef888
		ip = lgfs2_inode_get(sdp, bh);
Packit 6ef888
	if (ip)
Packit 6ef888
		ip->i_rgd = rgd;
Packit 6ef888
	return ip;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
/* fsck_inode_put - same as inode_put() in libgfs2 but system inodes
Packit 6ef888
   get special treatment. */
Packit 6ef888
void fsck_inode_put(struct gfs2_inode **ip_in)
Packit 6ef888
{
Packit 6ef888
	struct gfs2_inode *ip = *ip_in;
Packit 6ef888
	struct gfs2_inode *sysip;
Packit 6ef888
Packit 6ef888
	sysip = fsck_system_inode(ip->i_sbd, ip->i_di.di_num.no_addr);
Packit 6ef888
	if (!sysip)
Packit 6ef888
		inode_put(ip_in);
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
/**
Packit 6ef888
 * dirent_repair - attempt to repair a corrupt directory entry.
Packit 6ef888
 * @bh - The buffer header that contains the bad dirent
Packit 6ef888
 * @de - The directory entry in native format
Packit 6ef888
 * @dent - The directory entry in on-disk format
Packit 6ef888
 * @type - Type of directory (DIR_LINEAR or DIR_EXHASH)
Packit 6ef888
 * @first - TRUE if this is the first dirent in the buffer
Packit 6ef888
 *
Packit 6ef888
 * This function tries to repair a corrupt directory entry.  All we
Packit 6ef888
 * know at this point is that the length field is wrong.
Packit 6ef888
 */
Packit 6ef888
static int dirent_repair(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
Packit 6ef888
		  struct gfs2_dirent *de, struct gfs2_dirent *dent,
Packit 6ef888
		  int type, int first)
Packit 6ef888
{
Packit 6ef888
	char *bh_end, *p;
Packit 6ef888
	int calc_de_name_len = 0;
Packit 6ef888
	
Packit 6ef888
	/* If this is a sentinel, just fix the length and move on */
Packit 6ef888
	if (first && !de->de_inum.no_formal_ino) { /* Is it a sentinel? */
Packit 6ef888
		if (type == DIR_LINEAR)
Packit 6ef888
			de->de_rec_len = ip->i_sbd->bsize -
Packit 6ef888
				sizeof(struct gfs2_dinode);
Packit 6ef888
		else
Packit 6ef888
			de->de_rec_len = ip->i_sbd->bsize -
Packit 6ef888
				sizeof(struct gfs2_leaf);
Packit 6ef888
	} else {
Packit 6ef888
		bh_end = bh->b_data + ip->i_sbd->bsize;
Packit 6ef888
		/* first, figure out a probable name length */
Packit 6ef888
		p = (char *)dent + sizeof(struct gfs2_dirent);
Packit 6ef888
		while (*p &&         /* while there's a non-zero char and */
Packit 6ef888
		       isprint(*p) && /* a printable character and */
Packit 6ef888
		       p < bh_end) { /* not past end of buffer */
Packit 6ef888
			calc_de_name_len++;
Packit 6ef888
			p++;
Packit 6ef888
		}
Packit 6ef888
		if (!calc_de_name_len)
Packit 6ef888
			return 1;
Packit 6ef888
		/* There can often be noise at the end, so only          */
Packit 6ef888
		/* Trust the shorter of the two in case we have too much */
Packit 6ef888
		/* Or rather, only trust ours if it's shorter.           */
Packit 6ef888
		if (!de->de_name_len || de->de_name_len > NAME_MAX ||
Packit 6ef888
		    calc_de_name_len < de->de_name_len) /* if dent is hosed */
Packit 6ef888
			de->de_name_len = calc_de_name_len; /* use ours */
Packit 6ef888
		de->de_rec_len = GFS2_DIRENT_SIZE(de->de_name_len);
Packit 6ef888
	}
Packit 6ef888
	gfs2_dirent_out(de, (char *)dent);
Packit 6ef888
	bmodified(bh);
Packit 6ef888
	return 0;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
/**
Packit 6ef888
 * dirblk_truncate - truncate a directory block
Packit 6ef888
 */
Packit 6ef888
static void dirblk_truncate(struct gfs2_inode *ip, struct gfs2_dirent *fixb,
Packit 6ef888
			    struct gfs2_buffer_head *bh)
Packit 6ef888
{
Packit 6ef888
	char *bh_end;
Packit 6ef888
	struct gfs2_dirent de;
Packit 6ef888
Packit 6ef888
	bh_end = bh->b_data + ip->i_sbd->sd_sb.sb_bsize;
Packit 6ef888
	/* truncate the block to save the most dentries.  To do this we
Packit 6ef888
	   have to patch the previous dent. */
Packit 6ef888
	gfs2_dirent_in(&de, (char *)fixb);
Packit 6ef888
	de.de_rec_len = bh_end - (char *)fixb;
Packit 6ef888
	gfs2_dirent_out(&de, (char *)fixb);
Packit 6ef888
	bmodified(bh);
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
/*
Packit 6ef888
 * check_entries - check directory entries for a given block
Packit 6ef888
 *
Packit 6ef888
 * @ip - dinode associated with this leaf block
Packit 6ef888
 * bh - buffer for the leaf block
Packit 6ef888
 * type - type of block this is (linear or exhash)
Packit 6ef888
 * @count - set to the count entries
Packit 6ef888
 * @lindex - the last inde
Packit 6ef888
 * @pass - structure pointing to pass-specific functions
Packit 6ef888
 *
Packit 6ef888
 * returns: 0 - good block or it was repaired to be good
Packit 6ef888
 *         -1 - error occurred
Packit 6ef888
 */
Packit 6ef888
static int check_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
Packit 6ef888
			 int type, uint32_t *count, int lindex,
Packit 6ef888
			 struct metawalk_fxns *pass)
Packit 6ef888
{
Packit 6ef888
	struct gfs2_dirent *dent;
Packit 6ef888
	struct gfs2_dirent de, *prev;
Packit 6ef888
	int error = 0;
Packit 6ef888
	char *bh_end;
Packit 6ef888
	char *filename;
Packit 6ef888
	int first = 1;
Packit 6ef888
Packit 6ef888
	bh_end = bh->b_data + ip->i_sbd->bsize;
Packit 6ef888
Packit 6ef888
	if (type == DIR_LINEAR) {
Packit 6ef888
		dent = (struct gfs2_dirent *)(bh->b_data + sizeof(struct gfs2_dinode));
Packit 6ef888
	} else {
Packit 6ef888
		dent = (struct gfs2_dirent *)(bh->b_data + sizeof(struct gfs2_leaf));
Packit 6ef888
		log_debug( _("Checking leaf %llu (0x%llx)\n"),
Packit 6ef888
			  (unsigned long long)bh->b_blocknr,
Packit 6ef888
			  (unsigned long long)bh->b_blocknr);
Packit 6ef888
	}
Packit 6ef888
Packit 6ef888
	prev = NULL;
Packit 6ef888
	if (!pass->check_dentry)
Packit 6ef888
		return 0;
Packit 6ef888
Packit 6ef888
	while (1) {
Packit 6ef888
		if (skip_this_pass || fsck_abort)
Packit 6ef888
			return FSCK_OK;
Packit 6ef888
		memset(&de, 0, sizeof(struct gfs2_dirent));
Packit 6ef888
		gfs2_dirent_in(&de, (char *)dent);
Packit 6ef888
		filename = (char *)dent + sizeof(struct gfs2_dirent);
Packit 6ef888
Packit 6ef888
		if (de.de_rec_len < sizeof(struct gfs2_dirent) +
Packit 6ef888
		    de.de_name_len ||
Packit 6ef888
		    (de.de_inum.no_formal_ino && !de.de_name_len && !first)) {
Packit 6ef888
			log_err( _("Directory block %llu (0x%llx"
Packit 6ef888
				"), entry %d of directory %llu "
Packit 6ef888
				"(0x%llx) is corrupt.\n"),
Packit 6ef888
				(unsigned long long)bh->b_blocknr,
Packit 6ef888
				(unsigned long long)bh->b_blocknr,
Packit 6ef888
				(*count) + 1,
Packit 6ef888
				(unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
				(unsigned long long)ip->i_di.di_num.no_addr);
Packit 6ef888
			if (query( _("Attempt to repair it? (y/n) "))) {
Packit 6ef888
				if (dirent_repair(ip, bh, &de, dent, type,
Packit 6ef888
						  first)) {
Packit 6ef888
					if (first) /* make a new sentinel */
Packit 6ef888
						dirblk_truncate(ip, dent, bh);
Packit 6ef888
					else
Packit 6ef888
						dirblk_truncate(ip, prev, bh);
Packit 6ef888
					log_err( _("Unable to repair corrupt "
Packit 6ef888
						   "directory entry; the "
Packit 6ef888
						   "entry was removed "
Packit 6ef888
						   "instead.\n"));
Packit 6ef888
					return 0;
Packit 6ef888
				} else {
Packit 6ef888
					log_err( _("Corrupt directory entry "
Packit 6ef888
						   "repaired.\n"));
Packit 6ef888
					/* keep looping through dentries */
Packit 6ef888
				}
Packit 6ef888
			} else {
Packit 6ef888
				log_err( _("Corrupt directory entry ignored, "
Packit 6ef888
					"stopped after checking %d entries.\n"),
Packit 6ef888
					*count);
Packit 6ef888
				return 0;
Packit 6ef888
			}
Packit 6ef888
		}
Packit 6ef888
		if (!de.de_inum.no_formal_ino){
Packit 6ef888
			if (first){
Packit 6ef888
				log_debug( _("First dirent is a sentinel (place holder).\n"));
Packit 6ef888
				first = 0;
Packit 6ef888
			} else {
Packit 6ef888
				log_err( _("Directory entry with inode number of "
Packit 6ef888
					"zero in leaf %llu (0x%llx) of "
Packit 6ef888
					"directory %llu (0x%llx)!\n"),
Packit 6ef888
					(unsigned long long)bh->b_blocknr,
Packit 6ef888
					(unsigned long long)bh->b_blocknr,
Packit 6ef888
					(unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
					(unsigned long long)ip->i_di.di_num.no_addr);
Packit 6ef888
				if (query(_("Attempt to remove it? (y/n) "))) {
Packit 6ef888
					dirblk_truncate(ip, prev, bh);
Packit 6ef888
					log_err(_("The corrupt directory "
Packit 6ef888
						  "entry was removed.\n"));
Packit 6ef888
				} else {
Packit 6ef888
					log_err( _("Corrupt directory entry "
Packit 6ef888
						   "ignored, stopped after "
Packit 6ef888
						   "checking %d entries.\n"),
Packit 6ef888
						 *count);
Packit 6ef888
				}
Packit 6ef888
				return 0;
Packit 6ef888
			}
Packit 6ef888
		} else {
Packit 6ef888
			if (!de.de_inum.no_addr && first) { /* reverse sentinel */
Packit 6ef888
				log_debug( _("First dirent is a Sentinel (place holder).\n"));
Packit 6ef888
				/* Swap the two to silently make it a proper sentinel */
Packit 6ef888
				de.de_inum.no_addr = de.de_inum.no_formal_ino;
Packit 6ef888
				de.de_inum.no_formal_ino = 0;
Packit 6ef888
				gfs2_dirent_out(&de, (char *)dent);
Packit 6ef888
				bmodified(bh);
Packit 6ef888
				/* Mark dirent buffer as modified */
Packit 6ef888
				first = 0;
Packit 6ef888
			} else {
Packit 6ef888
				error = pass->check_dentry(ip, dent, prev, bh,
Packit 6ef888
							   filename, count,
Packit 6ef888
							   &lindex,
Packit 6ef888
							   pass->private);
Packit 6ef888
				if (error < 0) {
Packit 6ef888
					stack;
Packit 6ef888
					return error;
Packit 6ef888
				}
Packit 6ef888
			}
Packit 6ef888
		}
Packit 6ef888
Packit 6ef888
		if ((char *)dent + de.de_rec_len >= bh_end){
Packit 6ef888
			log_debug( _("Last entry processed for %lld->%lld "
Packit 6ef888
				     "(0x%llx->0x%llx), di_blocks=%llu.\n"),
Packit 6ef888
				   (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
				   (unsigned long long)bh->b_blocknr,
Packit 6ef888
				   (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
				   (unsigned long long)bh->b_blocknr,
Packit 6ef888
				   (unsigned long long)ip->i_di.di_blocks);
Packit 6ef888
			break;
Packit 6ef888
		}
Packit 6ef888
Packit 6ef888
		/* If we didn't clear the dentry, or if we did, but it
Packit 6ef888
		 * was the first dentry, set prev  */
Packit 6ef888
		if (!error || first)
Packit 6ef888
			prev = dent;
Packit 6ef888
		first = 0;
Packit 6ef888
		dent = (struct gfs2_dirent *)((char *)dent + de.de_rec_len);
Packit 6ef888
	}
Packit 6ef888
	return 0;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
/**
Packit 6ef888
 * check_leaf - check a leaf block for errors
Packit 6ef888
 * Reads in the leaf block
Packit 6ef888
 * Leaves the buffer around for further analysis (caller must brelse)
Packit 6ef888
 */
Packit 6ef888
int check_leaf(struct gfs2_inode *ip, int lindex, struct metawalk_fxns *pass,
Packit 6ef888
	       uint64_t *leaf_no, struct gfs2_leaf *leaf, int *ref_count)
Packit 6ef888
{
Packit 6ef888
	int error = 0, fix;
Packit 6ef888
	struct gfs2_buffer_head *lbh = NULL;
Packit 6ef888
	uint32_t count = 0;
Packit 6ef888
	struct gfs2_sbd *sdp = ip->i_sbd;
Packit 6ef888
	const char *msg;
Packit 6ef888
	int di_depth = ip->i_di.di_depth;
Packit 6ef888
Packit 6ef888
	/* Make sure the block number is in range. */
Packit 6ef888
	if (!valid_block_ip(ip, *leaf_no)) {
Packit 6ef888
		log_err( _("Leaf block #%llu (0x%llx) is out of range for "
Packit 6ef888
			   "directory #%llu (0x%llx) at index %d (0x%x).\n"),
Packit 6ef888
			 (unsigned long long)*leaf_no,
Packit 6ef888
			 (unsigned long long)*leaf_no,
Packit 6ef888
			 (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
			 (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
			 lindex, lindex);
Packit 6ef888
		msg = _("that is out of range");
Packit 6ef888
		goto bad_leaf;
Packit 6ef888
	}
Packit 6ef888
Packit 6ef888
	/* Try to read in the leaf block. */
Packit 6ef888
	lbh = bread(sdp, *leaf_no);
Packit 6ef888
	/* Make sure it's really a valid leaf block. */
Packit 6ef888
	if (gfs2_check_meta(lbh, GFS2_METATYPE_LF)) {
Packit 6ef888
		msg = _("that is not really a leaf");
Packit 6ef888
		goto bad_leaf;
Packit 6ef888
	}
Packit 6ef888
	if (pass->check_leaf_depth)
Packit 6ef888
		error = pass->check_leaf_depth(ip, *leaf_no, *ref_count, lbh);
Packit 6ef888
Packit 6ef888
	if (error >= 0 && pass->check_leaf) {
Packit 6ef888
		error = pass->check_leaf(ip, *leaf_no, pass->private);
Packit 6ef888
		if (error == -EEXIST) {
Packit 6ef888
			log_info(_("Previous reference to leaf %lld (0x%llx) "
Packit 6ef888
				   "has already checked it; skipping.\n"),
Packit 6ef888
				 (unsigned long long)*leaf_no,
Packit 6ef888
				 (unsigned long long)*leaf_no);
Packit 6ef888
			brelse(lbh);
Packit 6ef888
			return error;
Packit 6ef888
		}
Packit 6ef888
	}
Packit 6ef888
	/* Early versions of GFS2 had an endianess bug in the kernel that set
Packit 6ef888
	   lf_dirent_format to cpu_to_be16(GFS2_FORMAT_DE).  This was fixed
Packit 6ef888
	   to use cpu_to_be32(), but we should check for incorrect values and
Packit 6ef888
	   replace them with the correct value. */
Packit 6ef888
Packit 6ef888
	gfs2_leaf_in(leaf, lbh->b_data);
Packit 6ef888
	if (leaf->lf_dirent_format == (GFS2_FORMAT_DE << 16)) {
Packit 6ef888
		log_debug( _("incorrect lf_dirent_format at leaf #%" PRIu64
Packit 6ef888
			     "\n"), *leaf_no);
Packit 6ef888
		leaf->lf_dirent_format = GFS2_FORMAT_DE;
Packit 6ef888
		gfs2_leaf_out(leaf, lbh->b_data);
Packit 6ef888
		bmodified(lbh);
Packit 6ef888
		log_debug( _("Fixing lf_dirent_format.\n"));
Packit 6ef888
	}
Packit 6ef888
Packit 6ef888
	/* Make sure it's really a leaf. */
Packit 6ef888
	if (leaf->lf_header.mh_type != GFS2_METATYPE_LF) {
Packit 6ef888
		log_err( _("Inode %llu (0x%llx) points to bad leaf %llu"
Packit 6ef888
			   " (0x%llx).\n"),
Packit 6ef888
			 (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
			 (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
			 (unsigned long long)*leaf_no,
Packit 6ef888
			 (unsigned long long)*leaf_no);
Packit 6ef888
		msg = _("that is not a leaf");
Packit 6ef888
		goto bad_leaf;
Packit 6ef888
	}
Packit 6ef888
Packit 6ef888
	if (pass->check_dentry && is_dir(&ip->i_di, sdp->gfs1)) {
Packit 6ef888
		error = check_entries(ip, lbh, DIR_EXHASH, &count, lindex,
Packit 6ef888
				      pass);
Packit 6ef888
Packit 6ef888
		if (skip_this_pass || fsck_abort)
Packit 6ef888
			goto out;
Packit 6ef888
Packit 6ef888
		if (error < 0) {
Packit 6ef888
			stack;
Packit 6ef888
			goto out; /* This seems wrong: needs investigation */
Packit 6ef888
		}
Packit 6ef888
Packit 6ef888
		if (count == leaf->lf_entries)
Packit 6ef888
			goto out;
Packit 6ef888
Packit 6ef888
		/* release and re-read the leaf in case check_entries
Packit 6ef888
		   changed it. */
Packit 6ef888
		brelse(lbh);
Packit 6ef888
		lbh = bread(sdp, *leaf_no);
Packit 6ef888
		gfs2_leaf_in(leaf, lbh->b_data);
Packit 6ef888
		if (count != leaf->lf_entries) {
Packit 6ef888
			log_err( _("Leaf %llu (0x%llx) entry count in "
Packit 6ef888
				   "directory %llu (0x%llx) does not match "
Packit 6ef888
				   "number of entries found - is %u, found %u\n"),
Packit 6ef888
				 (unsigned long long)*leaf_no,
Packit 6ef888
				 (unsigned long long)*leaf_no,
Packit 6ef888
				 (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
				 (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
				 leaf->lf_entries, count);
Packit 6ef888
			if (query( _("Update leaf entry count? (y/n) "))) {
Packit 6ef888
				leaf->lf_entries = count;
Packit 6ef888
				gfs2_leaf_out(leaf, lbh->b_data);
Packit 6ef888
				bmodified(lbh);
Packit 6ef888
				log_warn( _("Leaf entry count updated\n"));
Packit 6ef888
			} else
Packit 6ef888
				log_err( _("Leaf entry count left in "
Packit 6ef888
					   "inconsistent state\n"));
Packit 6ef888
		}
Packit 6ef888
	}
Packit 6ef888
out:
Packit 6ef888
	if (di_depth < ip->i_di.di_depth) {
Packit 6ef888
		log_debug(_("Depth of directory %lld (0x%llx) changed from "
Packit 6ef888
			    "%d to %d; adjusting ref_count from %d to %d\n"),
Packit 6ef888
			  (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
			  (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
			  di_depth, ip->i_di.di_depth,
Packit 6ef888
			  *ref_count,
Packit 6ef888
			  (*ref_count) << (ip->i_di.di_depth - di_depth));
Packit 6ef888
		(*ref_count) <<= (ip->i_di.di_depth - di_depth);
Packit 6ef888
	}
Packit 6ef888
	brelse(lbh);
Packit 6ef888
	if (error < 0)
Packit 6ef888
		return error;
Packit 6ef888
	return 0;
Packit 6ef888
Packit 6ef888
bad_leaf:
Packit 6ef888
	if (lbh)
Packit 6ef888
		brelse(lbh);
Packit 6ef888
	if (pass->repair_leaf) {
Packit 6ef888
		/* The leaf we read in is bad so we need to repair it. */
Packit 6ef888
		fix = pass->repair_leaf(ip, leaf_no, lindex, *ref_count, msg);
Packit 6ef888
		if (fix < 0)
Packit 6ef888
			return fix;
Packit 6ef888
Packit 6ef888
	}
Packit 6ef888
	if (di_depth < ip->i_di.di_depth) {
Packit 6ef888
		log_debug(_("Depth of directory %lld (0x%llx) changed from "
Packit 6ef888
			    "%d to %d. Adjusting ref_count from %d to %d\n"),
Packit 6ef888
			  (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
			  (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
			  di_depth, ip->i_di.di_depth,
Packit 6ef888
			  *ref_count,
Packit 6ef888
			  (*ref_count) << (ip->i_di.di_depth - di_depth));
Packit 6ef888
		(*ref_count) <<= (ip->i_di.di_depth - di_depth);
Packit 6ef888
	}
Packit 6ef888
	return 1;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
static int u64cmp(const void *p1, const void *p2)
Packit 6ef888
{
Packit 6ef888
	uint64_t a = *(uint64_t *)p1;
Packit 6ef888
	uint64_t b = *(uint64_t *)p2;
Packit 6ef888
Packit 6ef888
	if (a > b)
Packit 6ef888
		return 1;
Packit 6ef888
	if (a < b)
Packit 6ef888
		return -1;
Packit 6ef888
Packit 6ef888
	return 0;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
static void dir_leaf_reada(struct gfs2_inode *ip, uint64_t *tbl, unsigned hsize)
Packit 6ef888
{
Packit 6ef888
	uint64_t *t = alloca(hsize * sizeof(uint64_t));
Packit 6ef888
	uint64_t leaf_no;
Packit 6ef888
	struct gfs2_sbd *sdp = ip->i_sbd;
Packit 6ef888
	unsigned n = 0;
Packit 6ef888
	unsigned i;
Packit 6ef888
Packit 6ef888
	for (i = 0; i < hsize; i++) {
Packit 6ef888
		leaf_no = be64_to_cpu(tbl[i]);
Packit 6ef888
		if (valid_block_ip(ip, leaf_no))
Packit 6ef888
			t[n++] = leaf_no * sdp->bsize;
Packit 6ef888
	}
Packit 6ef888
	qsort(t, n, sizeof(uint64_t), u64cmp);
Packit 6ef888
	for (i = 0; i < n; i++)
Packit 6ef888
		posix_fadvise(sdp->device_fd, t[i], sdp->bsize, POSIX_FADV_WILLNEED);
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
/* Checks exhash directory entries */
Packit 6ef888
int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass)
Packit 6ef888
{
Packit 6ef888
	int error = 0;
Packit 6ef888
	unsigned hsize = (1 << ip->i_di.di_depth);
Packit 6ef888
	uint64_t leaf_no, leaf_next;
Packit 6ef888
	uint64_t first_ok_leaf, orig_di_blocks;
Packit 6ef888
	struct gfs2_buffer_head *lbh;
Packit 6ef888
	int lindex;
Packit 6ef888
	struct gfs2_sbd *sdp = ip->i_sbd;
Packit 6ef888
	int ref_count, orig_ref_count, orig_di_depth, orig_di_height;
Packit 6ef888
	uint64_t *tbl;
Packit 6ef888
	int chained_leaf, tbl_valid;
Packit 6ef888
Packit 6ef888
	tbl = get_dir_hash(ip);
Packit 6ef888
	if (tbl == NULL) {
Packit 6ef888
		perror("get_dir_hash");
Packit 6ef888
		return -1;
Packit 6ef888
	}
Packit 6ef888
	tbl_valid = 1;
Packit 6ef888
	orig_di_depth = ip->i_di.di_depth;
Packit 6ef888
	orig_di_height = ip->i_di.di_height;
Packit 6ef888
	orig_di_blocks = ip->i_di.di_blocks;
Packit 6ef888
Packit 6ef888
	/* Turn off system readahead */
Packit 6ef888
	posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_RANDOM);
Packit 6ef888
Packit 6ef888
	/* Readahead */
Packit 6ef888
	dir_leaf_reada(ip, tbl, hsize);
Packit 6ef888
Packit 6ef888
	if (pass->check_hash_tbl) {
Packit 6ef888
		error = pass->check_hash_tbl(ip, tbl, hsize, pass->private);
Packit 6ef888
		if (error < 0) {
Packit 6ef888
			free(tbl);
Packit 6ef888
			posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
Packit 6ef888
			return error;
Packit 6ef888
		}
Packit 6ef888
		/* If hash table changes were made, read it in again. */
Packit 6ef888
		if (error) {
Packit 6ef888
			free(tbl);
Packit 6ef888
			tbl = get_dir_hash(ip);
Packit 6ef888
			if (tbl == NULL) {
Packit 6ef888
				perror("get_dir_hash");
Packit 6ef888
				return -1;
Packit 6ef888
			}
Packit 6ef888
		}
Packit 6ef888
	}
Packit 6ef888
Packit 6ef888
	/* Find the first valid leaf pointer in range and use it as our "old"
Packit 6ef888
	   leaf. That way, bad blocks at the beginning will be overwritten
Packit 6ef888
	   with the first valid leaf. */
Packit 6ef888
	first_ok_leaf = leaf_no = -1;
Packit 6ef888
	for (lindex = 0; lindex < hsize; lindex++) {
Packit 6ef888
		leaf_no = be64_to_cpu(tbl[lindex]);
Packit 6ef888
		if (valid_block_ip(ip, leaf_no)) {
Packit 6ef888
			lbh = bread(sdp, leaf_no);
Packit 6ef888
			/* Make sure it's really a valid leaf block. */
Packit 6ef888
			if (gfs2_check_meta(lbh, GFS2_METATYPE_LF) == 0) {
Packit 6ef888
				brelse(lbh);
Packit 6ef888
				first_ok_leaf = leaf_no;
Packit 6ef888
				break;
Packit 6ef888
			}
Packit 6ef888
			brelse(lbh);
Packit 6ef888
		}
Packit 6ef888
	}
Packit 6ef888
	if (first_ok_leaf == -1) { /* no valid leaf found */
Packit 6ef888
		log_err( _("Directory #%llu (0x%llx) has no valid leaf "
Packit 6ef888
			   "blocks\n"),
Packit 6ef888
			 (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
			 (unsigned long long)ip->i_di.di_num.no_addr);
Packit 6ef888
		free(tbl);
Packit 6ef888
		posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
Packit 6ef888
		return 1;
Packit 6ef888
	}
Packit 6ef888
	lindex = 0;
Packit 6ef888
	leaf_next = -1;
Packit 6ef888
	while (lindex < hsize) {
Packit 6ef888
		int l;
Packit 6ef888
Packit 6ef888
		if (fsck_abort)
Packit 6ef888
			break;
Packit 6ef888
Packit 6ef888
		if (!tbl_valid) {
Packit 6ef888
			free(tbl);
Packit 6ef888
			log_debug(_("Re-reading 0x%llx hash table.\n"),
Packit 6ef888
				  (unsigned long long)ip->i_di.di_num.no_addr);
Packit 6ef888
			tbl = get_dir_hash(ip);
Packit 6ef888
			if (tbl == NULL) {
Packit 6ef888
				perror("get_dir_hash");
Packit 6ef888
				return -1;
Packit 6ef888
			}
Packit 6ef888
			tbl_valid = 1;
Packit 6ef888
			orig_di_depth = ip->i_di.di_depth;
Packit 6ef888
			orig_di_height = ip->i_di.di_height;
Packit 6ef888
			orig_di_blocks = ip->i_di.di_blocks;
Packit 6ef888
		}
Packit 6ef888
		leaf_no = be64_to_cpu(tbl[lindex]);
Packit 6ef888
Packit 6ef888
		/* count the number of block pointers to this leaf. We don't
Packit 6ef888
		   need to count the current lindex, because we already know
Packit 6ef888
		   it's a reference */
Packit 6ef888
		ref_count = 1;
Packit 6ef888
Packit 6ef888
		for (l = lindex + 1; l < hsize; l++) {
Packit 6ef888
			leaf_next = be64_to_cpu(tbl[l]);
Packit 6ef888
			if (leaf_next != leaf_no)
Packit 6ef888
				break;
Packit 6ef888
			ref_count++;
Packit 6ef888
		}
Packit 6ef888
		orig_ref_count = ref_count;
Packit 6ef888
Packit 6ef888
		chained_leaf = 0;
Packit 6ef888
		do {
Packit 6ef888
			struct gfs2_leaf leaf;
Packit 6ef888
			if (fsck_abort) {
Packit 6ef888
				free(tbl);
Packit 6ef888
				posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
Packit 6ef888
				return 0;
Packit 6ef888
			}
Packit 6ef888
			error = check_leaf(ip, lindex, pass, &leaf_no, &leaf,
Packit 6ef888
					   &ref_count);
Packit 6ef888
			if (ref_count != orig_ref_count) {
Packit 6ef888
				log_debug(_("Ref count of leaf 0x%llx "
Packit 6ef888
					    "changed from %d to %d.\n"),
Packit 6ef888
					  (unsigned long long)leaf_no,
Packit 6ef888
					  orig_ref_count, ref_count);
Packit 6ef888
				tbl_valid = 0;
Packit 6ef888
			}
Packit 6ef888
			if (error < 0) {
Packit 6ef888
				free(tbl);
Packit 6ef888
				return error;
Packit 6ef888
			}
Packit 6ef888
			if (!leaf.lf_next || error)
Packit 6ef888
				break;
Packit 6ef888
			leaf_no = leaf.lf_next;
Packit 6ef888
			chained_leaf++;
Packit 6ef888
			log_debug( _("Leaf chain #%d (0x%llx) detected.\n"),
Packit 6ef888
				   chained_leaf, (unsigned long long)leaf_no);
Packit 6ef888
		} while (1); /* while we have chained leaf blocks */
Packit 6ef888
		if (orig_di_depth != ip->i_di.di_depth) {
Packit 6ef888
			log_debug(_("Depth of 0x%llx changed from %d to %d\n"),
Packit 6ef888
				  (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
				  orig_di_depth, ip->i_di.di_depth);
Packit 6ef888
			tbl_valid = 0;
Packit 6ef888
			lindex <<= (ip->i_di.di_depth - orig_di_depth);
Packit 6ef888
			hsize = (1 << ip->i_di.di_depth);
Packit 6ef888
		}
Packit 6ef888
		if (orig_di_height != ip->i_di.di_height) {
Packit 6ef888
			log_debug(_("Height of 0x%llx changed from %d to "
Packit 6ef888
				    "%d\n"),
Packit 6ef888
				  (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
				  orig_di_height, ip->i_di.di_height);
Packit 6ef888
			tbl_valid = 0;
Packit 6ef888
		}
Packit 6ef888
		if (orig_di_blocks != ip->i_di.di_blocks) {
Packit 6ef888
			log_debug(_("Block count of 0x%llx changed from %llu "
Packit 6ef888
				    "to %llu\n"),
Packit 6ef888
				  (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
				  (unsigned long long)orig_di_blocks,
Packit 6ef888
				  (unsigned long long)ip->i_di.di_blocks);
Packit 6ef888
			tbl_valid = 0;
Packit 6ef888
		}
Packit 6ef888
		lindex += ref_count;
Packit 6ef888
	} /* for every leaf block */
Packit 6ef888
	free(tbl);
Packit 6ef888
	posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
Packit 6ef888
	return 0;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
static int check_eattr_entries(struct gfs2_inode *ip,
Packit 6ef888
			       struct gfs2_buffer_head *bh,
Packit 6ef888
			       struct metawalk_fxns *pass)
Packit 6ef888
{
Packit 6ef888
	struct gfs2_ea_header *ea_hdr, *ea_hdr_prev = NULL;
Packit 6ef888
	uint64_t *ea_data_ptr = NULL;
Packit 6ef888
	int i;
Packit 6ef888
	int error = 0, err;
Packit 6ef888
	uint32_t offset = (uint32_t)sizeof(struct gfs2_meta_header);
Packit 6ef888
Packit 6ef888
	if (!pass->check_eattr_entry)
Packit 6ef888
		return 0;
Packit 6ef888
Packit 6ef888
	ea_hdr = (struct gfs2_ea_header *)(bh->b_data +
Packit 6ef888
					  sizeof(struct gfs2_meta_header));
Packit 6ef888
Packit 6ef888
	while (1){
Packit 6ef888
		if (ea_hdr->ea_type == GFS2_EATYPE_UNUSED)
Packit 6ef888
			error = 0;
Packit 6ef888
		else
Packit 6ef888
			error = pass->check_eattr_entry(ip, bh, ea_hdr,
Packit 6ef888
							ea_hdr_prev,
Packit 6ef888
							pass->private);
Packit 6ef888
		if (error < 0) {
Packit 6ef888
			stack;
Packit 6ef888
			return -1;
Packit 6ef888
		}
Packit 6ef888
		if (error == 0 && pass->check_eattr_extentry &&
Packit 6ef888
		   ea_hdr->ea_num_ptrs) {
Packit 6ef888
			uint32_t tot_ealen = 0;
Packit 6ef888
			struct gfs2_sbd *sdp = ip->i_sbd;
Packit 6ef888
Packit 6ef888
			ea_data_ptr = ((uint64_t *)((char *)ea_hdr +
Packit 6ef888
						    sizeof(struct gfs2_ea_header) +
Packit 6ef888
						    ((ea_hdr->ea_name_len + 7) & ~7)));
Packit 6ef888
Packit 6ef888
			/* It is possible when a EA is shrunk
Packit 6ef888
			** to have ea_num_ptrs be greater than
Packit 6ef888
			** the number required for ** data.
Packit 6ef888
			** In this case, the EA ** code leaves
Packit 6ef888
			** the blocks ** there for **
Packit 6ef888
			** reuse...........  */
Packit 6ef888
Packit 6ef888
			for(i = 0; i < ea_hdr->ea_num_ptrs; i++){
Packit 6ef888
				err = pass->check_eattr_extentry(ip, i,
Packit 6ef888
						ea_data_ptr, bh, tot_ealen,
Packit 6ef888
						ea_hdr, ea_hdr_prev,
Packit 6ef888
						pass->private);
Packit 6ef888
				if (err)
Packit 6ef888
					error = err;
Packit 6ef888
				tot_ealen += sdp->sd_sb.sb_bsize -
Packit 6ef888
					sizeof(struct gfs2_meta_header);
Packit 6ef888
				ea_data_ptr++;
Packit 6ef888
			}
Packit 6ef888
		}
Packit 6ef888
		offset += be32_to_cpu(ea_hdr->ea_rec_len);
Packit 6ef888
		if (ea_hdr->ea_flags & GFS2_EAFLAG_LAST ||
Packit 6ef888
		   offset >= ip->i_sbd->sd_sb.sb_bsize || ea_hdr->ea_rec_len == 0){
Packit 6ef888
			break;
Packit 6ef888
		}
Packit 6ef888
		ea_hdr_prev = ea_hdr;
Packit 6ef888
		ea_hdr = (struct gfs2_ea_header *)
Packit 6ef888
			((char *)(ea_hdr) +
Packit 6ef888
			 be32_to_cpu(ea_hdr->ea_rec_len));
Packit 6ef888
	}
Packit 6ef888
Packit 6ef888
	return error;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
/**
Packit 6ef888
 * check_leaf_eattr
Packit 6ef888
 * @ip: the inode the eattr comes from
Packit 6ef888
 * @block: block number of the leaf
Packit 6ef888
 *
Packit 6ef888
 * Returns: 0 on success, 1 if removal is needed, -1 on error
Packit 6ef888
 */
Packit 6ef888
static int check_leaf_eattr(struct gfs2_inode *ip, uint64_t block,
Packit 6ef888
			    uint64_t parent, struct metawalk_fxns *pass)
Packit 6ef888
{
Packit 6ef888
	struct gfs2_buffer_head *bh = NULL;
Packit 6ef888
Packit 6ef888
	if (pass->check_eattr_leaf) {
Packit 6ef888
		int error = 0;
Packit 6ef888
Packit 6ef888
		log_debug( _("Checking EA leaf block #%llu (0x%llx) for "
Packit 6ef888
			     "inode #%llu (0x%llx).\n"),
Packit 6ef888
			   (unsigned long long)block,
Packit 6ef888
			   (unsigned long long)block,
Packit 6ef888
			   (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
			   (unsigned long long)ip->i_di.di_num.no_addr);
Packit 6ef888
Packit 6ef888
		error = pass->check_eattr_leaf(ip, block, parent, &bh,
Packit 6ef888
					       pass->private);
Packit 6ef888
		if (error < 0) {
Packit 6ef888
			stack;
Packit 6ef888
			return -1;
Packit 6ef888
		}
Packit 6ef888
		if (error > 0) {
Packit 6ef888
			if (bh)
Packit 6ef888
				brelse(bh);
Packit 6ef888
			return 1;
Packit 6ef888
		}
Packit 6ef888
		if (bh) {
Packit 6ef888
			error = check_eattr_entries(ip, bh, pass);
Packit 6ef888
			brelse(bh);
Packit 6ef888
		}
Packit 6ef888
		return error;
Packit 6ef888
	}
Packit 6ef888
Packit 6ef888
	return 0;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
/**
Packit 6ef888
 * check_indirect_eattr
Packit 6ef888
 * @ip: the inode the eattr comes from
Packit 6ef888
 * @indirect_block
Packit 6ef888
 *
Packit 6ef888
 * Returns: 0 on success -1 on error
Packit 6ef888
 */
Packit 6ef888
static int check_indirect_eattr(struct gfs2_inode *ip, uint64_t indirect,
Packit 6ef888
				struct gfs2_buffer_head *indirect_buf,
Packit 6ef888
				struct metawalk_fxns *pass)
Packit 6ef888
{
Packit 6ef888
	int error = 0, err;
Packit 6ef888
	uint64_t *ea_leaf_ptr, *end;
Packit 6ef888
	uint64_t block;
Packit 6ef888
	struct gfs2_sbd *sdp = ip->i_sbd;
Packit 6ef888
	int first_ea_is_bad = 0;
Packit 6ef888
	uint64_t di_eattr_save = ip->i_di.di_eattr;
Packit 6ef888
	uint64_t offset = ip->i_sbd->gfs1 ? sizeof(struct gfs_indirect) : sizeof(struct gfs2_meta_header);
Packit 6ef888
	int leaf_pointers = 0, leaf_pointer_errors = 0;
Packit 6ef888
Packit 6ef888
	ea_leaf_ptr = (uint64_t *)(indirect_buf->b_data + offset);
Packit 6ef888
	end = ea_leaf_ptr + ((sdp->sd_sb.sb_bsize - offset) / 8);
Packit 6ef888
Packit 6ef888
	while (*ea_leaf_ptr && (ea_leaf_ptr < end)){
Packit 6ef888
		block = be64_to_cpu(*ea_leaf_ptr);
Packit 6ef888
		leaf_pointers++;
Packit 6ef888
		err = check_leaf_eattr(ip, block, indirect, pass);
Packit 6ef888
		if (err) {
Packit 6ef888
			error = err;
Packit 6ef888
			log_err(_("Error detected in leaf block %lld (0x%llx) "
Packit 6ef888
				  "referenced by indirect block %lld (0x%llx)"
Packit 6ef888
				  ".\n"),
Packit 6ef888
				(unsigned long long)block,
Packit 6ef888
				(unsigned long long)block,
Packit 6ef888
				(unsigned long long)indirect,
Packit 6ef888
				(unsigned long long)indirect);
Packit 6ef888
			log_err(_("Subsequent leaf block pointers should be "
Packit 6ef888
				  "cleared.\n"));
Packit 6ef888
		}
Packit 6ef888
		if (error) { /* leaf blocks following an error must also be
Packit 6ef888
				treated as error blocks and cleared. */
Packit 6ef888
			leaf_pointer_errors++;
Packit 6ef888
			log_err(_("Pointer to EA leaf block %lld (0x%llx) in "
Packit 6ef888
				  "indirect block %lld (0x%llx) should be "
Packit 6ef888
				  "cleared.\n"),
Packit 6ef888
				(unsigned long long)block,
Packit 6ef888
				(unsigned long long)block,
Packit 6ef888
				(unsigned long long)indirect,
Packit 6ef888
				(unsigned long long)indirect);
Packit 6ef888
		}
Packit 6ef888
		/* If the first eattr lead is bad, we can't have a hole, so we
Packit 6ef888
		   have to treat this as an unrecoverable eattr error and
Packit 6ef888
		   delete all eattr info. Calling finish_eattr_indir here
Packit 6ef888
		   causes ip->i_di.di_eattr = 0 and that ensures that
Packit 6ef888
		   subsequent calls to check_leaf_eattr result in the eattr
Packit 6ef888
		   check_leaf_block nuking them all "due to previous errors" */
Packit 6ef888
		if (leaf_pointers == 1 && leaf_pointer_errors == 1) {
Packit 6ef888
			first_ea_is_bad = 1;
Packit 6ef888
			if (pass->finish_eattr_indir)
Packit 6ef888
				pass->finish_eattr_indir(ip, leaf_pointers,
Packit 6ef888
							 leaf_pointer_errors,
Packit 6ef888
							 pass->private);
Packit 6ef888
		} else if (leaf_pointer_errors) {
Packit 6ef888
			/* This is a bit tricky.  We can't have eattr holes.
Packit 6ef888
			   So if we have 4 good eattrs, 1 bad eattr and 5 more
Packit 6ef888
			   good ones: GGGGBGGGGG, we need to tell
Packit 6ef888
			   check_leaf_eattr to delete all eattrs after the bad
Packit 6ef888
			   one. So we want: GGGG when we finish. To do that,
Packit 6ef888
			   we set di_eattr to 0 temporarily. */
Packit 6ef888
			ip->i_di.di_eattr = 0;
Packit 6ef888
			bmodified(ip->i_bh);
Packit 6ef888
		}
Packit 6ef888
		ea_leaf_ptr++;
Packit 6ef888
	}
Packit 6ef888
	/* If we temporarily nuked the ea block to prevent checking past
Packit 6ef888
	   a corrupt ea leaf, we need to restore the saved di_eattr block. */
Packit 6ef888
	if (di_eattr_save != 0)
Packit 6ef888
		ip->i_di.di_eattr = di_eattr_save;
Packit 6ef888
	if (pass->finish_eattr_indir) {
Packit 6ef888
		if (!first_ea_is_bad) {
Packit 6ef888
			pass->finish_eattr_indir(ip, leaf_pointers,
Packit 6ef888
						 leaf_pointer_errors,
Packit 6ef888
						 pass->private);
Packit 6ef888
		}
Packit 6ef888
		if (pass->delete_block && leaf_pointer_errors &&
Packit 6ef888
		    leaf_pointer_errors == leaf_pointers) {
Packit 6ef888
			pass->delete_block(ip, indirect, NULL, "leaf", NULL);
Packit 6ef888
			error = 1;
Packit 6ef888
		}
Packit 6ef888
	}
Packit 6ef888
Packit 6ef888
	return error;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
/**
Packit 6ef888
 * check_inode_eattr - check the EA's for a single inode
Packit 6ef888
 * @ip: the inode whose EA to check
Packit 6ef888
 *
Packit 6ef888
 * Returns: 0 on success, -1 on error
Packit 6ef888
 */
Packit 6ef888
int check_inode_eattr(struct gfs2_inode *ip, struct metawalk_fxns *pass)
Packit 6ef888
{
Packit 6ef888
	int error = 0;
Packit 6ef888
	struct gfs2_buffer_head *indirect_buf = NULL;
Packit 6ef888
Packit 6ef888
	if (!ip->i_di.di_eattr)
Packit 6ef888
		return 0;
Packit 6ef888
Packit 6ef888
	if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT){
Packit 6ef888
		if (!pass->check_eattr_indir)
Packit 6ef888
			return 0;
Packit 6ef888
Packit 6ef888
		log_debug( _("Checking EA indirect block #%llu (0x%llx) for "
Packit 6ef888
			     "inode #%llu (0x%llx)..\n"),
Packit 6ef888
			   (unsigned long long)ip->i_di.di_eattr,
Packit 6ef888
			   (unsigned long long)ip->i_di.di_eattr,
Packit 6ef888
			   (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
			   (unsigned long long)ip->i_di.di_num.no_addr);
Packit 6ef888
		error = pass->check_eattr_indir(ip, ip->i_di.di_eattr,
Packit 6ef888
						ip->i_di.di_num.no_addr,
Packit 6ef888
						&indirect_buf, pass->private);
Packit 6ef888
		if (!error) {
Packit 6ef888
			error = check_indirect_eattr(ip, ip->i_di.di_eattr,
Packit 6ef888
						     indirect_buf, pass);
Packit 6ef888
			if (error)
Packit 6ef888
				stack;
Packit 6ef888
		}
Packit 6ef888
		if (indirect_buf)
Packit 6ef888
			brelse(indirect_buf);
Packit 6ef888
		return error;
Packit 6ef888
	}
Packit 6ef888
	error = check_leaf_eattr(ip, ip->i_di.di_eattr,
Packit 6ef888
				 ip->i_di.di_num.no_addr, pass);
Packit 6ef888
	if (error)
Packit 6ef888
		stack;
Packit 6ef888
Packit 6ef888
	return error;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
/**
Packit 6ef888
 * free_metalist - free all metadata on a multi-level metadata list
Packit 6ef888
 */
Packit 6ef888
static void free_metalist(struct gfs2_inode *ip, osi_list_t *mlp)
Packit 6ef888
{
Packit 6ef888
	int i;
Packit 6ef888
	struct gfs2_buffer_head *nbh;
Packit 6ef888
Packit 6ef888
	for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
Packit 6ef888
		osi_list_t *list;
Packit 6ef888
Packit 6ef888
		list = &mlp[i];
Packit 6ef888
		while (!osi_list_empty(list)) {
Packit 6ef888
			nbh = osi_list_entry(list->next,
Packit 6ef888
					     struct gfs2_buffer_head, b_altlist);
Packit 6ef888
			if (nbh == ip->i_bh)
Packit 6ef888
				osi_list_del_init(&nbh->b_altlist);
Packit 6ef888
			else
Packit 6ef888
				brelse(nbh);
Packit 6ef888
		}
Packit 6ef888
	}
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
static void file_ra(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
Packit 6ef888
		    int head_size, int maxptrs, int h)
Packit 6ef888
{
Packit 6ef888
	struct gfs2_sbd *sdp = ip->i_sbd;
Packit 6ef888
	uint64_t *p, sblock = 0, block;
Packit 6ef888
	int extlen = 0;
Packit 6ef888
Packit 6ef888
	if (h + 2 == ip->i_di.di_height) {
Packit 6ef888
		p = (uint64_t *)(bh->b_data + head_size);
Packit 6ef888
		if (*p && *(p + 1)) {
Packit 6ef888
			sblock = be64_to_cpu(*p);
Packit 6ef888
			p++;
Packit 6ef888
			block = be64_to_cpu(*p);
Packit 6ef888
			extlen = block - sblock;
Packit 6ef888
			if (extlen > 1 && extlen <= maxptrs) {
Packit 6ef888
				posix_fadvise(sdp->device_fd,
Packit 6ef888
					      sblock * sdp->bsize,
Packit 6ef888
					      (extlen + 1) * sdp->bsize,
Packit 6ef888
					      POSIX_FADV_WILLNEED);
Packit 6ef888
				return;
Packit 6ef888
			}
Packit 6ef888
		}
Packit 6ef888
		extlen = 0;
Packit 6ef888
	}
Packit 6ef888
	for (p = (uint64_t *)(bh->b_data + head_size);
Packit 6ef888
	     p < (uint64_t *)(bh->b_data + sdp->bsize); p++) {
Packit 6ef888
		if (*p) {
Packit 6ef888
			if (!sblock) {
Packit 6ef888
				sblock = be64_to_cpu(*p);
Packit 6ef888
				extlen = 1;
Packit 6ef888
				continue;
Packit 6ef888
			}
Packit 6ef888
			block = be64_to_cpu(*p);
Packit 6ef888
			if (block == sblock + extlen) {
Packit 6ef888
				extlen++;
Packit 6ef888
				continue;
Packit 6ef888
			}
Packit 6ef888
		}
Packit 6ef888
		if (extlen && sblock) {
Packit 6ef888
			if (extlen > 1)
Packit 6ef888
				extlen--;
Packit 6ef888
			posix_fadvise(sdp->device_fd, sblock * sdp->bsize,
Packit 6ef888
				      extlen * sdp->bsize,
Packit 6ef888
				      POSIX_FADV_WILLNEED);
Packit 6ef888
			extlen = 0;
Packit 6ef888
			p--;
Packit 6ef888
		}
Packit 6ef888
	}
Packit 6ef888
	if (extlen)
Packit 6ef888
		posix_fadvise(sdp->device_fd, sblock * sdp->bsize,
Packit 6ef888
			      extlen * sdp->bsize, POSIX_FADV_WILLNEED);
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
/**
Packit 6ef888
 * build_and_check_metalist - check a bunch of indirect blocks
Packit 6ef888
 *                            This includes hash table blocks for directories
Packit 6ef888
 *                            which are technically "data" in the bitmap.
Packit 6ef888
 *
Packit 6ef888
 * Returns: 0 - all is well, process the blocks this metadata references
Packit 6ef888
 *          1 - something went wrong, but process the sub-blocks anyway
Packit 6ef888
 *         -1 - something went wrong, so don't process the sub-blocks
Packit 6ef888
 * @ip:
Packit 6ef888
 * @mlp:
Packit 6ef888
 */
Packit 6ef888
static int build_and_check_metalist(struct gfs2_inode *ip, osi_list_t *mlp,
Packit 6ef888
				    struct metawalk_fxns *pass)
Packit 6ef888
{
Packit 6ef888
	uint32_t height = ip->i_di.di_height;
Packit 6ef888
	struct gfs2_buffer_head *bh, *nbh, *metabh = ip->i_bh;
Packit 6ef888
	osi_list_t *prev_list, *cur_list, *tmp;
Packit 6ef888
	int h, head_size, iblk_type;
Packit 6ef888
	uint64_t *ptr, block, *undoptr;
Packit 6ef888
	int error, was_duplicate, is_valid;
Packit 6ef888
	int maxptrs;
Packit 6ef888
Packit 6ef888
	osi_list_add(&metabh->b_altlist, &mlp[0]);
Packit 6ef888
Packit 6ef888
	/* Directories are special.  Their 'data' is the hash table, which is
Packit 6ef888
	   basically an indirect block list. Their height is not important
Packit 6ef888
	   because it checks everything through the hash table using
Packit 6ef888
	   "depth" field calculations. However, we still have to check the
Packit 6ef888
	   indirect blocks, even if the height == 1.  */
Packit 6ef888
	if (is_dir(&ip->i_di, ip->i_sbd->gfs1))
Packit 6ef888
		height++;
Packit 6ef888
Packit 6ef888
	/* if (<there are no indirect blocks to check>) */
Packit 6ef888
	if (height < 2)
Packit 6ef888
		return meta_is_good;
Packit 6ef888
	for (h = 1; h < height; h++) {
Packit 6ef888
		if (h > 1) {
Packit 6ef888
			if (is_dir(&ip->i_di, ip->i_sbd->gfs1) &&
Packit 6ef888
			    h == ip->i_di.di_height + 1)
Packit 6ef888
				iblk_type = GFS2_METATYPE_JD;
Packit 6ef888
			else
Packit 6ef888
				iblk_type = GFS2_METATYPE_IN;
Packit 6ef888
			if (ip->i_sbd->gfs1) {
Packit 6ef888
				head_size = sizeof(struct gfs_indirect);
Packit 6ef888
				maxptrs = (ip->i_sbd->bsize - head_size) /
Packit 6ef888
					sizeof(uint64_t);
Packit 6ef888
			} else {
Packit 6ef888
				head_size = sizeof(struct gfs2_meta_header);
Packit 6ef888
				maxptrs = ip->i_sbd->sd_inptrs;
Packit 6ef888
			}
Packit 6ef888
		} else {
Packit 6ef888
			iblk_type = GFS2_METATYPE_DI;
Packit 6ef888
			head_size = sizeof(struct gfs2_dinode);
Packit 6ef888
			maxptrs = ip->i_sbd->sd_diptrs;
Packit 6ef888
		}
Packit 6ef888
		prev_list = &mlp[h - 1];
Packit 6ef888
		cur_list = &mlp[h];
Packit 6ef888
Packit 6ef888
		for (tmp = prev_list->next; tmp != prev_list; tmp = tmp->next){
Packit 6ef888
			bh = osi_list_entry(tmp, struct gfs2_buffer_head,
Packit 6ef888
					    b_altlist);
Packit 6ef888
			if (gfs2_check_meta(bh, iblk_type)) {
Packit 6ef888
				if (pass->invalid_meta_is_fatal)
Packit 6ef888
					return meta_error;
Packit 6ef888
Packit 6ef888
				continue;
Packit 6ef888
			}
Packit 6ef888
Packit 6ef888
			if (pass->readahead)
Packit 6ef888
				file_ra(ip, bh, head_size, maxptrs, h);
Packit 6ef888
			/* Now check the metadata itself */
Packit 6ef888
			for (ptr = (uint64_t *)(bh->b_data + head_size);
Packit 6ef888
			     (char *)ptr < (bh->b_data + ip->i_sbd->bsize);
Packit 6ef888
			     ptr++) {
Packit 6ef888
				if (skip_this_pass || fsck_abort) {
Packit 6ef888
					free_metalist(ip, mlp);
Packit 6ef888
					return meta_is_good;
Packit 6ef888
				}
Packit 6ef888
				nbh = NULL;
Packit 6ef888
Packit 6ef888
				if (!*ptr)
Packit 6ef888
					continue;
Packit 6ef888
Packit 6ef888
				block = be64_to_cpu(*ptr);
Packit 6ef888
				was_duplicate = 0;
Packit 6ef888
				error = pass->check_metalist(ip, block, &nbh,
Packit 6ef888
							     h, &is_valid,
Packit 6ef888
							     &was_duplicate,
Packit 6ef888
							     pass->private);
Packit 6ef888
				/* check_metalist should hold any buffers
Packit 6ef888
				   it gets with "bread". */
Packit 6ef888
				if (error == meta_error) {
Packit 6ef888
					stack;
Packit 6ef888
					log_info(_("\nSerious metadata "
Packit 6ef888
						   "error on block %llu "
Packit 6ef888
						   "(0x%llx).\n"),
Packit 6ef888
						 (unsigned long long)block,
Packit 6ef888
						 (unsigned long long)block);
Packit 6ef888
					goto error_undo;
Packit 6ef888
				}
Packit 6ef888
				if (error == meta_skip_further) {
Packit 6ef888
					log_info(_("\nUnrecoverable metadata "
Packit 6ef888
						   "error on block %llu "
Packit 6ef888
						   "(0x%llx). Further metadata"
Packit 6ef888
						   " will be skipped.\n"),
Packit 6ef888
						 (unsigned long long)block,
Packit 6ef888
						 (unsigned long long)block);
Packit 6ef888
					goto error_undo;
Packit 6ef888
				}
Packit 6ef888
				if (!is_valid) {
Packit 6ef888
					log_debug( _("Skipping rejected block "
Packit 6ef888
						     "%llu (0x%llx)\n"),
Packit 6ef888
						   (unsigned long long)block,
Packit 6ef888
						   (unsigned long long)block);
Packit 6ef888
					if (pass->invalid_meta_is_fatal) {
Packit 6ef888
						error = meta_error;
Packit 6ef888
						goto error_undo;
Packit 6ef888
					}
Packit 6ef888
					continue;
Packit 6ef888
				}
Packit 6ef888
				/* Note that there's a special case in which
Packit 6ef888
				   we need to process the metadata block, even
Packit 6ef888
				   if it was a duplicate. That's for cases
Packit 6ef888
				   where we deleted the last reference as
Packit 6ef888
				   metadata. */
Packit 6ef888
				if (was_duplicate) {
Packit 6ef888
					log_debug( _("Skipping duplicate %llu "
Packit 6ef888
						     "(0x%llx)\n"),
Packit 6ef888
						   (unsigned long long)block,
Packit 6ef888
						   (unsigned long long)block);
Packit 6ef888
					continue;
Packit 6ef888
				}
Packit 6ef888
				if (!valid_block_ip(ip, block)) {
Packit 6ef888
					log_debug( _("Skipping invalid block "
Packit 6ef888
						     "%lld (0x%llx)\n"),
Packit 6ef888
						   (unsigned long long)block,
Packit 6ef888
						   (unsigned long long)block);
Packit 6ef888
					if (pass->invalid_meta_is_fatal) {
Packit 6ef888
						error = meta_error;
Packit 6ef888
						goto error_undo;
Packit 6ef888
					}
Packit 6ef888
					continue;
Packit 6ef888
				}
Packit 6ef888
				if (!nbh)
Packit 6ef888
					nbh = bread(ip->i_sbd, block);
Packit 6ef888
				osi_list_add_prev(&nbh->b_altlist, cur_list);
Packit 6ef888
			} /* for all data on the indirect block */
Packit 6ef888
		} /* for blocks at that height */
Packit 6ef888
	} /* for height */
Packit 6ef888
	return 0;
Packit 6ef888
Packit 6ef888
error_undo: /* undo what we've done so far for this block */
Packit 6ef888
	if (pass->undo_check_meta == NULL)
Packit 6ef888
		return error;
Packit 6ef888
Packit 6ef888
	log_info(_("Undoing the work we did before the error on block %llu "
Packit 6ef888
		   "(0x%llx).\n"), (unsigned long long)bh->b_blocknr,
Packit 6ef888
		 (unsigned long long)bh->b_blocknr);
Packit 6ef888
	for (undoptr = (uint64_t *)(bh->b_data + head_size); undoptr < ptr &&
Packit 6ef888
		     (char *)undoptr < (bh->b_data + ip->i_sbd->bsize);
Packit 6ef888
	     undoptr++) {
Packit 6ef888
		if (!*undoptr)
Packit 6ef888
			continue;
Packit 6ef888
Packit 6ef888
		block = be64_to_cpu(*undoptr);
Packit 6ef888
		pass->undo_check_meta(ip, block, h, pass->private);
Packit 6ef888
	}
Packit 6ef888
	return error;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
/**
Packit 6ef888
 * check_data - check all data pointers for a given buffer
Packit 6ef888
 *              This does not include "data" blocks that are really
Packit 6ef888
 *              hash table blocks for directories.
Packit 6ef888
 *
Packit 6ef888
 * @ip:
Packit 6ef888
 *
Packit 6ef888
 * returns: +ENOENT if there are too many bad pointers
Packit 6ef888
 *          -1 if a more serious error occurred.
Packit 6ef888
 *          0 if no errors occurred
Packit 6ef888
 *          1 if errors were found and corrected
Packit 6ef888
 *          2 (ENOENT) is there were too many bad pointers
Packit 6ef888
 */
Packit 6ef888
static int check_data(struct gfs2_inode *ip, struct metawalk_fxns *pass,
Packit 6ef888
		      struct gfs2_buffer_head *bh, int head_size,
Packit 6ef888
		      uint64_t *blks_checked, struct error_block *error_blk)
Packit 6ef888
{
Packit 6ef888
	int error = 0, rc = 0;
Packit 6ef888
	uint64_t block, *ptr;
Packit 6ef888
	uint64_t *ptr_start = (uint64_t *)(bh->b_data + head_size);
Packit 6ef888
	char *ptr_end = (bh->b_data + ip->i_sbd->bsize);
Packit 6ef888
	uint64_t metablock = bh->b_blocknr;
Packit 6ef888
Packit 6ef888
	/* If there isn't much pointer corruption check the pointers */
Packit 6ef888
	log_debug(_("\nProcessing data blocks for inode 0x%llx, metadata "
Packit 6ef888
		    "block 0x%llx.\n"),
Packit 6ef888
		  (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
		  (unsigned long long)bh->b_blocknr);
Packit 6ef888
	for (ptr = ptr_start ; (char *)ptr < ptr_end && !fsck_abort; ptr++) {
Packit 6ef888
		if (!*ptr)
Packit 6ef888
			continue;
Packit 6ef888
Packit 6ef888
		if (skip_this_pass || fsck_abort)
Packit 6ef888
			return error;
Packit 6ef888
		block =  be64_to_cpu(*ptr);
Packit 6ef888
		/* It's important that we don't call valid_block() and
Packit 6ef888
		   bypass calling check_data on invalid blocks because that
Packit 6ef888
		   would defeat the rangecheck_block related functions in
Packit 6ef888
		   pass1. Therefore the individual check_data functions
Packit 6ef888
		   should do a range check. */
Packit 6ef888
		rc = pass->check_data(ip, metablock, block, pass->private,
Packit 6ef888
				      bh, ptr);
Packit 6ef888
		if (rc && (!error || (rc < error))) {
Packit 6ef888
			log_info("\n");
Packit 6ef888
			if (rc < 0) {
Packit 6ef888
				/* A fatal error trumps a non-fatal one. */
Packit 6ef888
				if ((error_blk->errblk == 0) ||
Packit 6ef888
				    (rc < error)) {
Packit 6ef888
					log_debug(_("Fatal error on metadata "
Packit 6ef888
						    "block 0x%llx, offset "
Packit 6ef888
						    "0x%x, referencing block "
Packit 6ef888
						    "0x%llx preempts non-fatal"
Packit 6ef888
						    " error on block 0x%llx\n"),
Packit 6ef888
						  (unsigned long long)metablock,
Packit 6ef888
						  (int)(ptr - ptr_start),
Packit 6ef888
						  (unsigned long long)block,
Packit 6ef888
						  (unsigned long long)error_blk->errblk);
Packit 6ef888
					error_blk->metablk = metablock;
Packit 6ef888
					error_blk->metaoff = ptr - ptr_start;
Packit 6ef888
					error_blk->errblk = block;
Packit 6ef888
				}
Packit 6ef888
				log_info(_("Unrecoverable "));
Packit 6ef888
			} else { /* nonfatal error */
Packit 6ef888
				if (error_blk->errblk == 0) {
Packit 6ef888
					error_blk->metablk = metablock;
Packit 6ef888
					error_blk->metaoff = ptr - ptr_start;
Packit 6ef888
					error_blk->errblk = block;
Packit 6ef888
				}
Packit 6ef888
			}
Packit 6ef888
			log_info(_("data block error %d on metadata block "
Packit 6ef888
				   "%lld (0x%llx), offset %d (0x%x), "
Packit 6ef888
				   "referencing data block %lld (0x%llx).\n"),
Packit 6ef888
				 rc, (unsigned long long)metablock,
Packit 6ef888
				 (unsigned long long)metablock,
Packit 6ef888
				 (int)(ptr - ptr_start),
Packit 6ef888
				 (int)(ptr - ptr_start),
Packit 6ef888
				 (unsigned long long)block,
Packit 6ef888
				 (unsigned long long)block);
Packit 6ef888
			error = rc;
Packit 6ef888
		}
Packit 6ef888
		if (rc < 0)
Packit 6ef888
			return rc;
Packit 6ef888
		(*blks_checked)++;
Packit 6ef888
	}
Packit 6ef888
	return error;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
static int undo_check_data(struct gfs2_inode *ip, struct metawalk_fxns *pass,
Packit 6ef888
			   uint64_t metablock,
Packit 6ef888
			   uint64_t *ptr_start, char *ptr_end,
Packit 6ef888
			   struct error_block *error_blk, int error)
Packit 6ef888
{
Packit 6ef888
	int rc = 0;
Packit 6ef888
	uint64_t block, *ptr;
Packit 6ef888
	int found_error_blk = 0;
Packit 6ef888
Packit 6ef888
	/* If there isn't much pointer corruption check the pointers */
Packit 6ef888
	for (ptr = ptr_start ; (char *)ptr < ptr_end && !fsck_abort; ptr++) {
Packit 6ef888
		if (!*ptr)
Packit 6ef888
			continue;
Packit 6ef888
Packit 6ef888
		if (skip_this_pass || fsck_abort)
Packit 6ef888
			return 1;
Packit 6ef888
		block =  be64_to_cpu(*ptr);
Packit 6ef888
		if (metablock == error_blk->metablk &&
Packit 6ef888
		    (ptr - ptr_start == error_blk->metaoff) &&
Packit 6ef888
		    block == error_blk->errblk) {
Packit 6ef888
			if (error < 0) { /* A fatal error that stopped it? */
Packit 6ef888
				log_debug(_("Stopping the undo process: "
Packit 6ef888
					    "fatal error block 0x%llx was "
Packit 6ef888
					    "found at metadata block 0x%llx,"
Packit 6ef888
					    "offset 0x%x.\n"),
Packit 6ef888
					  (unsigned long long)error_blk->errblk,
Packit 6ef888
					  (unsigned long long)error_blk->metablk,
Packit 6ef888
					  error_blk->metaoff);
Packit 6ef888
				return 1;
Packit 6ef888
			}
Packit 6ef888
			found_error_blk = 1;
Packit 6ef888
			log_debug(_("The non-fatal error block 0x%llx was "
Packit 6ef888
				    "found at metadata block 0x%llx, offset "
Packit 6ef888
				    "0x%d, but undo processing will continue "
Packit 6ef888
				    "until the end of this metadata block.\n"),
Packit 6ef888
				  (unsigned long long)error_blk->errblk,
Packit 6ef888
				  (unsigned long long)error_blk->metablk,
Packit 6ef888
				  error_blk->metaoff);
Packit 6ef888
		}
Packit 6ef888
		rc = pass->undo_check_data(ip, block, pass->private);
Packit 6ef888
		if (rc < 0)
Packit 6ef888
			return rc;
Packit 6ef888
	}
Packit 6ef888
	return found_error_blk;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
static int hdr_size(struct gfs2_buffer_head *bh, int height)
Packit 6ef888
{
Packit 6ef888
	if (height > 1) {
Packit 6ef888
		if (gfs2_check_meta(bh, GFS2_METATYPE_IN))
Packit 6ef888
			return 0;
Packit 6ef888
		if (bh->sdp->gfs1)
Packit 6ef888
			return sizeof(struct gfs_indirect);
Packit 6ef888
		else
Packit 6ef888
			return sizeof(struct gfs2_meta_header);
Packit 6ef888
	}
Packit 6ef888
	/* if this isn't really a dinode, skip it */
Packit 6ef888
	if (gfs2_check_meta(bh, GFS2_METATYPE_DI))
Packit 6ef888
		return 0;
Packit 6ef888
Packit 6ef888
	return sizeof(struct gfs2_dinode);
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
/**
Packit 6ef888
 * check_metatree
Packit 6ef888
 * @ip: inode structure in memory
Packit 6ef888
 * @pass: structure passed in from caller to determine the sub-functions
Packit 6ef888
 *
Packit 6ef888
 */
Packit 6ef888
int check_metatree(struct gfs2_inode *ip, struct metawalk_fxns *pass)
Packit 6ef888
{
Packit 6ef888
	osi_list_t metalist[GFS2_MAX_META_HEIGHT];
Packit 6ef888
	osi_list_t *list, *tmp;
Packit 6ef888
	struct gfs2_buffer_head *bh;
Packit 6ef888
	uint32_t height = ip->i_di.di_height;
Packit 6ef888
	int  i, head_size;
Packit 6ef888
	uint64_t blks_checked = 0;
Packit 6ef888
	int error, rc;
Packit 6ef888
	int metadata_clean = 0;
Packit 6ef888
	struct error_block error_blk = {0, 0, 0};
Packit 6ef888
	int hit_error_blk = 0;
Packit 6ef888
Packit 6ef888
	if (!height && !is_dir(&ip->i_di, ip->i_sbd->gfs1))
Packit 6ef888
		return 0;
Packit 6ef888
Packit 6ef888
	for (i = 0; i < GFS2_MAX_META_HEIGHT; i++)
Packit 6ef888
		osi_list_init(&metalist[i]);
Packit 6ef888
Packit 6ef888
	/* create and check the metadata list for each height */
Packit 6ef888
	error = build_and_check_metalist(ip, &metalist[0], pass);
Packit 6ef888
	if (error) {
Packit 6ef888
		stack;
Packit 6ef888
		goto undo_metalist;
Packit 6ef888
	}
Packit 6ef888
Packit 6ef888
	metadata_clean = 1;
Packit 6ef888
	/* For directories, we've already checked the "data" blocks which
Packit 6ef888
	 * comprise the directory hash table, so we perform the directory
Packit 6ef888
	 * checks and exit. */
Packit 6ef888
        if (is_dir(&ip->i_di, ip->i_sbd->gfs1)) {
Packit 6ef888
		if (!(ip->i_di.di_flags & GFS2_DIF_EXHASH))
Packit 6ef888
			goto out;
Packit 6ef888
		/* check validity of leaf blocks and leaf chains */
Packit 6ef888
		error = check_leaf_blks(ip, pass);
Packit 6ef888
		if (error)
Packit 6ef888
			goto undo_metalist;
Packit 6ef888
		goto out;
Packit 6ef888
	}
Packit 6ef888
Packit 6ef888
	/* check data blocks */
Packit 6ef888
	list = &metalist[height - 1];
Packit 6ef888
	if (ip->i_di.di_blocks > COMFORTABLE_BLKS)
Packit 6ef888
		last_reported_fblock = -10000000;
Packit 6ef888
Packit 6ef888
	for (tmp = list->next; !error && tmp != list; tmp = tmp->next) {
Packit 6ef888
		if (fsck_abort) {
Packit 6ef888
			free_metalist(ip, &metalist[0]);
Packit 6ef888
			return 0;
Packit 6ef888
		}
Packit 6ef888
		bh = osi_list_entry(tmp, struct gfs2_buffer_head, b_altlist);
Packit 6ef888
		head_size = hdr_size(bh, height);
Packit 6ef888
		if (!head_size)
Packit 6ef888
			continue;
Packit 6ef888
Packit 6ef888
		if (pass->check_data)
Packit 6ef888
			error = check_data(ip, pass, bh, head_size,
Packit 6ef888
					   &blks_checked, &error_blk);
Packit 6ef888
		if (pass->big_file_msg && ip->i_di.di_blocks > COMFORTABLE_BLKS)
Packit 6ef888
			pass->big_file_msg(ip, blks_checked);
Packit 6ef888
	}
Packit 6ef888
	if (pass->big_file_msg && ip->i_di.di_blocks > COMFORTABLE_BLKS) {
Packit 6ef888
		log_notice( _("\rLarge file at %lld (0x%llx) - 100 percent "
Packit 6ef888
			      "complete.                                   "
Packit 6ef888
			      "\n"),
Packit 6ef888
			    (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
			    (unsigned long long)ip->i_di.di_num.no_addr);
Packit 6ef888
		fflush(stdout);
Packit 6ef888
	}
Packit 6ef888
undo_metalist:
Packit 6ef888
	if (!error)
Packit 6ef888
		goto out;
Packit 6ef888
	log_err( _("Error: inode %llu (0x%llx) had unrecoverable errors at "
Packit 6ef888
		   "metadata block %lld (0x%llx), offset %d (0x%x), block "
Packit 6ef888
		   "%lld (0x%llx).\n"),
Packit 6ef888
		 (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
		 (unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
		 (unsigned long long)error_blk.metablk,
Packit 6ef888
		 (unsigned long long)error_blk.metablk,
Packit 6ef888
		 error_blk.metaoff, error_blk.metaoff,
Packit 6ef888
		 (unsigned long long)error_blk.errblk,
Packit 6ef888
		 (unsigned long long)error_blk.errblk);
Packit 6ef888
	if (!query( _("Remove the invalid inode? (y/n) "))) {
Packit 6ef888
		free_metalist(ip, &metalist[0]);
Packit 6ef888
		log_err(_("Invalid inode not deleted.\n"));
Packit 6ef888
		return error;
Packit 6ef888
	}
Packit 6ef888
	for (i = 0; pass->undo_check_meta && i < height; i++) {
Packit 6ef888
		while (!osi_list_empty(&metalist[i])) {
Packit 6ef888
			list = &metalist[i];
Packit 6ef888
			bh = osi_list_entry(list->next,
Packit 6ef888
					    struct gfs2_buffer_head,
Packit 6ef888
					    b_altlist);
Packit 6ef888
			log_err(_("Undoing metadata work for block %llu "
Packit 6ef888
				  "(0x%llx)\n"),
Packit 6ef888
				(unsigned long long)bh->b_blocknr,
Packit 6ef888
				(unsigned long long)bh->b_blocknr);
Packit 6ef888
			if (i)
Packit 6ef888
				rc = pass->undo_check_meta(ip, bh->b_blocknr,
Packit 6ef888
							   i, pass->private);
Packit 6ef888
			else
Packit 6ef888
				rc = 0;
Packit 6ef888
			if (metadata_clean && rc == 0 && i == height - 1 &&
Packit 6ef888
			    !hit_error_blk) {
Packit 6ef888
				head_size = hdr_size(bh, height);
Packit 6ef888
				if (head_size) {
Packit 6ef888
					rc = undo_check_data(ip, pass,
Packit 6ef888
							     bh->b_blocknr,
Packit 6ef888
							     (uint64_t *)
Packit 6ef888
					      (bh->b_data + head_size),
Packit 6ef888
					      (bh->b_data + ip->i_sbd->bsize),
Packit 6ef888
							     &error_blk,
Packit 6ef888
							     error);
Packit 6ef888
					if (rc > 0) {
Packit 6ef888
						hit_error_blk = 1;
Packit 6ef888
						log_err("Reached the error "
Packit 6ef888
							"block undoing work "
Packit 6ef888
							"for inode %lld "
Packit 6ef888
							"(0x%llx).\n",
Packit 6ef888
							(unsigned long long)ip->i_di.di_num.no_addr,
Packit 6ef888
							(unsigned long long)ip->i_di.di_num.no_addr);
Packit 6ef888
						rc = 0;
Packit 6ef888
					}
Packit 6ef888
				}
Packit 6ef888
			}
Packit 6ef888
			if (bh == ip->i_bh)
Packit 6ef888
				osi_list_del(&bh->b_altlist);
Packit 6ef888
			else
Packit 6ef888
				brelse(bh);
Packit 6ef888
		}
Packit 6ef888
	}
Packit 6ef888
	/* There may be leftover duplicate records, so we need to delete them.
Packit 6ef888
	   For example, if a metadata block was found to be a duplicate, we
Packit 6ef888
	   may not have added it to the metalist, which means it's not there
Packit 6ef888
	   to undo. */
Packit 6ef888
	delete_all_dups(ip);
Packit 6ef888
	/* Set the dinode as "bad" so it gets deleted */
Packit 6ef888
	fsck_bitmap_set(ip, ip->i_di.di_num.no_addr, _("corrupt"),
Packit 6ef888
			GFS2_BLKST_FREE);
Packit 6ef888
	log_err(_("The corrupt inode was invalidated.\n"));
Packit 6ef888
out:
Packit 6ef888
	free_metalist(ip, &metalist[0]);
Packit 6ef888
	return error;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
/* Checks stuffed inode directories */
Packit 6ef888
int check_linear_dir(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
Packit 6ef888
		     struct metawalk_fxns *pass)
Packit 6ef888
{
Packit 6ef888
	int error = 0;
Packit 6ef888
	uint32_t count = 0;
Packit 6ef888
Packit 6ef888
	error = check_entries(ip, bh, DIR_LINEAR, &count, 0, pass);
Packit 6ef888
	if (error < 0) {
Packit 6ef888
		stack;
Packit 6ef888
		return -1;
Packit 6ef888
	}
Packit 6ef888
Packit 6ef888
	return error;
Packit 6ef888
}
Packit 6ef888
Packit 6ef888
int check_dir(struct gfs2_sbd *sdp, struct gfs2_inode *ip, struct metawalk_fxns *pass)
Packit 6ef888
{
Packit 6ef888
	int error = 0;
Packit 6ef888
Packit 6ef888
	if (ip->i_di.di_flags & GFS2_DIF_EXHASH)
Packit 6ef888
		error = check_leaf_blks(ip, pass);
Packit 6ef888
	else
Packit 6ef888
		error = check_linear_dir(ip, ip->i_bh, pass);
Packit 6ef888
Packit 6ef888
	if (error < 0)
Packit 6ef888
		stack;
Packit 6ef888
Packit 6ef888
	return error;
Packit 6ef888
}