diff --git a/gfs2/libgfs2/rgrp.c b/gfs2/libgfs2/rgrp.c index 190715e..ad044d0 100644 --- a/gfs2/libgfs2/rgrp.c +++ b/gfs2/libgfs2/rgrp.c @@ -332,7 +332,7 @@ static uint64_t align_block(const uint64_t base, const uint64_t align) */ uint64_t lgfs2_rgrp_align_addr(const lgfs2_rgrps_t rgs, uint64_t addr) { - return align_block(addr, rgs->align); + return align_block(addr, rgs->align_off); } /** diff --git a/gfs2/libgfs2/rgrp.c.bz1698858-mkfs_gfs2_Improve_alignment_of_first_resource_group b/gfs2/libgfs2/rgrp.c.bz1698858-mkfs_gfs2_Improve_alignment_of_first_resource_group new file mode 100644 index 0000000..190715e --- /dev/null +++ b/gfs2/libgfs2/rgrp.c.bz1698858-mkfs_gfs2_Improve_alignment_of_first_resource_group @@ -0,0 +1,1016 @@ +#include "clusterautoconfig.h" + +#include +#include +#include +#include +#include +#include + +#include "libgfs2.h" +#include "rgrp.h" + +#define RG_SYNC_TOLERANCE 1000 +#define ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S)) + +static void compute_bitmaps(lgfs2_rgrp_t rg, const unsigned bsize) +{ + int x; + + rg->bits[0].bi_offset = sizeof(struct gfs2_rgrp); + rg->bits[0].bi_start = 0; + rg->bits[0].bi_len = bsize - sizeof(struct gfs2_rgrp); + + for (x = 1; x < rg->ri.ri_length; x++) { + rg->bits[x].bi_offset = sizeof(struct gfs2_meta_header); + rg->bits[x].bi_start = rg->bits[x - 1].bi_start + rg->bits[x - 1].bi_len; + rg->bits[x].bi_len = bsize - sizeof(struct gfs2_meta_header); + } + x--; + rg->bits[x].bi_len = rg->ri.ri_bitbytes - rg->bits[x].bi_start; +} + +/** + * gfs2_compute_bitstructs - Compute the bitmap sizes + * bsize: Block size + * rgd: The resource group descriptor + * Returns: 0 on success, -1 on error + */ +int gfs2_compute_bitstructs(const uint32_t bsize, struct rgrp_tree *rgd) +{ + uint32_t length = rgd->ri.ri_length; + uint32_t bytes_left; + int ownbits = 0; + + /* Max size of an rg is 2GB. A 2GB RG with (minimum) 512-byte blocks + has 4194304 blocks. We can represent 4 blocks in one bitmap byte. + Therefore, all 4194304 blocks can be represented in 1048576 bytes. + Subtract a metadata header for each 512-byte block and we get + 488 bytes of bitmap per block. Divide 1048576 by 488 and we can + be assured we should never have more than 2149 of them. */ + errno = EINVAL; + if (length > 2149 || length == 0) + return -1; + + if(rgd->bits == NULL) { + rgd->bits = calloc(length, sizeof(struct gfs2_bitmap)); + if(rgd->bits == NULL) + return -1; + ownbits = 1; + } + + compute_bitmaps(rgd, bsize); + bytes_left = rgd->ri.ri_bitbytes - (rgd->bits[rgd->ri.ri_length - 1].bi_start + + rgd->bits[rgd->ri.ri_length - 1].bi_len); + errno = EINVAL; + if(bytes_left) + goto errbits; + + if((rgd->bits[length - 1].bi_start + + rgd->bits[length - 1].bi_len) * GFS2_NBBY != rgd->ri.ri_data) + goto errbits; + + return 0; +errbits: + if (ownbits) { + free(rgd->bits); + rgd->bits = NULL; + } + return -1; +} + + +/** + * blk2rgrpd - Find resource group for a given data block number + * @sdp: The GFS superblock + * @n: The data block number + * + * Returns: Ths resource group, or NULL if not found + */ +struct rgrp_tree *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk) +{ + struct rgrp_tree *rgd = (struct rgrp_tree *)sdp->rgtree.osi_node; + while (rgd) { + if (blk < rgd->ri.ri_addr) + rgd = (struct rgrp_tree *)rgd->node.osi_left; + else if (blk >= rgd->ri.ri_data0 + rgd->ri.ri_data) + rgd = (struct rgrp_tree *)rgd->node.osi_right; + else + return rgd; + } + return NULL; +} + +/** + * Allocate a multi-block buffer for a resource group's bitmaps. This is done + * as one chunk and should be freed using lgfs2_rgrp_bitbuf_free(). + * Returns 0 on success with the bitmap buffer allocated in the resource group, + * or non-zero on failure with errno set. + */ +int lgfs2_rgrp_bitbuf_alloc(lgfs2_rgrp_t rg) +{ + struct gfs2_sbd *sdp = rg->rgrps->sdp; + struct gfs2_buffer_head *bhs; + size_t len = rg->ri.ri_length * sdp->bsize; + unsigned long io_align = sdp->bsize; + unsigned i; + char *bufs; + + if (rg->rgrps->align > 0) { + len = ROUND_UP(len, rg->rgrps->align * sdp->bsize); + io_align = rg->rgrps->align_off * sdp->bsize; + } + bhs = calloc(rg->ri.ri_length, sizeof(struct gfs2_buffer_head)); + if (bhs == NULL) + return 1; + + if (posix_memalign((void **)&bufs, io_align, len) != 0) { + errno = ENOMEM; + free(bhs); + return 1; + } + memset(bufs, 0, len); + + for (i = 0; i < rg->ri.ri_length; i++) { + rg->bits[i].bi_bh = bhs + i; + rg->bits[i].bi_bh->iov.iov_base = bufs + (i * sdp->bsize); + rg->bits[i].bi_bh->iov.iov_len = sdp->bsize; + rg->bits[i].bi_bh->b_blocknr = rg->ri.ri_addr + i; + rg->bits[i].bi_bh->sdp = sdp; + } + return 0; +} + +/** + * Free the multi-block bitmap buffer from a resource group. The buffer should + * have been allocated as a single chunk as in lgfs2_rgrp_bitbuf_alloc(). + * This does not implicitly write the bitmaps to disk. Use lgfs2_rgrp_write() + * for that. + * rg: The resource groups whose bitmap buffer should be freed. + */ +void lgfs2_rgrp_bitbuf_free(lgfs2_rgrp_t rg) +{ + unsigned i; + free(rg->bits[0].bi_bh->iov.iov_base); + free(rg->bits[0].bi_bh); + for (i = 0; i < rg->ri.ri_length; i++) + rg->bits[i].bi_bh = NULL; +} + +/** + * Check a resource group's crc + * Returns 0 on success, non-zero if crc is bad + */ +int lgfs2_rgrp_crc_check(char *buf) +{ + int ret = 0; +#ifdef GFS2_HAS_RG_RI_FIELDS + struct gfs2_rgrp *rg = (struct gfs2_rgrp *)buf; + uint32_t crc = rg->rg_crc; + + if (crc == 0) + return 0; + + rg->rg_crc = 0; + if (be32_to_cpu(crc) != gfs2_disk_hash(buf, sizeof(struct gfs2_rgrp))) + ret = 1; + rg->rg_crc = crc; +#endif + return ret; +} + +/** + * Set the crc of an on-disk resource group + */ +void lgfs2_rgrp_crc_set(char *buf) +{ +#ifdef GFS2_HAS_RG_RI_FIELDS + struct gfs2_rgrp *rg = (struct gfs2_rgrp *)buf; + uint32_t crc; + + rg->rg_crc = 0; + crc = gfs2_disk_hash(buf, sizeof(struct gfs2_rgrp)); + rg->rg_crc = cpu_to_be32(crc); +#endif +} + +/** + * gfs2_rgrp_read - read in the resource group information from disk. + * @rgd - resource group structure + * returns: 0 if no error, otherwise the block number that failed + */ +uint64_t gfs2_rgrp_read(struct gfs2_sbd *sdp, struct rgrp_tree *rgd) +{ + unsigned x, length = rgd->ri.ri_length; + struct gfs2_buffer_head **bhs; + + if (length == 0 || gfs2_check_range(sdp, rgd->ri.ri_addr)) + return -1; + + bhs = calloc(length, sizeof(struct gfs2_buffer_head *)); + if (bhs == NULL) + return -1; + + if (breadm(sdp, bhs, length, rgd->ri.ri_addr)) { + free(bhs); + return -1; + } + + for (x = 0; x < length; x++) { + struct gfs2_bitmap *bi = &rgd->bits[x]; + int mtype = (x ? GFS2_METATYPE_RB : GFS2_METATYPE_RG); + + bi->bi_bh = bhs[x]; + if (gfs2_check_meta(bi->bi_bh, mtype)) { + unsigned err = x; + do { + brelse(rgd->bits[x].bi_bh); + rgd->bits[x].bi_bh = NULL; + } while (x-- != 0); + free(bhs); + return rgd->ri.ri_addr + err; + } + } + if (sdp->gfs1) + gfs_rgrp_in((struct gfs_rgrp *)&rgd->rg, rgd->bits[0].bi_bh); + else { + if (lgfs2_rgrp_crc_check(rgd->bits[0].bi_bh->b_data)) { + free(bhs); + return rgd->ri.ri_addr; + } + gfs2_rgrp_in(&rgd->rg, rgd->bits[0].bi_bh->b_data); + } + free(bhs); + return 0; +} + +void gfs2_rgrp_relse(struct rgrp_tree *rgd) +{ + int x, length = rgd->ri.ri_length; + + if (rgd->bits == NULL) + return; + for (x = 0; x < length; x++) { + if (rgd->bits[x].bi_bh && rgd->bits[x].bi_bh->b_data) { + brelse(rgd->bits[x].bi_bh); + rgd->bits[x].bi_bh = NULL; + } + } +} + +struct rgrp_tree *rgrp_insert(struct osi_root *rgtree, uint64_t rgblock) +{ + struct osi_node **newn = &rgtree->osi_node, *parent = NULL; + struct rgrp_tree *data; + + /* Figure out where to put new node */ + while (*newn) { + struct rgrp_tree *cur = (struct rgrp_tree *)*newn; + + parent = *newn; + if (rgblock < cur->ri.ri_addr) + newn = &((*newn)->osi_left); + else if (rgblock > cur->ri.ri_addr) + newn = &((*newn)->osi_right); + else + return cur; + } + + data = calloc(1, sizeof(struct rgrp_tree)); + if (!data) + return NULL; + /* Add new node and rebalance tree. */ + data->ri.ri_addr = rgblock; + osi_link_node(&data->node, parent, newn); + osi_insert_color(&data->node, rgtree); + + return data; +} + +void gfs2_rgrp_free(struct osi_root *rgrp_tree) +{ + struct rgrp_tree *rgd; + int rgs_since_sync = 0; + struct osi_node *n; + struct gfs2_sbd *sdp = NULL; + + if (OSI_EMPTY_ROOT(rgrp_tree)) + return; + while ((n = osi_first(rgrp_tree))) { + rgd = (struct rgrp_tree *)n; + + if (rgd->bits) { + if (rgd->bits[0].bi_bh) { /* if a buffer exists */ + rgs_since_sync++; + if (rgs_since_sync >= RG_SYNC_TOLERANCE) { + if (!sdp) + sdp = rgd->bits[0].bi_bh->sdp; + fsync(sdp->device_fd); + rgs_since_sync = 0; + } + gfs2_rgrp_relse(rgd); /* free them all. */ + } + free(rgd->bits); + } + osi_erase(&rgd->node, rgrp_tree); + free(rgd); + } +} + +static uint64_t align_block(const uint64_t base, const uint64_t align) +{ + if ((align > 0) && ((base % align) > 0)) + return (base - (base % align)) + align; + return base; +} + +/** + * Calculate the aligned block address of a resource group. + * rgs: The resource groups handle + * base: The base address of the first resource group address, in blocks + * Returns the aligned address of the first resource group. + */ +uint64_t lgfs2_rgrp_align_addr(const lgfs2_rgrps_t rgs, uint64_t addr) +{ + return align_block(addr, rgs->align); +} + +/** + * Calculate the aligned relative address of the next resource group (and thus + * the aligned length of this one). + * rgs: The resource groups handle + * base: The base length of the current resource group, in blocks + * Returns the length of the resource group (the aligned relative address of + * the next one) + */ +uint32_t lgfs2_rgrp_align_len(const lgfs2_rgrps_t rgs, uint32_t len) +{ + return align_block(len, rgs->align) + rgs->align_off; +} + +/** + * Plan the sizes of resource groups for remaining free space, based on a + * target maximum size. In order to make best use of the space while keeping + * the resource groups aligned appropriately we need to either reduce the + * length of every resource group or of a subset of the resource groups, so + * we're left with either one or two resource group sizes. We keep track of + * both of these and the numbers of each size of resource group inside the + * resource groups descriptor. + * rgs: The resource groups descriptor + * space: The number of remaining blocks to be allocated + * tgtsize: The target resource group size in blocks + * Returns the number of resource groups planned to fit in the given space, or + * 0 if the smallest resource group would be smaller than GFS2_MIN_RGSIZE. + */ +uint32_t lgfs2_rgrps_plan(const lgfs2_rgrps_t rgs, uint64_t space, uint32_t tgtsize) +{ + uint32_t maxlen = (GFS2_MAX_RGSIZE << 20) / rgs->sdp->bsize; + uint32_t minlen = (GFS2_MIN_RGSIZE << 20) / rgs->sdp->bsize; + + /* Apps should already have checked that the rg size is <= + GFS2_MAX_RGSIZE but just in case alignment pushes it over we clamp + it back down while calculating the initial rgrp length. */ + do { + rgs->plan[0].len = lgfs2_rgrp_align_len(rgs, tgtsize); + tgtsize -= (rgs->align + 1); + } while (rgs->plan[0].len > maxlen); + + rgs->plan[0].num = space / rgs->plan[0].len; + + if ((space - (rgs->plan[0].num * rgs->plan[0].len)) > rgs->align) { + unsigned adj = (rgs->align > 0) ? rgs->align : 1; + + /* Spread the adjustment required to fit a new rgrp at the end + over all of the rgrps so that we don't end with a single + tiny one. */ + rgs->plan[0].num++; + while (((rgs->plan[0].len - adj) * (uint64_t)rgs->plan[0].num) >= space) + rgs->plan[0].len -= adj; + + /* We've adjusted the size of the rgrps down as far as we can + without leaving a large gap at the end of the device now, + but we still need to reduce the size of some rgrps in order + to make everything fit, so we use the second rgplan to + specify a second length for a subset of the resource groups. + If plan[0].len already divides the space with no remainder, + plan[1].num will stay 0 and it won't be used. */ + rgs->plan[1].len = rgs->plan[0].len - adj; + rgs->plan[1].num = 0; + + while (((rgs->plan[0].len * rgs->plan[0].num) + + (rgs->plan[1].len * rgs->plan[1].num)) >= space) { + /* Total number of rgrps stays constant now. We just + need to shift some weight around */ + rgs->plan[0].num--; + rgs->plan[1].num++; + } + } + + /* Once we've reached this point, + (plan[0].num * plan[0].len) + (plan[1].num * plan[1].len) + will be less than one adjustment smaller than 'space'. */ + if (rgs->plan[0].len < minlen) + return 0; + + return rgs->plan[0].num + rgs->plan[1].num; +} + +/** + * Create and initialise an empty set of resource groups + * bsize: The block size of the fs + * devlen: The length of the device, in fs blocks + * align: The required stripe alignment of the resource groups. Must be a multiple of 'offset'. + * offset: The required stripe offset of the resource groups + * Returns an initialised lgfs2_rgrps_t or NULL if unsuccessful with errno set + */ +lgfs2_rgrps_t lgfs2_rgrps_init(struct gfs2_sbd *sdp, uint64_t align, uint64_t offset) +{ + lgfs2_rgrps_t rgs; + + errno = EINVAL; + if (offset != 0 && (align % offset) != 0) + return NULL; + + rgs = calloc(1, sizeof(*rgs)); + if (rgs == NULL) + return NULL; + + rgs->sdp = sdp; + rgs->align = align; + rgs->align_off = offset; + memset(&rgs->root, 0, sizeof(rgs->root)); + + return rgs; +} + +/** + * Populate a set of resource groups from a gfs2 rindex file. + * fd: An open file descriptor for the rindex file. + * rgs: The set of resource groups. + * Returns the number of resource groups added to the set or 0 on error with + * errno set. + */ +unsigned lgfs2_rindex_read_fd(int fd, lgfs2_rgrps_t rgs) +{ + unsigned count = 0; + char buf[sizeof(struct gfs2_rindex)]; + + errno = EINVAL; + if (fd < 0 || rgs == NULL) + return 0; + + while (1) { + lgfs2_rgrp_t rg; + struct gfs2_rindex ri; + ssize_t ret = read(fd, buf, sizeof(struct gfs2_rindex)); + if (ret == 0) + break; + + if (ret != sizeof(struct gfs2_rindex)) + return 0; + + gfs2_rindex_in(&ri, buf); + rg = lgfs2_rgrps_append(rgs, &ri, 0); + if (rg == NULL) + return 0; + count++; + } + return count; +} + +/** + * Read a rindex entry into a set of resource groups + * rip: The inode of the rindex file + * rgs: The set of resource groups. + * i: The index of the entry to read from the rindex file + * Returns the new rindex entry added to the set or NULL on error with errno + * set. + */ +const struct gfs2_rindex *lgfs2_rindex_read_one(struct gfs2_inode *rip, lgfs2_rgrps_t rgs, unsigned i) +{ + uint64_t off = i * sizeof(struct gfs2_rindex); + char buf[sizeof(struct gfs2_rindex)]; + struct gfs2_rindex ri; + lgfs2_rgrp_t rg; + int ret; + + errno = EINVAL; + if (rip == NULL || rgs == NULL) + return NULL; + + ret = gfs2_readi(rip, buf, off, sizeof(struct gfs2_rindex)); + if (ret != sizeof(struct gfs2_rindex)) + return NULL; + + gfs2_rindex_in(&ri, buf); + rg = lgfs2_rgrps_append(rgs, &ri, 0); + if (rg == NULL) + return NULL; + + return &rg->ri; +} + +/** + * Free a set of resource groups created with lgfs2_rgrps_append() etc. This + * does not write any dirty buffers to disk. See lgfs2_rgrp_write(). + * rgs: A pointer to the set of resource groups to be freed. + */ +void lgfs2_rgrps_free(lgfs2_rgrps_t *rgs) +{ + lgfs2_rgrp_t rg; + struct osi_root *tree = &(*rgs)->root; + + while ((rg = (struct rgrp_tree *)osi_first(tree))) { + int i; + for (i = 0; i < rg->ri.ri_length; i++) { + if (rg->bits[i].bi_bh != NULL) { + free(rg->bits[i].bi_bh); + rg->bits[i].bi_bh = NULL; + } + } + osi_erase(&rg->node, tree); + free(rg); + } + free(*rgs); + *rgs = NULL; +} + +/** + * Calculate the fields for a new entry in the resource group index. + * ri: A pointer to the resource group index entry to be calculated. + * addr: The address at which to place this resource group + * len: The required length of the resource group, in fs blocks. + * If rglen is 0, geometry previously calculated by lgfs2_rgrps_plan() will be used. + * Returns the calculated address of the next resource group or 0 with errno set: + * EINVAL - The entry pointer is NULL + * ENOSPC - This rgrp would extend past the end of the device + */ +uint64_t lgfs2_rindex_entry_new(lgfs2_rgrps_t rgs, struct gfs2_rindex *ri, uint64_t addr, uint32_t len) +{ + int plan = -1; + errno = EINVAL; + if (!ri) + return 0; + + errno = ENOSPC; + if (rgs->plan[0].num > 0) + plan = 0; + else if (rgs->plan[1].num > 0) + plan = 1; + else if (len == 0) + return 0; + + if (plan >= 0 && (len == 0 || len == rgs->plan[plan].len)) { + len = rgs->plan[plan].len; + rgs->plan[plan].num--; + } + + if (addr + len > rgs->sdp->device.length) + return 0; + + ri->ri_addr = addr; + ri->ri_length = rgblocks2bitblocks(rgs->sdp->bsize, len, &ri->ri_data); + ri->__pad = 0; + ri->ri_data0 = ri->ri_addr + ri->ri_length; + ri->ri_bitbytes = ri->ri_data / GFS2_NBBY; + memset(&ri->ri_reserved, 0, sizeof(ri->ri_reserved)); + + return ri->ri_addr + len; +} + +/** + * Return the rindex structure relating to a resource group. + * The return type is const to advise callers that making changes to this + * structure directly isn't wise. libgfs2 functions should be used instead. + */ +const struct gfs2_rindex *lgfs2_rgrp_index(lgfs2_rgrp_t rg) +{ + return &rg->ri; +} + +/** + * Return the rgrp structure relating to a resource group. + * The return type is const to advise callers that making changes to this + * structure directly isn't wise. libgfs2 functions should be used instead. + */ +const struct gfs2_rgrp *lgfs2_rgrp_rgrp(lgfs2_rgrp_t rg) +{ + return &rg->rg; +} + +/** + * Returns the total resource group size, in blocks, required to give blksreq data blocks + */ +unsigned lgfs2_rgsize_for_data(uint64_t blksreq, unsigned bsize) +{ + const uint32_t blks_rgrp = GFS2_NBBY * (bsize - sizeof(struct gfs2_rgrp)); + const uint32_t blks_meta = GFS2_NBBY * (bsize - sizeof(struct gfs2_meta_header)); + unsigned bitblocks = 1; + blksreq = (blksreq + 3) & ~3; + if (blksreq > blks_rgrp) + bitblocks += ((blksreq - blks_rgrp) + blks_meta - 1) / blks_meta; + return bitblocks + blksreq; +} + +// Temporary function to aid in API migration +struct osi_node *lgfs2_rgrps_root(lgfs2_rgrps_t rgs) +{ + return rgs->root.osi_node; +} + +/** + * Insert a new resource group after the last resource group in a set. + * rgs: The set of resource groups + * entry: The entry to be added + * rg_skip: The value to be used for this resource group's rg_skip field + * Returns the new resource group on success or NULL on failure with errno set. + */ +lgfs2_rgrp_t lgfs2_rgrps_append(lgfs2_rgrps_t rgs, struct gfs2_rindex *entry, uint32_t rg_skip) +{ + lgfs2_rgrp_t rg; + struct osi_node **link = &rgs->root.osi_node; + struct osi_node *parent = osi_last(&rgs->root); + lgfs2_rgrp_t lastrg = (lgfs2_rgrp_t)parent; + + errno = EINVAL; + if (entry == NULL) + return NULL; + + if (lastrg != NULL) { /* Tree is not empty */ + if (entry->ri_addr <= lastrg->ri.ri_addr) + return NULL; /* Appending with a lower address doesn't make sense */ + link = &lastrg->node.osi_right; + } + + rg = calloc(1, sizeof(*rg) + (entry->ri_length * sizeof(struct gfs2_bitmap))); + if (rg == NULL) + return NULL; + + rg->bits = (struct gfs2_bitmap *)(rg + 1); + + osi_link_node(&rg->node, parent, link); + osi_insert_color(&rg->node, &rgs->root); + + memcpy(&rg->ri, entry, sizeof(struct gfs2_rindex)); + rg->rg.rg_header.mh_magic = GFS2_MAGIC; + rg->rg.rg_header.mh_type = GFS2_METATYPE_RG; + rg->rg.rg_header.mh_format = GFS2_FORMAT_RG; + rg->rg.rg_free = rg->ri.ri_data; +#ifdef GFS2_HAS_RG_SKIP + rg->rg.rg_skip = rg_skip; +#endif +#ifdef GFS2_HAS_RG_RI_FIELDS + rg->rg.rg_data0 = rg->ri.ri_data0; + rg->rg.rg_data = rg->ri.ri_data; + rg->rg.rg_bitbytes = rg->ri.ri_bitbytes; + rg->rg.rg_crc = 0; +#endif + compute_bitmaps(rg, rgs->sdp->bsize); + rg->rgrps = rgs; + return rg; +} + +/** + * Write a resource group to a file descriptor. + * Returns 0 on success or non-zero on failure with errno set + */ +int lgfs2_rgrp_write(int fd, const lgfs2_rgrp_t rg) +{ + struct gfs2_sbd *sdp = rg->rgrps->sdp; + unsigned int i; + const struct gfs2_meta_header bmh = { + .mh_magic = GFS2_MAGIC, + .mh_type = GFS2_METATYPE_RB, + .mh_format = GFS2_FORMAT_RB, + }; + int freebufs = 0; + ssize_t ret; + size_t len; + + if (rg->bits[0].bi_bh == NULL) { + freebufs = 1; + if (lgfs2_rgrp_bitbuf_alloc(rg) != 0) + return -1; + } + gfs2_rgrp_out(&rg->rg, rg->bits[0].bi_bh->b_data); + for (i = 1; i < rg->ri.ri_length; i++) + gfs2_meta_header_out(&bmh, rg->bits[i].bi_bh->b_data); + + len = sdp->bsize * rg->ri.ri_length; + if (rg->rgrps->align > 0) + len = ROUND_UP(len, rg->rgrps->align * sdp->bsize); + + ret = pwrite(sdp->device_fd, rg->bits[0].bi_bh->b_data, len, + rg->bits[0].bi_bh->b_blocknr * sdp->bsize); + + if (freebufs) + lgfs2_rgrp_bitbuf_free(rg); + + return ret == len ? 0 : -1; +} + +lgfs2_rgrp_t lgfs2_rgrp_first(lgfs2_rgrps_t rgs) +{ + return (lgfs2_rgrp_t)osi_first(&rgs->root); +} + +lgfs2_rgrp_t lgfs2_rgrp_next(lgfs2_rgrp_t rg) +{ + return (lgfs2_rgrp_t)osi_next(&rg->node); +} + +lgfs2_rgrp_t lgfs2_rgrp_prev(lgfs2_rgrp_t rg) +{ + return (lgfs2_rgrp_t)osi_prev(&rg->node); +} + +lgfs2_rgrp_t lgfs2_rgrp_last(lgfs2_rgrps_t rgs) +{ + return (lgfs2_rgrp_t)osi_last(&rgs->root); +} + +/** + * gfs2_rbm_from_block - Set the rbm based upon rgd and block number + * @rbm: The rbm with rgd already set correctly + * @block: The block number (filesystem relative) + * + * This sets the bi and offset members of an rbm based on a + * resource group and a filesystem relative block number. The + * resource group must be set in the rbm on entry, the bi and + * offset members will be set by this function. + * + * Returns: 0 on success, or non-zero with errno set + */ +int lgfs2_rbm_from_block(struct lgfs2_rbm *rbm, uint64_t block) +{ + uint64_t rblock = block - rbm->rgd->ri.ri_data0; + struct gfs2_sbd *sdp = rbm_bi(rbm)->bi_bh->sdp; + + if (rblock > UINT_MAX) { + errno = EINVAL; + return 1; + } + if (block >= rbm->rgd->ri.ri_data0 + rbm->rgd->ri.ri_data) { + errno = E2BIG; + return 1; + } + + rbm->bii = 0; + rbm->offset = (uint32_t)(rblock); + /* Check if the block is within the first block */ + if (rbm->offset < (rbm_bi(rbm)->bi_len * GFS2_NBBY)) + return 0; + + /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */ + rbm->offset += (sizeof(struct gfs2_rgrp) - + sizeof(struct gfs2_meta_header)) * GFS2_NBBY; + rbm->bii = rbm->offset / sdp->sd_blocks_per_bitmap; + rbm->offset -= rbm->bii * sdp->sd_blocks_per_bitmap; + return 0; +} + +/** + * lgfs2_rbm_incr - increment an rbm structure + * @rbm: The rbm with rgd already set correctly + * + * This function takes an existing rbm structure and increments it to the next + * viable block offset. + * + * Returns: If incrementing the offset would cause the rbm to go past the + * end of the rgrp, true is returned, otherwise false. + * + */ +static int lgfs2_rbm_incr(struct lgfs2_rbm *rbm) +{ + if (rbm->offset + 1 < (rbm_bi(rbm)->bi_len * GFS2_NBBY)) { /* in the same bitmap */ + rbm->offset++; + return 0; + } + if (rbm->bii == rbm->rgd->ri.ri_length - 1) /* at the last bitmap */ + return 1; + + rbm->offset = 0; + rbm->bii++; + return 0; +} + +/** + * lgfs2_testbit - test a bit in the bitmaps + * @rbm: The bit to test + * + * Returns: The two bit block state of the requested bit + */ +static inline uint8_t lgfs2_testbit(const struct lgfs2_rbm *rbm) +{ + struct gfs2_bitmap *bi = rbm_bi(rbm); + const uint8_t *buffer = (uint8_t *)bi->bi_bh->b_data + bi->bi_offset; + const uint8_t *byte; + unsigned int bit; + + byte = buffer + (rbm->offset / GFS2_NBBY); + bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; + + return (*byte >> bit) & GFS2_BIT_MASK; +} + +/** + * lgfs2_unaligned_extlen - Look for free blocks which are not byte aligned + * @rbm: Position to search (value/result) + * @n_unaligned: Number of unaligned blocks to check + * @len: Decremented for each block found (terminate on zero) + * + * Returns: true if a non-free block is encountered + */ +static int lgfs2_unaligned_extlen(struct lgfs2_rbm *rbm, uint32_t n_unaligned, uint32_t *len) +{ + uint32_t n; + uint8_t res; + + for (n = 0; n < n_unaligned; n++) { + res = lgfs2_testbit(rbm); + if (res != GFS2_BLKST_FREE) + return 1; + (*len)--; + if (*len == 0) + return 1; + if (lgfs2_rbm_incr(rbm)) + return 1; + } + + return 0; +} + +static uint8_t *check_bytes8(const uint8_t *start, uint8_t value, unsigned bytes) +{ + while (bytes) { + if (*start != value) + return (void *)start; + start++; + bytes--; + } + return NULL; +} + +/** + * lgfs2_free_extlen - Return extent length of free blocks + * @rbm: Starting position + * @len: Max length to check + * + * Starting at the block specified by the rbm, see how many free blocks + * there are, not reading more than len blocks ahead. This can be done + * using check_bytes8 when the blocks are byte aligned, but has to be done + * on a block by block basis in case of unaligned blocks. Also this + * function can cope with bitmap boundaries (although it must stop on + * a resource group boundary) + * + * Returns: Number of free blocks in the extent + */ +static uint32_t lgfs2_free_extlen(const struct lgfs2_rbm *rrbm, uint32_t len) +{ + struct lgfs2_rbm rbm = *rrbm; + uint32_t n_unaligned = rbm.offset & 3; + uint32_t size = len; + uint32_t bytes; + uint32_t chunk_size; + uint8_t *ptr, *start, *end; + uint64_t block; + struct gfs2_bitmap *bi; + + if (n_unaligned && + lgfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) + goto out; + + n_unaligned = len & 3; + /* Start is now byte aligned */ + while (len > 3) { + bi = rbm_bi(&rbm); + start = (uint8_t *)bi->bi_bh->b_data; + end = start + bi->bi_bh->sdp->bsize; + start += bi->bi_offset; + start += (rbm.offset / GFS2_NBBY); + bytes = (len / GFS2_NBBY) < (end - start) ? (len / GFS2_NBBY):(end - start); + ptr = check_bytes8(start, 0, bytes); + chunk_size = ((ptr == NULL) ? bytes : (ptr - start)); + chunk_size *= GFS2_NBBY; + len -= chunk_size; + block = lgfs2_rbm_to_block(&rbm); + if (lgfs2_rbm_from_block(&rbm, block + chunk_size)) { + n_unaligned = 0; + break; + } + if (ptr) { + n_unaligned = 3; + break; + } + n_unaligned = len & 3; + } + + /* Deal with any bits left over at the end */ + if (n_unaligned) + lgfs2_unaligned_extlen(&rbm, n_unaligned, &len); +out: + return size - len; +} + +/** + * gfs2_rbm_find - Look for blocks of a particular state + * @rbm: Value/result starting position and final position + * @state: The state which we want to find + * @minext: Pointer to the requested extent length (NULL for a single block) + * This is updated to be the actual reservation size. + * + * Returns: 0 on success, non-zero with errno == ENOSPC if there is no block of the requested state + */ +int lgfs2_rbm_find(struct lgfs2_rbm *rbm, uint8_t state, uint32_t *minext) +{ + int initial_bii; + uint32_t offset; + int n = 0; + int iters = rbm->rgd->ri.ri_length; + uint32_t extlen; + + /* If we are not starting at the beginning of a bitmap, then we + * need to add one to the bitmap count to ensure that we search + * the starting bitmap twice. + */ + if (rbm->offset != 0) + iters++; + + for (n = 0; n < iters; n++) { + struct gfs2_bitmap *bi = rbm_bi(rbm); + struct gfs2_buffer_head *bh = bi->bi_bh; + uint8_t *buf = (uint8_t *)bh->b_data + bi->bi_offset; + uint64_t block; + int ret; + + if ((rbm->rgd->rg.rg_free < *minext) && (state == GFS2_BLKST_FREE)) + goto next_bitmap; + + offset = gfs2_bitfit(buf, bi->bi_len, rbm->offset, state); + if (offset == BFITNOENT) + goto next_bitmap; + + rbm->offset = offset; + initial_bii = rbm->bii; + block = lgfs2_rbm_to_block(rbm); + extlen = 1; + + if (*minext != 0) + extlen = lgfs2_free_extlen(rbm, *minext); + + if (extlen >= *minext) + return 0; + + ret = lgfs2_rbm_from_block(rbm, block + extlen); + if (ret == 0) { + n += (rbm->bii - initial_bii); + continue; + } + + if (errno == E2BIG) { + rbm->bii = 0; + rbm->offset = 0; + n += (rbm->bii - initial_bii); + goto res_covered_end_of_rgrp; + } + + return ret; + +next_bitmap: /* Find next bitmap in the rgrp */ + rbm->offset = 0; + rbm->bii++; + if (rbm->bii == rbm->rgd->ri.ri_length) + rbm->bii = 0; + +res_covered_end_of_rgrp: + if (rbm->bii == 0) + break; + } + + errno = ENOSPC; + return 1; +} + +/** + * lgfs2_alloc_extent - allocate an extent from a given bitmap + * @rbm: the resource group information + * @state: The state of the first block, GFS2_BLKST_DINODE or GFS2_BLKST_USED + * @elen: The requested extent length + * Returns the length of the extent allocated. + */ +unsigned lgfs2_alloc_extent(const struct lgfs2_rbm *rbm, int state, const unsigned elen) +{ + struct lgfs2_rbm pos = { .rgd = rbm->rgd, }; + const uint64_t block = lgfs2_rbm_to_block(rbm); + unsigned len; + + gfs2_set_bitmap(rbm->rgd, block, state); + + for (len = 1; len < elen; len++) { + int ret = lgfs2_rbm_from_block(&pos, block + len); + if (ret || lgfs2_testbit(&pos) != GFS2_BLKST_FREE) + break; + gfs2_set_bitmap(pos.rgd, block + len, GFS2_BLKST_USED); + } + return len; +} diff --git a/tests/mkfs.at b/tests/mkfs.at index ee444f5..3e3fb82 100644 --- a/tests/mkfs.at +++ b/tests/mkfs.at @@ -122,6 +122,8 @@ AT_KEYWORDS(mkfs.gfs2 mkfs) AT_CHECK([$GFS_MKFS -p lock_nolock -o test_topology=0:512:65536:393216:512 $GFS_TGT], 0, [ignore], [ignore]) # Check rgrp alignment to minimum_io_size: 65536 / 4096 == 16 AT_CHECK([gfs2_edit -p rindex $GFS_TGT | grep ri_addr | awk '{print $2, $2 % 16; if ($2 % 16 != 0) { exit 1 }}'], 0, [ignore], [ignore]) +# rhbz#1698858 +AT_CHECK([$GFS_MKFS -p lock_nolock -o test_topology=0:512:131072:6291456:512 $GFS_TGT], 0, [ignore], [ignore]) AT_CLEANUP AT_SETUP([Values of rg_skip]) diff --git a/tests/mkfs.at.bz1698858-mkfs_gfs2_Improve_alignment_of_first_resource_group b/tests/mkfs.at.bz1698858-mkfs_gfs2_Improve_alignment_of_first_resource_group new file mode 100644 index 0000000..ee444f5 --- /dev/null +++ b/tests/mkfs.at.bz1698858-mkfs_gfs2_Improve_alignment_of_first_resource_group @@ -0,0 +1,167 @@ +AT_TESTED([mkfs.gfs2]) +AT_BANNER([mkfs.gfs2 tests]) + +AT_SETUP([Locking protocol validation]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +AT_CHECK([$GFS_MKFS -p badprotocol $GFS_TGT], 255, [ignore], [ignore]) +AT_CLEANUP + +AT_SETUP([Resource group size validation]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +AT_CHECK([$GFS_MKFS -p lock_nolock -r 31 $GFS_TGT], 255, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_nolock -r 2049 $GFS_TGT], 255, [ignore], [ignore]) +AT_CLEANUP + +AT_SETUP([Journal size validation]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +AT_CHECK([$GFS_MKFS -p lock_nolock -J 7 $GFS_TGT], 255, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_nolock -J 1025 $GFS_TGT], 255, [ignore], [ignore]) +AT_CLEANUP + +AT_SETUP([Block count validation]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +GFS_TGT_REGEN +AT_CHECK([$GFS_MKFS -p lock_nolock -b 512 $GFS_TGT $(($(gfs_max_blocks 512)+1))], 255, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_nolock -b 4096 $GFS_TGT $(($(gfs_max_blocks 4096)+1))], 255, [ignore], [ignore]) +AT_CLEANUP + +AT_SETUP([Quota change file size validation]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +AT_CHECK([$GFS_MKFS -p lock_nolock -c 0 $GFS_TGT], 255, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_nolock -c 65 $GFS_TGT], 255, [ignore], [ignore]) +AT_CLEANUP + +AT_SETUP([Locking protocols]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +GFS_FSCK_CHECK([$GFS_MKFS -p lock_nolock $GFS_TGT]) +GFS_FSCK_CHECK([$GFS_MKFS -p lock_dlm -t foo:bar $GFS_TGT]) +AT_CLEANUP + +AT_SETUP([Valid block sizes 512-4096]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +GFS_FSCK_CHECK([$GFS_MKFS -p lock_nolock -b 512 $GFS_TGT]) +GFS_FSCK_CHECK([$GFS_MKFS -p lock_nolock -b 1024 $GFS_TGT]) +GFS_FSCK_CHECK([$GFS_MKFS -p lock_nolock -b 2048 $GFS_TGT]) +GFS_FSCK_CHECK([$GFS_MKFS -p lock_nolock -b 4096 $GFS_TGT]) +AT_CLEANUP + +AT_SETUP([Max. blocks, min. block size]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +GFS_FSCK_CHECK([$GFS_MKFS -p lock_nolock -b 512 $GFS_TGT $(gfs_max_blocks 512)]) +AT_CLEANUP + +AT_SETUP([Max. blocks, max. block size]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +GFS_FSCK_CHECK([$GFS_MKFS -p lock_nolock -b 4096 $GFS_TGT $(gfs_max_blocks 4096)]) +AT_CLEANUP + +AT_SETUP([Max. resource group size]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +GFS_FSCK_CHECK([$GFS_MKFS -p lock_nolock -r 2048 $GFS_TGT]) +AT_CLEANUP + +AT_SETUP([Min. resource group size]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +GFS_FSCK_CHECK([$GFS_MKFS -p lock_nolock -r 32 $GFS_TGT]) +AT_CLEANUP + +AT_SETUP([Max. resource group size, min. block size]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +GFS_FSCK_CHECK([$GFS_MKFS -p lock_nolock -r 2048 -b 512 $GFS_TGT]) +AT_CLEANUP + +AT_SETUP([Max. journal size]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +GFS_FSCK_CHECK([$GFS_MKFS -p lock_nolock -J 1024 $GFS_TGT]) +AT_CLEANUP + +AT_SETUP([Min. journal size]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +GFS_FSCK_CHECK([$GFS_MKFS -p lock_nolock -J 8 $GFS_TGT]) +AT_CLEANUP + +AT_SETUP([Max. quota change file size]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +GFS_FSCK_CHECK([$GFS_MKFS -p lock_nolock -c 64 $GFS_TGT]) +AT_CLEANUP + +AT_SETUP([Min. quota change file size]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +GFS_FSCK_CHECK([$GFS_MKFS -p lock_nolock -c 1 $GFS_TGT]) +AT_CLEANUP + +AT_SETUP([Lock table validation]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +AT_CHECK([$GFS_MKFS -p lock_nolock -t "" $GFS_TGT], 255, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_nolock -t "123456789012345678901234567890123:12345678" $GFS_TGT], 255, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_nolock -t "12345678901234567:1234567890123456789012345678901" $GFS_TGT], 255, [ignore], [ignore]) +GFS_FSCK_CHECK([$GFS_MKFS -p lock_nolock -t "12345678901234567890123456789012:123456789012345678901234567890" $GFS_TGT]) +AT_CHECK([$GFS_MKFS -p lock_dlm -t "" $GFS_TGT], 255, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_dlm -t "quite_long_cluster_name_test_here:intec34p" $GFS_TGT], 255, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_dlm -t "financial_cluster:this_time_we_test_fs_naming_len" $GFS_TGT], 255, [ignore], [ignore]) +GFS_FSCK_CHECK([$GFS_MKFS -p lock_dlm -t "a_really_long_named_cluster_here:concurrently_lets_check_fs_len" $GFS_TGT]) +AT_CLEANUP + +# -o test_topology order: +# alignment_offset, +# logical_sector_size, +# minimum_io_size, +# optimal_io_size, +# physical_sector_size + +AT_SETUP([Device i/o limits handling]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +AT_CHECK([$GFS_MKFS -p lock_nolock -o test_topology=0:0:0:0:0 $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_nolock -o test_topology=7168:512:0:33553920:512 $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_nolock -o test_topology=7168:512:8192:33553920:512 $GFS_TGT], 0, [ignore], [Warning: device is not properly aligned. This may harm performance. +]) +AT_CLEANUP + +AT_SETUP([Resource group alignment]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +AT_CHECK([$GFS_MKFS -p lock_nolock -o test_topology=0:512:65536:393216:512 $GFS_TGT], 0, [ignore], [ignore]) +# Check rgrp alignment to minimum_io_size: 65536 / 4096 == 16 +AT_CHECK([gfs2_edit -p rindex $GFS_TGT | grep ri_addr | awk '{print $2, $2 % 16; if ($2 % 16 != 0) { exit 1 }}'], 0, [ignore], [ignore]) +AT_CLEANUP + +AT_SETUP([Values of rg_skip]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +AT_CHECK([$GFS_MKFS -p lock_nolock -r 2048 $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([rgskipcheck.sh $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_nolock -r 1024 $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([rgskipcheck.sh $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_nolock -r 512 $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([rgskipcheck.sh $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_nolock -r 219 $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([rgskipcheck.sh $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_nolock -r 32 $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([rgskipcheck.sh $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_nolock -o test_topology=0:512:65536:393216:512 $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([rgskipcheck.sh $GFS_TGT], 0, [ignore], [ignore]) +AT_CLEANUP + +AT_SETUP([Values of rg_data0, rg_data, rg_bitbytes]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +AT_CHECK([$GFS_MKFS -p lock_nolock -r 2048 $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([rgrifieldscheck.sh $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_nolock -r 1024 $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([rgrifieldscheck.sh $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_nolock -r 512 $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([rgrifieldscheck.sh $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_nolock -r 219 $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([rgrifieldscheck.sh $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_nolock -r 32 $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([rgrifieldscheck.sh $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([$GFS_MKFS -p lock_nolock -o test_topology=0:512:65536:393216:512 $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([rgrifieldscheck.sh $GFS_TGT], 0, [ignore], [ignore]) +AT_CLEANUP + +AT_SETUP([Small filesystems]) +AT_KEYWORDS(mkfs.gfs2 mkfs) +GFS_TGT_SIZE(32M) +AT_CHECK([$GFS_MKFS -p lock_nolock $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([fsck.gfs2 -n $GFS_TGT], 0, [ignore], [ignore]) +GFS_TGT_SIZE(64M) +AT_CHECK([$GFS_MKFS -p lock_nolock -j2 $GFS_TGT], 0, [ignore], [ignore]) +AT_CHECK([fsck.gfs2 -n $GFS_TGT], 0, [ignore], [ignore]) +AT_CLEANUP