/*
* Copyright (c) 2020 Red Hat, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*
* $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/hashLock.c#5 $
*/
/**
* HashLock controls and coordinates writing, index access, and dedupe among
* groups of DataVIOs concurrently writing identical blocks, allowing them to
* deduplicate not only against advice but also against each other. This save
* on index queries and allows those DataVIOs to concurrently deduplicate
* against a single block instead of being serialized through a PBN read lock.
* Only one index query is needed for each HashLock, instead of one for every
* DataVIO.
*
* A HashLock acts like a state machine perhaps more than as a lock. Other
* than the starting and ending states INITIALIZING and DESTROYING, every
* state represents and is held for the duration of an asynchronous operation.
* All state transitions are performed on the thread of the HashZone
* containing the lock. An asynchronous operation is almost always performed
* upon entering a state, and the callback from that operation triggers
* exiting the state and entering a new state.
*
* In all states except DEDUPING, there is a single DataVIO, called the lock
* agent, performing the asynchronous operations on behalf of the lock. The
* agent will change during the lifetime of the lock if the lock is shared by
* more than one DataVIO. DataVIOs waiting to deduplicate are kept on a wait
* queue. Viewed a different way, the agent holds the lock exclusively until
* the lock enters the DEDUPING state, at which point it becomes a shared lock
* that all the waiters (and any new DataVIOs that arrive) use to share a PBN
* lock. In state DEDUPING, there is no agent. When the last DataVIO in the
* lock calls back in DEDUPING, it becomes the agent and the lock becomes
* exclusive again. New DataVIOs that arrive in the lock will also go on the
* wait queue.
*
* The existence of lock waiters is a key factor controlling which state the
* lock transitions to next. When the lock is new or has waiters, it will
* always try to reach DEDUPING, and when it doesn't, it will try to clean up
* and exit.
*
* Deduping requires holding a PBN lock on a block that is known to contain
* data identical to the DataVIOs in the lock, so the lock will send the
* agent to the duplicate zone to acquire the PBN lock (LOCKING), to the
* kernel I/O threads to read and verify the data (VERIFYING), or to write a
* new copy of the data to a full data block or a slot in a compressed block
* (WRITING).
*
* Cleaning up consists of updating the index when the data location is
* different from the initial index query (UPDATING, triggered by stale
* advice, compression, and rollover), releasing the PBN lock on the duplicate
* block (UNLOCKING), and releasing the HashLock itself back to the hash zone
* (DESTROYING).
*
* The shortest sequence of states is for non-concurrent writes of new data:
* INITIALIZING -> QUERYING -> WRITING -> DESTROYING
* This sequence is short because no PBN read lock or index update is needed.
*
* Non-concurrent, finding valid advice looks like this (endpoints elided):
* -> QUERYING -> LOCKING -> VERIFYING -> DEDUPING -> UNLOCKING ->
* Or with stale advice (endpoints elided):
* -> QUERYING -> LOCKING -> VERIFYING -> UNLOCKING -> WRITING -> UPDATING ->
*
* When there are not enough available reference count increments available on
* a PBN for a DataVIO to deduplicate, a new lock is forked and the excess
* waiters roll over to the new lock (which goes directly to WRITING). The new
* lock takes the place of the old lock in the lock map so new DataVIOs will
* be directed to it. The two locks will proceed independently, but only the
* new lock will have the right to update the index (unless it also forks).
*
* Since rollover happens in a lock instance, once a valid data location has
* been selected, it will not change. QUERYING and WRITING are only performed
* once per lock lifetime. All other non-endpoint states can be re-entered.
*
* XXX still need doc on BYPASSING
*
* The function names in this module follow a convention referencing the
* states and transitions in the state machine diagram for VDOSTORY-190.
* [XXX link or repository path to it?]
* For example, for the LOCKING state, there are startLocking() and
* finishLocking() functions. startLocking() is invoked by the finish function
* of the state (or states) that transition to LOCKING. It performs the actual
* lock state change and must be invoked on the hash zone thread.
* finishLocking() is called by (or continued via callback from) the code
* actually obtaining the lock. It does any bookkeeping or decision-making
* required and invokes the appropriate start function of the state being
* transitioned to after LOCKING.
**/
#include "hashLock.h"
#include "hashLockInternals.h"
#include "logger.h"
#include "permassert.h"
#include "compressionState.h"
#include "constants.h"
#include "dataVIO.h"
#include "hashZone.h"
#include "packer.h"
#include "pbnLock.h"
#include "physicalZone.h"
#include "ringNode.h"
#include "slab.h"
#include "slabDepot.h"
#include "trace.h"
#include "types.h"
#include "vdoInternal.h"
#include "vioWrite.h"
#include "waitQueue.h"
static const char *LOCK_STATE_NAMES[] = {
[HASH_LOCK_BYPASSING] = "BYPASSING",
[HASH_LOCK_DEDUPING] = "DEDUPING",
[HASH_LOCK_DESTROYING] = "DESTROYING",
[HASH_LOCK_INITIALIZING] = "INITIALIZING",
[HASH_LOCK_LOCKING] = "LOCKING",
[HASH_LOCK_QUERYING] = "QUERYING",
[HASH_LOCK_UNLOCKING] = "UNLOCKING",
[HASH_LOCK_UPDATING] = "UPDATING",
[HASH_LOCK_VERIFYING] = "VERIFYING",
[HASH_LOCK_WRITING] = "WRITING",
};
// There are loops in the state diagram, so some forward decl's are needed.
static void startDeduping(HashLock *lock, DataVIO *agent, bool agentIsDone);
static void startLocking(HashLock *lock, DataVIO *agent);
static void startWriting(HashLock *lock, DataVIO *agent);
static void unlockDuplicatePBN(VDOCompletion *completion);
static void transferAllocationLock(DataVIO *dataVIO);
/**********************************************************************/
PBNLock *getDuplicateLock(DataVIO *dataVIO)
{
if (dataVIO->hashLock == NULL) {
return NULL;
}
return dataVIO->hashLock->duplicateLock;
}
/**********************************************************************/
const char *getHashLockStateName(HashLockState state)
{
// Catch if a state has been added without updating the name array.
STATIC_ASSERT((HASH_LOCK_DESTROYING + 1) == COUNT_OF(LOCK_STATE_NAMES));
return (state < COUNT_OF(LOCK_STATE_NAMES)) ? LOCK_STATE_NAMES[state] : NULL;
}
/**
* Set the current state of a hash lock.
*
* @param lock The lock to update
* @param newState The new state
**/
static void setHashLockState(HashLock *lock, HashLockState newState)
{
if (false) {
logWarning("XXX %" PRIptr " %s -> %s", (void *) lock,
getHashLockStateName(lock->state),
getHashLockStateName(newState));
}
lock->state = newState;
}
/**
* Assert that a DataVIO is the agent of its hash lock, and that this is being
* called in the hash zone.
*
* @param dataVIO The DataVIO expected to be the lock agent
* @param where A string describing the function making the assertion
**/
static void assertHashLockAgent(DataVIO *dataVIO, const char *where)
{
// Not safe to access the agent field except from the hash zone.
assertInHashZone(dataVIO);
ASSERT_LOG_ONLY(dataVIO == dataVIO->hashLock->agent,
"%s must be for the hash lock agent", where);
}
/**
* Set or clear the lock agent.
*
* @param lock The hash lock to update
* @param newAgent The new lock agent (may be NULL to clear the agent)
**/
static void setAgent(HashLock *lock, DataVIO *newAgent)
{
lock->agent = newAgent;
}
/**
* Set the duplicate lock held by a hash lock. May only be called in the
* physical zone of the PBN lock.
*
* @param hashLock The hash lock to update
* @param pbnLock The PBN read lock to use as the duplicate lock
**/
static void setDuplicateLock(HashLock *hashLock, PBNLock *pbnLock)
{
ASSERT_LOG_ONLY((hashLock->duplicateLock == NULL),
"hash lock must not already hold a duplicate lock");
pbnLock->holderCount += 1;
hashLock->duplicateLock = pbnLock;
}
/**
* Convert a pointer to the hashLockNode field in a DataVIO to the enclosing
* DataVIO.
*
* @param lockNode The RingNode to convert
*
* @return A pointer to the DataVIO containing the RingNode
**/
static inline DataVIO *dataVIOFromLockNode(RingNode *lockNode)
{
return (DataVIO *) ((byte *) lockNode - offsetof(DataVIO, hashLockNode));
}
/**
* Remove the first DataVIO from the lock's wait queue and return it.
*
* @param lock The lock containing the wait queue
*
* @return The first (oldest) waiter in the queue, or NULL
if
* the queue is empty
**/
static inline DataVIO *dequeueLockWaiter(HashLock *lock)
{
return waiterAsDataVIO(dequeueNextWaiter(&lock->waiters));
}
/**
* Continue processing a DataVIO that has been waiting for an event, setting
* the result from the event, and continuing in a specified callback function.
*
* @param dataVIO The DataVIO to continue
* @param result The current result (will not mask older errors)
* @param callback The function in which to continue processing
**/
static void continueDataVIOIn(DataVIO *dataVIO,
int result,
VDOAction *callback)
{
dataVIOAsCompletion(dataVIO)->callback = callback;
continueDataVIO(dataVIO, result);
}
/**
* Set, change, or clear the hash lock a DataVIO is using. Updates the hash
* lock (or locks) to reflect the change in membership.
*
* @param dataVIO The DataVIO to update
* @param newLock The hash lock the DataVIO is joining
**/
static void setHashLock(DataVIO *dataVIO, HashLock *newLock)
{
HashLock *oldLock = dataVIO->hashLock;
if (oldLock != NULL) {
ASSERT_LOG_ONLY(dataVIO->hashZone != NULL,
"must have a hash zone when halding a hash lock");
ASSERT_LOG_ONLY(!isRingEmpty(&dataVIO->hashLockNode),
"must be on a hash lock ring when holding a hash lock");
ASSERT_LOG_ONLY(oldLock->referenceCount > 0,
"hash lock reference must be counted");
if ((oldLock->state != HASH_LOCK_BYPASSING)
&& (oldLock->state != HASH_LOCK_UNLOCKING)) {
// If the reference count goes to zero in a non-terminal state, we're
// most likely leaking this lock.
ASSERT_LOG_ONLY(oldLock->referenceCount > 1,
"hash locks should only become unreferenced in"
" a terminal state, not state %s",
getHashLockStateName(oldLock->state));
}
unspliceRingNode(&dataVIO->hashLockNode);
oldLock->referenceCount -= 1;
dataVIO->hashLock = NULL;
}
if (newLock != NULL) {
// Keep all DataVIOs sharing the lock on a ring since they can complete in
// any order and we'll always need a pointer to one to compare data.
pushRingNode(&newLock->duplicateRing, &dataVIO->hashLockNode);
newLock->referenceCount += 1;
// XXX Not needed for VDOSTORY-190, but useful for checking whether a test
// is getting concurrent dedupe, and how much.
if (newLock->maxReferences < newLock->referenceCount) {
newLock->maxReferences = newLock->referenceCount;
}
dataVIO->hashLock = newLock;
}
}
/**
* Bottleneck for DataVIOs that have written or deduplicated and that are no
* longer needed to be an agent for the hash lock.
*
* @param dataVIO The DataVIO to complete and send to be cleaned up
**/
static void exitHashLock(DataVIO *dataVIO)
{
// XXX trace record?
// Release the hash lock now, saving a thread transition in cleanup.
releaseHashLock(dataVIO);
// Complete the DataVIO and start the clean-up path in vioWrite to release
// any locks it still holds.
finishDataVIO(dataVIO, VDO_SUCCESS);
}
/**
* Retire the active lock agent, replacing it with the first lock waiter, and
* make the retired agent exit the hash lock.
*
* @param lock The hash lock to update
*
* @return The new lock agent (which will be NULL if there was no waiter)
**/
static DataVIO *retireLockAgent(HashLock *lock)
{
DataVIO *oldAgent = lock->agent;
DataVIO *newAgent = dequeueLockWaiter(lock);
setAgent(lock, newAgent);
exitHashLock(oldAgent);
if (newAgent != NULL) {
setDuplicateLocation(newAgent, lock->duplicate);
}
return newAgent;
}
/**
* Callback to call compressData(), putting a DataVIO back on the write path.
*
* @param completion The DataVIO
**/
static void compressDataCallback(VDOCompletion *completion)
{
// XXX VDOSTORY-190 need an error check since compressData doesn't have one.
compressData(asDataVIO(completion));
}
/**
* Add a DataVIO to the lock's queue of waiters.
*
* @param lock The hash lock on which to wait
* @param dataVIO The DataVIO to add to the queue
**/
static void waitOnHashLock(HashLock *lock, DataVIO *dataVIO)
{
int result = enqueueDataVIO(&lock->waiters, dataVIO, THIS_LOCATION(NULL));
if (result != VDO_SUCCESS) {
// This should be impossible, but if it somehow happens, give up on trying
// to dedupe the data.
setHashLock(dataVIO, NULL);
continueDataVIOIn(dataVIO, result, compressDataCallback);
return;
}
// Make sure the agent doesn't block indefinitely in the packer since it now
// has at least one other DataVIO waiting on it.
if ((lock->state == HASH_LOCK_WRITING) && cancelCompression(lock->agent)) {
/*
* Even though we're waiting, we also have to send ourselves as a one-way
* message to the packer to ensure the agent continues executing. This is
* safe because cancelCompression() guarantees the agent won't continue
* executing until this message arrives in the packer, and because the
* wait queue link isn't used for sending the message.
*/
dataVIO->compression.lockHolder = lock->agent;
launchPackerCallback(dataVIO, removeLockHolderFromPacker,
THIS_LOCATION("$F;cb=removeLockHolderFromPacker"));
}
}
/**
* WaiterCallback function that calls compressData on the DataVIO waiter.
*
* @param waiter The DataVIO's waiter link
* @param context Not used
**/
static void compressWaiter(Waiter *waiter,
void *context __attribute__((unused)))
{
DataVIO *dataVIO = waiterAsDataVIO(waiter);
dataVIO->isDuplicate = false;
compressData(dataVIO);
}
/**
* Handle the result of the agent for the lock releasing a read lock on
* duplicate candidate due to aborting the hash lock. This continuation is
* registered in unlockDuplicatePBN().
*
* @param completion The completion of the DataVIO acting as the lock's agent
**/
static void finishBypassing(VDOCompletion *completion)
{
DataVIO *agent = asDataVIO(completion);
assertHashLockAgent(agent, __func__);
HashLock *lock = agent->hashLock;
ASSERT_LOG_ONLY(lock->duplicateLock == NULL,
"must have released the duplicate lock for the hash lock");
exitHashLock(agent);
}
/**
* Stop using the hash lock, resuming the old write path for the lock agent
* and any DataVIOs waiting on it, and put it in a state where DataVIOs
* entering the lock will use the old dedupe path instead of waiting.
*
* @param lock The hash lock
* @param agent The DataVIO acting as the agent for the lock
**/
static void startBypassing(HashLock *lock, DataVIO *agent)
{
setHashLockState(lock, HASH_LOCK_BYPASSING);
// Ensure we don't attempt to update advice when cleaning up.
lock->updateAdvice = false;
ASSERT_LOG_ONLY(((agent != NULL) || !hasWaiters(&lock->waiters)),
"should not have waiters without an agent");
notifyAllWaiters(&lock->waiters, compressWaiter, NULL);
if (lock->duplicateLock != NULL) {
if (agent != NULL) {
// The agent must reference the duplicate zone to launch it.
agent->duplicate = lock->duplicate;
launchDuplicateZoneCallback(agent, unlockDuplicatePBN,
THIS_LOCATION(NULL));
return;
}
ASSERT_LOG_ONLY(false, "hash lock holding a PBN lock must have an agent");
}
if (agent == NULL) {
return;
}
setAgent(lock, NULL);
agent->isDuplicate = false;
compressData(agent);
}
/**
* Abort processing on this hash lock when noticing an error. Currently, this
* moves the hash lock to the BYPASSING state, to release all pending DataVIOs.
*
* @param lock The HashLock
* @param dataVIO The DataVIO with the error
**/
static void abortHashLock(HashLock *lock, DataVIO *dataVIO)
{
// If we've already aborted the lock, don't try to re-abort it; just exit.
if (lock->state == HASH_LOCK_BYPASSING) {
exitHashLock(dataVIO);
return;
}
if (dataVIO != lock->agent) {
if ((lock->agent != NULL) || (lock->referenceCount > 1)) {
// Other DataVIOs are still sharing the lock (which should be DEDUPING),
// so just kick this one out of the lock to report its error.
ASSERT_LOG_ONLY(lock->agent == NULL,
"only active agent should call abortHashLock");
exitHashLock(dataVIO);
return;
}
// Make the lone DataVIO the lock agent so it can abort and clean up.
setAgent(lock, dataVIO);
}
startBypassing(lock, dataVIO);
}
/**
* Handle the result of the agent for the lock releasing a read lock on
* duplicate candidate. This continuation is registered in
* unlockDuplicatePBN().
*
* @param completion The completion of the DataVIO acting as the lock's agent
**/
static void finishUnlocking(VDOCompletion *completion)
{
DataVIO *agent = asDataVIO(completion);
assertHashLockAgent(agent, __func__);
HashLock *lock = agent->hashLock;
ASSERT_LOG_ONLY(lock->duplicateLock == NULL,
"must have released the duplicate lock for the hash lock");
if (completion->result != VDO_SUCCESS) {
abortHashLock(lock, agent);
return;
}
if (!lock->verified) {
/*
* UNLOCKING -> WRITING transition: The lock we released was on an
* unverified block, so it must have been a lock on advice we were
* verifying, not on a location that was used for deduplication. Go write
* (or compress) the block to get a location to dedupe against.
*/
startWriting(lock, agent);
return;
}
// With the lock released, the verified duplicate block may already have
// changed and will need to be re-verified if a waiter arrived.
lock->verified = false;
if (hasWaiters(&lock->waiters)) {
/*
* UNLOCKING -> LOCKING transition: A new DataVIO entered the hash lock
* while the agent was releasing the PBN lock. The current agent exits and
* the waiter has to re-lock and re-verify the duplicate location.
*/
// XXX VDOSTORY-190 If we used the current agent to re-acquire the PBN
// lock we wouldn't need to re-verify.
agent = retireLockAgent(lock);
startLocking(lock, agent);
return;
}
/*
* UNLOCKING -> DESTROYING transition: The agent is done with the lock
* and no other DataVIOs reference it, so remove it from the lock map
* and return it to the pool.
*/
exitHashLock(agent);
}
/**
* Release a read lock on the PBN of the block that may or may not have
* contained duplicate data. This continuation is launched by
* startUnlocking(), and calls back to finishUnlocking() on the hash zone
* thread.
*
* @param completion The completion of the DataVIO acting as the lock's agent
**/
static void unlockDuplicatePBN(VDOCompletion *completion)
{
DataVIO *agent = asDataVIO(completion);
assertInDuplicateZone(agent);
HashLock *lock = agent->hashLock;
ASSERT_LOG_ONLY(lock->duplicateLock != NULL,
"must have a duplicate lock to release");
releasePBNLock(agent->duplicate.zone, agent->duplicate.pbn,
&lock->duplicateLock);
if (lock->state == HASH_LOCK_BYPASSING) {
launchHashZoneCallback(agent, finishBypassing, THIS_LOCATION(NULL));
} else {
launchHashZoneCallback(agent, finishUnlocking, THIS_LOCATION(NULL));
}
}
/**
* Release a read lock on the PBN of the block that may or may not have
* contained duplicate data.
*
* @param lock The hash lock
* @param agent The DataVIO currently acting as the agent for the lock
**/
static void startUnlocking(HashLock *lock, DataVIO *agent)
{
setHashLockState(lock, HASH_LOCK_UNLOCKING);
/*
* XXX If we arrange to continue on the duplicate zone thread when
* verification fails, and don't explicitly change lock states (or use an
* agent-local state, or an atomic), we can avoid a thread transition here.
*/
launchDuplicateZoneCallback(agent, unlockDuplicatePBN, THIS_LOCATION(NULL));
}
/**
* Process the result of a UDS update performed by the agent for the lock.
* This continuation is registered in startQuerying().
*
* @param completion The completion of the DataVIO that performed the update
**/
static void finishUpdating(VDOCompletion *completion)
{
DataVIO *agent = asDataVIO(completion);
assertHashLockAgent(agent, __func__);
HashLock *lock = agent->hashLock;
if (completion->result != VDO_SUCCESS) {
abortHashLock(lock, agent);
return;
}
// UDS was updated successfully, so don't update again unless the
// duplicate location changes due to rollover.
lock->updateAdvice = false;
if (hasWaiters(&lock->waiters)) {
/*
* UPDATING -> DEDUPING transition: A new DataVIO arrived during the UDS
* update. Send it on the verified dedupe path. The agent is done with the
* lock, but the lock may still need to use it to clean up after rollover.
*/
startDeduping(lock, agent, true);
return;
}
if (lock->duplicateLock != NULL) {
/*
* UPDATING -> UNLOCKING transition: No one is waiting to dedupe, but we
* hold a duplicate PBN lock, so go release it.
*/
startUnlocking(lock, agent);
} else {
/*
* UPDATING -> DESTROYING transition: No one is waiting to dedupe and
* there's no lock to release.
*/
// XXX startDestroying(lock, agent);
startBypassing(lock, NULL);
exitHashLock(agent);
}
}
/**
* Continue deduplication with the last step, updating UDS with the location
* of the duplicate that should be returned as advice in the future.
*
* @param lock The hash lock
* @param agent The DataVIO currently acting as the agent for the lock
**/
static void startUpdating(HashLock *lock, DataVIO *agent)
{
setHashLockState(lock, HASH_LOCK_UPDATING);
ASSERT_LOG_ONLY(lock->verified, "new advice should have been verified");
ASSERT_LOG_ONLY(lock->updateAdvice, "should only update advice if needed");
agent->lastAsyncOperation = UPDATE_INDEX;
setHashZoneCallback(agent, finishUpdating, THIS_LOCATION(NULL));
dataVIOAsCompletion(agent)->layer->updateAlbireo(agent);
}
/**
* Handle a DataVIO that has finished deduplicating against the block locked
* by the hash lock. If there are other DataVIOs still sharing the lock, this
* will just release the DataVIO's share of the lock and finish processing the
* DataVIO. If this is the last DataVIO holding the lock, this makes the
* DataVIO the lock agent and uses it to advance the state of the lock so it
* can eventually be released.
*
* @param lock The hash lock
* @param dataVIO The lock holder that has finished deduplicating
**/
static void finishDeduping(HashLock *lock, DataVIO *dataVIO)
{
ASSERT_LOG_ONLY(lock->agent == NULL, "shouldn't have an agent in DEDUPING");
ASSERT_LOG_ONLY(!hasWaiters(&lock->waiters),
"shouldn't have any lock waiters in DEDUPING");
// Just release the lock reference if other DataVIOs are still deduping.
if (lock->referenceCount > 1) {
exitHashLock(dataVIO);
return;
}
// The hash lock must have an agent for all other lock states.
DataVIO *agent = dataVIO;
setAgent(lock, agent);
if (lock->updateAdvice) {
/*
* DEDUPING -> UPDATING transition: The location of the duplicate block
* changed since the initial UDS query because of compression, rollover,
* or because the query agent didn't have an allocation. The UDS update
* was delayed in case there was another change in location, but with only
* this DataVIO using the hash lock, it's time to update the advice.
*/
startUpdating(lock, agent);
} else {
/*
* DEDUPING -> UNLOCKING transition: Release the PBN read lock on the
* duplicate location so the hash lock itself can be released (contingent
* on no new DataVIOs arriving in the lock before the agent returns).
*/
startUnlocking(lock, agent);
}
}
/**
* Implements WaiterCallback. Binds the DataVIO that was waiting to a new hash
* lock and waits on that lock.
**/
static void enterForkedLock(Waiter *waiter, void *context)
{
DataVIO *dataVIO = waiterAsDataVIO(waiter);
HashLock *newLock = (HashLock *) context;
setHashLock(dataVIO, newLock);
waitOnHashLock(newLock, dataVIO);
}
/**
* Fork a hash lock because it has run out of increments on the duplicate PBN.
* Transfers the new agent and any lock waiters to a new hash lock instance
* which takes the place of the old lock in the lock map. The old lock remains
* active, but will not update advice.
*
* @param oldLock The hash lock to fork
* @param newAgent The DataVIO that will be the agent for the new lock
**/
static void forkHashLock(HashLock *oldLock, DataVIO *newAgent)
{
HashLock *newLock;
int result = acquireHashLockFromZone(newAgent->hashZone,
&newAgent->chunkName,
oldLock, &newLock);
if (result != VDO_SUCCESS) {
abortHashLock(oldLock, newAgent);
return;
}
// Only one of the two locks should update UDS. The old lock is out of
// references, so it would be poor dedupe advice in the short term.
oldLock->updateAdvice = false;
newLock->updateAdvice = true;
setHashLock(newAgent, newLock);
setAgent(newLock, newAgent);
notifyAllWaiters(&oldLock->waiters, enterForkedLock, newLock);
newAgent->isDuplicate = false;
startWriting(newLock, newAgent);
}
/**
* Reserve a reference count increment for a DataVIO and launch it on the
* dedupe path. If no increments are available, this will roll over to a new
* hash lock and launch the DataVIO as the writing agent for that lock.
*
* @param lock The hash lock
* @param dataVIO The DataVIO to deduplicate using the hash lock
* @param hasClaim true
if the dataVIO already has claimed
* an increment from the duplicate lock
**/
static void launchDedupe(HashLock *lock, DataVIO *dataVIO, bool hasClaim)
{
if (!hasClaim && !claimPBNLockIncrement(lock->duplicateLock)) {
// Out of increments, so must roll over to a new lock.
forkHashLock(lock, dataVIO);
return;
}
// Deduplicate against the lock's verified location.
setDuplicateLocation(dataVIO, lock->duplicate);
launchDuplicateZoneCallback(dataVIO, shareBlock,
THIS_LOCATION("$F;cb=shareBlock"));
}
/**
* Enter the hash lock state where DataVIOs deduplicate in parallel against a
* true copy of their data on disk. If the agent itself needs to deduplicate,
* an increment for it must already have been claimed from the duplicate lock,
* ensuring the hash lock will still have a DataVIO holding it.
*
* @param lock The hash lock
* @param agent The DataVIO acting as the agent for the lock
* @param agentIsDone true
only if the agent has already written
* or deduplicated against its data
**/
static void startDeduping(HashLock *lock, DataVIO *agent, bool agentIsDone)
{
setHashLockState(lock, HASH_LOCK_DEDUPING);
// We don't take the downgraded allocation lock from the agent unless we
// actually need to deduplicate against it.
if (lock->duplicateLock == NULL) {
ASSERT_LOG_ONLY(!isCompressed(agent->newMapped.state),
"compression must have shared a lock");
ASSERT_LOG_ONLY(agentIsDone, "agent must have written the new duplicate");
transferAllocationLock(agent);
}
ASSERT_LOG_ONLY(isPBNReadLock(lock->duplicateLock),
"duplicateLock must be a PBN read lock");
/*
* This state is not like any of the other states. There is no designated
* agent--the agent transitioning to this state and all the waiters will be
* launched to deduplicate in parallel.
*/
setAgent(lock, NULL);
/*
* Launch the agent (if not already deduplicated) and as many lock waiters
* as we have available increments for on the dedupe path. If we run out of
* increments, rollover will be triggered and the remaining waiters will be
* transferred to the new lock.
*/
if (!agentIsDone) {
launchDedupe(lock, agent, true);
agent = NULL;
}
while (hasWaiters(&lock->waiters)) {
launchDedupe(lock, dequeueLockWaiter(lock), false);
}
if (agentIsDone) {
/*
* In the degenerate case where all the waiters rolled over to a new lock,
* this will continue to use the old agent to clean up this lock, and
* otherwise it just lets the agent exit the lock.
*/
finishDeduping(lock, agent);
}
}
/**
* Handle the result of the agent for the lock comparing its data to the
* duplicate candidate. This continuation is registered in startVerifying().
*
* @param completion The completion of the DataVIO used to verify dedupe
**/
static void finishVerifying(VDOCompletion *completion)
{
DataVIO *agent = asDataVIO(completion);
assertHashLockAgent(agent, __func__);
HashLock *lock = agent->hashLock;
if (completion->result != VDO_SUCCESS) {
// XXX VDOSTORY-190 should convert verify IO errors to verification failure
abortHashLock(lock, agent);
return;
}
lock->verified = agent->isDuplicate;
// Only count the result of the initial verification of the advice as valid
// or stale, and not any re-verifications due to PBN lock releases.
if (!lock->verifyCounted) {
lock->verifyCounted = true;
if (lock->verified) {
bumpHashZoneValidAdviceCount(agent->hashZone);
} else {
bumpHashZoneStaleAdviceCount(agent->hashZone);
}
}
// Even if the block is a verified duplicate, we can't start to deduplicate
// unless we can claim a reference count increment for the agent.
if (lock->verified && !claimPBNLockIncrement(lock->duplicateLock)) {
agent->isDuplicate = false;
lock->verified = false;
}
if (lock->verified) {
/*
* VERIFYING -> DEDUPING transition: The advice is for a true duplicate,
* so start deduplicating against it, if references are available.
*/
startDeduping(lock, agent, false);
} else {
/*
* VERIFYING -> UNLOCKING transition: Either the verify failed or we'd try
* to dedupe and roll over immediately, which would fail because it would
* leave the lock without an agent to release the PBN lock. In both cases,
* the data will have to be written or compressed, but first the advice
* PBN must be unlocked by the VERIFYING agent.
*/
lock->updateAdvice = true;
startUnlocking(lock, agent);
}
}
/**
* Continue the deduplication path for a hash lock by using the agent to read
* (and possibly decompress) the data at the candidate duplicate location,
* comparing it to the data in the agent to verify that the candidate is
* identical to all the DataVIOs sharing the hash. If so, it can be
* deduplicated against, otherwise a DataVIO allocation will have to be
* written to and used for dedupe.
*
* @param lock The hash lock (must be LOCKING)
* @param agent The DataVIO to use to read and compare candidate data
**/
static void startVerifying(HashLock *lock, DataVIO *agent)
{
setHashLockState(lock, HASH_LOCK_VERIFYING);
ASSERT_LOG_ONLY(!lock->verified, "hash lock only verifies advice once");
/*
* XXX VDOSTORY-190 Optimization: This is one of those places where the zone
* and continuation we want to use depends on the outcome of the comparison.
* If we could choose which path in the layer thread before continuing, we
* could save a thread transition in one of the two cases (assuming we're
* willing to delay visibility of the the hash lock state change).
*/
VDOCompletion *completion = dataVIOAsCompletion(agent);
agent->lastAsyncOperation = VERIFY_DEDUPLICATION;
setHashZoneCallback(agent, finishVerifying, THIS_LOCATION(NULL));
completion->layer->verifyDuplication(agent);
}
/**
* Handle the result of the agent for the lock attempting to obtain a PBN read
* lock on the candidate duplicate block. this continuation is registered in
* lockDuplicatePBN().
*
* @param completion The completion of the DataVIO that attempted to get
* the read lock
**/
static void finishLocking(VDOCompletion *completion)
{
DataVIO *agent = asDataVIO(completion);
assertHashLockAgent(agent, __func__);
HashLock *lock = agent->hashLock;
if (completion->result != VDO_SUCCESS) {
// XXX clearDuplicateLocation()?
agent->isDuplicate = false;
abortHashLock(lock, agent);
return;
}
if (!agent->isDuplicate) {
ASSERT_LOG_ONLY(lock->duplicateLock == NULL,
"must not hold duplicateLock if not flagged as a duplicate");
/*
* LOCKING -> WRITING transition: The advice block is being modified or
* has no available references, so try to write or compress the data,
* remembering to update UDS later with the new advice.
*/
bumpHashZoneStaleAdviceCount(agent->hashZone);
lock->updateAdvice = true;
startWriting(lock, agent);
return;
}
ASSERT_LOG_ONLY(lock->duplicateLock != NULL,
"must hold duplicateLock if flagged as a duplicate");
if (!lock->verified) {
/*
* LOCKING -> VERIFYING transition: Continue on the unverified dedupe path,
* reading the candidate duplicate and comparing it to the agent's data to
* decide whether it is a true duplicate or stale advice.
*/
startVerifying(lock, agent);
return;
}
if (!claimPBNLockIncrement(lock->duplicateLock)) {
/*
* LOCKING -> UNLOCKING transition: The verified block was re-locked, but
* has no available increments left. Must first release the useless PBN
* read lock before rolling over to a new copy of the block.
*/
agent->isDuplicate = false;
lock->verified = false;
lock->updateAdvice = true;
startUnlocking(lock, agent);
return;
}
/*
* LOCKING -> DEDUPING transition: Continue on the verified dedupe path,
* deduplicating against a location that was previously verified or
* written to.
*/
startDeduping(lock, agent, false);
}
/**
* Acquire a read lock on the PBN of the block containing candidate duplicate
* data (compressed or uncompressed). If the PBN is already locked for
* writing, the lock attempt is abandoned and isDuplicate will be cleared
* before calling back. this continuation is launched from startLocking(), and
* calls back to finishLocking() on the hash zone thread.
*
* @param completion The completion of the DataVIO attempting to acquire the
* physical block lock on behalf of its hash lock
**/
static void lockDuplicatePBN(VDOCompletion *completion)
{
DataVIO *agent = asDataVIO(completion);
PhysicalZone *zone = agent->duplicate.zone;
assertInDuplicateZone(agent);
setHashZoneCallback(agent, finishLocking, THIS_LOCATION(NULL));
// While in the zone that owns it, find out how many additional references
// can be made to the block if it turns out to truly be a duplicate.
SlabDepot *depot = getSlabDepot(getVDOFromDataVIO(agent));
unsigned int incrementLimit = getIncrementLimit(depot, agent->duplicate.pbn);
if (incrementLimit == 0) {
// We could deduplicate against it later if a reference happened to be
// released during verification, but it's probably better to bail out now.
// XXX clearDuplicateLocation()?
agent->isDuplicate = false;
continueDataVIO(agent, VDO_SUCCESS);
return;
}
PBNLock *lock;
int result = attemptPBNLock(zone, agent->duplicate.pbn, VIO_READ_LOCK,
&lock);
if (result != VDO_SUCCESS) {
continueDataVIO(agent, result);
return;
}
if (!isPBNReadLock(lock)) {
/*
* There are three cases of write locks: uncompressed data block writes,
* compressed (packed) block writes, and block map page writes. In all
* three cases, we give up on trying to verify the advice and don't bother
* to try deduplicate against the data in the write lock holder.
*
* 1) We don't ever want to try to deduplicate against a block map page.
*
* 2a) It's very unlikely we'd deduplicate against an entire packed block,
* both because of the chance of matching it, and because we don't record
* advice for it, but for the uncompressed representation of all the
* fragments it contains. The only way we'd be getting lock contention is
* if we've written the same representation coincidentally before, had it
* become unreferenced, and it just happened to be packed together from
* compressed writes when we go to verify the lucky advice. Giving up is a
* miniscule loss of potential dedupe.
*
* 2b) If the advice is for a slot of a compressed block, it's about to
* get smashed, and the write smashing it cannot contain our data--it
* would have to be writing on behalf of our hash lock, but that's
* impossible since we're the lock agent.
*
* 3a) If the lock is held by a DataVIO with different data, the advice is
* already stale or is about to become stale.
*
* 3b) If the lock is held by a DataVIO that matches us, we may as well
* either write it ourselves (or reference the copy we already wrote)
* instead of potentially having many duplicates wait for the lock holder
* to write, journal, hash, and finally arrive in the hash lock. All we
* lose is a chance to avoid a UDS update in the very rare case of advice
* for a free block that just happened to be allocated to a DataVIO with
* the same hash. In async mode, there's also a chance to save on a block
* write, at the cost of a block verify. Saving on a full block compare in
* all stale advice cases almost certainly outweighs saving a UDS update
* in a lucky case where advice would have been saved from becoming stale.
*/
// XXX clearDuplicateLocation()?
agent->isDuplicate = false;
continueDataVIO(agent, VDO_SUCCESS);
return;
}
if (lock->holderCount == 0) {
// Ensure that the newly-locked block is referenced.
Slab *slab = getSlab(depot, agent->duplicate.pbn);
result = acquireProvisionalReference(slab, agent->duplicate.pbn, lock);
if (result != VDO_SUCCESS) {
logWarningWithStringError(result,
"Error acquiring provisional reference for "
"dedupe candidate; aborting dedupe");
agent->isDuplicate = false;
releasePBNLock(zone, agent->duplicate.pbn, &lock);
continueDataVIO(agent, result);
return;
}
/*
* The increment limit we grabbed earlier is still valid. The lock now
* holds the rights to acquire all those references. Those rights will be
* claimed by hash locks sharing this read lock.
*/
lock->incrementLimit = incrementLimit;
}
// We've successfully acquired a read lock on behalf of the hash lock,
// so mark it as such.
setDuplicateLock(agent->hashLock, lock);
/*
* XXX VDOSTORY-190 Optimization: Same as startLocking() lazily changing
* state to save on having to switch back to the hash zone thread. Here we
* could directly launch the block verify, then switch to a hash thread.
*/
continueDataVIO(agent, VDO_SUCCESS);
}
/**
* Continue deduplication for a hash lock that has obtained valid advice
* of a potential duplicate through its agent.
*
* @param lock The hash lock (currently must be QUERYING)
* @param agent The DataVIO bearing the dedupe advice
**/
static void startLocking(HashLock *lock, DataVIO *agent)
{
ASSERT_LOG_ONLY(lock->duplicateLock == NULL,
"must not acquire a duplicate lock when already holding it");
setHashLockState(lock, HASH_LOCK_LOCKING);
/*
* XXX VDOSTORY-190 Optimization: If we arrange to continue on the duplicate
* zone thread when accepting the advice, and don't explicitly change lock
* states (or use an agent-local state, or an atomic), we can avoid a thread
* transition here.
*/
agent->lastAsyncOperation = ACQUIRE_PBN_READ_LOCK;
launchDuplicateZoneCallback(agent, lockDuplicatePBN, THIS_LOCATION(NULL));
}
/**
* Re-entry point for the lock agent after it has finished writing or
* compressing its copy of the data block. The agent will never need to dedupe
* against anything, so it's done with the lock, but the lock may not be
* finished with it, as a UDS update might still be needed.
*
* If there are other lock holders, the agent will hand the job to one of them
* and exit, leaving the lock to deduplicate against the just-written block.
* If there are no other lock holders, the agent either exits (and later tears
* down the hash lock), or it remains the agent and updates UDS.
*
* @param lock The hash lock, which must be in state WRITING
* @param agent The DataVIO that wrote its data for the lock
**/
static void finishWriting(HashLock *lock, DataVIO *agent)
{
// Dedupe against the data block or compressed block slot the agent wrote.
// Since we know the write succeeded, there's no need to verify it.
lock->duplicate = agent->newMapped;
lock->verified = true;
if (isCompressed(lock->duplicate.state) && lock->registered) {
// Compression means the location we gave in the UDS query is not the
// location we're using to deduplicate.
lock->updateAdvice = true;
}
// If there are any waiters, we need to start deduping them.
if (hasWaiters(&lock->waiters)) {
/*
* WRITING -> DEDUPING transition: an asynchronously-written block
* failed to compress, so the PBN lock on the written copy was already
* transferred. The agent is done with the lock, but the lock may
* still need to use it to clean up after rollover.
*/
startDeduping(lock, agent, true);
return;
}
// There are no waiters and the agent has successfully written, so take a
// step towards being able to release the hash lock (or just release it).
if (lock->updateAdvice) {
/*
* WRITING -> UPDATING transition: There's no waiter and a UDS update is
* needed, so retain the WRITING agent and use it to launch the update.
* The happens on compression, rollover, or the QUERYING agent not having
* an allocation.
*/
startUpdating(lock, agent);
} else if (lock->duplicateLock != NULL) {
/*
* WRITING -> UNLOCKING transition: There's no waiter and no update
* needed, but the compressed write gave us a shared duplicate lock that
* we must release.
*/
setDuplicateLocation(agent, lock->duplicate);
startUnlocking(lock, agent);
} else {
/*
* WRITING -> DESTROYING transition: There's no waiter, no update needed,
* and no duplicate lock held, so both the agent and lock have no more
* work to do. The agent will release its allocation lock in cleanup.
*/
// XXX startDestroying(lock, agent);
startBypassing(lock, NULL);
exitHashLock(agent);
}
}
/**
* Search through the lock waiters for a DataVIO that has an allocation. If
* one is found, swap agents, put the old agent at the head of the wait queue,
* then return the new agent. Otherwise, just return the current agent.
*
* @param lock The hash lock to modify
**/
static DataVIO *selectWritingAgent(HashLock *lock)
{
// This should-be-impossible condition is the only cause for
// enqueueDataVIO() to fail later on, where it would be a pain to handle.
int result = ASSERT(!isWaiting(dataVIOAsWaiter(lock->agent)),
"agent must not be waiting");
if (result != VDO_SUCCESS) {
return lock->agent;
}
WaitQueue tempQueue;
initializeWaitQueue(&tempQueue);
// Move waiters to the temp queue one-by-one until we find an allocation.
// Not ideal to search, but it only happens when nearly out of space.
DataVIO *dataVIO;
while (((dataVIO = dequeueLockWaiter(lock)) != NULL)
&& !hasAllocation(dataVIO)) {
// Use the lower-level enqueue since we're just moving waiters around.
int result = enqueueWaiter(&tempQueue, dataVIOAsWaiter(dataVIO));
// The only error is the DataVIO already being on a wait queue, and since
// we just dequeued it, that could only happen due to a memory smash or
// concurrent use of that DataVIO.
ASSERT_LOG_ONLY(result == VDO_SUCCESS, "impossible enqueueWaiter error");
}
if (dataVIO != NULL) {
// Move the rest of the waiters over to the temp queue, preserving the
// order they arrived at the lock.
transferAllWaiters(&lock->waiters, &tempQueue);
// The current agent is being replaced and will have to wait to dedupe;
// make it the first waiter since it was the first to reach the lock.
int result = enqueueDataVIO(&lock->waiters, lock->agent,
THIS_LOCATION(NULL));
ASSERT_LOG_ONLY(result == VDO_SUCCESS,
"impossible enqueueDataVIO error after isWaiting checked");
setAgent(lock, dataVIO);
} else {
// No one has an allocation, so keep the current agent.
dataVIO = lock->agent;
}
// Swap all the waiters back onto the lock's queue.
transferAllWaiters(&tempQueue, &lock->waiters);
return dataVIO;
}
/**
* Begin the non-duplicate write path for a hash lock that had no advice,
* selecting a DataVIO with an allocation as a new agent, if necessary,
* then resuming the agent on the DataVIO write path.
*
* @param lock The hash lock (currently must be QUERYING)
* @param agent The DataVIO currently acting as the agent for the lock
**/
static void startWriting(HashLock *lock, DataVIO *agent)
{
setHashLockState(lock, HASH_LOCK_WRITING);
// The agent might not have received an allocation and so can't be used for
// writing, but it's entirely possible that one of the waiters did.
if (!hasAllocation(agent)) {
agent = selectWritingAgent(lock);
// If none of the waiters had an allocation, the writes all have to fail.
if (!hasAllocation(agent)) {
/*
* XXX VDOSTORY-190 Should we keep a variant of BYPASSING that causes
* new arrivals to fail immediately if they don't have an allocation? It
* might be possible that on some path there would be non-waiters still
* referencing the lock, so it would remain in the map as everything is
* currently spelled, even if the agent and all the waiters release.
*/
startBypassing(lock, agent);
return;
}
}
// If the agent compresses, it might wait indefinitely in the packer,
// which would be bad if there are any other DataVIOs waiting.
if (hasWaiters(&lock->waiters)) {
// XXX in sync mode, transition directly to LOCKING to start dedupe?
cancelCompression(agent);
}
/*
* Send the agent to the compress/pack/async-write path in vioWrite. If it
* succeeds, it will return to the hash lock via continueHashLock() and call
* finishWriting().
*/
compressData(agent);
}
/**
* Process the result of a UDS query performed by the agent for the lock. This
* continuation is registered in startQuerying().
*
* @param completion The completion of the DataVIO that performed the query
**/
static void finishQuerying(VDOCompletion *completion)
{
DataVIO *agent = asDataVIO(completion);
assertHashLockAgent(agent, __func__);
HashLock *lock = agent->hashLock;
if (completion->result != VDO_SUCCESS) {
abortHashLock(lock, agent);
return;
}
if (agent->isDuplicate) {
lock->duplicate = agent->duplicate;
/*
* QUERYING -> LOCKING transition: Valid advice was obtained from UDS.
* Use the QUERYING agent to start the hash lock on the unverified dedupe
* path, verifying that the advice can be used.
*/
startLocking(lock, agent);
} else {
// The agent will be used as the duplicate if has an allocation; if it
// does, that location was posted to UDS, so no update will be needed.
lock->updateAdvice = !hasAllocation(agent);
/*
* QUERYING -> WRITING transition: There was no advice or the advice
* wasn't valid, so try to write or compress the data.
*/
startWriting(lock, agent);
}
}
/**
* Start deduplication for a hash lock that has finished initializing by
* making the DataVIO that requested it the agent, entering the QUERYING
* state, and using the agent to perform the UDS query on behalf of the lock.
*
* @param lock The initialized hash lock
* @param dataVIO The DataVIO that has just obtained the new lock
**/
static void startQuerying(HashLock *lock, DataVIO *dataVIO)
{
setAgent(lock, dataVIO);
setHashLockState(lock, HASH_LOCK_QUERYING);
VDOCompletion *completion = dataVIOAsCompletion(dataVIO);
dataVIO->lastAsyncOperation = CHECK_FOR_DEDUPLICATION;
setHashZoneCallback(dataVIO, finishQuerying, THIS_LOCATION(NULL));
completion->layer->checkForDuplication(dataVIO);
}
/**
* Complain that a DataVIO has entered a HashLock that is in an unimplemented
* or unusable state and continue the DataVIO with an error.
*
* @param lock The hash lock
* @param dataVIO The DataVIO attempting to enter the lock
**/
static void reportBogusLockState(HashLock *lock, DataVIO *dataVIO)
{
int result = ASSERT_FALSE("hash lock must not be in unimplemented state %s",
getHashLockStateName(lock->state));
continueDataVIOIn(dataVIO, result, compressDataCallback);
}
/**********************************************************************/
void enterHashLock(DataVIO *dataVIO)
{
HashLock *lock = dataVIO->hashLock;
switch (lock->state) {
case HASH_LOCK_INITIALIZING:
startQuerying(lock, dataVIO);
break;
case HASH_LOCK_QUERYING:
case HASH_LOCK_WRITING:
case HASH_LOCK_UPDATING:
case HASH_LOCK_LOCKING:
case HASH_LOCK_VERIFYING:
case HASH_LOCK_UNLOCKING:
// The lock is busy, and can't be shared yet.
waitOnHashLock(lock, dataVIO);
break;
case HASH_LOCK_BYPASSING:
// Bypass dedupe entirely.
compressData(dataVIO);
break;
case HASH_LOCK_DEDUPING:
launchDedupe(lock, dataVIO, false);
break;
case HASH_LOCK_DESTROYING:
// A lock in this state should not be acquired by new VIOs.
reportBogusLockState(lock, dataVIO);
break;
default:
reportBogusLockState(lock, dataVIO);
}
}
/**********************************************************************/
void continueHashLock(DataVIO *dataVIO)
{
HashLock *lock = dataVIO->hashLock;
// XXX VDOSTORY-190 Eventually we may be able to fold the error handling
// in at this point instead of using a separate entry point for it.
switch (lock->state) {
case HASH_LOCK_WRITING:
ASSERT_LOG_ONLY(dataVIO == lock->agent,
"only the lock agent may continue the lock");
finishWriting(lock, dataVIO);
break;
case HASH_LOCK_DEDUPING:
finishDeduping(lock, dataVIO);
break;
case HASH_LOCK_BYPASSING:
// This DataVIO has finished the write path and the lock doesn't need it.
// XXX This isn't going to be correct if DEDUPING ever uses BYPASSING.
finishDataVIO(dataVIO, VDO_SUCCESS);
break;
case HASH_LOCK_INITIALIZING:
case HASH_LOCK_QUERYING:
case HASH_LOCK_UPDATING:
case HASH_LOCK_LOCKING:
case HASH_LOCK_VERIFYING:
case HASH_LOCK_UNLOCKING:
case HASH_LOCK_DESTROYING:
// A lock in this state should never be re-entered.
reportBogusLockState(lock, dataVIO);
break;
default:
reportBogusLockState(lock, dataVIO);
}
}
/**********************************************************************/
void continueHashLockOnError(DataVIO *dataVIO)
{
// XXX We could simply use continueHashLock() and check for errors in that.
abortHashLock(dataVIO->hashLock, dataVIO);
}
/**
* Check whether the data in DataVIOs sharing a lock is different than in a
* DataVIO seeking to share the lock, which should only be possible in the
* extremely unlikely case of a hash collision.
*
* @param lock The lock to check
* @param candidate The DataVIO seeking to share the lock
*
* @return true
if the given DataVIO must not share the lock
* because it doesn't have the same data as the lock holders
**/
static bool isHashCollision(HashLock *lock, DataVIO *candidate)
{
if (isRingEmpty(&lock->duplicateRing)) {
return false;
}
DataVIO *lockHolder = dataVIOFromLockNode(lock->duplicateRing.next);
PhysicalLayer *layer = dataVIOAsCompletion(candidate)->layer;
bool collides = !layer->compareDataVIOs(lockHolder, candidate);
if (collides) {
bumpHashZoneCollisionCount(candidate->hashZone);
} else {
bumpHashZoneDataMatchCount(candidate->hashZone);
}
return collides;
}
/**********************************************************************/
static inline int assertHashLockPreconditions(const DataVIO *dataVIO)
{
int result = ASSERT(dataVIO->hashLock == NULL,
"must not already hold a hash lock");
if (result != VDO_SUCCESS) {
return result;
}
result = ASSERT(isRingEmpty(&dataVIO->hashLockNode),
"must not already be a member of a hash lock ring");
if (result != VDO_SUCCESS) {
return result;
}
return ASSERT(dataVIO->recoverySequenceNumber == 0,
"must not hold a recovery lock when getting a hash lock");
}
/**********************************************************************/
int acquireHashLock(DataVIO *dataVIO)
{
int result = assertHashLockPreconditions(dataVIO);
if (result != VDO_SUCCESS) {
return result;
}
HashLock *lock;
result = acquireHashLockFromZone(dataVIO->hashZone, &dataVIO->chunkName,
NULL, &lock);
if (result != VDO_SUCCESS) {
return result;
}
if (isHashCollision(lock, dataVIO)) {
// Hash collisions are extremely unlikely, but the bogus dedupe would be a
// data corruption. Bypass dedupe entirely by leaving hashLock unset.
// XXX clear hashZone too?
return VDO_SUCCESS;
}
setHashLock(dataVIO, lock);
return VDO_SUCCESS;
}
/**********************************************************************/
void releaseHashLock(DataVIO *dataVIO)
{
HashLock *lock = dataVIO->hashLock;
if (lock == NULL) {
return;
}
setHashLock(dataVIO, NULL);
if (lock->referenceCount > 0) {
// The lock is still in use by other DataVIOs.
return;
}
setHashLockState(lock, HASH_LOCK_DESTROYING);
returnHashLockToZone(dataVIO->hashZone, &lock);
}
/**
* Transfer a DataVIO's downgraded allocation PBN lock to the DataVIO's hash
* lock, converting it to a duplicate PBN lock.
*
* @param dataVIO The DataVIO holding the allocation lock to transfer
**/
static void transferAllocationLock(DataVIO *dataVIO)
{
ASSERT_LOG_ONLY(dataVIO->newMapped.pbn == getDataVIOAllocation(dataVIO),
"transferred lock must be for the block written");
AllocatingVIO *allocatingVIO = dataVIOAsAllocatingVIO(dataVIO);
PBNLock *pbnLock = allocatingVIO->allocationLock;
allocatingVIO->allocationLock = NULL;
allocatingVIO->allocation = ZERO_BLOCK;
ASSERT_LOG_ONLY(isPBNReadLock(pbnLock),
"must have downgraded the allocation lock before transfer");
HashLock *hashLock = dataVIO->hashLock;
hashLock->duplicate = dataVIO->newMapped;
dataVIO->duplicate = dataVIO->newMapped;
// Since the lock is being transferred, the holder count doesn't change (and
// isn't even safe to examine on this thread).
hashLock->duplicateLock = pbnLock;
}
/**********************************************************************/
void shareCompressedWriteLock(DataVIO *dataVIO, PBNLock *pbnLock)
{
ASSERT_LOG_ONLY(getDuplicateLock(dataVIO) == NULL,
"a duplicate PBN lock should not exist when writing");
ASSERT_LOG_ONLY(isCompressed(dataVIO->newMapped.state),
"lock transfer must be for a compressed write");
assertInNewMappedZone(dataVIO);
// First sharer downgrades the lock.
if (!isPBNReadLock(pbnLock)) {
downgradePBNWriteLock(pbnLock);
}
// Get a share of the PBN lock, ensuring it cannot be released until
// after this DataVIO has had a chance to journal a reference.
dataVIO->duplicate = dataVIO->newMapped;
dataVIO->hashLock->duplicate = dataVIO->newMapped;
setDuplicateLock(dataVIO->hashLock, pbnLock);
// Claim a reference for this DataVIO, which is necessary since another
// HashLock might start deduplicating against it before our incRef.
bool claimed = claimPBNLockIncrement(pbnLock);
ASSERT_LOG_ONLY(claimed, "impossible to fail to claim an initial increment");
}