Blame src/memjournal.c

Packit 87b942
/*
Packit 87b942
** 2008 October 7
Packit 87b942
**
Packit 87b942
** The author disclaims copyright to this source code.  In place of
Packit 87b942
** a legal notice, here is a blessing:
Packit 87b942
**
Packit 87b942
**    May you do good and not evil.
Packit 87b942
**    May you find forgiveness for yourself and forgive others.
Packit 87b942
**    May you share freely, never taking more than you give.
Packit 87b942
**
Packit 87b942
*************************************************************************
Packit 87b942
**
Packit 87b942
** This file contains code use to implement an in-memory rollback journal.
Packit 87b942
** The in-memory rollback journal is used to journal transactions for
Packit 87b942
** ":memory:" databases and when the journal_mode=MEMORY pragma is used.
Packit 87b942
**
Packit 87b942
** Update:  The in-memory journal is also used to temporarily cache
Packit 87b942
** smaller journals that are not critical for power-loss recovery.
Packit 87b942
** For example, statement journals that are not too big will be held
Packit 87b942
** entirely in memory, thus reducing the number of file I/O calls, and
Packit 87b942
** more importantly, reducing temporary file creation events.  If these
Packit 87b942
** journals become too large for memory, they are spilled to disk.  But
Packit 87b942
** in the common case, they are usually small and no file I/O needs to
Packit 87b942
** occur.
Packit 87b942
*/
Packit 87b942
#include "sqliteInt.h"
Packit 87b942
Packit 87b942
/* Forward references to internal structures */
Packit 87b942
typedef struct MemJournal MemJournal;
Packit 87b942
typedef struct FilePoint FilePoint;
Packit 87b942
typedef struct FileChunk FileChunk;
Packit 87b942
Packit 87b942
/*
Packit 87b942
** The rollback journal is composed of a linked list of these structures.
Packit 87b942
**
Packit 87b942
** The zChunk array is always at least 8 bytes in size - usually much more.
Packit 87b942
** Its actual size is stored in the MemJournal.nChunkSize variable.
Packit 87b942
*/
Packit 87b942
struct FileChunk {
Packit 87b942
  FileChunk *pNext;               /* Next chunk in the journal */
Packit 87b942
  u8 zChunk[8];                   /* Content of this chunk */
Packit 87b942
};
Packit 87b942
Packit 87b942
/*
Packit 87b942
** By default, allocate this many bytes of memory for each FileChunk object.
Packit 87b942
*/
Packit 87b942
#define MEMJOURNAL_DFLT_FILECHUNKSIZE 1024
Packit 87b942
Packit 87b942
/*
Packit 87b942
** For chunk size nChunkSize, return the number of bytes that should
Packit 87b942
** be allocated for each FileChunk structure.
Packit 87b942
*/
Packit 87b942
#define fileChunkSize(nChunkSize) (sizeof(FileChunk) + ((nChunkSize)-8))
Packit 87b942
Packit 87b942
/*
Packit 87b942
** An instance of this object serves as a cursor into the rollback journal.
Packit 87b942
** The cursor can be either for reading or writing.
Packit 87b942
*/
Packit 87b942
struct FilePoint {
Packit 87b942
  sqlite3_int64 iOffset;          /* Offset from the beginning of the file */
Packit 87b942
  FileChunk *pChunk;              /* Specific chunk into which cursor points */
Packit 87b942
};
Packit 87b942
Packit 87b942
/*
Packit 87b942
** This structure is a subclass of sqlite3_file. Each open memory-journal
Packit 87b942
** is an instance of this class.
Packit 87b942
*/
Packit 87b942
struct MemJournal {
Packit 87b942
  const sqlite3_io_methods *pMethod; /* Parent class. MUST BE FIRST */
Packit 87b942
  int nChunkSize;                 /* In-memory chunk-size */
Packit 87b942
Packit 87b942
  int nSpill;                     /* Bytes of data before flushing */
Packit 87b942
  int nSize;                      /* Bytes of data currently in memory */
Packit 87b942
  FileChunk *pFirst;              /* Head of in-memory chunk-list */
Packit 87b942
  FilePoint endpoint;             /* Pointer to the end of the file */
Packit 87b942
  FilePoint readpoint;            /* Pointer to the end of the last xRead() */
Packit 87b942
Packit 87b942
  int flags;                      /* xOpen flags */
Packit 87b942
  sqlite3_vfs *pVfs;              /* The "real" underlying VFS */
Packit 87b942
  const char *zJournal;           /* Name of the journal file */
Packit 87b942
};
Packit 87b942
Packit 87b942
/*
Packit 87b942
** Read data from the in-memory journal file.  This is the implementation
Packit 87b942
** of the sqlite3_vfs.xRead method.
Packit 87b942
*/
Packit 87b942
static int memjrnlRead(
Packit 87b942
  sqlite3_file *pJfd,    /* The journal file from which to read */
Packit 87b942
  void *zBuf,            /* Put the results here */
Packit 87b942
  int iAmt,              /* Number of bytes to read */
Packit 87b942
  sqlite_int64 iOfst     /* Begin reading at this offset */
Packit 87b942
){
Packit 87b942
  MemJournal *p = (MemJournal *)pJfd;
Packit 87b942
  u8 *zOut = zBuf;
Packit 87b942
  int nRead = iAmt;
Packit 87b942
  int iChunkOffset;
Packit 87b942
  FileChunk *pChunk;
Packit 87b942
Packit 87b942
#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \
Packit 87b942
 || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE)
Packit 87b942
  if( (iAmt+iOfst)>p->endpoint.iOffset ){
Packit 87b942
    return SQLITE_IOERR_SHORT_READ;
Packit 87b942
  }
Packit 87b942
#endif
Packit 87b942
Packit 87b942
  assert( (iAmt+iOfst)<=p->endpoint.iOffset );
Packit 87b942
  assert( p->readpoint.iOffset==0 || p->readpoint.pChunk!=0 );
Packit 87b942
  if( p->readpoint.iOffset!=iOfst || iOfst==0 ){
Packit 87b942
    sqlite3_int64 iOff = 0;
Packit 87b942
    for(pChunk=p->pFirst; 
Packit 87b942
        ALWAYS(pChunk) && (iOff+p->nChunkSize)<=iOfst;
Packit 87b942
        pChunk=pChunk->pNext
Packit 87b942
    ){
Packit 87b942
      iOff += p->nChunkSize;
Packit 87b942
    }
Packit 87b942
  }else{
Packit 87b942
    pChunk = p->readpoint.pChunk;
Packit 87b942
    assert( pChunk!=0 );
Packit 87b942
  }
Packit 87b942
Packit 87b942
  iChunkOffset = (int)(iOfst%p->nChunkSize);
Packit 87b942
  do {
Packit 87b942
    int iSpace = p->nChunkSize - iChunkOffset;
Packit 87b942
    int nCopy = MIN(nRead, (p->nChunkSize - iChunkOffset));
Packit 87b942
    memcpy(zOut, (u8*)pChunk->zChunk + iChunkOffset, nCopy);
Packit 87b942
    zOut += nCopy;
Packit 87b942
    nRead -= iSpace;
Packit 87b942
    iChunkOffset = 0;
Packit 87b942
  } while( nRead>=0 && (pChunk=pChunk->pNext)!=0 && nRead>0 );
Packit 87b942
  p->readpoint.iOffset = pChunk ? iOfst+iAmt : 0;
Packit 87b942
  p->readpoint.pChunk = pChunk;
Packit 87b942
Packit 87b942
  return SQLITE_OK;
Packit 87b942
}
Packit 87b942
Packit 87b942
/*
Packit 87b942
** Free the list of FileChunk structures headed at MemJournal.pFirst.
Packit 87b942
*/
Packit 87b942
static void memjrnlFreeChunks(MemJournal *p){
Packit 87b942
  FileChunk *pIter;
Packit 87b942
  FileChunk *pNext;
Packit 87b942
  for(pIter=p->pFirst; pIter; pIter=pNext){
Packit 87b942
    pNext = pIter->pNext;
Packit 87b942
    sqlite3_free(pIter);
Packit 87b942
  } 
Packit 87b942
  p->pFirst = 0;
Packit 87b942
}
Packit 87b942
Packit 87b942
/*
Packit 87b942
** Flush the contents of memory to a real file on disk.
Packit 87b942
*/
Packit 87b942
static int memjrnlCreateFile(MemJournal *p){
Packit 87b942
  int rc;
Packit 87b942
  sqlite3_file *pReal = (sqlite3_file*)p;
Packit 87b942
  MemJournal copy = *p;
Packit 87b942
Packit 87b942
  memset(p, 0, sizeof(MemJournal));
Packit 87b942
  rc = sqlite3OsOpen(copy.pVfs, copy.zJournal, pReal, copy.flags, 0);
Packit 87b942
  if( rc==SQLITE_OK ){
Packit 87b942
    int nChunk = copy.nChunkSize;
Packit 87b942
    i64 iOff = 0;
Packit 87b942
    FileChunk *pIter;
Packit 87b942
    for(pIter=copy.pFirst; pIter; pIter=pIter->pNext){
Packit 87b942
      if( iOff + nChunk > copy.endpoint.iOffset ){
Packit 87b942
        nChunk = copy.endpoint.iOffset - iOff;
Packit 87b942
      }
Packit 87b942
      rc = sqlite3OsWrite(pReal, (u8*)pIter->zChunk, nChunk, iOff);
Packit 87b942
      if( rc ) break;
Packit 87b942
      iOff += nChunk;
Packit 87b942
    }
Packit 87b942
    if( rc==SQLITE_OK ){
Packit 87b942
      /* No error has occurred. Free the in-memory buffers. */
Packit 87b942
      memjrnlFreeChunks(©);
Packit 87b942
    }
Packit 87b942
  }
Packit 87b942
  if( rc!=SQLITE_OK ){
Packit 87b942
    /* If an error occurred while creating or writing to the file, restore
Packit 87b942
    ** the original before returning. This way, SQLite uses the in-memory
Packit 87b942
    ** journal data to roll back changes made to the internal page-cache
Packit 87b942
    ** before this function was called.  */
Packit 87b942
    sqlite3OsClose(pReal);
Packit 87b942
    *p = copy;
Packit 87b942
  }
Packit 87b942
  return rc;
Packit 87b942
}
Packit 87b942
Packit 87b942
Packit 87b942
/*
Packit 87b942
** Write data to the file.
Packit 87b942
*/
Packit 87b942
static int memjrnlWrite(
Packit 87b942
  sqlite3_file *pJfd,    /* The journal file into which to write */
Packit 87b942
  const void *zBuf,      /* Take data to be written from here */
Packit 87b942
  int iAmt,              /* Number of bytes to write */
Packit 87b942
  sqlite_int64 iOfst     /* Begin writing at this offset into the file */
Packit 87b942
){
Packit 87b942
  MemJournal *p = (MemJournal *)pJfd;
Packit 87b942
  int nWrite = iAmt;
Packit 87b942
  u8 *zWrite = (u8 *)zBuf;
Packit 87b942
Packit 87b942
  /* If the file should be created now, create it and write the new data
Packit 87b942
  ** into the file on disk. */
Packit 87b942
  if( p->nSpill>0 && (iAmt+iOfst)>p->nSpill ){
Packit 87b942
    int rc = memjrnlCreateFile(p);
Packit 87b942
    if( rc==SQLITE_OK ){
Packit 87b942
      rc = sqlite3OsWrite(pJfd, zBuf, iAmt, iOfst);
Packit 87b942
    }
Packit 87b942
    return rc;
Packit 87b942
  }
Packit 87b942
Packit 87b942
  /* If the contents of this write should be stored in memory */
Packit 87b942
  else{
Packit 87b942
    /* An in-memory journal file should only ever be appended to. Random
Packit 87b942
    ** access writes are not required. The only exception to this is when
Packit 87b942
    ** the in-memory journal is being used by a connection using the
Packit 87b942
    ** atomic-write optimization. In this case the first 28 bytes of the
Packit 87b942
    ** journal file may be written as part of committing the transaction. */ 
Packit 87b942
    assert( iOfst==p->endpoint.iOffset || iOfst==0 );
Packit 87b942
#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \
Packit 87b942
 || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE)
Packit 87b942
    if( iOfst==0 && p->pFirst ){
Packit 87b942
      assert( p->nChunkSize>iAmt );
Packit 87b942
      memcpy((u8*)p->pFirst->zChunk, zBuf, iAmt);
Packit 87b942
    }else
Packit 87b942
#else
Packit 87b942
    assert( iOfst>0 || p->pFirst==0 );
Packit 87b942
#endif
Packit 87b942
    {
Packit 87b942
      while( nWrite>0 ){
Packit 87b942
        FileChunk *pChunk = p->endpoint.pChunk;
Packit 87b942
        int iChunkOffset = (int)(p->endpoint.iOffset%p->nChunkSize);
Packit 87b942
        int iSpace = MIN(nWrite, p->nChunkSize - iChunkOffset);
Packit 87b942
Packit 87b942
        if( iChunkOffset==0 ){
Packit 87b942
          /* New chunk is required to extend the file. */
Packit 87b942
          FileChunk *pNew = sqlite3_malloc(fileChunkSize(p->nChunkSize));
Packit 87b942
          if( !pNew ){
Packit 87b942
            return SQLITE_IOERR_NOMEM_BKPT;
Packit 87b942
          }
Packit 87b942
          pNew->pNext = 0;
Packit 87b942
          if( pChunk ){
Packit 87b942
            assert( p->pFirst );
Packit 87b942
            pChunk->pNext = pNew;
Packit 87b942
          }else{
Packit 87b942
            assert( !p->pFirst );
Packit 87b942
            p->pFirst = pNew;
Packit 87b942
          }
Packit 87b942
          p->endpoint.pChunk = pNew;
Packit 87b942
        }
Packit 87b942
Packit 87b942
        memcpy((u8*)p->endpoint.pChunk->zChunk + iChunkOffset, zWrite, iSpace);
Packit 87b942
        zWrite += iSpace;
Packit 87b942
        nWrite -= iSpace;
Packit 87b942
        p->endpoint.iOffset += iSpace;
Packit 87b942
      }
Packit 87b942
      p->nSize = iAmt + iOfst;
Packit 87b942
    }
Packit 87b942
  }
Packit 87b942
Packit 87b942
  return SQLITE_OK;
Packit 87b942
}
Packit 87b942
Packit 87b942
/*
Packit 87b942
** Truncate the file.
Packit 87b942
**
Packit 87b942
** If the journal file is already on disk, truncate it there. Or, if it
Packit 87b942
** is still in main memory but is being truncated to zero bytes in size,
Packit 87b942
** ignore 
Packit 87b942
*/
Packit 87b942
static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){
Packit 87b942
  MemJournal *p = (MemJournal *)pJfd;
Packit 87b942
  if( ALWAYS(size==0) ){
Packit 87b942
    memjrnlFreeChunks(p);
Packit 87b942
    p->nSize = 0;
Packit 87b942
    p->endpoint.pChunk = 0;
Packit 87b942
    p->endpoint.iOffset = 0;
Packit 87b942
    p->readpoint.pChunk = 0;
Packit 87b942
    p->readpoint.iOffset = 0;
Packit 87b942
  }
Packit 87b942
  return SQLITE_OK;
Packit 87b942
}
Packit 87b942
Packit 87b942
/*
Packit 87b942
** Close the file.
Packit 87b942
*/
Packit 87b942
static int memjrnlClose(sqlite3_file *pJfd){
Packit 87b942
  MemJournal *p = (MemJournal *)pJfd;
Packit 87b942
  memjrnlFreeChunks(p);
Packit 87b942
  return SQLITE_OK;
Packit 87b942
}
Packit 87b942
Packit 87b942
/*
Packit 87b942
** Sync the file.
Packit 87b942
**
Packit 87b942
** If the real file has been created, call its xSync method. Otherwise, 
Packit 87b942
** syncing an in-memory journal is a no-op. 
Packit 87b942
*/
Packit 87b942
static int memjrnlSync(sqlite3_file *pJfd, int flags){
Packit 87b942
  UNUSED_PARAMETER2(pJfd, flags);
Packit 87b942
  return SQLITE_OK;
Packit 87b942
}
Packit 87b942
Packit 87b942
/*
Packit 87b942
** Query the size of the file in bytes.
Packit 87b942
*/
Packit 87b942
static int memjrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){
Packit 87b942
  MemJournal *p = (MemJournal *)pJfd;
Packit 87b942
  *pSize = (sqlite_int64) p->endpoint.iOffset;
Packit 87b942
  return SQLITE_OK;
Packit 87b942
}
Packit 87b942
Packit 87b942
/*
Packit 87b942
** Table of methods for MemJournal sqlite3_file object.
Packit 87b942
*/
Packit 87b942
static const struct sqlite3_io_methods MemJournalMethods = {
Packit 87b942
  1,                /* iVersion */
Packit 87b942
  memjrnlClose,     /* xClose */
Packit 87b942
  memjrnlRead,      /* xRead */
Packit 87b942
  memjrnlWrite,     /* xWrite */
Packit 87b942
  memjrnlTruncate,  /* xTruncate */
Packit 87b942
  memjrnlSync,      /* xSync */
Packit 87b942
  memjrnlFileSize,  /* xFileSize */
Packit 87b942
  0,                /* xLock */
Packit 87b942
  0,                /* xUnlock */
Packit 87b942
  0,                /* xCheckReservedLock */
Packit 87b942
  0,                /* xFileControl */
Packit 87b942
  0,                /* xSectorSize */
Packit 87b942
  0,                /* xDeviceCharacteristics */
Packit 87b942
  0,                /* xShmMap */
Packit 87b942
  0,                /* xShmLock */
Packit 87b942
  0,                /* xShmBarrier */
Packit 87b942
  0,                /* xShmUnmap */
Packit 87b942
  0,                /* xFetch */
Packit 87b942
  0                 /* xUnfetch */
Packit 87b942
};
Packit 87b942
Packit 87b942
/* 
Packit 87b942
** Open a journal file. 
Packit 87b942
**
Packit 87b942
** The behaviour of the journal file depends on the value of parameter 
Packit 87b942
** nSpill. If nSpill is 0, then the journal file is always create and 
Packit 87b942
** accessed using the underlying VFS. If nSpill is less than zero, then
Packit 87b942
** all content is always stored in main-memory. Finally, if nSpill is a
Packit 87b942
** positive value, then the journal file is initially created in-memory
Packit 87b942
** but may be flushed to disk later on. In this case the journal file is
Packit 87b942
** flushed to disk either when it grows larger than nSpill bytes in size,
Packit 87b942
** or when sqlite3JournalCreate() is called.
Packit 87b942
*/
Packit 87b942
int sqlite3JournalOpen(
Packit 87b942
  sqlite3_vfs *pVfs,         /* The VFS to use for actual file I/O */
Packit 87b942
  const char *zName,         /* Name of the journal file */
Packit 87b942
  sqlite3_file *pJfd,        /* Preallocated, blank file handle */
Packit 87b942
  int flags,                 /* Opening flags */
Packit 87b942
  int nSpill                 /* Bytes buffered before opening the file */
Packit 87b942
){
Packit 87b942
  MemJournal *p = (MemJournal*)pJfd;
Packit 87b942
Packit 87b942
  /* Zero the file-handle object. If nSpill was passed zero, initialize
Packit 87b942
  ** it using the sqlite3OsOpen() function of the underlying VFS. In this
Packit 87b942
  ** case none of the code in this module is executed as a result of calls
Packit 87b942
  ** made on the journal file-handle.  */
Packit 87b942
  memset(p, 0, sizeof(MemJournal));
Packit 87b942
  if( nSpill==0 ){
Packit 87b942
    return sqlite3OsOpen(pVfs, zName, pJfd, flags, 0);
Packit 87b942
  }
Packit 87b942
Packit 87b942
  if( nSpill>0 ){
Packit 87b942
    p->nChunkSize = nSpill;
Packit 87b942
  }else{
Packit 87b942
    p->nChunkSize = 8 + MEMJOURNAL_DFLT_FILECHUNKSIZE - sizeof(FileChunk);
Packit 87b942
    assert( MEMJOURNAL_DFLT_FILECHUNKSIZE==fileChunkSize(p->nChunkSize) );
Packit 87b942
  }
Packit 87b942
Packit 87b942
  p->pMethod = (const sqlite3_io_methods*)&MemJournalMethods;
Packit 87b942
  p->nSpill = nSpill;
Packit 87b942
  p->flags = flags;
Packit 87b942
  p->zJournal = zName;
Packit 87b942
  p->pVfs = pVfs;
Packit 87b942
  return SQLITE_OK;
Packit 87b942
}
Packit 87b942
Packit 87b942
/*
Packit 87b942
** Open an in-memory journal file.
Packit 87b942
*/
Packit 87b942
void sqlite3MemJournalOpen(sqlite3_file *pJfd){
Packit 87b942
  sqlite3JournalOpen(0, 0, pJfd, 0, -1);
Packit 87b942
}
Packit 87b942
Packit 87b942
#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \
Packit 87b942
 || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE)
Packit 87b942
/*
Packit 87b942
** If the argument p points to a MemJournal structure that is not an 
Packit 87b942
** in-memory-only journal file (i.e. is one that was opened with a +ve
Packit 87b942
** nSpill parameter or as SQLITE_OPEN_MAIN_JOURNAL), and the underlying 
Packit 87b942
** file has not yet been created, create it now.
Packit 87b942
*/
Packit 87b942
int sqlite3JournalCreate(sqlite3_file *pJfd){
Packit 87b942
  int rc = SQLITE_OK;
Packit 87b942
  MemJournal *p = (MemJournal*)pJfd;
Packit 87b942
  if( p->pMethod==&MemJournalMethods && (
Packit 87b942
#ifdef SQLITE_ENABLE_ATOMIC_WRITE
Packit 87b942
     p->nSpill>0
Packit 87b942
#else
Packit 87b942
     /* While this appears to not be possible without ATOMIC_WRITE, the
Packit 87b942
     ** paths are complex, so it seems prudent to leave the test in as
Packit 87b942
     ** a NEVER(), in case our analysis is subtly flawed. */
Packit 87b942
     NEVER(p->nSpill>0)
Packit 87b942
#endif
Packit 87b942
#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE
Packit 87b942
     || (p->flags & SQLITE_OPEN_MAIN_JOURNAL)
Packit 87b942
#endif
Packit 87b942
  )){
Packit 87b942
    rc = memjrnlCreateFile(p);
Packit 87b942
  }
Packit 87b942
  return rc;
Packit 87b942
}
Packit 87b942
#endif
Packit 87b942
Packit 87b942
/*
Packit 87b942
** The file-handle passed as the only argument is open on a journal file.
Packit 87b942
** Return true if this "journal file" is currently stored in heap memory,
Packit 87b942
** or false otherwise.
Packit 87b942
*/
Packit 87b942
int sqlite3JournalIsInMemory(sqlite3_file *p){
Packit 87b942
  return p->pMethods==&MemJournalMethods;
Packit 87b942
}
Packit 87b942
Packit 87b942
/* 
Packit 87b942
** Return the number of bytes required to store a JournalFile that uses vfs
Packit 87b942
** pVfs to create the underlying on-disk files.
Packit 87b942
*/
Packit 87b942
int sqlite3JournalSize(sqlite3_vfs *pVfs){
Packit 87b942
  return MAX(pVfs->szOsFile, (int)sizeof(MemJournal));
Packit 87b942
}