Logo Search packages:      
Sourcecode: postgresql-8.4 version File versions

freelist.c

/*-------------------------------------------------------------------------
 *
 * freelist.c
 *      routines for managing the buffer pool's replacement strategy.
 *
 *
 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *      $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.66 2009/01/01 17:23:47 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "storage/buf_internals.h"
#include "storage/bufmgr.h"


/*
 * The shared freelist control information.
 */
typedef struct
{
      /* Clock sweep hand: index of next buffer to consider grabbing */
      int               nextVictimBuffer;

      int               firstFreeBuffer;  /* Head of list of unused buffers */
      int               lastFreeBuffer; /* Tail of list of unused buffers */

      /*
       * NOTE: lastFreeBuffer is undefined when firstFreeBuffer is -1 (that is,
       * when the list is empty)
       */

      /*
       * Statistics.    These counters should be wide enough that they can't
       * overflow during a single bgwriter cycle.
       */
      uint32            completePasses; /* Complete cycles of the clock sweep */
      uint32            numBufferAllocs;  /* Buffers allocated since last reset */
} BufferStrategyControl;

/* Pointers to shared state */
static BufferStrategyControl *StrategyControl = NULL;

/*
 * Private (non-shared) state for managing a ring of shared buffers to re-use.
 * This is currently the only kind of BufferAccessStrategy object, but someday
 * we might have more kinds.
 */
typedef struct BufferAccessStrategyData
{
      /* Overall strategy type */
      BufferAccessStrategyType btype;
      /* Number of elements in buffers[] array */
      int               ring_size;

      /*
       * Index of the "current" slot in the ring, ie, the one most recently
       * returned by GetBufferFromRing.
       */
      int               current;

      /*
       * True if the buffer just returned by StrategyGetBuffer had been in the
       * ring already.
       */
      bool        current_was_in_ring;

      /*
       * Array of buffer numbers.  InvalidBuffer (that is, zero) indicates we
       * have not yet selected a buffer for this ring slot.  For allocation
       * simplicity this is palloc'd together with the fixed fields of the
       * struct.
       */
      Buffer            buffers[1];       /* VARIABLE SIZE ARRAY */
} BufferAccessStrategyData;


/* Prototypes for internal functions */
static volatile BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy);
static void AddBufferToRing(BufferAccessStrategy strategy,
                        volatile BufferDesc *buf);


/*
 * StrategyGetBuffer
 *
 *    Called by the bufmgr to get the next candidate buffer to use in
 *    BufferAlloc(). The only hard requirement BufferAlloc() has is that
 *    the selected buffer must not currently be pinned by anyone.
 *
 *    strategy is a BufferAccessStrategy object, or NULL for default strategy.
 *
 *    To ensure that no one else can pin the buffer before we do, we must
 *    return the buffer with the buffer header spinlock still held.  If
 *    *lock_held is set on exit, we have returned with the BufFreelistLock
 *    still held, as well; the caller must release that lock once the spinlock
 *    is dropped.  We do it that way because releasing the BufFreelistLock
 *    might awaken other processes, and it would be bad to do the associated
 *    kernel calls while holding the buffer header spinlock.
 */
volatile BufferDesc *
StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
{
      volatile BufferDesc *buf;
      int               trycounter;

      /*
       * If given a strategy object, see whether it can select a buffer. We
       * assume strategy objects don't need the BufFreelistLock.
       */
      if (strategy != NULL)
      {
            buf = GetBufferFromRing(strategy);
            if (buf != NULL)
            {
                  *lock_held = false;
                  return buf;
            }
      }

      /* Nope, so lock the freelist */
      *lock_held = true;
      LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);

      /*
       * We count buffer allocation requests so that the bgwriter can estimate
       * the rate of buffer consumption.  Note that buffers recycled by a
       * strategy object are intentionally not counted here.
       */
      StrategyControl->numBufferAllocs++;

      /*
       * Try to get a buffer from the freelist.  Note that the freeNext fields
       * are considered to be protected by the BufFreelistLock not the
       * individual buffer spinlocks, so it's OK to manipulate them without
       * holding the spinlock.
       */
      while (StrategyControl->firstFreeBuffer >= 0)
      {
            buf = &BufferDescriptors[StrategyControl->firstFreeBuffer];
            Assert(buf->freeNext != FREENEXT_NOT_IN_LIST);

            /* Unconditionally remove buffer from freelist */
            StrategyControl->firstFreeBuffer = buf->freeNext;
            buf->freeNext = FREENEXT_NOT_IN_LIST;

            /*
             * If the buffer is pinned or has a nonzero usage_count, we cannot use
             * it; discard it and retry.  (This can only happen if VACUUM put a
             * valid buffer in the freelist and then someone else used it before
             * we got to it.  It's probably impossible altogether as of 8.3, but
             * we'd better check anyway.)
             */
            LockBufHdr(buf);
            if (buf->refcount == 0 && buf->usage_count == 0)
            {
                  if (strategy != NULL)
                        AddBufferToRing(strategy, buf);
                  return buf;
            }
            UnlockBufHdr(buf);
      }

      /* Nothing on the freelist, so run the "clock sweep" algorithm */
      trycounter = NBuffers;
      for (;;)
      {
            buf = &BufferDescriptors[StrategyControl->nextVictimBuffer];

            if (++StrategyControl->nextVictimBuffer >= NBuffers)
            {
                  StrategyControl->nextVictimBuffer = 0;
                  StrategyControl->completePasses++;
            }

            /*
             * If the buffer is pinned or has a nonzero usage_count, we cannot use
             * it; decrement the usage_count (unless pinned) and keep scanning.
             */
            LockBufHdr(buf);
            if (buf->refcount == 0)
            {
                  if (buf->usage_count > 0)
                  {
                        buf->usage_count--;
                        trycounter = NBuffers;
                  }
                  else
                  {
                        /* Found a usable buffer */
                        if (strategy != NULL)
                              AddBufferToRing(strategy, buf);
                        return buf;
                  }
            }
            else if (--trycounter == 0)
            {
                  /*
                   * We've scanned all the buffers without making any state changes,
                   * so all the buffers are pinned (or were when we looked at them).
                   * We could hope that someone will free one eventually, but it's
                   * probably better to fail than to risk getting stuck in an
                   * infinite loop.
                   */
                  UnlockBufHdr(buf);
                  elog(ERROR, "no unpinned buffers available");
            }
            UnlockBufHdr(buf);
      }

      /* not reached */
      return NULL;
}

/*
 * StrategyFreeBuffer: put a buffer on the freelist
 */
void
StrategyFreeBuffer(volatile BufferDesc *buf)
{
      LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);

      /*
       * It is possible that we are told to put something in the freelist that
       * is already in it; don't screw up the list if so.
       */
      if (buf->freeNext == FREENEXT_NOT_IN_LIST)
      {
            buf->freeNext = StrategyControl->firstFreeBuffer;
            if (buf->freeNext < 0)
                  StrategyControl->lastFreeBuffer = buf->buf_id;
            StrategyControl->firstFreeBuffer = buf->buf_id;
      }

      LWLockRelease(BufFreelistLock);
}

/*
 * StrategySyncStart -- tell BufferSync where to start syncing
 *
 * The result is the buffer index of the best buffer to sync first.
 * BufferSync() will proceed circularly around the buffer array from there.
 *
 * In addition, we return the completed-pass count (which is effectively
 * the higher-order bits of nextVictimBuffer) and the count of recent buffer
 * allocs if non-NULL pointers are passed.      The alloc count is reset after
 * being read.
 */
int
StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
{
      int               result;

      LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
      result = StrategyControl->nextVictimBuffer;
      if (complete_passes)
            *complete_passes = StrategyControl->completePasses;
      if (num_buf_alloc)
      {
            *num_buf_alloc = StrategyControl->numBufferAllocs;
            StrategyControl->numBufferAllocs = 0;
      }
      LWLockRelease(BufFreelistLock);
      return result;
}


/*
 * StrategyShmemSize
 *
 * estimate the size of shared memory used by the freelist-related structures.
 *
 * Note: for somewhat historical reasons, the buffer lookup hashtable size
 * is also determined here.
 */
Size
StrategyShmemSize(void)
{
      Size        size = 0;

      /* size of lookup hash table ... see comment in StrategyInitialize */
      size = add_size(size, BufTableShmemSize(NBuffers + NUM_BUFFER_PARTITIONS));

      /* size of the shared replacement strategy control block */
      size = add_size(size, MAXALIGN(sizeof(BufferStrategyControl)));

      return size;
}

/*
 * StrategyInitialize -- initialize the buffer cache replacement
 *          strategy.
 *
 * Assumes: All of the buffers are already built into a linked list.
 *          Only called by postmaster and only during initialization.
 */
void
StrategyInitialize(bool init)
{
      bool        found;

      /*
       * Initialize the shared buffer lookup hashtable.
       *
       * Since we can't tolerate running out of lookup table entries, we must be
       * sure to specify an adequate table size here.  The maximum steady-state
       * usage is of course NBuffers entries, but BufferAlloc() tries to insert
       * a new entry before deleting the old.  In principle this could be
       * happening in each partition concurrently, so we could need as many as
       * NBuffers + NUM_BUFFER_PARTITIONS entries.
       */
      InitBufTable(NBuffers + NUM_BUFFER_PARTITIONS);

      /*
       * Get or create the shared strategy control block
       */
      StrategyControl = (BufferStrategyControl *)
            ShmemInitStruct("Buffer Strategy Status",
                                    sizeof(BufferStrategyControl),
                                    &found);

      if (!found)
      {
            /*
             * Only done once, usually in postmaster
             */
            Assert(init);

            /*
             * Grab the whole linked list of free buffers for our strategy. We
             * assume it was previously set up by InitBufferPool().
             */
            StrategyControl->firstFreeBuffer = 0;
            StrategyControl->lastFreeBuffer = NBuffers - 1;

            /* Initialize the clock sweep pointer */
            StrategyControl->nextVictimBuffer = 0;

            /* Clear statistics */
            StrategyControl->completePasses = 0;
            StrategyControl->numBufferAllocs = 0;
      }
      else
            Assert(!init);
}


/* ----------------------------------------------------------------
 *                      Backend-private buffer ring management
 * ----------------------------------------------------------------
 */


/*
 * GetAccessStrategy -- create a BufferAccessStrategy object
 *
 * The object is allocated in the current memory context.
 */
BufferAccessStrategy
GetAccessStrategy(BufferAccessStrategyType btype)
{
      BufferAccessStrategy strategy;
      int               ring_size;

      /*
       * Select ring size to use.  See buffer/README for rationales. (Currently
       * all cases are the same size, but keep this code structure for
       * flexibility.)
       *
       * Note: if you change the ring size for BAS_BULKREAD, see also
       * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c.
       */
      switch (btype)
      {
            case BAS_NORMAL:
                  /* if someone asks for NORMAL, just give 'em a "default" object */
                  return NULL;

            case BAS_BULKREAD:
                  ring_size = 256 * 1024 / BLCKSZ;
                  break;
            case BAS_BULKWRITE:
                  ring_size = 256 * 1024 / BLCKSZ;
                  break;
            case BAS_VACUUM:
                  ring_size = 256 * 1024 / BLCKSZ;
                  break;

            default:
                  elog(ERROR, "unrecognized buffer access strategy: %d",
                         (int) btype);
                  return NULL;            /* keep compiler quiet */
      }

      /* Make sure ring isn't an undue fraction of shared buffers */
      ring_size = Min(NBuffers / 8, ring_size);

      /* Allocate the object and initialize all elements to zeroes */
      strategy = (BufferAccessStrategy)
            palloc0(offsetof(BufferAccessStrategyData, buffers) +
                        ring_size * sizeof(Buffer));

      /* Set fields that don't start out zero */
      strategy->btype = btype;
      strategy->ring_size = ring_size;

      return strategy;
}

/*
 * FreeAccessStrategy -- release a BufferAccessStrategy object
 *
 * A simple pfree would do at the moment, but we would prefer that callers
 * don't assume that much about the representation of BufferAccessStrategy.
 */
void
FreeAccessStrategy(BufferAccessStrategy strategy)
{
      /* don't crash if called on a "default" strategy */
      if (strategy != NULL)
            pfree(strategy);
}

/*
 * GetBufferFromRing -- returns a buffer from the ring, or NULL if the
 *          ring is empty.
 *
 * The bufhdr spin lock is held on the returned buffer.
 */
static volatile BufferDesc *
GetBufferFromRing(BufferAccessStrategy strategy)
{
      volatile BufferDesc *buf;
      Buffer            bufnum;

      /* Advance to next ring slot */
      if (++strategy->current >= strategy->ring_size)
            strategy->current = 0;

      /*
       * If the slot hasn't been filled yet, tell the caller to allocate a new
       * buffer with the normal allocation strategy.  He will then fill this
       * slot by calling AddBufferToRing with the new buffer.
       */
      bufnum = strategy->buffers[strategy->current];
      if (bufnum == InvalidBuffer)
      {
            strategy->current_was_in_ring = false;
            return NULL;
      }

      /*
       * If the buffer is pinned we cannot use it under any circumstances.
       *
       * If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
       * since our own previous usage of the ring element would have left it
       * there, but it might've been decremented by clock sweep since then). A
       * higher usage_count indicates someone else has touched the buffer, so we
       * shouldn't re-use it.
       */
      buf = &BufferDescriptors[bufnum - 1];
      LockBufHdr(buf);
      if (buf->refcount == 0 && buf->usage_count <= 1)
      {
            strategy->current_was_in_ring = true;
            return buf;
      }
      UnlockBufHdr(buf);

      /*
       * Tell caller to allocate a new buffer with the normal allocation
       * strategy.  He'll then replace this ring element via AddBufferToRing.
       */
      strategy->current_was_in_ring = false;
      return NULL;
}

/*
 * AddBufferToRing -- add a buffer to the buffer ring
 *
 * Caller must hold the buffer header spinlock on the buffer.  Since this
 * is called with the spinlock held, it had better be quite cheap.
 */
static void
AddBufferToRing(BufferAccessStrategy strategy, volatile BufferDesc *buf)
{
      strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf);
}

/*
 * StrategyRejectBuffer -- consider rejecting a dirty buffer
 *
 * When a nondefault strategy is used, the buffer manager calls this function
 * when it turns out that the buffer selected by StrategyGetBuffer needs to
 * be written out and doing so would require flushing WAL too.    This gives us
 * a chance to choose a different victim.
 *
 * Returns true if buffer manager should ask for a new victim, and false
 * if this buffer should be written and re-used.
 */
bool
StrategyRejectBuffer(BufferAccessStrategy strategy, volatile BufferDesc *buf)
{
      /* We only do this in bulkread mode */
      if (strategy->btype != BAS_BULKREAD)
            return false;

      /* Don't muck with behavior of normal buffer-replacement strategy */
      if (!strategy->current_was_in_ring ||
        strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
            return false;

      /*
       * Remove the dirty buffer from the ring; necessary to prevent infinite
       * loop if all ring members are dirty.
       */
      strategy->buffers[strategy->current] = InvalidBuffer;

      return true;
}

Generated by  Doxygen 1.6.0   Back to index