Logo Search packages:      
Sourcecode: postgresql-8.4 version File versions

pg_stat_statements.c

/*-------------------------------------------------------------------------
 *
 * pg_stat_statements.c
 *          Track statement execution times across a whole database cluster.
 *
 * Note about locking issues: to create or delete an entry in the shared
 * hashtable, one must hold pgss->lock exclusively.  Modifying any field
 * in an entry except the counters requires the same.  To look up an entry,
 * one must hold the lock shared.  To read or update the counters within
 * an entry, one must hold the lock shared or exclusive (so the entry doesn't
 * disappear!) and also take the entry's mutex spinlock.
 *
 *
 * Copyright (c) 2008-2009, PostgreSQL Global Development Group
 *
 * IDENTIFICATION
 *      $PostgreSQL: pgsql/contrib/pg_stat_statements/pg_stat_statements.c,v 1.3 2009/06/11 14:48:51 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include <unistd.h>

#include "access/hash.h"
#include "catalog/pg_type.h"
#include "executor/executor.h"
#include "executor/instrument.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/spin.h"
#include "utils/builtins.h"
#include "utils/hsearch.h"
#include "utils/guc.h"


PG_MODULE_MAGIC;

/* Location of stats file */
#define PGSS_DUMP_FILE  "global/pg_stat_statements.stat"

/* This constant defines the magic number in the stats file header */
static const uint32 PGSS_FILE_HEADER = 0x20081202;

/* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
#define USAGE_EXEC(duration)  (1.0)
#define USAGE_INIT                        (1.0) /* including initial planning */
#define USAGE_DECREASE_FACTOR (0.99)      /* decreased every entry_dealloc */
#define USAGE_DEALLOC_PERCENT 5           /* free this % of entries at once */

/*
 * Hashtable key that defines the identity of a hashtable entry.  The
 * hash comparators do not assume that the query string is null-terminated;
 * this lets us search for an mbcliplen'd string without copying it first.
 *
 * Presently, the query encoding is fully determined by the source database
 * and so we don't really need it to be in the key.  But that might not always
 * be true. Anyway it's notationally convenient to pass it as part of the key.
 */
typedef struct pgssHashKey
{
      Oid               userid;                 /* user OID */
      Oid               dbid;             /* database OID */
      int               encoding;         /* query encoding */
      int               query_len;        /* # of valid bytes in query string */
      const char *query_ptr;        /* query string proper */
} pgssHashKey;

/*
 * The actual stats counters kept within pgssEntry.
 */
typedef struct Counters
{
      int64       calls;                  /* # of times executed */
      double            total_time;       /* total execution time in seconds */
      int64       rows;             /* total # of retrieved or affected rows */
      double            usage;                  /* usage factor */
} Counters;

/*
 * Statistics per statement
 *
 * NB: see the file read/write code before changing field order here.
 */
typedef struct pgssEntry
{
      pgssHashKey key;              /* hash key of entry - MUST BE FIRST */
      Counters    counters;         /* the statistics for this query */
      slock_t           mutex;                  /* protects the counters only */
      char        query[1];         /* VARIABLE LENGTH ARRAY - MUST BE LAST */
      /* Note: the allocated length of query[] is actually pgss->query_size */
} pgssEntry;

/*
 * Global shared state
 */
typedef struct pgssSharedState
{
      LWLockId    lock;             /* protects hashtable search/modification */
      int               query_size;       /* max query length in bytes */
} pgssSharedState;

/*---- Local variables ----*/

/* Current nesting depth of ExecutorRun calls */
static int  nested_level = 0;

/* Saved hook values in case of unload */
static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
static ExecutorStart_hook_type prev_ExecutorStart = NULL;
static ExecutorRun_hook_type prev_ExecutorRun = NULL;
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;

/* Links to shared memory state */
static pgssSharedState *pgss = NULL;
static HTAB *pgss_hash = NULL;

/*---- GUC variables ----*/

typedef enum
{
      PGSS_TRACK_NONE,              /* track no statements */
      PGSS_TRACK_TOP,                     /* only top level statements */
      PGSS_TRACK_ALL,                     /* all statements, including nested ones */
} PGSSTrackLevel;

static const struct config_enum_entry track_options[] = {
      {"none", PGSS_TRACK_NONE, false},
      {"top", PGSS_TRACK_TOP, false},
      {"all", PGSS_TRACK_ALL, false},
      {NULL, 0, false}
};

static int  pgss_max;               /* max # statements to track */
static int  pgss_track;             /* tracking level */
static bool pgss_save;              /* whether to save stats across shutdown */


#define pgss_enabled() \
      (pgss_track == PGSS_TRACK_ALL || \
      (pgss_track == PGSS_TRACK_TOP && nested_level == 0))

/*---- Function declarations ----*/

void        _PG_init(void);
void        _PG_fini(void);

Datum       pg_stat_statements_reset(PG_FUNCTION_ARGS);
Datum       pg_stat_statements(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
PG_FUNCTION_INFO_V1(pg_stat_statements);

static void pgss_shmem_startup(void);
static void pgss_shmem_shutdown(int code, Datum arg);
static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
static void pgss_ExecutorRun(QueryDesc *queryDesc,
                         ScanDirection direction,
                         long count);
static void pgss_ExecutorEnd(QueryDesc *queryDesc);
static uint32 pgss_hash_fn(const void *key, Size keysize);
static int  pgss_match_fn(const void *key1, const void *key2, Size keysize);
static void pgss_store(const char *query,
               const Instrumentation *instr, uint32 rows);
static Size pgss_memsize(void);
static pgssEntry *entry_alloc(pgssHashKey *key);
static void entry_dealloc(void);
static void entry_reset(void);


/*
 * Module load callback
 */
void
_PG_init(void)
{
      /*
       * In order to create our shared memory area, we have to be loaded via
       * shared_preload_libraries.  If not, fall out without hooking into any of
       * the main system.  (We don't throw error here because it seems useful to
       * allow the pg_stat_statements functions to be created even when the
       * module isn't active.  The functions must protect themselves against
       * being called then, however.)
       */
      if (!process_shared_preload_libraries_in_progress)
            return;

      /*
       * Define (or redefine) custom GUC variables.
       */
      DefineCustomIntVariable("pg_stat_statements.max",
        "Sets the maximum number of statements tracked by pg_stat_statements.",
                                          NULL,
                                          &pgss_max,
                                          1000,
                                          100,
                                          INT_MAX,
                                          PGC_POSTMASTER,
                                          0,
                                          NULL,
                                          NULL);

      DefineCustomEnumVariable("pg_stat_statements.track",
                     "Selects which statements are tracked by pg_stat_statements.",
                                           NULL,
                                           &pgss_track,
                                           PGSS_TRACK_TOP,
                                           track_options,
                                           PGC_SUSET,
                                           0,
                                           NULL,
                                           NULL);

      DefineCustomBoolVariable("pg_stat_statements.save",
                     "Save pg_stat_statements statistics across server shutdowns.",
                                           NULL,
                                           &pgss_save,
                                           true,
                                           PGC_SIGHUP,
                                           0,
                                           NULL,
                                           NULL);

      EmitWarningsOnPlaceholders("pg_stat_statements");

      /*
       * Request additional shared resources.  (These are no-ops if we're not in
       * the postmaster process.)  We'll allocate or attach to the shared
       * resources in pgss_shmem_startup().
       */
      RequestAddinShmemSpace(pgss_memsize());
      RequestAddinLWLocks(1);

      /*
       * Install hooks.
       */
      prev_shmem_startup_hook = shmem_startup_hook;
      shmem_startup_hook = pgss_shmem_startup;
      prev_ExecutorStart = ExecutorStart_hook;
      ExecutorStart_hook = pgss_ExecutorStart;
      prev_ExecutorRun = ExecutorRun_hook;
      ExecutorRun_hook = pgss_ExecutorRun;
      prev_ExecutorEnd = ExecutorEnd_hook;
      ExecutorEnd_hook = pgss_ExecutorEnd;
}

/*
 * Module unload callback
 */
void
_PG_fini(void)
{
      /* Uninstall hooks. */
      ExecutorStart_hook = prev_ExecutorStart;
      ExecutorRun_hook = prev_ExecutorRun;
      ExecutorEnd_hook = prev_ExecutorEnd;
      shmem_startup_hook = prev_shmem_startup_hook;
}

/*
 * shmem_startup hook: allocate or attach to shared memory,
 * then load any pre-existing statistics from file.
 */
static void
pgss_shmem_startup(void)
{
      bool        found;
      HASHCTL           info;
      FILE     *file;
      uint32            header;
      int32       num;
      int32       i;
      int               query_size;
      int               buffer_size;
      char     *buffer = NULL;

      if (prev_shmem_startup_hook)
            prev_shmem_startup_hook();

      /* reset in case this is a restart within the postmaster */
      pgss = NULL;
      pgss_hash = NULL;

      /*
       * Create or attach to the shared memory state, including hash table
       */
      LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);

      pgss = ShmemInitStruct("pg_stat_statements",
                                       sizeof(pgssSharedState),
                                       &found);
      if (!pgss)
            elog(ERROR, "out of shared memory");

      if (!found)
      {
            /* First time through ... */
            pgss->lock = LWLockAssign();
            pgss->query_size = pgstat_track_activity_query_size;
      }

      /* Be sure everyone agrees on the hash table entry size */
      query_size = pgss->query_size;

      memset(&info, 0, sizeof(info));
      info.keysize = sizeof(pgssHashKey);
      info.entrysize = offsetof(pgssEntry, query) +query_size;
      info.hash = pgss_hash_fn;
      info.match = pgss_match_fn;
      pgss_hash = ShmemInitHash("pg_stat_statements hash",
                                            pgss_max, pgss_max,
                                            &info,
                                            HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
      if (!pgss_hash)
            elog(ERROR, "out of shared memory");

      LWLockRelease(AddinShmemInitLock);

      /*
       * If we're in the postmaster (or a standalone backend...), set up a shmem
       * exit hook to dump the statistics to disk.
       */
      if (!IsUnderPostmaster)
            on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);

      /*
       * Attempt to load old statistics from the dump file.
       *
       * Note: we don't bother with locks here, because there should be no other
       * processes running when this is called.
       */
      if (!pgss_save)
            return;

      file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
      if (file == NULL)
      {
            if (errno == ENOENT)
                  return;                       /* ignore not-found error */
            goto error;
      }

      buffer_size = query_size;
      buffer = (char *) palloc(buffer_size);

      if (fread(&header, sizeof(uint32), 1, file) != 1 ||
            header != PGSS_FILE_HEADER ||
            fread(&num, sizeof(int32), 1, file) != 1)
            goto error;

      for (i = 0; i < num; i++)
      {
            pgssEntry   temp;
            pgssEntry  *entry;

            if (fread(&temp, offsetof(pgssEntry, mutex), 1, file) != 1)
                  goto error;

            /* Encoding is the only field we can easily sanity-check */
            if (!PG_VALID_BE_ENCODING(temp.key.encoding))
                  goto error;

            /* Previous incarnation might have had a larger query_size */
            if (temp.key.query_len >= buffer_size)
            {
                  buffer = (char *) repalloc(buffer, temp.key.query_len + 1);
                  buffer_size = temp.key.query_len + 1;
            }

            if (fread(buffer, 1, temp.key.query_len, file) != temp.key.query_len)
                  goto error;
            buffer[temp.key.query_len] = '\0';

            /* Clip to available length if needed */
            if (temp.key.query_len >= query_size)
                  temp.key.query_len = pg_encoding_mbcliplen(temp.key.encoding,
                                                                                 buffer,
                                                                                 temp.key.query_len,
                                                                                 query_size - 1);
            temp.key.query_ptr = buffer;

            /* make the hashtable entry (discards old entries if too many) */
            entry = entry_alloc(&temp.key);

            /* copy in the actual stats */
            entry->counters = temp.counters;
      }

      pfree(buffer);
      FreeFile(file);
      return;

error:
      ereport(LOG,
                  (errcode_for_file_access(),
                   errmsg("could not read pg_stat_statement file \"%s\": %m",
                              PGSS_DUMP_FILE)));
      if (buffer)
            pfree(buffer);
      if (file)
            FreeFile(file);
      /* If possible, throw away the bogus file; ignore any error */
      unlink(PGSS_DUMP_FILE);
}

/*
 * shmem_shutdown hook: Dump statistics into file.
 *
 * Note: we don't bother with acquiring lock, because there should be no
 * other processes running when this is called.
 */
static void
pgss_shmem_shutdown(int code, Datum arg)
{
      FILE     *file;
      HASH_SEQ_STATUS hash_seq;
      int32       num_entries;
      pgssEntry  *entry;

      /* Don't try to dump during a crash. */
      if (code)
            return;

      /* Safety check ... shouldn't get here unless shmem is set up. */
      if (!pgss || !pgss_hash)
            return;

      /* Don't dump if told not to. */
      if (!pgss_save)
            return;

      file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_W);
      if (file == NULL)
            goto error;

      if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
            goto error;
      num_entries = hash_get_num_entries(pgss_hash);
      if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
            goto error;

      hash_seq_init(&hash_seq, pgss_hash);
      while ((entry = hash_seq_search(&hash_seq)) != NULL)
      {
            int               len = entry->key.query_len;

            if (fwrite(entry, offsetof(pgssEntry, mutex), 1, file) != 1 ||
                  fwrite(entry->query, 1, len, file) != len)
                  goto error;
      }

      if (FreeFile(file))
      {
            file = NULL;
            goto error;
      }

      return;

error:
      ereport(LOG,
                  (errcode_for_file_access(),
                   errmsg("could not write pg_stat_statement file \"%s\": %m",
                              PGSS_DUMP_FILE)));
      if (file)
            FreeFile(file);
      unlink(PGSS_DUMP_FILE);
}

/*
 * ExecutorStart hook: start up tracking if needed
 */
static void
pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
{
      if (prev_ExecutorStart)
            prev_ExecutorStart(queryDesc, eflags);
      else
            standard_ExecutorStart(queryDesc, eflags);

      if (pgss_enabled())
      {
            /*
             * Set up to track total elapsed time in ExecutorRun.  Make sure the
             * space is allocated in the per-query context so it will go away at
             * ExecutorEnd.
             */
            if (queryDesc->totaltime == NULL)
            {
                  MemoryContext oldcxt;

                  oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
                  queryDesc->totaltime = InstrAlloc(1);
                  MemoryContextSwitchTo(oldcxt);
            }
      }
}

/*
 * ExecutorRun hook: all we need do is track nesting depth
 */
static void
pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count)
{
      nested_level++;
      PG_TRY();
      {
            if (prev_ExecutorRun)
                  prev_ExecutorRun(queryDesc, direction, count);
            else
                  standard_ExecutorRun(queryDesc, direction, count);
            nested_level--;
      }
      PG_CATCH();
      {
            nested_level--;
            PG_RE_THROW();
      }
      PG_END_TRY();
}

/*
 * ExecutorEnd hook: store results if needed
 */
static void
pgss_ExecutorEnd(QueryDesc *queryDesc)
{
      if (queryDesc->totaltime && pgss_enabled())
      {
            /*
             * Make sure stats accumulation is done.  (Note: it's okay if several
             * levels of hook all do this.)
             */
            InstrEndLoop(queryDesc->totaltime);

            pgss_store(queryDesc->sourceText,
                           queryDesc->totaltime,
                           queryDesc->estate->es_processed);
      }

      if (prev_ExecutorEnd)
            prev_ExecutorEnd(queryDesc);
      else
            standard_ExecutorEnd(queryDesc);
}

/*
 * Calculate hash value for a key
 */
static uint32
pgss_hash_fn(const void *key, Size keysize)
{
      const pgssHashKey *k = (const pgssHashKey *) key;

      /* we don't bother to include encoding in the hash */
      return hash_uint32((uint32) k->userid) ^
            hash_uint32((uint32) k->dbid) ^
            DatumGetUInt32(hash_any((const unsigned char *) k->query_ptr,
                                                k->query_len));
}

/*
 * Compare two keys - zero means match
 */
static int
pgss_match_fn(const void *key1, const void *key2, Size keysize)
{
      const pgssHashKey *k1 = (const pgssHashKey *) key1;
      const pgssHashKey *k2 = (const pgssHashKey *) key2;

      if (k1->userid == k2->userid &&
            k1->dbid == k2->dbid &&
            k1->encoding == k2->encoding &&
            k1->query_len == k2->query_len &&
            memcmp(k1->query_ptr, k2->query_ptr, k1->query_len) == 0)
            return 0;
      else
            return 1;
}

/*
 * Store some statistics for a statement.
 */
static void
pgss_store(const char *query, const Instrumentation *instr, uint32 rows)
{
      pgssHashKey key;
      double            usage;
      pgssEntry  *entry;

      Assert(query != NULL);

      /* Safety check... */
      if (!pgss || !pgss_hash)
            return;

      /* Set up key for hashtable search */
      key.userid = GetUserId();
      key.dbid = MyDatabaseId;
      key.encoding = GetDatabaseEncoding();
      key.query_len = strlen(query);
      if (key.query_len >= pgss->query_size)
            key.query_len = pg_encoding_mbcliplen(key.encoding,
                                                                    query,
                                                                    key.query_len,
                                                                    pgss->query_size - 1);
      key.query_ptr = query;

      usage = USAGE_EXEC(duration);

      /* Lookup the hash table entry with shared lock. */
      LWLockAcquire(pgss->lock, LW_SHARED);

      entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
      if (!entry)
      {
            /* Must acquire exclusive lock to add a new entry. */
            LWLockRelease(pgss->lock);
            LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
            entry = entry_alloc(&key);
      }

      /* Grab the spinlock while updating the counters. */
      {
            volatile pgssEntry *e = (volatile pgssEntry *) entry;

            SpinLockAcquire(&e->mutex);
            e->counters.calls += 1;
            e->counters.total_time += instr->total;
            e->counters.rows += rows;
            e->counters.usage += usage;
            SpinLockRelease(&e->mutex);
      }

      LWLockRelease(pgss->lock);
}

/*
 * Reset all statement statistics.
 */
Datum
pg_stat_statements_reset(PG_FUNCTION_ARGS)
{
      if (!pgss || !pgss_hash)
            ereport(ERROR,
                        (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
                         errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
      entry_reset();
      PG_RETURN_VOID();
}

#define PG_STAT_STATEMENTS_COLS           6

/*
 * Retrieve statement statistics.
 */
Datum
pg_stat_statements(PG_FUNCTION_ARGS)
{
      ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
      TupleDesc   tupdesc;
      Tuplestorestate *tupstore;
      MemoryContext per_query_ctx;
      MemoryContext oldcontext;
      Oid               userid = GetUserId();
      bool        is_superuser = superuser();
      HASH_SEQ_STATUS hash_seq;
      pgssEntry  *entry;

      if (!pgss || !pgss_hash)
            ereport(ERROR,
                        (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
                         errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));

      /* check to see if caller supports us returning a tuplestore */
      if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
            ereport(ERROR,
                        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                         errmsg("set-valued function called in context that cannot accept a set")));
      if (!(rsinfo->allowedModes & SFRM_Materialize))
            ereport(ERROR,
                        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                         errmsg("materialize mode required, but it is not " \
                                    "allowed in this context")));

      per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
      oldcontext = MemoryContextSwitchTo(per_query_ctx);

      tupdesc = CreateTemplateTupleDesc(PG_STAT_STATEMENTS_COLS, false);
      TupleDescInitEntry(tupdesc, (AttrNumber) 1, "userid",
                                 OIDOID, -1, 0);
      TupleDescInitEntry(tupdesc, (AttrNumber) 2, "dbid",
                                 OIDOID, -1, 0);
      TupleDescInitEntry(tupdesc, (AttrNumber) 3, "query",
                                 TEXTOID, -1, 0);
      TupleDescInitEntry(tupdesc, (AttrNumber) 4, "calls",
                                 INT8OID, -1, 0);
      TupleDescInitEntry(tupdesc, (AttrNumber) 5, "total_time",
                                 FLOAT8OID, -1, 0);
      TupleDescInitEntry(tupdesc, (AttrNumber) 6, "rows",
                                 INT8OID, -1, 0);

      tupstore = tuplestore_begin_heap(true, false, work_mem);
      rsinfo->returnMode = SFRM_Materialize;
      rsinfo->setResult = tupstore;
      rsinfo->setDesc = tupdesc;

      LWLockAcquire(pgss->lock, LW_SHARED);

      hash_seq_init(&hash_seq, pgss_hash);
      while ((entry = hash_seq_search(&hash_seq)) != NULL)
      {
            Datum       values[PG_STAT_STATEMENTS_COLS];
            bool        nulls[PG_STAT_STATEMENTS_COLS];
            int               i = 0;
            Counters    tmp;

            /* generate junk in short-term context */
            MemoryContextSwitchTo(oldcontext);

            memset(values, 0, sizeof(values));
            memset(nulls, 0, sizeof(nulls));

            values[i++] = ObjectIdGetDatum(entry->key.userid);
            values[i++] = ObjectIdGetDatum(entry->key.dbid);

            if (is_superuser || entry->key.userid == userid)
            {
                  char     *qstr;

                  qstr = (char *)
                        pg_do_encoding_conversion((unsigned char *) entry->query,
                                                              entry->key.query_len,
                                                              entry->key.encoding,
                                                              GetDatabaseEncoding());
                  values[i++] = CStringGetTextDatum(qstr);
                  if (qstr != entry->query)
                        pfree(qstr);
            }
            else
                  values[i++] = CStringGetTextDatum("<insufficient privilege>");

            /* copy counters to a local variable to keep locking time short */
            {
                  volatile pgssEntry *e = (volatile pgssEntry *) entry;

                  SpinLockAcquire(&e->mutex);
                  tmp = e->counters;
                  SpinLockRelease(&e->mutex);
            }

            values[i++] = Int64GetDatumFast(tmp.calls);
            values[i++] = Float8GetDatumFast(tmp.total_time);
            values[i++] = Int64GetDatumFast(tmp.rows);

            Assert(i == PG_STAT_STATEMENTS_COLS);

            /* switch to appropriate context while storing the tuple */
            MemoryContextSwitchTo(per_query_ctx);
            tuplestore_putvalues(tupstore, tupdesc, values, nulls);
      }

      LWLockRelease(pgss->lock);

      /* clean up and return the tuplestore */
      tuplestore_donestoring(tupstore);

      MemoryContextSwitchTo(oldcontext);

      return (Datum) 0;
}

/*
 * Estimate shared memory space needed.
 */
static Size
pgss_memsize(void)
{
      Size        size;
      Size        entrysize;

      size = MAXALIGN(sizeof(pgssSharedState));
      entrysize = offsetof(pgssEntry, query) +pgstat_track_activity_query_size;
      size = add_size(size, hash_estimate_size(pgss_max, entrysize));

      return size;
}

/*
 * Allocate a new hashtable entry.
 * caller must hold an exclusive lock on pgss->lock
 *
 * Note: despite needing exclusive lock, it's not an error for the target
 * entry to already exist.    This is because pgss_store releases and
 * reacquires lock after failing to find a match; so someone else could
 * have made the entry while we waited to get exclusive lock.
 */
static pgssEntry *
entry_alloc(pgssHashKey *key)
{
      pgssEntry  *entry;
      bool        found;

      /* Caller must have clipped query properly */
      Assert(key->query_len < pgss->query_size);

      /* Make space if needed */
      while (hash_get_num_entries(pgss_hash) >= pgss_max)
            entry_dealloc();

      /* Find or create an entry with desired hash code */
      entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);

      if (!found)
      {
            /* New entry, initialize it */

            /* dynahash tried to copy the key for us, but must fix query_ptr */
            entry->key.query_ptr = entry->query;
            /* reset the statistics */
            memset(&entry->counters, 0, sizeof(Counters));
            entry->counters.usage = USAGE_INIT;
            /* re-initialize the mutex each time ... we assume no one using it */
            SpinLockInit(&entry->mutex);
            /* ... and don't forget the query text */
            memcpy(entry->query, key->query_ptr, key->query_len);
            entry->query[key->query_len] = '\0';
      }

      return entry;
}

/*
 * qsort comparator for sorting into increasing usage order
 */
static int
entry_cmp(const void *lhs, const void *rhs)
{
      double            l_usage = (*(const pgssEntry **) lhs)->counters.usage;
      double            r_usage = (*(const pgssEntry **) rhs)->counters.usage;

      if (l_usage < r_usage)
            return -1;
      else if (l_usage > r_usage)
            return +1;
      else
            return 0;
}

/*
 * Deallocate least used entries.
 * Caller must hold an exclusive lock on pgss->lock.
 */
static void
entry_dealloc(void)
{
      HASH_SEQ_STATUS hash_seq;
      pgssEntry **entries;
      pgssEntry  *entry;
      int               nvictims;
      int               i;

      /* Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them. */

      entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));

      i = 0;
      hash_seq_init(&hash_seq, pgss_hash);
      while ((entry = hash_seq_search(&hash_seq)) != NULL)
      {
            entries[i++] = entry;
            entry->counters.usage *= USAGE_DECREASE_FACTOR;
      }

      qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
      nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
      nvictims = Min(nvictims, i);

      for (i = 0; i < nvictims; i++)
      {
            hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
      }

      pfree(entries);
}

/*
 * Release all entries.
 */
static void
entry_reset(void)
{
      HASH_SEQ_STATUS hash_seq;
      pgssEntry  *entry;

      LWLockAcquire(pgss->lock, LW_EXCLUSIVE);

      hash_seq_init(&hash_seq, pgss_hash);
      while ((entry = hash_seq_search(&hash_seq)) != NULL)
      {
            hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
      }

      LWLockRelease(pgss->lock);
}

Generated by  Doxygen 1.6.0   Back to index