Logo Search packages:      
Sourcecode: postgresql-8.4 version File versions

pgarch.c

/*-------------------------------------------------------------------------
 *
 * pgarch.c
 *
 *    PostgreSQL WAL archiver
 *
 *    All functions relating to archiver are included here
 *
 *    - All functions executed by archiver process
 *
 *    - archiver is forked from postmaster, and the two
 *    processes then communicate using signals. All functions
 *    executed by postmaster are included in this file.
 *
 *    Initial author: Simon Riggs         simon@2ndquadrant.com
 *
 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *      $PostgreSQL: pgsql/src/backend/postmaster/pgarch.c,v 1.39 2009/01/01 17:23:46 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include <fcntl.h>
#include <signal.h>
#include <time.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <unistd.h>

#include "access/xlog_internal.h"
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "postmaster/fork_process.h"
#include "postmaster/pgarch.h"
#include "postmaster/postmaster.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/pg_shmem.h"
#include "storage/pmsignal.h"
#include "utils/guc.h"
#include "utils/ps_status.h"


/* ----------
 * Timer definitions.
 * ----------
 */
#define PGARCH_AUTOWAKE_INTERVAL 60       /* How often to force a poll of the
                                                             * archive status directory; in
                                                             * seconds. */
#define PGARCH_RESTART_INTERVAL 10        /* How often to attempt to restart a
                                                             * failed archiver; in seconds. */

/* ----------
 * Archiver control info.
 *
 * We expect that archivable files within pg_xlog will have names between
 * MIN_XFN_CHARS and MAX_XFN_CHARS in length, consisting only of characters
 * appearing in VALID_XFN_CHARS.  The status files in archive_status have
 * corresponding names with ".ready" or ".done" appended.
 * ----------
 */
#define MIN_XFN_CHARS   16
#define MAX_XFN_CHARS   40
#define VALID_XFN_CHARS "0123456789ABCDEF.history.backup"

#define NUM_ARCHIVE_RETRIES 3


/* ----------
 * Local data
 * ----------
 */
static time_t last_pgarch_start_time;
static time_t last_sigterm_time = 0;

/*
 * Flags set by interrupt handlers for later service in the main loop.
 */
static volatile sig_atomic_t got_SIGHUP = false;
static volatile sig_atomic_t got_SIGTERM = false;
static volatile sig_atomic_t wakened = false;
static volatile sig_atomic_t ready_to_stop = false;

/* ----------
 * Local function forward declarations
 * ----------
 */
#ifdef EXEC_BACKEND
static pid_t pgarch_forkexec(void);
#endif

NON_EXEC_STATIC void PgArchiverMain(int argc, char *argv[]);
static void pgarch_exit(SIGNAL_ARGS);
static void ArchSigHupHandler(SIGNAL_ARGS);
static void ArchSigTermHandler(SIGNAL_ARGS);
static void pgarch_waken(SIGNAL_ARGS);
static void pgarch_waken_stop(SIGNAL_ARGS);
static void pgarch_MainLoop(void);
static void pgarch_ArchiverCopyLoop(void);
static bool pgarch_archiveXlog(char *xlog);
static bool pgarch_readyXlog(char *xlog);
static void pgarch_archiveDone(char *xlog);


/* ------------------------------------------------------------
 * Public functions called from postmaster follow
 * ------------------------------------------------------------
 */

/*
 * pgarch_start
 *
 *    Called from postmaster at startup or after an existing archiver
 *    died.  Attempt to fire up a fresh archiver process.
 *
 *    Returns PID of child process, or 0 if fail.
 *
 *    Note: if fail, we will be called again from the postmaster main loop.
 */
int
pgarch_start(void)
{
      time_t            curtime;
      pid_t       pgArchPid;

      /*
       * Do nothing if no archiver needed
       */
      if (!XLogArchivingActive())
            return 0;

      /*
       * Do nothing if too soon since last archiver start.  This is a safety
       * valve to protect against continuous respawn attempts if the archiver is
       * dying immediately at launch. Note that since we will be re-called from
       * the postmaster main loop, we will get another chance later.
       */
      curtime = time(NULL);
      if ((unsigned int) (curtime - last_pgarch_start_time) <
            (unsigned int) PGARCH_RESTART_INTERVAL)
            return 0;
      last_pgarch_start_time = curtime;

#ifdef EXEC_BACKEND
      switch ((pgArchPid = pgarch_forkexec()))
#else
      switch ((pgArchPid = fork_process()))
#endif
      {
            case -1:
                  ereport(LOG,
                              (errmsg("could not fork archiver: %m")));
                  return 0;

#ifndef EXEC_BACKEND
            case 0:
                  /* in postmaster child ... */
                  /* Close the postmaster's sockets */
                  ClosePostmasterPorts(false);

                  /* Lose the postmaster's on-exit routines */
                  on_exit_reset();

                  /* Drop our connection to postmaster's shared memory, as well */
                  PGSharedMemoryDetach();

                  PgArchiverMain(0, NULL);
                  break;
#endif

            default:
                  return (int) pgArchPid;
      }

      /* shouldn't get here */
      return 0;
}

/* ------------------------------------------------------------
 * Local functions called by archiver follow
 * ------------------------------------------------------------
 */


#ifdef EXEC_BACKEND

/*
 * pgarch_forkexec() -
 *
 * Format up the arglist for, then fork and exec, archive process
 */
static pid_t
pgarch_forkexec(void)
{
      char     *av[10];
      int               ac = 0;

      av[ac++] = "postgres";

      av[ac++] = "--forkarch";

      av[ac++] = NULL;              /* filled in by postmaster_forkexec */

      av[ac] = NULL;
      Assert(ac < lengthof(av));

      return postmaster_forkexec(ac, av);
}
#endif   /* EXEC_BACKEND */


/*
 * PgArchiverMain
 *
 *    The argc/argv parameters are valid only in EXEC_BACKEND case.  However,
 *    since we don't use 'em, it hardly matters...
 */
NON_EXEC_STATIC void
PgArchiverMain(int argc, char *argv[])
{
      IsUnderPostmaster = true;     /* we are a postmaster subprocess now */

      MyProcPid = getpid();         /* reset MyProcPid */

      MyStartTime = time(NULL);     /* record Start Time for logging */

      /*
       * If possible, make this process a group leader, so that the postmaster
       * can signal any child processes too.
       */
#ifdef HAVE_SETSID
      if (setsid() < 0)
            elog(FATAL, "setsid() failed: %m");
#endif

      /*
       * Ignore all signals usually bound to some action in the postmaster,
       * except for SIGHUP, SIGTERM, SIGUSR1, SIGUSR2, and SIGQUIT.
       */
      pqsignal(SIGHUP, ArchSigHupHandler);
      pqsignal(SIGINT, SIG_IGN);
      pqsignal(SIGTERM, ArchSigTermHandler);
      pqsignal(SIGQUIT, pgarch_exit);
      pqsignal(SIGALRM, SIG_IGN);
      pqsignal(SIGPIPE, SIG_IGN);
      pqsignal(SIGUSR1, pgarch_waken);
      pqsignal(SIGUSR2, pgarch_waken_stop);
      pqsignal(SIGCHLD, SIG_DFL);
      pqsignal(SIGTTIN, SIG_DFL);
      pqsignal(SIGTTOU, SIG_DFL);
      pqsignal(SIGCONT, SIG_DFL);
      pqsignal(SIGWINCH, SIG_DFL);
      PG_SETMASK(&UnBlockSig);

      /*
       * Identify myself via ps
       */
      init_ps_display("archiver process", "", "", "");

      pgarch_MainLoop();

      exit(0);
}

/* SIGQUIT signal handler for archiver process */
static void
pgarch_exit(SIGNAL_ARGS)
{
      /* SIGQUIT means curl up and die ... */
      exit(1);
}

/* SIGHUP signal handler for archiver process */
static void
ArchSigHupHandler(SIGNAL_ARGS)
{
      /* set flag to re-read config file at next convenient time */
      got_SIGHUP = true;
}

/* SIGTERM signal handler for archiver process */
static void
ArchSigTermHandler(SIGNAL_ARGS)
{
      /*
       * The postmaster never sends us SIGTERM, so we assume that this means
       * that init is trying to shut down the whole system.  If we hang around
       * too long we'll get SIGKILL'd.  Set flag to prevent starting any more
       * archive commands.
       */
      got_SIGTERM = true;
}

/* SIGUSR1 signal handler for archiver process */
static void
pgarch_waken(SIGNAL_ARGS)
{
      /* set flag that there is work to be done */
      wakened = true;
}

/* SIGUSR2 signal handler for archiver process */
static void
pgarch_waken_stop(SIGNAL_ARGS)
{
      /* set flag to do a final cycle and shut down afterwards */
      ready_to_stop = true;
}

/*
 * pgarch_MainLoop
 *
 * Main loop for archiver
 */
static void
pgarch_MainLoop(void)
{
      time_t            last_copy_time = 0;
      bool        time_to_stop;

      /*
       * We run the copy loop immediately upon entry, in case there are
       * unarchived files left over from a previous database run (or maybe the
       * archiver died unexpectedly).  After that we wait for a signal or
       * timeout before doing more.
       */
      wakened = true;

      do
      {
            /* When we get SIGUSR2, we do one more archive cycle, then exit */
            time_to_stop = ready_to_stop;

            /* Check for config update */
            if (got_SIGHUP)
            {
                  got_SIGHUP = false;
                  ProcessConfigFile(PGC_SIGHUP);
            }

            /*
             * If we've gotten SIGTERM, we normally just sit and do nothing until
             * SIGUSR2 arrives.  However, that means a random SIGTERM would
             * disable archiving indefinitely, which doesn't seem like a good
             * idea.  If more than 60 seconds pass since SIGTERM, exit anyway,
             * so that the postmaster can start a new archiver if needed.
             */
            if (got_SIGTERM)
            {
                  time_t            curtime = time(NULL);

                  if (last_sigterm_time == 0)
                        last_sigterm_time = curtime;
                  else if ((unsigned int) (curtime - last_sigterm_time) >=
                               (unsigned int) 60)
                        break;
            }

            /* Do what we're here for */
            if (wakened || time_to_stop)
            {
                  wakened = false;
                  pgarch_ArchiverCopyLoop();
                  last_copy_time = time(NULL);
            }

            /*
             * There shouldn't be anything for the archiver to do except to wait
             * for a signal ... however, the archiver exists to protect our data,
             * so she wakes up occasionally to allow herself to be proactive.
             *
             * On some platforms, signals won't interrupt the sleep.  To ensure we
             * respond reasonably promptly when someone signals us, break down the
             * sleep into 1-second increments, and check for interrupts after each
             * nap.
             */
            while (!(wakened || ready_to_stop || got_SIGHUP ||
                         !PostmasterIsAlive(true)))
            {
                  time_t            curtime;

                  pg_usleep(1000000L);
                  curtime = time(NULL);
                  if ((unsigned int) (curtime - last_copy_time) >=
                        (unsigned int) PGARCH_AUTOWAKE_INTERVAL)
                        wakened = true;
            }

            /*
             * The archiver quits either when the postmaster dies (not expected)
             * or after completing one more archiving cycle after receiving
             * SIGUSR2.
             */
      } while (PostmasterIsAlive(true) && !time_to_stop);
}

/*
 * pgarch_ArchiverCopyLoop
 *
 * Archives all outstanding xlogs then returns
 */
static void
pgarch_ArchiverCopyLoop(void)
{
      char        xlog[MAX_XFN_CHARS + 1];

      if (!XLogArchiveCommandSet())
      {
            ereport(WARNING,
               (errmsg("archive_mode enabled, yet archive_command is not set")));
            /* can't do anything if no command ... */
            return;
      }

      /*
       * loop through all xlogs with archive_status of .ready and archive
       * them...mostly we expect this to be a single file, though it is possible
       * some backend will add files onto the list of those that need archiving
       * while we are still copying earlier archives
       */
      while (pgarch_readyXlog(xlog))
      {
            int               failures = 0;

            for (;;)
            {
                  /*
                   * Do not initiate any more archive commands after receiving
                   * SIGTERM, nor after the postmaster has died unexpectedly.
                   * The first condition is to try to keep from having init
                   * SIGKILL the command, and the second is to avoid conflicts
                   * with another archiver spawned by a newer postmaster.
                   */
                  if (got_SIGTERM || !PostmasterIsAlive(true))
                        return;

                  if (pgarch_archiveXlog(xlog))
                  {
                        /* successful */
                        pgarch_archiveDone(xlog);
                        break;                  /* out of inner retry loop */
                  }
                  else
                  {
                        if (++failures >= NUM_ARCHIVE_RETRIES)
                        {
                              ereport(WARNING,
                                          (errmsg("transaction log file \"%s\" could not be archived: too many failures",
                                                      xlog)));
                              return;           /* give up archiving for now */
                        }
                        pg_usleep(1000000L);    /* wait a bit before retrying */
                  }
            }
      }
}

/*
 * pgarch_archiveXlog
 *
 * Invokes system(3) to copy one archive file to wherever it should go
 *
 * Returns true if successful
 */
static bool
pgarch_archiveXlog(char *xlog)
{
      char        xlogarchcmd[MAXPGPATH];
      char        pathname[MAXPGPATH];
      char        activitymsg[MAXFNAMELEN + 16];
      char     *dp;
      char     *endp;
      const char *sp;
      int               rc;

      snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog);

      /*
       * construct the command to be executed
       */
      dp = xlogarchcmd;
      endp = xlogarchcmd + MAXPGPATH - 1;
      *endp = '\0';

      for (sp = XLogArchiveCommand; *sp; sp++)
      {
            if (*sp == '%')
            {
                  switch (sp[1])
                  {
                        case 'p':
                              /* %p: relative path of source file */
                              sp++;
                              strlcpy(dp, pathname, endp - dp);
                              make_native_path(dp);
                              dp += strlen(dp);
                              break;
                        case 'f':
                              /* %f: filename of source file */
                              sp++;
                              strlcpy(dp, xlog, endp - dp);
                              dp += strlen(dp);
                              break;
                        case '%':
                              /* convert %% to a single % */
                              sp++;
                              if (dp < endp)
                                    *dp++ = *sp;
                              break;
                        default:
                              /* otherwise treat the % as not special */
                              if (dp < endp)
                                    *dp++ = *sp;
                              break;
                  }
            }
            else
            {
                  if (dp < endp)
                        *dp++ = *sp;
            }
      }
      *dp = '\0';

      ereport(DEBUG3,
                  (errmsg_internal("executing archive command \"%s\"",
                                           xlogarchcmd)));

      /* Report archive activity in PS display */
      snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog);
      set_ps_display(activitymsg, false);

      rc = system(xlogarchcmd);
      if (rc != 0)
      {
            /*
             * If either the shell itself, or a called command, died on a signal,
             * abort the archiver.  We do this because system() ignores SIGINT and
             * SIGQUIT while waiting; so a signal is very likely something that
             * should have interrupted us too.  If we overreact it's no big deal,
             * the postmaster will just start the archiver again.
             *
             * Per the Single Unix Spec, shells report exit status > 128 when a
             * called command died on a signal.
             */
            int         lev = (WIFSIGNALED(rc) || WEXITSTATUS(rc) > 128) ? FATAL : LOG;

            if (WIFEXITED(rc))
            {
                  ereport(lev,
                              (errmsg("archive command failed with exit code %d",
                                          WEXITSTATUS(rc)),
                               errdetail("The failed archive command was: %s",
                                             xlogarchcmd)));
            }
            else if (WIFSIGNALED(rc))
            {
#if defined(WIN32)
                  ereport(lev,
                              (errmsg("archive command was terminated by exception 0x%X",
                                          WTERMSIG(rc)),
                               errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
                               errdetail("The failed archive command was: %s",
                                             xlogarchcmd)));
#elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
                  ereport(lev,
                              (errmsg("archive command was terminated by signal %d: %s",
                                          WTERMSIG(rc),
                                          WTERMSIG(rc) < NSIG ? sys_siglist[WTERMSIG(rc)] : "(unknown)"),
                               errdetail("The failed archive command was: %s",
                                             xlogarchcmd)));
#else
                  ereport(lev,
                              (errmsg("archive command was terminated by signal %d",
                                          WTERMSIG(rc)),
                               errdetail("The failed archive command was: %s",
                                             xlogarchcmd)));
#endif
            }
            else
            {
                  ereport(lev,
                              (errmsg("archive command exited with unrecognized status %d",
                                          rc),
                               errdetail("The failed archive command was: %s",
                                             xlogarchcmd)));
            }

            snprintf(activitymsg, sizeof(activitymsg), "failed on %s", xlog);
            set_ps_display(activitymsg, false);

            return false;
      }
      ereport(DEBUG1,
                  (errmsg("archived transaction log file \"%s\"", xlog)));

      snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog);
      set_ps_display(activitymsg, false);

      return true;
}

/*
 * pgarch_readyXlog
 *
 * Return name of the oldest xlog file that has not yet been archived.
 * No notification is set that file archiving is now in progress, so
 * this would need to be extended if multiple concurrent archival
 * tasks were created. If a failure occurs, we will completely
 * re-copy the file at the next available opportunity.
 *
 * It is important that we return the oldest, so that we archive xlogs
 * in order that they were written, for two reasons:
 * 1) to maintain the sequential chain of xlogs required for recovery
 * 2) because the oldest ones will sooner become candidates for
 * recycling at time of checkpoint
 *
 * NOTE: the "oldest" comparison will presently consider all segments of
 * a timeline with a smaller ID to be older than all segments of a timeline
 * with a larger ID; the net result being that past timelines are given
 * higher priority for archiving.  This seems okay, or at least not
 * obviously worth changing.
 */
static bool
pgarch_readyXlog(char *xlog)
{
      /*
       * open xlog status directory and read through list of xlogs that have the
       * .ready suffix, looking for earliest file. It is possible to optimise
       * this code, though only a single file is expected on the vast majority
       * of calls, so....
       */
      char        XLogArchiveStatusDir[MAXPGPATH];
      char        newxlog[MAX_XFN_CHARS + 6 + 1];
      DIR            *rldir;
      struct dirent *rlde;
      bool        found = false;

      snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status");
      rldir = AllocateDir(XLogArchiveStatusDir);
      if (rldir == NULL)
            ereport(ERROR,
                        (errcode_for_file_access(),
                         errmsg("could not open archive status directory \"%s\": %m",
                                    XLogArchiveStatusDir)));

      while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL)
      {
            int               basenamelen = (int) strlen(rlde->d_name) - 6;

            if (basenamelen >= MIN_XFN_CHARS &&
                  basenamelen <= MAX_XFN_CHARS &&
                  strspn(rlde->d_name, VALID_XFN_CHARS) >= basenamelen &&
                  strcmp(rlde->d_name + basenamelen, ".ready") == 0)
            {
                  if (!found)
                  {
                        strcpy(newxlog, rlde->d_name);
                        found = true;
                  }
                  else
                  {
                        if (strcmp(rlde->d_name, newxlog) < 0)
                              strcpy(newxlog, rlde->d_name);
                  }
            }
      }
      FreeDir(rldir);

      if (found)
      {
            /* truncate off the .ready */
            newxlog[strlen(newxlog) - 6] = '\0';
            strcpy(xlog, newxlog);
      }
      return found;
}

/*
 * pgarch_archiveDone
 *
 * Emit notification that an xlog file has been successfully archived.
 * We do this by renaming the status file from NNN.ready to NNN.done.
 * Eventually, a checkpoint process will notice this and delete both the
 * NNN.done file and the xlog file itself.
 */
static void
pgarch_archiveDone(char *xlog)
{
      char        rlogready[MAXPGPATH];
      char        rlogdone[MAXPGPATH];

      StatusFilePath(rlogready, xlog, ".ready");
      StatusFilePath(rlogdone, xlog, ".done");
      if (rename(rlogready, rlogdone) < 0)
            ereport(WARNING,
                        (errcode_for_file_access(),
                         errmsg("could not rename file \"%s\" to \"%s\": %m",
                                    rlogready, rlogdone)));
}

Generated by  Doxygen 1.6.0   Back to index