Logo Search packages:      
Sourcecode: postgresql-8.4 version File versions

stringutils.c

/*
 * psql - the PostgreSQL interactive terminal
 *
 * Copyright (c) 2000-2009, PostgreSQL Global Development Group
 *
 * $PostgreSQL$
 */
#include "postgres_fe.h"

#include <ctype.h>

#include "common.h"
#include "stringutils.h"


static void strip_quotes(char *source, char quote, char escape, int encoding);


/*
 * Replacement for strtok() (a.k.a. poor man's flex)
 *
 * Splits a string into tokens, returning one token per call, then NULL
 * when no more tokens exist in the given string.
 *
 * The calling convention is similar to that of strtok, but with more
 * frammishes.
 *
 * s -                  string to parse, if NULL continue parsing the last string
 * whitespace - set of whitespace characters that separate tokens
 * delim -        set of non-whitespace separator characters (or NULL)
 * quote -        set of characters that can quote a token (NULL if none)
 * escape -       character that can quote quotes (0 if none)
 * e_strings -    if TRUE, treat E'...' syntax as a valid token
 * del_quotes - if TRUE, strip quotes from the returned token, else return
 *                      it exactly as found in the string
 * encoding -     the active character-set encoding
 *
 * Characters in 'delim', if any, will be returned as single-character
 * tokens unless part of a quoted token.
 *
 * Double occurrences of the quoting character are always taken to represent
 * a single quote character in the data.  If escape isn't 0, then escape
 * followed by anything (except \0) is a data character too.
 *
 * The combination of e_strings and del_quotes both TRUE is not currently
 * handled.  This could be fixed but it's not needed anywhere at the moment.
 *
 * Note that the string s is _not_ overwritten in this implementation.
 *
 * NB: it's okay to vary delim, quote, and escape from one call to the
 * next on a single source string, but changing whitespace is a bad idea
 * since you might lose data.
 */
char *
strtokx(const char *s,
            const char *whitespace,
            const char *delim,
            const char *quote,
            char escape,
            bool e_strings,
            bool del_quotes,
            int encoding)
{
      static char *storage = NULL;/* store the local copy of the users string
                                                 * here */
      static char *string = NULL; /* pointer into storage where to continue on
                                                 * next call */

      /* variously abused variables: */
      unsigned int offset;
      char     *start;
      char     *p;

      if (s)
      {
            free(storage);

            /*
             * We may need extra space to insert delimiter nulls for adjacent
             * tokens.  2X the space is a gross overestimate, but it's unlikely
             * that this code will be used on huge strings anyway.
             */
            storage = pg_malloc(2 * strlen(s) + 1);
            strcpy(storage, s);
            string = storage;
      }

      if (!storage)
            return NULL;

      /* skip leading whitespace */
      offset = strspn(string, whitespace);
      start = &string[offset];

      /* end of string reached? */
      if (*start == '\0')
      {
            /* technically we don't need to free here, but we're nice */
            free(storage);
            storage = NULL;
            string = NULL;
            return NULL;
      }

      /* test if delimiter character */
      if (delim && strchr(delim, *start))
      {
            /*
             * If not at end of string, we need to insert a null to terminate the
             * returned token.      We can just overwrite the next character if it
             * happens to be in the whitespace set ... otherwise move over the
             * rest of the string to make room.  (This is why we allocated extra
             * space above).
             */
            p = start + 1;
            if (*p != '\0')
            {
                  if (!strchr(whitespace, *p))
                        memmove(p + 1, p, strlen(p) + 1);
                  *p = '\0';
                  string = p + 1;
            }
            else
            {
                  /* at end of string, so no extra work */
                  string = p;
            }

            return start;
      }

      /* check for E string */
      p = start;
      if (e_strings &&
            (*p == 'E' || *p == 'e') &&
            p[1] == '\'')
      {
            quote = "'";
            escape = '\\';                /* if std strings before, not any more */
            p++;
      }

      /* test if quoting character */
      if (quote && strchr(quote, *p))
      {
            /* okay, we have a quoted token, now scan for the closer */
            char        thisquote = *p++;

            for (; *p; p += PQmblen(p, encoding))
            {
                  if (*p == escape && p[1] != '\0')
                        p++;              /* process escaped anything */
                  else if (*p == thisquote && p[1] == thisquote)
                        p++;              /* process doubled quote */
                  else if (*p == thisquote)
                  {
                        p++;              /* skip trailing quote */
                        break;
                  }
            }

            /*
             * If not at end of string, we need to insert a null to terminate the
             * returned token.      See notes above.
             */
            if (*p != '\0')
            {
                  if (!strchr(whitespace, *p))
                        memmove(p + 1, p, strlen(p) + 1);
                  *p = '\0';
                  string = p + 1;
            }
            else
            {
                  /* at end of string, so no extra work */
                  string = p;
            }

            /* Clean up the token if caller wants that */
            if (del_quotes)
                  strip_quotes(start, thisquote, escape, encoding);

            return start;
      }

      /*
       * Otherwise no quoting character.  Scan till next whitespace, delimiter
       * or quote.  NB: at this point, *start is known not to be '\0',
       * whitespace, delim, or quote, so we will consume at least one character.
       */
      offset = strcspn(start, whitespace);

      if (delim)
      {
            unsigned int offset2 = strcspn(start, delim);

            if (offset > offset2)
                  offset = offset2;
      }

      if (quote)
      {
            unsigned int offset2 = strcspn(start, quote);

            if (offset > offset2)
                  offset = offset2;
      }

      p = start + offset;

      /*
       * If not at end of string, we need to insert a null to terminate the
       * returned token.      See notes above.
       */
      if (*p != '\0')
      {
            if (!strchr(whitespace, *p))
                  memmove(p + 1, p, strlen(p) + 1);
            *p = '\0';
            string = p + 1;
      }
      else
      {
            /* at end of string, so no extra work */
            string = p;
      }

      return start;
}


/*
 * strip_quotes
 *
 * Remove quotes from the string at *source.  Leading and trailing occurrences
 * of 'quote' are removed; embedded double occurrences of 'quote' are reduced
 * to single occurrences; if 'escape' is not 0 then 'escape' removes special
 * significance of next character.
 *
 * Note that the source string is overwritten in-place.
 */
static void
strip_quotes(char *source, char quote, char escape, int encoding)
{
      char     *src;
      char     *dst;

      psql_assert(source);
      psql_assert(quote);

      src = dst = source;

      if (*src && *src == quote)
            src++;                              /* skip leading quote */

      while (*src)
      {
            char        c = *src;
            int               i;

            if (c == quote && src[1] == '\0')
                  break;                        /* skip trailing quote */
            else if (c == quote && src[1] == quote)
                  src++;                        /* process doubled quote */
            else if (c == escape && src[1] != '\0')
                  src++;                        /* process escaped character */

            i = PQmblen(src, encoding);
            while (i--)
                  *dst++ = *src++;
      }

      *dst = '\0';
}

Generated by  Doxygen 1.6.0   Back to index