Logo Search packages:      
Sourcecode: postgresql-8.4 version File versions  Download package

encode.c

/*-------------------------------------------------------------------------
 *
 * encode.c
 *      Various data encoding/decoding things.
 *
 * Copyright (c) 2001-2009, PostgreSQL Global Development Group
 *
 *
 * IDENTIFICATION
 *      $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.23.2.1 2009/08/30 16:53:37 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include <ctype.h>

#include "utils/builtins.h"


struct pg_encoding
{
      unsigned    (*encode_len) (const char *data, unsigned dlen);
      unsigned    (*decode_len) (const char *data, unsigned dlen);
      unsigned    (*encode) (const char *data, unsigned dlen, char *res);
      unsigned    (*decode) (const char *data, unsigned dlen, char *res);
};

static const struct pg_encoding *pg_find_encoding(const char *name);

/*
 * SQL functions.
 */

Datum
binary_encode(PG_FUNCTION_ARGS)
{
      bytea    *data = PG_GETARG_BYTEA_P(0);
      Datum       name = PG_GETARG_DATUM(1);
      text     *result;
      char     *namebuf;
      int               datalen,
                        resultlen,
                        res;
      const struct pg_encoding *enc;

      datalen = VARSIZE(data) - VARHDRSZ;

      namebuf = TextDatumGetCString(name);

      enc = pg_find_encoding(namebuf);
      if (enc == NULL)
            ereport(ERROR,
                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                         errmsg("unrecognized encoding: \"%s\"", namebuf)));

      resultlen = enc->encode_len(VARDATA(data), datalen);
      result = palloc(VARHDRSZ + resultlen);

      res = enc->encode(VARDATA(data), datalen, VARDATA(result));

      /* Make this FATAL 'cause we've trodden on memory ... */
      if (res > resultlen)
            elog(FATAL, "overflow - encode estimate too small");

      SET_VARSIZE(result, VARHDRSZ + res);

      PG_RETURN_TEXT_P(result);
}

Datum
binary_decode(PG_FUNCTION_ARGS)
{
      text     *data = PG_GETARG_TEXT_P(0);
      Datum       name = PG_GETARG_DATUM(1);
      bytea    *result;
      char     *namebuf;
      int               datalen,
                        resultlen,
                        res;
      const struct pg_encoding *enc;

      datalen = VARSIZE(data) - VARHDRSZ;

      namebuf = TextDatumGetCString(name);

      enc = pg_find_encoding(namebuf);
      if (enc == NULL)
            ereport(ERROR,
                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                         errmsg("unrecognized encoding: \"%s\"", namebuf)));

      resultlen = enc->decode_len(VARDATA(data), datalen);
      result = palloc(VARHDRSZ + resultlen);

      res = enc->decode(VARDATA(data), datalen, VARDATA(result));

      /* Make this FATAL 'cause we've trodden on memory ... */
      if (res > resultlen)
            elog(FATAL, "overflow - decode estimate too small");

      SET_VARSIZE(result, VARHDRSZ + res);

      PG_RETURN_BYTEA_P(result);
}


/*
 * HEX
 */

static const char *hextbl = "0123456789abcdef";

static const int8 hexlookup[128] = {
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
      0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
      -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
      -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};

static unsigned
hex_encode(const char *src, unsigned len, char *dst)
{
      const char *end = src + len;

      while (src < end)
      {
            *dst++ = hextbl[(*src >> 4) & 0xF];
            *dst++ = hextbl[*src & 0xF];
            src++;
      }
      return len * 2;
}

static char
get_hex(char c)
{
      int               res = -1;

      if (c > 0 && c < 127)
            res = hexlookup[(unsigned char) c];

      if (res < 0)
            ereport(ERROR,
                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                         errmsg("invalid hexadecimal digit: \"%c\"", c)));

      return (char) res;
}

static unsigned
hex_decode(const char *src, unsigned len, char *dst)
{
      const char *s,
                     *srcend;
      char        v1,
                        v2,
                     *p;

      srcend = src + len;
      s = src;
      p = dst;
      while (s < srcend)
      {
            if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r')
            {
                  s++;
                  continue;
            }
            v1 = get_hex(*s++) << 4;
            if (s >= srcend)
                  ereport(ERROR,
                              (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                          errmsg("invalid hexadecimal data: odd number of digits")));

            v2 = get_hex(*s++);
            *p++ = v1 | v2;
      }

      return p - dst;
}

static unsigned
hex_enc_len(const char *src, unsigned srclen)
{
      return srclen << 1;
}

static unsigned
hex_dec_len(const char *src, unsigned srclen)
{
      return srclen >> 1;
}

/*
 * BASE64
 */

static const char _base64[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

static const int8 b64lookup[128] = {
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
      52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
      -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
      15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
      -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
      41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
};

static unsigned
b64_encode(const char *src, unsigned len, char *dst)
{
      char     *p,
                     *lend = dst + 76;
      const char *s,
                     *end = src + len;
      int               pos = 2;
      uint32            buf = 0;

      s = src;
      p = dst;

      while (s < end)
      {
            buf |= (unsigned char) *s << (pos << 3);
            pos--;
            s++;

            /* write it out */
            if (pos < 0)
            {
                  *p++ = _base64[(buf >> 18) & 0x3f];
                  *p++ = _base64[(buf >> 12) & 0x3f];
                  *p++ = _base64[(buf >> 6) & 0x3f];
                  *p++ = _base64[buf & 0x3f];

                  pos = 2;
                  buf = 0;
            }
            if (p >= lend)
            {
                  *p++ = '\n';
                  lend = p + 76;
            }
      }
      if (pos != 2)
      {
            *p++ = _base64[(buf >> 18) & 0x3f];
            *p++ = _base64[(buf >> 12) & 0x3f];
            *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
            *p++ = '=';
      }

      return p - dst;
}

static unsigned
b64_decode(const char *src, unsigned len, char *dst)
{
      const char *srcend = src + len,
                     *s = src;
      char     *p = dst;
      char        c;
      int               b = 0;
      uint32            buf = 0;
      int               pos = 0,
                        end = 0;

      while (s < srcend)
      {
            c = *s++;

            if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
                  continue;

            if (c == '=')
            {
                  /* end sequence */
                  if (!end)
                  {
                        if (pos == 2)
                              end = 1;
                        else if (pos == 3)
                              end = 2;
                        else
                              ereport(ERROR,
                                          (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                                           errmsg("unexpected \"=\"")));
                  }
                  b = 0;
            }
            else
            {
                  b = -1;
                  if (c > 0 && c < 127)
                        b = b64lookup[(unsigned char) c];
                  if (b < 0)
                        ereport(ERROR,
                                    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                                     errmsg("invalid symbol")));
            }
            /* add it to buffer */
            buf = (buf << 6) + b;
            pos++;
            if (pos == 4)
            {
                  *p++ = (buf >> 16) & 255;
                  if (end == 0 || end > 1)
                        *p++ = (buf >> 8) & 255;
                  if (end == 0 || end > 2)
                        *p++ = buf & 255;
                  buf = 0;
                  pos = 0;
            }
      }

      if (pos != 0)
            ereport(ERROR,
                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                         errmsg("invalid end sequence")));

      return p - dst;
}


static unsigned
b64_enc_len(const char *src, unsigned srclen)
{
      /* 3 bytes will be converted to 4, linefeed after 76 chars */
      return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4);
}

static unsigned
b64_dec_len(const char *src, unsigned srclen)
{
      return (srclen * 3) >> 2;
}

/*
 * Escape
 * Minimally escape bytea to text.
 * De-escape text to bytea.
 *
 * We must escape zero bytes and high-bit-set bytes to avoid generating
 * text that might be invalid in the current encoding, or that might
 * change to something else if passed through an encoding conversion
 * (leading to failing to de-escape to the original bytea value).
 * Also of course backslash itself has to be escaped.
 *
 * De-escaping processes \\ and any \### octal
 */

#define VAL(CH)               ((CH) - '0')
#define DIG(VAL)        ((VAL) + '0')

static unsigned
esc_encode(const char *src, unsigned srclen, char *dst)
{
      const char *end = src + srclen;
      char     *rp = dst;
      int               len = 0;

      while (src < end)
      {
            unsigned char c = (unsigned char) *src;

            if (c == '\0' || IS_HIGHBIT_SET(c))
            {
                  rp[0] = '\\';
                  rp[1] = DIG(c >> 6);
                  rp[2] = DIG((c >> 3) & 7);
                  rp[3] = DIG(c & 7);
                  rp += 4;
                  len += 4;
            }
            else if (c == '\\')
            {
                  rp[0] = '\\';
                  rp[1] = '\\';
                  rp += 2;
                  len += 2;
            }
            else
            {
                  *rp++ = c;
                  len++;
            }

            src++;
      }

      return len;
}

static unsigned
esc_decode(const char *src, unsigned srclen, char *dst)
{
      const char *end = src + srclen;
      char     *rp = dst;
      int               len = 0;

      while (src < end)
      {
            if (src[0] != '\\')
                  *rp++ = *src++;
            else if (src + 3 < end &&
                         (src[1] >= '0' && src[1] <= '3') &&
                         (src[2] >= '0' && src[2] <= '7') &&
                         (src[3] >= '0' && src[3] <= '7'))
            {
                  int               val;

                  val = VAL(src[1]);
                  val <<= 3;
                  val += VAL(src[2]);
                  val <<= 3;
                  *rp++ = val + VAL(src[3]);
                  src += 4;
            }
            else if (src + 1 < end &&
                         (src[1] == '\\'))
            {
                  *rp++ = '\\';
                  src += 2;
            }
            else
            {
                  /*
                   * One backslash, not followed by ### valid octal. Should never
                   * get here, since esc_dec_len does same check.
                   */
                  ereport(ERROR,
                              (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                               errmsg("invalid input syntax for type bytea")));
            }

            len++;
      }

      return len;
}

static unsigned
esc_enc_len(const char *src, unsigned srclen)
{
      const char *end = src + srclen;
      int               len = 0;

      while (src < end)
      {
            if (*src == '\0' || IS_HIGHBIT_SET(*src))
                  len += 4;
            else if (*src == '\\')
                  len += 2;
            else
                  len++;

            src++;
      }

      return len;
}

static unsigned
esc_dec_len(const char *src, unsigned srclen)
{
      const char *end = src + srclen;
      int               len = 0;

      while (src < end)
      {
            if (src[0] != '\\')
                  src++;
            else if (src + 3 < end &&
                         (src[1] >= '0' && src[1] <= '3') &&
                         (src[2] >= '0' && src[2] <= '7') &&
                         (src[3] >= '0' && src[3] <= '7'))
            {
                  /*
                   * backslash + valid octal
                   */
                  src += 4;
            }
            else if (src + 1 < end &&
                         (src[1] == '\\'))
            {
                  /*
                   * two backslashes = backslash
                   */
                  src += 2;
            }
            else
            {
                  /*
                   * one backslash, not followed by ### valid octal
                   */
                  ereport(ERROR,
                              (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                               errmsg("invalid input syntax for type bytea")));
            }

            len++;
      }
      return len;
}

/*
 * Common
 */

static const struct
{
      const char *name;
      struct pg_encoding enc;
}     enclist[] =

{
      {
            "hex",
            {
                  hex_enc_len, hex_dec_len, hex_encode, hex_decode
            }
      },
      {
            "base64",
            {
                  b64_enc_len, b64_dec_len, b64_encode, b64_decode
            }
      },
      {
            "escape",
            {
                  esc_enc_len, esc_dec_len, esc_encode, esc_decode
            }
      },
      {
            NULL,
            {
                  NULL, NULL, NULL, NULL
            }
      }
};

static const struct pg_encoding *
pg_find_encoding(const char *name)
{
      int               i;

      for (i = 0; enclist[i].name; i++)
            if (pg_strcasecmp(enclist[i].name, name) == 0)
                  return &enclist[i].enc;

      return NULL;
}

Generated by  Doxygen 1.6.0   Back to index