Logo Search packages:      
Sourcecode: postgresql-8.4 version File versions  Download package

ts_utils.c
/*-------------------------------------------------------------------------
 *
 * ts_utils.c
 *          various support functions
 *
 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
 *
 *
 * IDENTIFICATION
 *      $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.13 2009/01/01 17:23:48 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */

#include "postgres.h"

#include <ctype.h>

#include "miscadmin.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"


/*
 * Given the base name and extension of a tsearch config file, return
 * its full path name.  The base name is assumed to be user-supplied,
 * and is checked to prevent pathname attacks.  The extension is assumed
 * to be safe.
 *
 * The result is a palloc'd string.
 */
char *
get_tsearch_config_filename(const char *basename,
                                          const char *extension)
{
      char        sharepath[MAXPGPATH];
      char     *result;

      /*
       * We limit the basename to contain a-z, 0-9, and underscores.    This may
       * be overly restrictive, but we don't want to allow access to anything
       * outside the tsearch_data directory, so for instance '/' *must* be
       * rejected, and on some platforms '\' and ':' are risky as well. Allowing
       * uppercase might result in incompatible behavior between case-sensitive
       * and case-insensitive filesystems, and non-ASCII characters create other
       * interesting risks, so on the whole a tight policy seems best.
       */
      if (strspn(basename, "abcdefghijklmnopqrstuvwxyz0123456789_") != strlen(basename))
            ereport(ERROR,
                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                         errmsg("invalid text search configuration file name \"%s\"",
                                    basename)));

      get_share_path(my_exec_path, sharepath);
      result = palloc(MAXPGPATH);
      snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s",
                   sharepath, basename, extension);

      return result;
}

static int
comparestr(const void *a, const void *b)
{
      return strcmp(*(char **) a, *(char **) b);
}

/*
 * Reads a stop-word file. Each word is run through 'wordop'
 * function, if given.  wordop may either modify the input in-place,
 * or palloc a new version.
 */
void
readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
{
      char    **stop = NULL;

      s->len = 0;
      if (fname && *fname)
      {
            char     *filename = get_tsearch_config_filename(fname, "stop");
            tsearch_readline_state trst;
            char     *line;
            int               reallen = 0;

            if (!tsearch_readline_begin(&trst, filename))
                  ereport(ERROR,
                              (errcode(ERRCODE_CONFIG_FILE_ERROR),
                               errmsg("could not open stop-word file \"%s\": %m",
                                          filename)));

            while ((line = tsearch_readline(&trst)) != NULL)
            {
                  char     *pbuf = line;

                  /* Trim trailing space */
                  while (*pbuf && !t_isspace(pbuf))
                        pbuf += pg_mblen(pbuf);
                  *pbuf = '\0';

                  /* Skip empty lines */
                  if (*line == '\0')
                  {
                        pfree(line);
                        continue;
                  }

                  if (s->len >= reallen)
                  {
                        if (reallen == 0)
                        {
                              reallen = 64;
                              stop = (char **) palloc(sizeof(char *) * reallen);
                        }
                        else
                        {
                              reallen *= 2;
                              stop = (char **) repalloc((void *) stop,
                                                                    sizeof(char *) * reallen);
                        }
                  }

                  if (wordop)
                  {
                        stop[s->len] = wordop(line);
                        if (stop[s->len] != line)
                              pfree(line);
                  }
                  else
                        stop[s->len] = line;

                  (s->len)++;
            }

            tsearch_readline_end(&trst);
            pfree(filename);
      }

      s->stop = stop;

      /* Sort to allow binary searching */
      if (s->stop && s->len > 0)
            qsort(s->stop, s->len, sizeof(char *), comparestr);
}

bool
searchstoplist(StopList *s, char *key)
{
      return (s->stop && s->len > 0 &&
                  bsearch(&key, s->stop, s->len,
                              sizeof(char *), comparestr)) ? true : false;
}

Generated by  Doxygen 1.6.0   Back to index