Logo Search packages:      
Sourcecode: postgresql-8.4 version File versions

orindxpath.c

/*-------------------------------------------------------------------------
 *
 * orindxpath.c
 *      Routines to find index paths that match a set of OR clauses
 *
 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *      $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.90 2009/06/11 14:48:59 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */

#include "postgres.h"

#include "optimizer/cost.h"
#include "optimizer/paths.h"
#include "optimizer/restrictinfo.h"


/*----------
 * create_or_index_quals
 *      Examine join OR-of-AND quals to see if any useful restriction OR
 *      clauses can be extracted.  If so, add them to the query.
 *
 * Although a join clause must reference other relations overall,
 * an OR of ANDs clause might contain sub-clauses that reference just this
 * relation and can be used to build a restriction clause.
 * For example consider
 *          WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
 * We can transform this into
 *          WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
 *                AND (a.x = 42 OR a.x = 44)
 *                AND (b.y = 43 OR b.z = 45);
 * which opens the potential to build OR indexscans on a and b.  In essence
 * this is a partial transformation to CNF (AND of ORs format).  It is not
 * complete, however, because we do not unravel the original OR --- doing so
 * would usually bloat the qualification expression to little gain.
 *
 * The added quals are partially redundant with the original OR, and therefore
 * will cause the size of the joinrel to be underestimated when it is finally
 * formed.  (This would be true of a full transformation to CNF as well; the
 * fault is not really in the transformation, but in clauselist_selectivity's
 * inability to recognize redundant conditions.)  To minimize the collateral
 * damage, we want to minimize the number of quals added.  Therefore we do
 * not add every possible extracted restriction condition to the query.
 * Instead, we search for the single restriction condition that generates
 * the most useful (cheapest) OR indexscan, and add only that condition.
 * This is a pretty ad-hoc heuristic, but quite useful.
 *
 * We can then compensate for the redundancy of the added qual by poking
 * the recorded selectivity of the original OR clause, thereby ensuring
 * the added qual doesn't change the estimated size of the joinrel when
 * it is finally formed.  This is a MAJOR HACK: it depends on the fact
 * that clause selectivities are cached and on the fact that the same
 * RestrictInfo node will appear in every joininfo list that might be used
 * when the joinrel is formed.      And it probably isn't right in cases where
 * the size estimation is nonlinear (i.e., outer and IN joins).  But it
 * beats not doing anything.
 *
 * NOTE: one might think this messiness could be worked around by generating
 * the indexscan path with a small path->rows value, and not touching the
 * rel's baserestrictinfo or rel->rows.  However, that does not work.
 * The optimizer's fundamental design assumes that every general-purpose
 * Path for a given relation generates the same number of rows.  Without
 * this assumption we'd not be able to optimize solely on the cost of Paths,
 * but would have to take number of output rows into account as well.
 * (Perhaps someday that'd be worth doing, but it's a pretty big change...)
 *
 * 'rel' is the relation entry for which quals are to be created
 *
 * If successful, adds qual(s) to rel->baserestrictinfo and returns TRUE.
 * If no quals available, returns FALSE and doesn't change rel.
 *
 * Note: check_partial_indexes() must have been run previously.
 *----------
 */
bool
create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
{
      BitmapOrPath *bestpath = NULL;
      RestrictInfo *bestrinfo = NULL;
      List     *newrinfos;
      RestrictInfo *or_rinfo;
      Selectivity or_selec,
                        orig_selec;
      ListCell   *i;

      /*
       * Find potentially interesting OR joinclauses.
       *
       * We must ignore clauses for which the target rel is in nullable_relids;
       * that means there's an outer join below the clause and so it can't be
       * enforced at the relation scan level.
       *
       * We must also ignore clauses that are marked !is_pushed_down (ie they
       * are themselves outer-join clauses).    It would be safe to extract an
       * index condition from such a clause if we are within the nullable rather
       * than the non-nullable side of its join, but we haven't got enough
       * context here to tell which applies.    OR clauses in outer-join quals
       * aren't exactly common, so we'll let that case go unoptimized for now.
       */
      foreach(i, rel->joininfo)
      {
            RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);

            if (restriction_is_or_clause(rinfo) &&
                  rinfo->is_pushed_down &&
                  !bms_is_member(rel->relid, rinfo->nullable_relids))
            {
                  /*
                   * Use the generate_bitmap_or_paths() machinery to estimate the
                   * value of each OR clause.  We can use regular restriction
                   * clauses along with the OR clause contents to generate
                   * indexquals.    We pass outer_rel = NULL so that sub-clauses that
                   * are actually joins will be ignored.
                   */
                  List     *orpaths;
                  ListCell   *k;

                  orpaths = generate_bitmap_or_paths(root, rel,
                                                                     list_make1(rinfo),
                                                                     rel->baserestrictinfo,
                                                                     NULL);

                  /* Locate the cheapest OR path */
                  foreach(k, orpaths)
                  {
                        BitmapOrPath *path = (BitmapOrPath *) lfirst(k);

                        Assert(IsA(path, BitmapOrPath));
                        if (bestpath == NULL ||
                              path->path.total_cost < bestpath->path.total_cost)
                        {
                              bestpath = path;
                              bestrinfo = rinfo;
                        }
                  }
            }
      }

      /* Fail if no suitable clauses found */
      if (bestpath == NULL)
            return false;

      /*
       * Convert the path's indexclauses structure to a RestrictInfo tree. We
       * include any partial-index predicates so as to get a reasonable
       * representation of what the path is actually scanning.
       */
      newrinfos = make_restrictinfo_from_bitmapqual((Path *) bestpath,
                                                                          true, true);

      /* It's possible we get back something other than a single OR clause */
      if (list_length(newrinfos) != 1)
            return false;
      or_rinfo = (RestrictInfo *) linitial(newrinfos);
      Assert(IsA(or_rinfo, RestrictInfo));
      if (!restriction_is_or_clause(or_rinfo))
            return false;

      /*
       * OK, add it to the rel's restriction list.
       */
      rel->baserestrictinfo = list_concat(rel->baserestrictinfo, newrinfos);

      /*
       * Adjust the original OR clause's cached selectivity to compensate for
       * the selectivity of the added (but redundant) lower-level qual. This
       * should result in the join rel getting approximately the same rows
       * estimate as it would have gotten without all these shenanigans. (XXX
       * major hack alert ... this depends on the assumption that the
       * selectivity will stay cached ...)
       */
      or_selec = clause_selectivity(root, (Node *) or_rinfo,
                                                  0, JOIN_INNER, NULL);
      if (or_selec > 0 && or_selec < 1)
      {
            orig_selec = clause_selectivity(root, (Node *) bestrinfo,
                                                            0, JOIN_INNER, NULL);
            bestrinfo->norm_selec = orig_selec / or_selec;
            /* clamp result to sane range */
            if (bestrinfo->norm_selec > 1)
                  bestrinfo->norm_selec = 1;
            /* It isn't an outer join clause, so no need to adjust outer_selec */
      }

      /* Tell caller to recompute partial index status and rowcount estimate */
      return true;
}

Generated by  Doxygen 1.6.0   Back to index