Logo Search packages:      
Sourcecode: postgresql-8.4 version File versions

nodeHashjoin.c

/*-------------------------------------------------------------------------
 *
 * nodeHashjoin.c
 *      Routines to handle hash join nodes
 *
 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *      $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.101 2009/06/11 14:48:57 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */

#include "postgres.h"

#include "executor/executor.h"
#include "executor/hashjoin.h"
#include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h"
#include "utils/memutils.h"


/* Returns true for JOIN_LEFT and JOIN_ANTI jointypes */
#define HASHJOIN_IS_OUTER(hjstate)  ((hjstate)->hj_NullInnerTupleSlot != NULL)

static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *outerNode,
                                      HashJoinState *hjstate,
                                      uint32 *hashvalue);
static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
                                      BufFile *file,
                                      uint32 *hashvalue,
                                      TupleTableSlot *tupleSlot);
static int  ExecHashJoinNewBatch(HashJoinState *hjstate);


/* ----------------------------------------------------------------
 *          ExecHashJoin
 *
 *          This function implements the Hybrid Hashjoin algorithm.
 *
 *          Note: the relation we build hash table on is the "inner"
 *                  the other one is "outer".
 * ----------------------------------------------------------------
 */
TupleTableSlot *                    /* return: a tuple or NULL */
ExecHashJoin(HashJoinState *node)
{
      EState         *estate;
      PlanState  *outerNode;
      HashState  *hashNode;
      List     *joinqual;
      List     *otherqual;
      TupleTableSlot *inntuple;
      ExprContext *econtext;
      ExprDoneCond isDone;
      HashJoinTable hashtable;
      HashJoinTuple curtuple;
      TupleTableSlot *outerTupleSlot;
      uint32            hashvalue;
      int               batchno;

      /*
       * get information from HashJoin node
       */
      estate = node->js.ps.state;
      joinqual = node->js.joinqual;
      otherqual = node->js.ps.qual;
      hashNode = (HashState *) innerPlanState(node);
      outerNode = outerPlanState(node);

      /*
       * get information from HashJoin state
       */
      hashtable = node->hj_HashTable;
      econtext = node->js.ps.ps_ExprContext;

      /*
       * Check to see if we're still projecting out tuples from a previous join
       * tuple (because there is a function-returning-set in the projection
       * expressions).  If so, try to project another one.
       */
      if (node->js.ps.ps_TupFromTlist)
      {
            TupleTableSlot *result;

            result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
            if (isDone == ExprMultipleResult)
                  return result;
            /* Done with that source tuple... */
            node->js.ps.ps_TupFromTlist = false;
      }

      /*
       * Reset per-tuple memory context to free any expression evaluation
       * storage allocated in the previous tuple cycle.  Note this can't happen
       * until we're done projecting out tuples from a join tuple.
       */
      ResetExprContext(econtext);

      /*
       * if this is the first call, build the hash table for inner relation
       */
      if (hashtable == NULL)
      {
            /*
             * If the outer relation is completely empty, we can quit without
             * building the hash table.  However, for an inner join it is only a
             * win to check this when the outer relation's startup cost is less
             * than the projected cost of building the hash table.      Otherwise it's
             * best to build the hash table first and see if the inner relation is
             * empty.  (When it's an outer join, we should always make this check,
             * since we aren't going to be able to skip the join on the strength
             * of an empty inner relation anyway.)
             *
             * If we are rescanning the join, we make use of information gained on
             * the previous scan: don't bother to try the prefetch if the previous
             * scan found the outer relation nonempty.      This is not 100% reliable
             * since with new parameters the outer relation might yield different
             * results, but it's a good heuristic.
             *
             * The only way to make the check is to try to fetch a tuple from the
             * outer plan node.  If we succeed, we have to stash it away for later
             * consumption by ExecHashJoinOuterGetTuple.
             */
            if (HASHJOIN_IS_OUTER(node) ||
                  (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
                   !node->hj_OuterNotEmpty))
            {
                  node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
                  if (TupIsNull(node->hj_FirstOuterTupleSlot))
                  {
                        node->hj_OuterNotEmpty = false;
                        return NULL;
                  }
                  else
                        node->hj_OuterNotEmpty = true;
            }
            else
                  node->hj_FirstOuterTupleSlot = NULL;

            /*
             * create the hash table
             */
            hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
                                                            node->hj_HashOperators);
            node->hj_HashTable = hashtable;

            /*
             * execute the Hash node, to build the hash table
             */
            hashNode->hashtable = hashtable;
            (void) MultiExecProcNode((PlanState *) hashNode);

            /*
             * If the inner relation is completely empty, and we're not doing an
             * outer join, we can quit without scanning the outer relation.
             */
            if (hashtable->totalTuples == 0 && !HASHJOIN_IS_OUTER(node))
                  return NULL;

            /*
             * need to remember whether nbatch has increased since we began
             * scanning the outer relation
             */
            hashtable->nbatch_outstart = hashtable->nbatch;

            /*
             * Reset OuterNotEmpty for scan.  (It's OK if we fetched a tuple
             * above, because ExecHashJoinOuterGetTuple will immediately set it
             * again.)
             */
            node->hj_OuterNotEmpty = false;
      }

      /*
       * run the hash join process
       */
      for (;;)
      {
            /*
             * If we don't have an outer tuple, get the next one
             */
            if (node->hj_NeedNewOuter)
            {
                  outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
                                                                                 node,
                                                                                 &hashvalue);
                  if (TupIsNull(outerTupleSlot))
                  {
                        /* end of join */
                        return NULL;
                  }

                  econtext->ecxt_outertuple = outerTupleSlot;
                  node->hj_NeedNewOuter = false;
                  node->hj_MatchedOuter = false;

                  /*
                   * Now we have an outer tuple; find the corresponding bucket for
                   * this tuple in the main hash table or skew hash table.
                   */
                  node->hj_CurHashValue = hashvalue;
                  ExecHashGetBucketAndBatch(hashtable, hashvalue,
                                                        &node->hj_CurBucketNo, &batchno);
                  node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
                                                                                           hashvalue);
                  node->hj_CurTuple = NULL;

                  /*
                   * Now we've got an outer tuple and the corresponding hash bucket,
                   * but it might not belong to the current batch, or it might match
                   * a skew bucket.
                   */
                  if (batchno != hashtable->curbatch &&
                        node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO)
                  {
                        /*
                         * Need to postpone this outer tuple to a later batch. Save it
                         * in the corresponding outer-batch file.
                         */
                        Assert(batchno > hashtable->curbatch);
                        ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
                                                        hashvalue,
                                                        &hashtable->outerBatchFile[batchno]);
                        node->hj_NeedNewOuter = true;
                        continue;         /* loop around for a new outer tuple */
                  }
            }

            /*
             * OK, scan the selected hash bucket for matches
             */
            for (;;)
            {
                  curtuple = ExecScanHashBucket(node, econtext);
                  if (curtuple == NULL)
                        break;                  /* out of matches */

                  /*
                   * we've got a match, but still need to test non-hashed quals
                   */
                  inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(curtuple),
                                                                   node->hj_HashTupleSlot,
                                                                   false);    /* don't pfree */
                  econtext->ecxt_innertuple = inntuple;

                  /* reset temp memory each time to avoid leaks from qual expr */
                  ResetExprContext(econtext);

                  /*
                   * if we pass the qual, then save state for next call and have
                   * ExecProject form the projection, store it in the tuple table,
                   * and return the slot.
                   *
                   * Only the joinquals determine MatchedOuter status, but all quals
                   * must pass to actually return the tuple.
                   */
                  if (joinqual == NIL || ExecQual(joinqual, econtext, false))
                  {
                        node->hj_MatchedOuter = true;

                        /* In an antijoin, we never return a matched tuple */
                        if (node->js.jointype == JOIN_ANTI)
                        {
                              node->hj_NeedNewOuter = true;
                              break;            /* out of loop over hash bucket */
                        }

                        /*
                         * In a semijoin, we'll consider returning the first match,
                         * but after that we're done with this outer tuple.
                         */
                        if (node->js.jointype == JOIN_SEMI)
                              node->hj_NeedNewOuter = true;

                        if (otherqual == NIL || ExecQual(otherqual, econtext, false))
                        {
                              TupleTableSlot *result;

                              result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);

                              if (isDone != ExprEndResult)
                              {
                                    node->js.ps.ps_TupFromTlist =
                                          (isDone == ExprMultipleResult);
                                    return result;
                              }
                        }

                        /*
                         * If semijoin and we didn't return the tuple, we're still
                         * done with this outer tuple.
                         */
                        if (node->js.jointype == JOIN_SEMI)
                              break;            /* out of loop over hash bucket */
                  }
            }

            /*
             * Now the current outer tuple has run out of matches, so check
             * whether to emit a dummy outer-join tuple. If not, loop around to
             * get a new outer tuple.
             */
            node->hj_NeedNewOuter = true;

            if (!node->hj_MatchedOuter &&
                  HASHJOIN_IS_OUTER(node))
            {
                  /*
                   * We are doing an outer join and there were no join matches for
                   * this outer tuple.  Generate a fake join tuple with nulls for
                   * the inner tuple, and return it if it passes the non-join quals.
                   */
                  econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;

                  if (otherqual == NIL || ExecQual(otherqual, econtext, false))
                  {
                        /*
                         * qualification was satisfied so we project and return the
                         * slot containing the result tuple using ExecProject().
                         */
                        TupleTableSlot *result;

                        result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);

                        if (isDone != ExprEndResult)
                        {
                              node->js.ps.ps_TupFromTlist =
                                    (isDone == ExprMultipleResult);
                              return result;
                        }
                  }
            }
      }
}

/* ----------------------------------------------------------------
 *          ExecInitHashJoin
 *
 *          Init routine for HashJoin node.
 * ----------------------------------------------------------------
 */
HashJoinState *
ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
{
      HashJoinState *hjstate;
      Plan     *outerNode;
      Hash     *hashNode;
      List     *lclauses;
      List     *rclauses;
      List     *hoperators;
      ListCell   *l;

      /* check for unsupported flags */
      Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));

      /*
       * create state structure
       */
      hjstate = makeNode(HashJoinState);
      hjstate->js.ps.plan = (Plan *) node;
      hjstate->js.ps.state = estate;

      /*
       * Miscellaneous initialization
       *
       * create expression context for node
       */
      ExecAssignExprContext(estate, &hjstate->js.ps);

      /*
       * initialize child expressions
       */
      hjstate->js.ps.targetlist = (List *)
            ExecInitExpr((Expr *) node->join.plan.targetlist,
                               (PlanState *) hjstate);
      hjstate->js.ps.qual = (List *)
            ExecInitExpr((Expr *) node->join.plan.qual,
                               (PlanState *) hjstate);
      hjstate->js.jointype = node->join.jointype;
      hjstate->js.joinqual = (List *)
            ExecInitExpr((Expr *) node->join.joinqual,
                               (PlanState *) hjstate);
      hjstate->hashclauses = (List *)
            ExecInitExpr((Expr *) node->hashclauses,
                               (PlanState *) hjstate);

      /*
       * initialize child nodes
       *
       * Note: we could suppress the REWIND flag for the inner input, which
       * would amount to betting that the hash will be a single batch.  Not
       * clear if this would be a win or not.
       */
      outerNode = outerPlan(node);
      hashNode = (Hash *) innerPlan(node);

      outerPlanState(hjstate) = ExecInitNode(outerNode, estate, eflags);
      innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate, eflags);

#define HASHJOIN_NSLOTS 3

      /*
       * tuple table initialization
       */
      ExecInitResultTupleSlot(estate, &hjstate->js.ps);
      hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);

      /* note: HASHJOIN_IS_OUTER macro depends on this initialization */
      switch (node->join.jointype)
      {
            case JOIN_INNER:
            case JOIN_SEMI:
                  break;
            case JOIN_LEFT:
            case JOIN_ANTI:
                  hjstate->hj_NullInnerTupleSlot =
                        ExecInitNullTupleSlot(estate,
                                                 ExecGetResultType(innerPlanState(hjstate)));
                  break;
            default:
                  elog(ERROR, "unrecognized join type: %d",
                         (int) node->join.jointype);
      }

      /*
       * now for some voodoo.  our temporary tuple slot is actually the result
       * tuple slot of the Hash node (which is our inner plan).  we do this
       * because Hash nodes don't return tuples via ExecProcNode() -- instead
       * the hash join node uses ExecScanHashBucket() to get at the contents of
       * the hash table.      -cim 6/9/91
       */
      {
            HashState  *hashstate = (HashState *) innerPlanState(hjstate);
            TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;

            hjstate->hj_HashTupleSlot = slot;
      }

      /*
       * initialize tuple type and projection info
       */
      ExecAssignResultTypeFromTL(&hjstate->js.ps);
      ExecAssignProjectionInfo(&hjstate->js.ps, NULL);

      ExecSetSlotDescriptor(hjstate->hj_OuterTupleSlot,
                                      ExecGetResultType(outerPlanState(hjstate)));

      /*
       * initialize hash-specific info
       */
      hjstate->hj_HashTable = NULL;
      hjstate->hj_FirstOuterTupleSlot = NULL;

      hjstate->hj_CurHashValue = 0;
      hjstate->hj_CurBucketNo = 0;
      hjstate->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO;
      hjstate->hj_CurTuple = NULL;

      /*
       * Deconstruct the hash clauses into outer and inner argument values, so
       * that we can evaluate those subexpressions separately.  Also make a list
       * of the hash operator OIDs, in preparation for looking up the hash
       * functions to use.
       */
      lclauses = NIL;
      rclauses = NIL;
      hoperators = NIL;
      foreach(l, hjstate->hashclauses)
      {
            FuncExprState *fstate = (FuncExprState *) lfirst(l);
            OpExpr         *hclause;

            Assert(IsA(fstate, FuncExprState));
            hclause = (OpExpr *) fstate->xprstate.expr;
            Assert(IsA(hclause, OpExpr));
            lclauses = lappend(lclauses, linitial(fstate->args));
            rclauses = lappend(rclauses, lsecond(fstate->args));
            hoperators = lappend_oid(hoperators, hclause->opno);
      }
      hjstate->hj_OuterHashKeys = lclauses;
      hjstate->hj_InnerHashKeys = rclauses;
      hjstate->hj_HashOperators = hoperators;
      /* child Hash node needs to evaluate inner hash keys, too */
      ((HashState *) innerPlanState(hjstate))->hashkeys = rclauses;

      hjstate->js.ps.ps_TupFromTlist = false;
      hjstate->hj_NeedNewOuter = true;
      hjstate->hj_MatchedOuter = false;
      hjstate->hj_OuterNotEmpty = false;

      return hjstate;
}

int
ExecCountSlotsHashJoin(HashJoin *node)
{
      return ExecCountSlotsNode(outerPlan(node)) +
            ExecCountSlotsNode(innerPlan(node)) +
            HASHJOIN_NSLOTS;
}

/* ----------------------------------------------------------------
 *          ExecEndHashJoin
 *
 *          clean up routine for HashJoin node
 * ----------------------------------------------------------------
 */
void
ExecEndHashJoin(HashJoinState *node)
{
      /*
       * Free hash table
       */
      if (node->hj_HashTable)
      {
            ExecHashTableDestroy(node->hj_HashTable);
            node->hj_HashTable = NULL;
      }

      /*
       * Free the exprcontext
       */
      ExecFreeExprContext(&node->js.ps);

      /*
       * clean out the tuple table
       */
      ExecClearTuple(node->js.ps.ps_ResultTupleSlot);
      ExecClearTuple(node->hj_OuterTupleSlot);
      ExecClearTuple(node->hj_HashTupleSlot);

      /*
       * clean up subtrees
       */
      ExecEndNode(outerPlanState(node));
      ExecEndNode(innerPlanState(node));
}

/*
 * ExecHashJoinOuterGetTuple
 *
 *          get the next outer tuple for hashjoin: either by
 *          executing a plan node in the first pass, or from
 *          the temp files for the hashjoin batches.
 *
 * Returns a null slot if no more outer tuples.  On success, the tuple's
 * hash value is stored at *hashvalue --- this is either originally computed,
 * or re-read from the temp file.
 */
static TupleTableSlot *
ExecHashJoinOuterGetTuple(PlanState *outerNode,
                                      HashJoinState *hjstate,
                                      uint32 *hashvalue)
{
      HashJoinTable hashtable = hjstate->hj_HashTable;
      int               curbatch = hashtable->curbatch;
      TupleTableSlot *slot;

      if (curbatch == 0)                  /* if it is the first pass */
      {
            /*
             * Check to see if first outer tuple was already fetched by
             * ExecHashJoin() and not used yet.
             */
            slot = hjstate->hj_FirstOuterTupleSlot;
            if (!TupIsNull(slot))
                  hjstate->hj_FirstOuterTupleSlot = NULL;
            else
                  slot = ExecProcNode(outerNode);

            while (!TupIsNull(slot))
            {
                  /*
                   * We have to compute the tuple's hash value.
                   */
                  ExprContext *econtext = hjstate->js.ps.ps_ExprContext;

                  econtext->ecxt_outertuple = slot;
                  if (ExecHashGetHashValue(hashtable, econtext,
                                                       hjstate->hj_OuterHashKeys,
                                                       true,            /* outer tuple */
                                                       HASHJOIN_IS_OUTER(hjstate),
                                                       hashvalue))
                  {
                        /* remember outer relation is not empty for possible rescan */
                        hjstate->hj_OuterNotEmpty = true;

                        return slot;
                  }

                  /*
                   * That tuple couldn't match because of a NULL, so discard it and
                   * continue with the next one.
                   */
                  slot = ExecProcNode(outerNode);
            }

            /*
             * We have just reached the end of the first pass. Try to switch to a
             * saved batch.
             */
            curbatch = ExecHashJoinNewBatch(hjstate);
      }

      /*
       * Try to read from a temp file. Loop allows us to advance to new batches
       * as needed.  NOTE: nbatch could increase inside ExecHashJoinNewBatch, so
       * don't try to optimize this loop.
       */
      while (curbatch < hashtable->nbatch)
      {
            slot = ExecHashJoinGetSavedTuple(hjstate,
                                                             hashtable->outerBatchFile[curbatch],
                                                             hashvalue,
                                                             hjstate->hj_OuterTupleSlot);
            if (!TupIsNull(slot))
                  return slot;
            curbatch = ExecHashJoinNewBatch(hjstate);
      }

      /* Out of batches... */
      return NULL;
}

/*
 * ExecHashJoinNewBatch
 *          switch to a new hashjoin batch
 *
 * Returns the number of the new batch (1..nbatch-1), or nbatch if no more.
 * We will never return a batch number that has an empty outer batch file.
 */
static int
ExecHashJoinNewBatch(HashJoinState *hjstate)
{
      HashJoinTable hashtable = hjstate->hj_HashTable;
      int               nbatch;
      int               curbatch;
      BufFile    *innerFile;
      TupleTableSlot *slot;
      uint32            hashvalue;

start_over:
      nbatch = hashtable->nbatch;
      curbatch = hashtable->curbatch;

      if (curbatch > 0)
      {
            /*
             * We no longer need the previous outer batch file; close it right
             * away to free disk space.
             */
            if (hashtable->outerBatchFile[curbatch])
                  BufFileClose(hashtable->outerBatchFile[curbatch]);
            hashtable->outerBatchFile[curbatch] = NULL;
      }
      else  /* we just finished the first batch */
      {
            /*
             * Reset some of the skew optimization state variables, since we no
             * longer need to consider skew tuples after the first batch. The
             * memory context reset we are about to do will release the skew
             * hashtable itself.
             */
            hashtable->skewEnabled = false;
            hashtable->skewBucket = NULL;
            hashtable->skewBucketNums = NULL;
            hashtable->spaceUsedSkew = 0;
      }

      /*
       * We can always skip over any batches that are completely empty on both
       * sides.  We can sometimes skip over batches that are empty on only one
       * side, but there are exceptions:
       *
       * 1. In an outer join, we have to process outer batches even if the inner
       * batch is empty.
       *
       * 2. If we have increased nbatch since the initial estimate, we have to
       * scan inner batches since they might contain tuples that need to be
       * reassigned to later inner batches.
       *
       * 3. Similarly, if we have increased nbatch since starting the outer
       * scan, we have to rescan outer batches in case they contain tuples that
       * need to be reassigned.
       */
      curbatch++;
      while (curbatch < nbatch &&
               (hashtable->outerBatchFile[curbatch] == NULL ||
                  hashtable->innerBatchFile[curbatch] == NULL))
      {
            if (hashtable->outerBatchFile[curbatch] &&
                  HASHJOIN_IS_OUTER(hjstate))
                  break;                        /* must process due to rule 1 */
            if (hashtable->innerBatchFile[curbatch] &&
                  nbatch != hashtable->nbatch_original)
                  break;                        /* must process due to rule 2 */
            if (hashtable->outerBatchFile[curbatch] &&
                  nbatch != hashtable->nbatch_outstart)
                  break;                        /* must process due to rule 3 */
            /* We can ignore this batch. */
            /* Release associated temp files right away. */
            if (hashtable->innerBatchFile[curbatch])
                  BufFileClose(hashtable->innerBatchFile[curbatch]);
            hashtable->innerBatchFile[curbatch] = NULL;
            if (hashtable->outerBatchFile[curbatch])
                  BufFileClose(hashtable->outerBatchFile[curbatch]);
            hashtable->outerBatchFile[curbatch] = NULL;
            curbatch++;
      }

      if (curbatch >= nbatch)
            return curbatch;        /* no more batches */

      hashtable->curbatch = curbatch;

      /*
       * Reload the hash table with the new inner batch (which could be empty)
       */
      ExecHashTableReset(hashtable);

      innerFile = hashtable->innerBatchFile[curbatch];

      if (innerFile != NULL)
      {
            if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))
                  ereport(ERROR,
                              (errcode_for_file_access(),
                           errmsg("could not rewind hash-join temporary file: %m")));

            while ((slot = ExecHashJoinGetSavedTuple(hjstate,
                                                                         innerFile,
                                                                         &hashvalue,
                                                                         hjstate->hj_HashTupleSlot)))
            {
                  /*
                   * NOTE: some tuples may be sent to future batches.  Also, it is
                   * possible for hashtable->nbatch to be increased here!
                   */
                  ExecHashTableInsert(hashtable, slot, hashvalue);
            }

            /*
             * after we build the hash table, the inner batch file is no longer
             * needed
             */
            BufFileClose(innerFile);
            hashtable->innerBatchFile[curbatch] = NULL;
      }

      /*
       * If there's no outer batch file, advance to next batch.
       */
      if (hashtable->outerBatchFile[curbatch] == NULL)
            goto start_over;

      /*
       * Rewind outer batch file, so that we can start reading it.
       */
      if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
            ereport(ERROR,
                        (errcode_for_file_access(),
                         errmsg("could not rewind hash-join temporary file: %m")));

      return curbatch;
}

/*
 * ExecHashJoinSaveTuple
 *          save a tuple to a batch file.
 *
 * The data recorded in the file for each tuple is its hash value,
 * then the tuple in MinimalTuple format.
 *
 * Note: it is important always to call this in the regular executor
 * context, not in a shorter-lived context; else the temp file buffers
 * will get messed up.
 */
void
ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue,
                                BufFile **fileptr)
{
      BufFile    *file = *fileptr;
      size_t            written;

      if (file == NULL)
      {
            /* First write to this batch file, so open it. */
            file = BufFileCreateTemp(false);
            *fileptr = file;
      }

      written = BufFileWrite(file, (void *) &hashvalue, sizeof(uint32));
      if (written != sizeof(uint32))
            ereport(ERROR,
                        (errcode_for_file_access(),
                         errmsg("could not write to hash-join temporary file: %m")));

      written = BufFileWrite(file, (void *) tuple, tuple->t_len);
      if (written != tuple->t_len)
            ereport(ERROR,
                        (errcode_for_file_access(),
                         errmsg("could not write to hash-join temporary file: %m")));
}

/*
 * ExecHashJoinGetSavedTuple
 *          read the next tuple from a batch file.    Return NULL if no more.
 *
 * On success, *hashvalue is set to the tuple's hash value, and the tuple
 * itself is stored in the given slot.
 */
static TupleTableSlot *
ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
                                      BufFile *file,
                                      uint32 *hashvalue,
                                      TupleTableSlot *tupleSlot)
{
      uint32            header[2];
      size_t            nread;
      MinimalTuple tuple;

      /*
       * Since both the hash value and the MinimalTuple length word are uint32,
       * we can read them both in one BufFileRead() call without any type
       * cheating.
       */
      nread = BufFileRead(file, (void *) header, sizeof(header));
      if (nread == 0)                     /* end of file */
      {
            ExecClearTuple(tupleSlot);
            return NULL;
      }
      if (nread != sizeof(header))
            ereport(ERROR,
                        (errcode_for_file_access(),
                         errmsg("could not read from hash-join temporary file: %m")));
      *hashvalue = header[0];
      tuple = (MinimalTuple) palloc(header[1]);
      tuple->t_len = header[1];
      nread = BufFileRead(file,
                                    (void *) ((char *) tuple + sizeof(uint32)),
                                    header[1] - sizeof(uint32));
      if (nread != header[1] - sizeof(uint32))
            ereport(ERROR,
                        (errcode_for_file_access(),
                         errmsg("could not read from hash-join temporary file: %m")));
      return ExecStoreMinimalTuple(tuple, tupleSlot, true);
}


void
ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
{
      /*
       * In a multi-batch join, we currently have to do rescans the hard way,
       * primarily because batch temp files may have already been released. But
       * if it's a single-batch join, and there is no parameter change for the
       * inner subnode, then we can just re-use the existing hash table without
       * rebuilding it.
       */
      if (node->hj_HashTable != NULL)
      {
            if (node->hj_HashTable->nbatch == 1 &&
                  ((PlanState *) node)->righttree->chgParam == NULL)
            {
                  /*
                   * okay to reuse the hash table; needn't rescan inner, either.
                   *
                   * What we do need to do is reset our state about the emptiness of
                   * the outer relation, so that the new scan of the outer will
                   * update it correctly if it turns out to be empty this time.
                   * (There's no harm in clearing it now because ExecHashJoin won't
                   * need the info.  In the other cases, where the hash table
                   * doesn't exist or we are destroying it, we leave this state
                   * alone because ExecHashJoin will need it the first time
                   * through.)
                   */
                  node->hj_OuterNotEmpty = false;
            }
            else
            {
                  /* must destroy and rebuild hash table */
                  ExecHashTableDestroy(node->hj_HashTable);
                  node->hj_HashTable = NULL;

                  /*
                   * if chgParam of subnode is not null then plan will be re-scanned
                   * by first ExecProcNode.
                   */
                  if (((PlanState *) node)->righttree->chgParam == NULL)
                        ExecReScan(((PlanState *) node)->righttree, exprCtxt);
            }
      }

      /* Always reset intra-tuple state */
      node->hj_CurHashValue = 0;
      node->hj_CurBucketNo = 0;
      node->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO;
      node->hj_CurTuple = NULL;

      node->js.ps.ps_TupFromTlist = false;
      node->hj_NeedNewOuter = true;
      node->hj_MatchedOuter = false;
      node->hj_FirstOuterTupleSlot = NULL;

      /*
       * if chgParam of subnode is not null then plan will be re-scanned by
       * first ExecProcNode.
       */
      if (((PlanState *) node)->lefttree->chgParam == NULL)
            ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
}

Generated by  Doxygen 1.6.0   Back to index