MRS  1.0
A C++ Class Library for Statistical Set Processing
subpavings::MCMCPartitionGenerator Class Reference

A class to be able to generate a random partition of a binary tree into a given number of leaves. More...

List of all members.

Public Member Functions

 MCMCPartitionGenerator (unsigned long int seed)
 Default constructor.
 ~MCMCPartitionGenerator ()
 Destructor.
unsigned long int generateStatePartition (unsigned long int numLeaves) const
 Generate a partition of numLeaves to return a value in {1, 2, ..., numLeaves-1}, each with equal probability.
unsigned long int generateNaturalStatePartition (unsigned long int numLeaves) const
int generateKnuthDecision (unsigned long int p, unsigned long int q, bool across=false) const
cxsc::real getLnCatalanRatio (unsigned long int k, unsigned long int kPrime) const
void initialiseInstructions (size_t numLeaves) const
std::vector< unsigned long int > getInstructions () const
void clearInstructions () const

Detailed Description

A class to be able to generate a random partition of a binary tree into a given number of leaves.


Constructor & Destructor Documentation

subpavings::MCMCPartitionGenerator::MCMCPartitionGenerator ( unsigned long int  seed) [explicit]

Default constructor.

Parameters:
seeda seed for a random number generator.
   : rgsl(NULL), instructionsPtr(NULL), workspacePtr(NULL)
  {

    try {
      
      makeCatalans();
      rgsl = gsl_rng_alloc(gsl_rng_mt19937);
      
      gsl_rng_set(rgsl, seed);
      
    }
    catch (...) {
      try {
        if (rgsl != NULL) gsl_rng_free(rgsl);
      }
      catch (...) {}
      clearInstructions();
      throw;
    }
  }

Member Function Documentation

unsigned long int subpavings::MCMCPartitionGenerator::generateStatePartition ( unsigned long int  numLeaves) const

Generate a partition of numLeaves to return a value in {1, 2, ..., numLeaves-1}, each with equal probability.

The value returned can be thought of as the number going to 'one side' of a partition of numLeaves, where the allowable partitions, as pairs (one side, other side) are (1, numLeaves-1), (2, numLeaves-2), ... (numLeaves-1, 1).

  {
  
    #ifdef MYDEBUG
      cout << "\nIn generateState, numLeaves = " << numLeaves << endl;
      cout << "\n" << endl;
    #endif
    
    if (!numLeaves) throw std::logic_error(
      "MCMCPartitionGenerator::generateStatePartition(...) : numLeaves = 0");
    
    unsigned long int left = 0;
    
    if (numLeaves <= 2) left = numLeaves - 1; // 1 if numLeaves is 2
    
    else { // numLeaves > 2 
    
      unsigned long int k = numLeaves-1; // k > 1
    
      /* pick some partition of numLeaves at random
       * anywhere between 1 and numLeaves-1 */
      
      // rand in (0,1]
      double rand = 1.0-gsl_rng_uniform(rgsl);
      //double rand = 0.500005; fix missed probability for 9 leaves '(also 0.499995)
      
      /* If we can use the precomputed Catalan numbers it is much quicker */
      if (numLeaves <= catalans.size()) { // catalans included C0
      
        //pick any of the numbers in 0..Ck-1
        unsigned long int catK = catalans[k];
        #ifdef MYDEBUG
          cout << "using catalans table, catK = " << catK << endl;
          cout << "1/catK = " << (cxsc::real(1.0)/(1.0*catK)) << endl;
          cout << "1/catK = " << (cxsc::interval(1.0)/(1.0*catK)) << endl;
          cout << "gsl_rng_max = " << gsl_rng_max(rgsl) << endl;
        #endif
        
        //unsigned long int choose = gsl_rng_uniform_int(rgsl, catK);
        unsigned long int sum = 0;
        
        #ifdef MYDEBUG
          cout << "rand = " << rand << endl;
        #endif
      
        
        for (unsigned long int i = 1; i <= numLeaves/2; ++i) {
          
          long unsigned catComp = catalans[i-1]*catalans[k-i];
          sum += catComp;
          
          #ifdef MYDEBUG
            cout << "adding " << catComp 
                << " to sum, sum = " << sum << endl;
          #endif
          if (rand <= (sum*1.0)/catK) {
            left = i;
            #ifdef MYDEBUG
              cout << "picked " << left << endl;
            #endif
          
            break;
          }
          if (rand > (catK - (sum*1.0))/catK) {
            left = k - i + 1;
            #ifdef MYDEBUG
              cout << "picked " << (k - i + 1) << endl;
            #endif
            break;
          }
        }
      
      }
      
      else { // can't use catalans table
        
            
        /* prob of split 1|numLeaves-2 is Ck-1/Ck where k = numLeaves-1
        Ck-1/Ck = 0.5*k+1/(2k-1) (which = 1 if k = 1 ie numLeaves = 1*/
        double firstEdgeBand = 0.5*(k+1.0)/(2*k-1.0);
        
        // where we choose to go
        unsigned long int chosenS = 1;
        
        #ifdef MYDEBUG
          cout << "k = " << k << endl;
          cout << "rand = " << rand << endl;
          cout << "choose " << chosenS << " if rand <= " << firstEdgeBand << endl;
          cout << "choose k if rand > " << (1.0-firstEdgeBand) << endl;
        #endif
        
        if ( rand <= firstEdgeBand ) {
          left = chosenS;
          #ifdef MYDEBUG
            cout << "choose " << chosenS << endl;
          #endif
        }
        /* if k == 1, we should have gone to 1 by now */
        else if ( rand > 1.0 - firstEdgeBand ) {
          left = k - chosenS + 1;  // = k = numLeaves - 1
          #ifdef MYDEBUG
            cout << "choose k" << endl;
          #endif
        }
        /* if k == 2, we should have gone to either 1 or k = 2 by now */
        else { // not going down the edge
        
          assert( k > 2);
        
          ++chosenS; // = 2;
        
          /* the next probability is firstEdgeBand*1*0.5*(k/(2k-3)), so 
           * adding that to firstEdgeBand we get 
           * cut-off firstEdgeBand*(1+0.5*(k/(2k-3))) */
          double secondEdgeBand = firstEdgeBand * (1+ 0.5*k/(2*k-3.0));
          
          #ifdef MYDEBUG
            cout << "not on the edge, try 1 in from edge " << k << endl;
            cout << "choose " << chosenS << " if rand <= " << secondEdgeBand << endl;
            cout << "choose k-1 = " << (k - chosenS + 1) << " if rand > " << (1.0-secondEdgeBand) << endl;
          #endif
          
          
          if (rand <= secondEdgeBand) {
            left = chosenS;
            #ifdef MYDEBUG
              cout << "choose " << chosenS << endl;
            #endif
          }
          /* if k == 3, we should have gone to either 1 or 2 or 3 by now */
          else if ( rand > 1.0 - secondEdgeBand) {
            left = k - chosenS + 1; // k-1
            #ifdef MYDEBUG
              cout << "choose k-1 = " << (k - chosenS + 1) << endl;
            #endif
          }
          /* if k == 4, we should have gone to either 1 or 2 or 3 or 4 by now */
          else {
            
            assert( k > 4);
            
            ++ chosenS; //= 3;
            
            /* the next probability is 
             *  2 * (secondEdgeBand - firstEdgeBand) * 0.5 * (k-1.0)/(2*k-5.0)), 
             * which we add to secondEdgeBand to get the cut-off point */
            double thirdEdgeBand = secondEdgeBand + 
              (secondEdgeBand - firstEdgeBand) * (k-1.0)/(2*k-5.0);
          
            #ifdef MYDEBUG
              cout << "not on the edge, try 2 in from edge " << k << endl;
              cout << "choose " << chosenS << " if rand <= " << thirdEdgeBand << endl;
              cout << "choose k-2 = " << (k - chosenS + 1) << " if rand > " << (1.0-thirdEdgeBand) << endl;
            #endif
            
            
            if (rand <= thirdEdgeBand) {
              left = chosenS;
              #ifdef MYDEBUG
                cout << "choose " << chosenS << endl;
              #endif
            }
            /* if k == 5, we should have gone to either 1 or 2 or 3 or 4 or 5 by now */
            else if ( rand > 1.0 - thirdEdgeBand) {
              left = k - chosenS + 1; // k-2
              #ifdef MYDEBUG
                cout << "choose k-2 = " << (k - chosenS + 1) << endl;
              #endif
            }
            /* if k == 6, we should have gone to either 1 or 2 or 3 or 4 or 5 or 6 by now */
            
            else {
              // use our approximated probabilities
                
              assert(k > 6);
              
              ++chosenS; // = 4
              unsigned long int startChosenS = chosenS;
                
              cxsc::real realK(1.0*k);
              
              /* rescale rand to go from 0.0 up and in units without the big divisor */
              cxsc::real rescaledRand( (rand-thirdEdgeBand)
                    * 4.0*cxsc::SqrtPi_real/(cxsc::sqrt(realK)*(realK+1)) );
              /* top of range similarly */
              cxsc::real upperBound( (1.0 - 2*thirdEdgeBand) * 
                  4.0*cxsc::SqrtPi_real/(cxsc::sqrt(realK)*(realK+1)) );
              
              cxsc::real fudge(1.0);
              
              #ifdef MYDEBUG
                cout << "not on the first or second or third edge" << endl;
                cout << "rescaledRand = " << _double(rescaledRand) << endl;
                cout << "upperBound = " << _double(upperBound) << endl;
              #endif
              
              assert (rescaledRand <= upperBound);
              
              cxsc::dotprecision finalAccProbsRescaled(0.0);
              
              int tries = 0;
              while (!left && (tries < MAX_TRIES)) {
                
                cxsc::dotprecision accProbsRescaled(0.0);
                
                #ifdef MYDEBUG
                  cout << "try number " << (tries + 1) << endl;
                  cout << "fudge = " << fudge << endl;
                #endif
                
                #ifdef MYDEBUG_SPECIAL
                  if (tries) {
                    cout << "try number " << (tries + 1) << endl;
                    cout << "fudge = " << fudge << endl;
                  }
                #endif
              
                // step up from here, looking to send to left = S or left = k - S + 1
                for (chosenS = startChosenS ; chosenS <= numLeaves/2; ++chosenS) {
                  
                  cxsc::real realS(1.0*chosenS); 
                  
                  // what's the new top of the lower band, ie to send to left = S
                  cxsc::accumulate( accProbsRescaled, 
                        cxsc::exp(-1.0/8.0*(realS/(realK*(realK - realS))+1/(realS-1)))
                      /cxsc::sqrt((realS - 1.0)*(realK - realS)),
                        fudge/(realS*(realK - realS + 1)) );
                  
                  
                  #ifdef CHECK_MIN
                    if (cxsc::rnd(accProbsRescaled) < cxsc::MinReal) {
                      cout << "\nAlert: cxsc::rnd(accProbsRescaled) = " << cxsc::rnd(accProbsRescaled)
                        << " < cxsc::MinReal = " << cxsc::MinReal << endl;
                      throw std::runtime_error("MinReal exceeded: accProbsRescaled");
                    }
                    if (cxsc::rnd(upperBound - accProbsRescaled) < cxsc::MinReal) {
                      cout << "\nAlert: cxsc::rnd(upperBound - accProbsRescaled) = " << cxsc::rnd(upperBound - accProbsRescaled)
                        << " < cxsc::MinReal = " << cxsc::MinReal << endl;
                      throw std::runtime_error("MinReal exceeded: upperBound - accProbsRescaled");
                    }
                  #endif
                  #ifdef MYDEBUG
                  {
                    cout << "S = " << chosenS << endl;
                    cout << "fudge = " << fudge << endl;
                    cout << "accProbsRescaled = " << _double(cxsc::rnd(accProbsRescaled)) << endl;
                    cout << "choose " << chosenS << " if rescaledRand <= accProbsRescaled" << endl;
                    cout << "choose " << (k - chosenS + 1) 
                        << " if rescaledRand > (upperBound - accProbsRescaled = " 
                        << _double(cxsc::rnd(upperBound - accProbsRescaled))  << endl;
                  }
                  #endif
                  
                  
                  if (rescaledRand <= accProbsRescaled) {
                    left = chosenS;
                    #ifdef MYDEBUG
                      cout << "choose left = " << left << endl;
                    #endif
                    
                    break;
                  }
                  else if (rescaledRand > (upperBound - accProbsRescaled)) {
                    left = k - chosenS + 1;
                    #ifdef MYDEBUG
                      cout << "choose left = " << left << endl;
                    #endif
                    
                    break;
                  }
                  #ifdef MYDEBUG_SPECIAL
                  {
                    if (tries && left) {
                      cout << "S = " << chosenS << endl;
                      cout << "fudge = " << fudge << endl;
                      cout << "accProbsRescaled = " << _double(cxsc::rnd(accProbsRescaled)) << endl;
                      cout << "choose " << chosenS << " if rescaledRand <= accProbsRescaled" << endl;
                      cout << "choose " << (k - chosenS + 1) 
                          << " if rescaledRand > (upperBound - accProbsRescaled = " 
                          << _double(cxsc::rnd(upperBound - accProbsRescaled))  << endl;
                      
                      cout << "\nchoose left = " << left << endl;
                    }
                  }
                  #endif
                } // end of for loop
                
                finalAccProbsRescaled = accProbsRescaled;
                
                if (!left) {
                  /* possibility we could get to the end and still have not found left,
                   * because our probabilities in this for loop are approximated
                   * and are slightly too low ... so try again with a fudge factor
                   * and work out from the middle - state we want will be towards the middle*/ 
                                  
                  /* gap = upperBound - 2*accProbsRescaled
                     want fudge = upperBound/(upperBound-gap) 
                          = upperBound/(2*accProbsRescaled) */
                  fudge = upperBound/(2*cxsc::rnd(accProbsRescaled));
                  #ifdef MYDEBUG_SPECIAL
                    cout << "first not found left" << endl;
                    cout << "rand = " << rand << endl;
                    cout << "rescaledRand = " << rescaledRand << endl;
                    cout << "upperBound = " << upperBound << endl;
                    cout << "2*cxsc::rnd(accProbsRescaled) = " << (2*cxsc::rnd(accProbsRescaled)) << endl;
                  #endif
                  
                  accProbsRescaled = 0.0;
                  accumulate(accProbsRescaled, 0.5, upperBound);
                  
                  #ifdef MYDEBUG_SPECIAL
                    cout << "adjusted accProbsRescaled) = " << (cxsc::rnd(accProbsRescaled)) << endl;
                  #endif
                  
                  // step down from here, looking to send to left = S or left = k - S + 1
                  for (chosenS = numLeaves/2; chosenS >= startChosenS; --chosenS) {
                    
                    cxsc::real realS(1.0*chosenS); 
                    
                    // what's the new top of the lower band, ie to send to left = S
                    cxsc::accumulate( accProbsRescaled, 
                          -cxsc::exp(-1.0/8.0*(realS/(realK*(realK - realS))+1/(realS-1)))
                        /cxsc::sqrt((realS - 1.0)*(realK - realS)),
                          fudge/(realS*(realK - realS + 1)) );
                    
                    #ifdef MYDEBUG_SPECIAL
                    {
                      cout << "S = " << chosenS << endl;
                      cout << "fudge = " << fudge << endl;
                      cout << "accProbsRescaled = " << _double(cxsc::rnd(accProbsRescaled)) << endl;
                      cout << "choose " << chosenS 
                          << " if rescaledRand <= 0.5*upperBound && rescaledRand > accProbsRescaled" << endl;
                      cout << "else choose " << (k - chosenS + 1) 
                          << " if rescaledRand > 0.5*upperBound && rescaledRand <= (upperBound - accProbsRescaled = " 
                          << _double(cxsc::rnd(upperBound - accProbsRescaled))  << endl;
                      
                    }
                    #endif
                    
                                      
                    if ((rescaledRand <= 0.5*upperBound) && (rescaledRand > accProbsRescaled)) {
                      left = chosenS;
                      #ifdef MYDEBUG
                        cout << "choose left = " << left << endl;
                      #endif
                      
                      break;
                    }
                    else if ((rescaledRand > 0.5*upperBound) && 
                          (rescaledRand <= (upperBound - accProbsRescaled)) ){
                      left = k - chosenS + 1;
                      #ifdef MYDEBUG
                        cout << "choose left = " << left << endl;
                      #endif
                      
                      break;
                    }
                    #ifdef MYDEBUG_SPECIAL
                    {
                      if (left) {
                        cout << "S = " << chosenS << endl;
                        cout << "fudge = " << fudge << endl;
                        cout << "accProbsRescaled = " << _double(cxsc::rnd(accProbsRescaled)) << endl;
                        cout << "choose " << chosenS << " if rescaledRand > accProbsRescaled" << endl;
                        cout << "choose " << (k - chosenS + 1) 
                            << " if rescaledRand <= (upperBound - accProbsRescaled = " 
                            << _double(cxsc::rnd(upperBound - accProbsRescaled))  << endl;
                        cout << "\nchoose left = " << left << endl;
                      }
                    }
                    #endif
                  } // end of for second loop
                  
                  
                  
                } // end of if !left
                
                /* still possibility we could get to the end and still have not found left?
                 * because our probabilities in this for loop are approximated
                 * and are slightly too low ... so try again with a fudge factor*/ 
                if (!left) {
                  
                  /* gap = upperBound - 2*accProbsRescaled
                     want fudge = upperBound/(upperBound-gap) 
                          = upperBound/(2*accProbsRescaled) */
                  fudge = upperBound/(2*cxsc::rnd(finalAccProbsRescaled));
                  #ifdef MYDEBUG_SPECIAL
                    cout << "** STILL not found left, upperBound = " << upperBound << endl;
                    cout << "rand = " << rand << endl;
                    cout << "2*cxsc::rnd(finalAccProbsRescaled) = " << (2*cxsc::rnd(finalAccProbsRescaled)) << endl;
                  #endif
                }
                
                ++tries;
              } // end of tries while loop
              
              /* remote possibility we could get to the end and still have not found left?
               * I think that if that's the case we should pick the middle*/
              if (!left) {
                chosenS = numLeaves/2; // integer division
                left = chosenS;
                if (rand > 0.5) left = k - chosenS + 1; // same if numLeaves is even
                #if defined (MYDEBUG_SPECIAL) || defined (MYDEBUG_MISSED_PROBS)
                {
                  cout << "\n********* missed probability **************\n" << endl;
                
                  cout << "S = " << chosenS << endl;
                  cout << "rescaledRand = " << _double(rescaledRand) << endl;
                  cout << "accProbsRescaled = " << _double(cxsc::rnd(finalAccProbsRescaled)) << endl;
                  cout << "upperBound - accProbsRescaled = " 
                      << _double(cxsc::rnd(upperBound - finalAccProbsRescaled))  << endl;
                                  
                  cout << "\n*******************************************\n" << endl;
                }
                #endif
                
              }  
            } // end the else to deal with cases where we are not in first or second or third 'edge'     
          }// end the else to deal with cases where we are not in first or second
        }// end the else to deal with cases where we are not in first 'edge'
      
      } // end else we cannot use catalans table  
      assert (left != 0);
    } // end if numLeaves > 2
      
    #ifdef MYDEBUG
      cout << "left = " << left << endl;
    #endif
    
    if (NULL != instructionsPtr) instructionsPtr->push_back(left);
    
    return left;
    
  }

The documentation for this class was generated from the following files:
 All Classes Namespaces Functions Variables Typedefs Enumerations Friends