A class to be able to generate a random partition of a binary tree into a given number of leaves. More...
Public Member Functions | |
MCMCPartitionGenerator (unsigned long int seed) | |
Default constructor. | |
~MCMCPartitionGenerator () | |
Destructor. | |
unsigned long int | generateStatePartition (unsigned long int numLeaves) const |
Generate a partition of numLeaves to return a value in {1, 2, ..., numLeaves-1}, each with equal probability. | |
unsigned long int | generateNaturalStatePartition (unsigned long int numLeaves) const |
int | generateKnuthDecision (unsigned long int p, unsigned long int q, bool across=false) const |
cxsc::real | getLnCatalanRatio (unsigned long int k, unsigned long int kPrime) const |
void | initialiseInstructions (size_t numLeaves) const |
std::vector< unsigned long int > | getInstructions () const |
void | clearInstructions () const |
A class to be able to generate a random partition of a binary tree into a given number of leaves.
subpavings::MCMCPartitionGenerator::MCMCPartitionGenerator | ( | unsigned long int | seed | ) | [explicit] |
Default constructor.
seed | a seed for a random number generator. |
: rgsl(NULL), instructionsPtr(NULL), workspacePtr(NULL) { try { makeCatalans(); rgsl = gsl_rng_alloc(gsl_rng_mt19937); gsl_rng_set(rgsl, seed); } catch (...) { try { if (rgsl != NULL) gsl_rng_free(rgsl); } catch (...) {} clearInstructions(); throw; } }
unsigned long int subpavings::MCMCPartitionGenerator::generateStatePartition | ( | unsigned long int | numLeaves | ) | const |
Generate a partition of numLeaves to return a value in {1, 2, ..., numLeaves-1}, each with equal probability.
The value returned can be thought of as the number going to 'one side' of a partition of numLeaves, where the allowable partitions, as pairs (one side, other side) are (1, numLeaves-1), (2, numLeaves-2), ... (numLeaves-1, 1).
{ #ifdef MYDEBUG cout << "\nIn generateState, numLeaves = " << numLeaves << endl; cout << "\n" << endl; #endif if (!numLeaves) throw std::logic_error( "MCMCPartitionGenerator::generateStatePartition(...) : numLeaves = 0"); unsigned long int left = 0; if (numLeaves <= 2) left = numLeaves - 1; // 1 if numLeaves is 2 else { // numLeaves > 2 unsigned long int k = numLeaves-1; // k > 1 /* pick some partition of numLeaves at random * anywhere between 1 and numLeaves-1 */ // rand in (0,1] double rand = 1.0-gsl_rng_uniform(rgsl); //double rand = 0.500005; fix missed probability for 9 leaves '(also 0.499995) /* If we can use the precomputed Catalan numbers it is much quicker */ if (numLeaves <= catalans.size()) { // catalans included C0 //pick any of the numbers in 0..Ck-1 unsigned long int catK = catalans[k]; #ifdef MYDEBUG cout << "using catalans table, catK = " << catK << endl; cout << "1/catK = " << (cxsc::real(1.0)/(1.0*catK)) << endl; cout << "1/catK = " << (cxsc::interval(1.0)/(1.0*catK)) << endl; cout << "gsl_rng_max = " << gsl_rng_max(rgsl) << endl; #endif //unsigned long int choose = gsl_rng_uniform_int(rgsl, catK); unsigned long int sum = 0; #ifdef MYDEBUG cout << "rand = " << rand << endl; #endif for (unsigned long int i = 1; i <= numLeaves/2; ++i) { long unsigned catComp = catalans[i-1]*catalans[k-i]; sum += catComp; #ifdef MYDEBUG cout << "adding " << catComp << " to sum, sum = " << sum << endl; #endif if (rand <= (sum*1.0)/catK) { left = i; #ifdef MYDEBUG cout << "picked " << left << endl; #endif break; } if (rand > (catK - (sum*1.0))/catK) { left = k - i + 1; #ifdef MYDEBUG cout << "picked " << (k - i + 1) << endl; #endif break; } } } else { // can't use catalans table /* prob of split 1|numLeaves-2 is Ck-1/Ck where k = numLeaves-1 Ck-1/Ck = 0.5*k+1/(2k-1) (which = 1 if k = 1 ie numLeaves = 1*/ double firstEdgeBand = 0.5*(k+1.0)/(2*k-1.0); // where we choose to go unsigned long int chosenS = 1; #ifdef MYDEBUG cout << "k = " << k << endl; cout << "rand = " << rand << endl; cout << "choose " << chosenS << " if rand <= " << firstEdgeBand << endl; cout << "choose k if rand > " << (1.0-firstEdgeBand) << endl; #endif if ( rand <= firstEdgeBand ) { left = chosenS; #ifdef MYDEBUG cout << "choose " << chosenS << endl; #endif } /* if k == 1, we should have gone to 1 by now */ else if ( rand > 1.0 - firstEdgeBand ) { left = k - chosenS + 1; // = k = numLeaves - 1 #ifdef MYDEBUG cout << "choose k" << endl; #endif } /* if k == 2, we should have gone to either 1 or k = 2 by now */ else { // not going down the edge assert( k > 2); ++chosenS; // = 2; /* the next probability is firstEdgeBand*1*0.5*(k/(2k-3)), so * adding that to firstEdgeBand we get * cut-off firstEdgeBand*(1+0.5*(k/(2k-3))) */ double secondEdgeBand = firstEdgeBand * (1+ 0.5*k/(2*k-3.0)); #ifdef MYDEBUG cout << "not on the edge, try 1 in from edge " << k << endl; cout << "choose " << chosenS << " if rand <= " << secondEdgeBand << endl; cout << "choose k-1 = " << (k - chosenS + 1) << " if rand > " << (1.0-secondEdgeBand) << endl; #endif if (rand <= secondEdgeBand) { left = chosenS; #ifdef MYDEBUG cout << "choose " << chosenS << endl; #endif } /* if k == 3, we should have gone to either 1 or 2 or 3 by now */ else if ( rand > 1.0 - secondEdgeBand) { left = k - chosenS + 1; // k-1 #ifdef MYDEBUG cout << "choose k-1 = " << (k - chosenS + 1) << endl; #endif } /* if k == 4, we should have gone to either 1 or 2 or 3 or 4 by now */ else { assert( k > 4); ++ chosenS; //= 3; /* the next probability is * 2 * (secondEdgeBand - firstEdgeBand) * 0.5 * (k-1.0)/(2*k-5.0)), * which we add to secondEdgeBand to get the cut-off point */ double thirdEdgeBand = secondEdgeBand + (secondEdgeBand - firstEdgeBand) * (k-1.0)/(2*k-5.0); #ifdef MYDEBUG cout << "not on the edge, try 2 in from edge " << k << endl; cout << "choose " << chosenS << " if rand <= " << thirdEdgeBand << endl; cout << "choose k-2 = " << (k - chosenS + 1) << " if rand > " << (1.0-thirdEdgeBand) << endl; #endif if (rand <= thirdEdgeBand) { left = chosenS; #ifdef MYDEBUG cout << "choose " << chosenS << endl; #endif } /* if k == 5, we should have gone to either 1 or 2 or 3 or 4 or 5 by now */ else if ( rand > 1.0 - thirdEdgeBand) { left = k - chosenS + 1; // k-2 #ifdef MYDEBUG cout << "choose k-2 = " << (k - chosenS + 1) << endl; #endif } /* if k == 6, we should have gone to either 1 or 2 or 3 or 4 or 5 or 6 by now */ else { // use our approximated probabilities assert(k > 6); ++chosenS; // = 4 unsigned long int startChosenS = chosenS; cxsc::real realK(1.0*k); /* rescale rand to go from 0.0 up and in units without the big divisor */ cxsc::real rescaledRand( (rand-thirdEdgeBand) * 4.0*cxsc::SqrtPi_real/(cxsc::sqrt(realK)*(realK+1)) ); /* top of range similarly */ cxsc::real upperBound( (1.0 - 2*thirdEdgeBand) * 4.0*cxsc::SqrtPi_real/(cxsc::sqrt(realK)*(realK+1)) ); cxsc::real fudge(1.0); #ifdef MYDEBUG cout << "not on the first or second or third edge" << endl; cout << "rescaledRand = " << _double(rescaledRand) << endl; cout << "upperBound = " << _double(upperBound) << endl; #endif assert (rescaledRand <= upperBound); cxsc::dotprecision finalAccProbsRescaled(0.0); int tries = 0; while (!left && (tries < MAX_TRIES)) { cxsc::dotprecision accProbsRescaled(0.0); #ifdef MYDEBUG cout << "try number " << (tries + 1) << endl; cout << "fudge = " << fudge << endl; #endif #ifdef MYDEBUG_SPECIAL if (tries) { cout << "try number " << (tries + 1) << endl; cout << "fudge = " << fudge << endl; } #endif // step up from here, looking to send to left = S or left = k - S + 1 for (chosenS = startChosenS ; chosenS <= numLeaves/2; ++chosenS) { cxsc::real realS(1.0*chosenS); // what's the new top of the lower band, ie to send to left = S cxsc::accumulate( accProbsRescaled, cxsc::exp(-1.0/8.0*(realS/(realK*(realK - realS))+1/(realS-1))) /cxsc::sqrt((realS - 1.0)*(realK - realS)), fudge/(realS*(realK - realS + 1)) ); #ifdef CHECK_MIN if (cxsc::rnd(accProbsRescaled) < cxsc::MinReal) { cout << "\nAlert: cxsc::rnd(accProbsRescaled) = " << cxsc::rnd(accProbsRescaled) << " < cxsc::MinReal = " << cxsc::MinReal << endl; throw std::runtime_error("MinReal exceeded: accProbsRescaled"); } if (cxsc::rnd(upperBound - accProbsRescaled) < cxsc::MinReal) { cout << "\nAlert: cxsc::rnd(upperBound - accProbsRescaled) = " << cxsc::rnd(upperBound - accProbsRescaled) << " < cxsc::MinReal = " << cxsc::MinReal << endl; throw std::runtime_error("MinReal exceeded: upperBound - accProbsRescaled"); } #endif #ifdef MYDEBUG { cout << "S = " << chosenS << endl; cout << "fudge = " << fudge << endl; cout << "accProbsRescaled = " << _double(cxsc::rnd(accProbsRescaled)) << endl; cout << "choose " << chosenS << " if rescaledRand <= accProbsRescaled" << endl; cout << "choose " << (k - chosenS + 1) << " if rescaledRand > (upperBound - accProbsRescaled = " << _double(cxsc::rnd(upperBound - accProbsRescaled)) << endl; } #endif if (rescaledRand <= accProbsRescaled) { left = chosenS; #ifdef MYDEBUG cout << "choose left = " << left << endl; #endif break; } else if (rescaledRand > (upperBound - accProbsRescaled)) { left = k - chosenS + 1; #ifdef MYDEBUG cout << "choose left = " << left << endl; #endif break; } #ifdef MYDEBUG_SPECIAL { if (tries && left) { cout << "S = " << chosenS << endl; cout << "fudge = " << fudge << endl; cout << "accProbsRescaled = " << _double(cxsc::rnd(accProbsRescaled)) << endl; cout << "choose " << chosenS << " if rescaledRand <= accProbsRescaled" << endl; cout << "choose " << (k - chosenS + 1) << " if rescaledRand > (upperBound - accProbsRescaled = " << _double(cxsc::rnd(upperBound - accProbsRescaled)) << endl; cout << "\nchoose left = " << left << endl; } } #endif } // end of for loop finalAccProbsRescaled = accProbsRescaled; if (!left) { /* possibility we could get to the end and still have not found left, * because our probabilities in this for loop are approximated * and are slightly too low ... so try again with a fudge factor * and work out from the middle - state we want will be towards the middle*/ /* gap = upperBound - 2*accProbsRescaled want fudge = upperBound/(upperBound-gap) = upperBound/(2*accProbsRescaled) */ fudge = upperBound/(2*cxsc::rnd(accProbsRescaled)); #ifdef MYDEBUG_SPECIAL cout << "first not found left" << endl; cout << "rand = " << rand << endl; cout << "rescaledRand = " << rescaledRand << endl; cout << "upperBound = " << upperBound << endl; cout << "2*cxsc::rnd(accProbsRescaled) = " << (2*cxsc::rnd(accProbsRescaled)) << endl; #endif accProbsRescaled = 0.0; accumulate(accProbsRescaled, 0.5, upperBound); #ifdef MYDEBUG_SPECIAL cout << "adjusted accProbsRescaled) = " << (cxsc::rnd(accProbsRescaled)) << endl; #endif // step down from here, looking to send to left = S or left = k - S + 1 for (chosenS = numLeaves/2; chosenS >= startChosenS; --chosenS) { cxsc::real realS(1.0*chosenS); // what's the new top of the lower band, ie to send to left = S cxsc::accumulate( accProbsRescaled, -cxsc::exp(-1.0/8.0*(realS/(realK*(realK - realS))+1/(realS-1))) /cxsc::sqrt((realS - 1.0)*(realK - realS)), fudge/(realS*(realK - realS + 1)) ); #ifdef MYDEBUG_SPECIAL { cout << "S = " << chosenS << endl; cout << "fudge = " << fudge << endl; cout << "accProbsRescaled = " << _double(cxsc::rnd(accProbsRescaled)) << endl; cout << "choose " << chosenS << " if rescaledRand <= 0.5*upperBound && rescaledRand > accProbsRescaled" << endl; cout << "else choose " << (k - chosenS + 1) << " if rescaledRand > 0.5*upperBound && rescaledRand <= (upperBound - accProbsRescaled = " << _double(cxsc::rnd(upperBound - accProbsRescaled)) << endl; } #endif if ((rescaledRand <= 0.5*upperBound) && (rescaledRand > accProbsRescaled)) { left = chosenS; #ifdef MYDEBUG cout << "choose left = " << left << endl; #endif break; } else if ((rescaledRand > 0.5*upperBound) && (rescaledRand <= (upperBound - accProbsRescaled)) ){ left = k - chosenS + 1; #ifdef MYDEBUG cout << "choose left = " << left << endl; #endif break; } #ifdef MYDEBUG_SPECIAL { if (left) { cout << "S = " << chosenS << endl; cout << "fudge = " << fudge << endl; cout << "accProbsRescaled = " << _double(cxsc::rnd(accProbsRescaled)) << endl; cout << "choose " << chosenS << " if rescaledRand > accProbsRescaled" << endl; cout << "choose " << (k - chosenS + 1) << " if rescaledRand <= (upperBound - accProbsRescaled = " << _double(cxsc::rnd(upperBound - accProbsRescaled)) << endl; cout << "\nchoose left = " << left << endl; } } #endif } // end of for second loop } // end of if !left /* still possibility we could get to the end and still have not found left? * because our probabilities in this for loop are approximated * and are slightly too low ... so try again with a fudge factor*/ if (!left) { /* gap = upperBound - 2*accProbsRescaled want fudge = upperBound/(upperBound-gap) = upperBound/(2*accProbsRescaled) */ fudge = upperBound/(2*cxsc::rnd(finalAccProbsRescaled)); #ifdef MYDEBUG_SPECIAL cout << "** STILL not found left, upperBound = " << upperBound << endl; cout << "rand = " << rand << endl; cout << "2*cxsc::rnd(finalAccProbsRescaled) = " << (2*cxsc::rnd(finalAccProbsRescaled)) << endl; #endif } ++tries; } // end of tries while loop /* remote possibility we could get to the end and still have not found left? * I think that if that's the case we should pick the middle*/ if (!left) { chosenS = numLeaves/2; // integer division left = chosenS; if (rand > 0.5) left = k - chosenS + 1; // same if numLeaves is even #if defined (MYDEBUG_SPECIAL) || defined (MYDEBUG_MISSED_PROBS) { cout << "\n********* missed probability **************\n" << endl; cout << "S = " << chosenS << endl; cout << "rescaledRand = " << _double(rescaledRand) << endl; cout << "accProbsRescaled = " << _double(cxsc::rnd(finalAccProbsRescaled)) << endl; cout << "upperBound - accProbsRescaled = " << _double(cxsc::rnd(upperBound - finalAccProbsRescaled)) << endl; cout << "\n*******************************************\n" << endl; } #endif } } // end the else to deal with cases where we are not in first or second or third 'edge' }// end the else to deal with cases where we are not in first or second }// end the else to deal with cases where we are not in first 'edge' } // end else we cannot use catalans table assert (left != 0); } // end if numLeaves > 2 #ifdef MYDEBUG cout << "left = " << left << endl; #endif if (NULL != instructionsPtr) instructionsPtr->push_back(left); return left; }