Constructor. More...
Public Member Functions | |
MCMCGRDiagnosticInterval (cxsc::real t, size_t si, double percent, bool req) | |
Constructor. | |
MCMCGRDiagnosticInterval (cxsc::real t, size_t si, size_t sm, double percent, bool req) | |
Constructor. | |
virtual | ~MCMCGRDiagnosticInterval () |
Destructor. | |
bool | isRequired () const |
Get whether this diagnostic is required for burn-in. | |
virtual void | clean () |
Clean old calculations out. | |
virtual void | initialise (size_t ch, size_t res, bool logs=false) |
Clean old calculations out and initialise for new calculations. | |
virtual void | initialiseChainValues (const std::vector< ChangeOfStateInformationAutoMCMC > &infoObjs) |
Initialise values for each chain. | |
virtual void | updateChainValuesInLoop (const std::vector< ChangeOfStateInformationAutoMCMC > &infoObjs) |
Update calculations for each chain. | |
virtual int | calcDiagnosticsForLoop () |
Do convergence diagnostic calculations for a loop or iteration of the MCMC process, ie using current values assuming all chains have been iterated through for this loop. | |
void | outputResults (const std::string &filenameGRScalars, const RealVec &sampledInd, int precData=5) const |
Output results including the contents of sampledIndPtr. | |
void | outputResults (const std::string &filenameGRScalars, int precData=5) const |
Output results. | |
void | outputCalculations (const std::string &filenameGRWorkingCalcs, int precData=5) const |
Output values used in the calculations. | |
const std::vector< std::vector < cxsc::real > > & | getScalarsRef () const |
Get a const reference to the scalars held by this. | |
cxsc::real | getCurrentRhatValue () const |
Get the current estimated R held. | |
virtual std::string | getGRWorkingCalcsFilename () const =0 |
Get the name used by this in output files for working calculations. | |
virtual std::string | getGRDiagnosticsFilename () const =0 |
Get the name used by this in results files. | |
virtual std::string | getScalarsName () const =0 |
Get the name used by this for the scalar type it is using. | |
virtual cxsc::real | getTolerance () const |
Get the tolerance used by this. |
Constructor.
Type to do Gelman-Rubin potential scale reduction factor (PSRF) diagnostic calculations.
Calculations use the (approximate) last half of the scalar values from each chain only. If the number of states in the chain is n, the number used for the calculations is n-(n/2) where the division is integer division, ie if n is odd we effectively use the last (n+1)/2 values and if n is even we use the last n/2 values.
MCMCGRDiagnosticInterval::MCMCGRDiagnosticInterval | ( | cxsc::real | t, |
size_t | si, | ||
double | percent, | ||
bool | req | ||
) |
Constructor.
The largest number of states used for the calculation of each within chain interval will be 1,000,000 or the next integer multiple of above that figure.
t | The tolerance to use for the Gelman-Rubin convergence criteria. |
si | the size of the sampling interval to use for recalculating the diagnostic, eg si = 1 will recalculate for every change of state, si = 1000 will recalculate every 1000 changes of state, etc. |
percent | the percentage interval to calculate. |
req | flag for whether this diagnostic must be satisfied for burn-in. |
: tol(t), samplingInterval(si), maxStatesForCalcs(1000000), p((1-percent)/2), required(req), chains(0), states(0), statesNotInCalcs(0), rhatDiagnosticFlag(0) { if (!(tol > 0.0)) throw std::invalid_argument( "MCMCGRDiagnosticInterval::MCMCGRDiagnostic(...) : t"); if (!(percent > 0.0) || !(percent < 1.0)) throw std::invalid_argument( "MCMCGRDiagnosticInterval::MCMCGRDiagnostic(...) : percent"); if (si == 0) throw std::invalid_argument( "MCMCGRDiagnosticInterval::MCMCGRDiagnostic(...) : si"); if (maxStatesForCalcs%si) { maxStatesForCalcs = (maxStatesForCalcs/si + 1) * si; } }
MCMCGRDiagnosticInterval::MCMCGRDiagnosticInterval | ( | cxsc::real | t, |
size_t | si, | ||
size_t | sm, | ||
double | percent, | ||
bool | req | ||
) |
Constructor.
t | The tolerance to use for the Gelman-Rubin convergence criteria. |
si | the size of the sampling interval to use for recalculating the diagnostic, eg si = 1 will recalculate for every change of state, si = 1000 will recalculate every 1000 changes of state, etc. |
sm | the maximum number of states to be used for any one within-chain interval. If this is not an integer multiple of si then the maximum number of states used in the calculations will be sm rounded up to the next integer multiple of si greater than sm. |
percent | the percentage interval to calculate. |
req | flag for whether this diagnostic must be satisfied for burn-in. |
: tol(t), samplingInterval(si), maxStatesForCalcs(ms), p((1-percent)/2), required(req), chains(0), states(0), statesNotInCalcs(0), rhatDiagnosticFlag(0) { if (!(tol > 0.0)) throw std::invalid_argument( "MCMCGRDiagnosticInterval::MCMCGRDiagnostic(...) : t"); if (!(percent > 0.0) || !(percent < 1.0)) throw std::invalid_argument( "MCMCGRDiagnosticInterval::MCMCGRDiagnostic(...) : percent"); if (si == 0) throw std::invalid_argument( "MCMCGRDiagnosticInterval::MCMCGRDiagnostic(...) : si"); if (maxStatesForCalcs%si) { maxStatesForCalcs = (maxStatesForCalcs/si + 1) * si; } }
int MCMCGRDiagnosticInterval::calcDiagnosticsForLoop | ( | ) | [virtual] |
Do convergence diagnostic calculations for a loop or iteration of the MCMC process, ie using current values assuming all chains have been iterated through for this loop.
Implements subpavings::MCMCGRAuto::Diagnostic.
{ //if we are updating if ( (states % samplingInterval == 0) && ((states - statesNotInCalcs) % samplingInterval == 0) ) { calculationStates.push_back(states); //get the interval length for the overall set #ifdef MYDEBUG_CALCS_EXTRA cout << "\noverall set = "<< endl; #endif real len(0.0); if (states - statesNotInCalcs > 1) len = getIntervalLength(overallSets); #ifdef MYDEBUG_CALCS_EXTRA cout << "overall len = " << len << endl; #endif //clear out the overallSets container std::multiset < cxsc::real >().swap( overallSets ); #ifdef MYDEBUG_CALCS_EXTRA cout << "chain interval lens are:" << endl; ostream_iterator<real> out_it (cout,"\t"); copy ( currentIntervalLengths.begin(),currentIntervalLengths.end(), out_it ); cout << endl; #endif real meanChainIntervalLength(0.0); meanChainIntervalLength = std::accumulate(currentIntervalLengths.begin(), currentIntervalLengths.end(), meanChainIntervalLength)/(1.0*chains); /* keep the diagnostics */ overallIntervalLengths.push_back(len); interchainIntervalStatistic.push_back(meanChainIntervalLength); real thisRhat(0.0); if (meanChainIntervalLength > 0) thisRhat = len/meanChainIntervalLength; rhat.push_back(thisRhat); #ifdef MYDEBUG_CALCS_EXTRA cout << "meanChainIntervalLength = " << meanChainIntervalLength << endl; cout << "thisRhat = " << thisRhat << " - press any key " << endl; getchar(); #endif if ( !((thisRhat > 1.0 + tol) || (thisRhat < 1.0 - tol)) ) { // if we have not been converged before on this scalar value if (!rhatDiagnosticFlag) { #ifdef MYDEBUG cout << "\n" << getScalarsName() << " convergence test satisfied in state " << states << " (states in calcs = " << (states - statesNotInCalcs) << ")"<< endl; #endif // set the flag for this scalar value rhatDiagnosticFlag = 1; } } else { // not converged on this scalar value // if we were okay on this scalar value before if (rhatDiagnosticFlag) { #ifdef MYDEBUG cout << "\n--------- Note: " << getScalarsName() << " convergence test now NOT satisfied in state " << states << endl; #endif rhatDiagnosticFlag = 0; // update the flag } } #ifdef MYDEBUG_CALCS cout << "\nsampling" << endl; cout << "\nstates = " << states << endl; cout << "statesNotInCalcs = " << statesNotInCalcs << endl; #endif } else { // not updating, just replicate values calculationStates.push_back(calculationStates.back()); overallIntervalLengths.push_back(overallIntervalLengths.back()); interchainIntervalStatistic.push_back(interchainIntervalStatistic.back()); rhat.push_back(rhat.back()); } if (keepLogs) { // store the flag as well, as a real, which is a fudge... rhatFlag.push_back(rhatDiagnosticFlag); } // old value if not updated return rhatDiagnosticFlag; } // end calculations
cxsc::real MCMCGRDiagnosticInterval::getCurrentRhatValue | ( | ) | const [virtual] |
Get the current estimated R held.
Implements subpavings::MCMCGRAuto::Diagnostic.
{ if (rhat.empty()) throw std::runtime_error( "MCMCGRDiagnosticInterval::getCurrentRhatValue(size_t): no value to get"); return rhat.back(); }
void MCMCGRDiagnosticInterval::initialise | ( | size_t | ch, |
size_t | res, | ||
bool | logs = false |
||
) | [virtual] |
Clean old calculations out and initialise for new calculations.
ch | The number of chains to initialise for. |
ch | Suggested number of loops to reserve space for: this can choose to reserve more but should not reserve less capacity than this. |
logs | Indicator for whether this is to keep additional logs for calculations. |
Implements subpavings::MCMCGRAuto::Diagnostic.
{ clean(); keepLogs = logs; chains = ch; rhatDiagnosticFlag = 0; states = 1; statesNotInCalcs = 0; scalars = std::vector <RealVec > (chains); for (size_t ci = 0; ci < chains; ++ci) { scalars[ci].reserve(res); } currentIntervalLengths = vector < real >(chains, 0.0); if (keepLogs) { intervalLengthsColNames = vector<string>(chains); intervalLengths = std::vector <RealVec > (chains); for (size_t ci = 0; ci < chains; ++ci) { intervalLengths[ci].reserve(res); intervalLengths[ci].push_back(0.0); { std::ostringstream stm; stm << baseIntervalLengthsColName << ci; intervalLengthsColNames[ci] = stm.str(); } } /* keep a vector of the flag for convergence * (it's not a real, but easier to output it if we treat it like one) */ rhatFlag = RealVec(1, cxsc::real(0.0)); rhatFlag.reserve(res); } rhat = RealVec(1, cxsc::real (0.0) ); // to hold the rhats rhat.reserve(res); overallIntervalLengths.reserve(res); overallIntervalLengths.push_back(0); interchainIntervalStatistic.reserve(res); interchainIntervalStatistic.push_back(0); calculationStates.reserve(res); calculationStates.push_back(0); }
void MCMCGRDiagnosticInterval::initialiseChainValues | ( | const std::vector< ChangeOfStateInformationAutoMCMC > & | infoObjs | ) | [virtual] |
Initialise values for each chain.
infoObjs | The collection of ChangeOfStateInformationAutoMCMC objects that this can ask to initialise its scalar values for each chain. |
Implements subpavings::MCMCGRAuto::Diagnostic.
{ for (size_t ci = 0; ci < chains; ++ci) { //get the last value real lastValue = getScalarValue(infoObjs[ci]); scalars.at(ci).push_back(lastValue); } }
void MCMCGRDiagnosticInterval::outputCalculations | ( | const std::string & | filenameGRWorkingCalcs, |
int | precData = 5 |
||
) | const [virtual] |
Output values used in the calculations.
Outputs the intermediary values held and calculations for the the Gelman-Rubin convergence diagnostic calculations.
precData | The precision for the output. |
Implements subpavings::MCMCGRAuto::Diagnostic.
{ std::vector <std::string > colNames; getScalarColNames(colNames); /* output working calcs: all v's for each chain, * running sums for each chain, sample variances, * overall running sums */ colNames.insert(colNames.end(), intervalLengthsColNames.begin(), intervalLengthsColNames.end()); colNames.push_back(interchainIntervalStatisticColName); colNames.push_back(overallIntervalLengthsColName); std::vector < const RealVec* > data; data = addDataPtrs(data, scalars); data = addDataPtrs(data, intervalLengths); data.push_back(&interchainIntervalStatistic); data.push_back(&overallIntervalLengths); outputToFileVertical(calculationStates, data, colNames, filenameGRWorkingCalcs, precData); }
void MCMCGRDiagnosticInterval::outputResults | ( | const std::string & | filenameGRScalars, |
const RealVec & | sampledInd, | ||
int | precData = 5 |
||
) | const [virtual] |
Output results including the contents of sampledIndPtr.
Outputs the results of the Gelman-Rubin convergence diagnostic calculations (W, B, estimated variance, estimated R, whether convergence criteria satisfied, and sampledIndPtr values).
sampledIndPtr | A pointer to a collection of values, each one corresponding to a state for which this holds calcuations, indicating whether sampling took place in that state. |
precData | The precision for the output. |
Implements subpavings::MCMCGRAuto::Diagnostic.
{ /* sampledInd may contain more values than we monitored */ RealVec::const_iterator it = sampledInd.begin(); advance(it, scalars.size()); RealVec sampledIndTmp(sampledInd.begin(), it); std::vector < std::string > colNames; getScalarColNames(colNames); colNames.push_back(interchainIntervalStatisticColName); colNames.push_back(overallIntervalLengthsColName); colNames.push_back("rhat"); colNames.push_back("rhatFlag"); colNames.push_back("sampled?"); std::vector < const RealVec* > data; addDataPtrs(data, scalars); data.push_back(&interchainIntervalStatistic); data.push_back(&overallIntervalLengths); data.push_back(&rhat); data.push_back(&rhatFlag); data.push_back(&sampledIndTmp); outputToFileVertical(calculationStates, data, colNames, filenameGRScalars, precData); }
void MCMCGRDiagnosticInterval::outputResults | ( | const std::string & | filenameGRScalars, |
int | precData = 5 |
||
) | const [virtual] |
Output results.
Outputs the results of the Gelman-Rubin convergence diagnostic calculations (W, B, estimated variance, estimated R, whether convergence criteria satisfied).
precData | The precision for the output. |
Implements subpavings::MCMCGRAuto::Diagnostic.
{ std::vector <std::string > colNames; getScalarColNames(colNames); colNames.push_back(interchainIntervalStatisticColName); colNames.push_back(overallIntervalLengthsColName); colNames.push_back("rhat"); std::vector < const RealVec* > data; addDataPtrs(data, scalars); data.push_back(&interchainIntervalStatistic); data.push_back(&overallIntervalLengths); data.push_back(&rhat); outputToFileVertical(calculationStates, data, colNames, filenameGRScalars, precData); }
void MCMCGRDiagnosticInterval::updateChainValuesInLoop | ( | const std::vector< ChangeOfStateInformationAutoMCMC > & | infoObjs | ) | [virtual] |
Update calculations for each chain.
infoObjs | The collection of ChangeOfStateInformationAutoMCMC objects that this can ask for updates to its scalar values for each chain. |
Implements subpavings::MCMCGRAuto::Diagnostic.
{ ++states; // states is effectively number of transitions + 1 statesNotInCalcs = states/2; /* but limit states used for calcs */ if ( (states - statesNotInCalcs) > maxStatesForCalcs ) { statesNotInCalcs = states - maxStatesForCalcs; } #ifdef OLDCALCMETHOD statesNotInCalcs = 0; #endif /*On the first run, states will be 2 but statesForCalcs will be 1)*/ size_t statesForCalcs = states - statesNotInCalcs; //If we are updating if ( (states % samplingInterval == 0) && (statesForCalcs % samplingInterval == 0) ) { #ifdef MYDEBUG_CALCS cout << "\nsampling" << endl; cout << "\nstates for calcs = " << statesForCalcs << endl; cout << "statesNotInCalcs = " << statesNotInCalcs << endl; #endif vector < real >(chains, 0.0).swap(currentIntervalLengths); //the overallSets container should be clear assert(overallSets.empty()); for (size_t ci = 0; ci < chains; ++ci) { //get the last value real lastValue = getScalarValue(infoObjs[ci]); scalars.at(ci).push_back(lastValue); /* for each chain put the values to be used into the set*/ std::vector < real >::iterator it = scalars[ci].begin(); advance(it, statesNotInCalcs); // a new set from the scalars std::multiset < cxsc::real > thisSet( it, scalars[ci].end() ); // and also add to the overall set overallSets.insert( it, scalars[ci].end() ); #ifdef MYDEBUG_CALCS_EXTRA cout << "\n chain = " << ci << endl; #endif //get the interval length real len(0.0); if (statesForCalcs > 1) len = getIntervalLength(thisSet); currentIntervalLengths[ci] = len; if (keepLogs) intervalLengths[ci].push_back(len); #ifdef MYDEBUG_CALCS_EXTRA cout << "len = " << len << endl; #endif } } else { // not updating, get the scalar values only for (size_t ci = 0; ci < chains; ++ci) { //get the last value real lastValue = getScalarValue(infoObjs[ci]); scalars.at(ci).push_back(lastValue); // and put old values into log files if (keepLogs) intervalLengths[ci].push_back( currentIntervalLengths[ci]); } } }