MRS  1.0
A C++ Class Library for Statistical Set Processing
subpavings::MCMCGRDiagnosticPSRF Class Reference

Constructor. More...

+ Inheritance diagram for subpavings::MCMCGRDiagnosticPSRF:
+ Collaboration diagram for subpavings::MCMCGRDiagnosticPSRF:

List of all members.

Public Member Functions

 MCMCGRDiagnosticPSRF (cxsc::real t, bool req)
 Constructor.
virtual ~MCMCGRDiagnosticPSRF ()
 Destructor.
bool isRequired () const
 Get whether this diagnostic is required for burn-in.
virtual void clean ()
 Clean old calculations out.
virtual void initialise (size_t ch, size_t res, bool logs=false)
 Clean old calculations out and initialise for new calculations.
virtual void initialiseChainValues (const std::vector< ChangeOfStateInformationAutoMCMC > &infoObjs)
 Initialise values for each chain.
virtual void updateChainValuesInLoop (const std::vector< ChangeOfStateInformationAutoMCMC > &infoObjs)
 Update calculations for each chain.
virtual int calcDiagnosticsForLoop ()
 Do convergence diagnostic calculations for a loop or iteration of the MCMC process, ie using current values assuming all chains have been iterated through for this loop.
void outputResults (const std::string &filenameGRScalars, const RealVec &sampledInd, int precData=5) const
 Output results including the contents of sampledIndPtr.
void outputResults (const std::string &filenameGRScalars, int precData=5) const
 Output results.
void outputCalculations (const std::string &filenameGRWorkingCalcs, int precData=5) const
 Output values used in the calculations.
const std::vector< std::vector
< cxsc::real > > & 
getScalarsRef () const
 Get a const reference to the scalars held by this.
cxsc::real getCurrentRhatValue () const
 Get the current estimated R held.
virtual std::string getGRWorkingCalcsFilename () const =0
 Get the name used by this in output files for working calculations.
virtual std::string getGRDiagnosticsFilename () const =0
 Get the name used by this in results files.
virtual std::string getScalarsName () const =0
 Get the name used by this for the scalar type it is using.
virtual cxsc::real getTolerance () const
 Get the tolerance used by this.

Detailed Description

Constructor.

Type to do Gelman-Rubin potential scale reduction factor (PSRF) diagnostic calculations.

Calculations use the (approximate) last half of the scalar values from each chain only. If the number of states in the chain is n, the number used for the calculations is n-(n/2) where the division is integer division, ie if n is odd we effectively use the last (n+1)/2 values and if n is even we use the last n/2 values.


Constructor & Destructor Documentation

MCMCGRDiagnosticPSRF::MCMCGRDiagnosticPSRF ( cxsc::real  t,
bool  req 
) [explicit]

Constructor.

Parameters:
tThe tolerance to use for the Gelman-Rubin convergence criteria.
reqflag for whether this diagnostic must be satisfied for burn-in.
Precondition:
t > 0.0.
  : tol(t), required(req),
  chains(0),
  states(0),
  statesNotInCalcs(0),
  rhatDiagnosticFlag(0),
  runningSumAllChains(0.0),
  initialSumOfSquaresOfRunningSums(0.0)
{
  if (!(tol > 0.0)) throw std::invalid_argument(
      "MCMCGRDiagnosticPSRF::MCMCGRDiagnostic(...) : t");
}

Member Function Documentation

Do convergence diagnostic calculations for a loop or iteration of the MCMC process, ie using current values assuming all chains have been iterated through for this loop.

Returns:
1 if convergence criteria satisfied, 0 otherwise

Implements subpavings::MCMCGRAuto::Diagnostic.

{
  if (keepLogs) {
    // store the current runningSumAllChains as well
    runningSumOverall.push_back(runningSumAllChains);
  }

  // convergence diagnostics calculations for v's

  // the Ws: average, over chains, of sample variance of scalar value
  cxsc::real thisW = sumOfSampleVariancesOverChains/(chains * 1.0); 
  
  #ifdef MYDEBUG_CALCS_EXTRA
      cout << "and thisW = " << thisW << endl;
  #endif
  
        
  Ws.push_back(thisW); 
  // the Bs
  size_t statesForCalcs = states - statesNotInCalcs;
  
  cxsc::real thisB = (1.0/( (chains - 1) * statesForCalcs ) 
            * ( sumOfSquaresOfRunningSums 
            - (runningSumAllChains 
            * runningSumAllChains/(chains * 1.0)) ) );
  Bs.push_back(thisB); 
  
  #ifdef MYDEBUG_CALCS
    //check thisB is correct, doing it the long way
    // runningSumPtr has one running sum for each chain
    RealVec chainAverages;
    cxsc::real accRunningSums(0.0);
    for (RealVecItr it = runningSum.begin(); it < runningSum.end(); ++it) {
      cxsc::real thisChainRunningSum = (*it);
      cxsc::real thisChainAv = thisChainRunningSum/(statesForCalcs * 1.0);
      chainAverages.push_back(thisChainAv);
      accRunningSums+=thisChainRunningSum;
    }
    cxsc::real overallAv = accRunningSums/(statesForCalcs * chains * 1.0);
    cxsc::dotprecision accDiffs(0.0);
    for (RealVecItr it = chainAverages.begin(); it < chainAverages.end(); ++it) {
      cxsc::real thisDiff = (*it) - overallAv;
      // sum up the squares of the differences compared to overall average
      cxsc::accumulate(accDiffs, thisDiff, thisDiff);
    }
    cxsc::real altB = rnd(accDiffs)*( statesForCalcs/(chains - 1.0) );
    
    cout << "\nthisB for v's is\t" << thisB << endl;
    cout << "altB for v's is\t" << altB << endl;
    //assert(thisB == altB);
  
  #endif
  
  // the estimated var(v)
  cxsc::real thisVarV(0.0);
  if (statesForCalcs > 1) {
    thisVarV = statesForCalcs/(statesForCalcs-1.0) 
          * thisW + (1.0/statesForCalcs)*thisB;
  }
  #ifndef OLDCALCMETHOD
    if (statesForCalcs > 1) thisVarV +=thisB/(1.0*chains*statesForCalcs);
  #endif
  
  estVarV.push_back(thisVarV); 
  // the rhats
  cxsc::real thisRhat(0.0);
  // allow division by 0 if w = 0 when var does not
  if (thisW > 0.0 || thisVarV > 0.0) {
    thisRhat = thisVarV/thisW;
  }
  rhat.push_back(thisRhat); 
  
  #ifdef MYDEBUG_CALCS_EXTRA
      cout << "thisRhat = " << thisRhat << " - press any key " << endl;
      getchar();
  #endif
  
  if ( (thisRhat <= 1.0 + tol) 
        && (thisRhat >= 1.0 - tol) ) {
    // if we have not been converged before on this scalar value
    if (!rhatDiagnosticFlag)  {
      #ifdef MYDEBUG
        cout << "\n" << getScalarsName() 
          << " convergence test satisfied in state " 
            << states << " (states in calcs = " 
            << statesForCalcs << ")"<< endl;
      #endif
      // set the flag for this scalar value
      rhatDiagnosticFlag = 1;
    }
  } 
  else { // not converged on this scalar value
    // if we were okay on this scalar value before
    if (rhatDiagnosticFlag) {
      #ifdef MYDEBUG
        cout << "\n--------- Note: " << getScalarsName() 
        << " convergence test now NOT satisfied in state " 
          << states << endl;
    
      #endif
      
      rhatDiagnosticFlag = 0; // update the flag
      
    } 
  }
  
  if (keepLogs) {
    // store the flag as well, as a real, which is a fudge...
    rhatFlag.push_back(rhatDiagnosticFlag);
  }

  // end of checking diagnostic for v's

  return rhatDiagnosticFlag;
} // end calculations
cxsc::real MCMCGRDiagnosticPSRF::getCurrentRhatValue ( ) const [virtual]

Get the current estimated R held.

Note:
Used for debugging. *
Returns:
The current estimated R value.
Precondition:
This holds at least one estimated R.

Implements subpavings::MCMCGRAuto::Diagnostic.

{
  if (rhat.empty())
    throw std::runtime_error(
      "MCMCGRDiagnosticPSRF::getCurrentRhatValue(size_t): no value to get");
  return rhat.back();
}
void MCMCGRDiagnosticPSRF::initialise ( size_t  ch,
size_t  res,
bool  logs = false 
) [virtual]

Clean old calculations out and initialise for new calculations.

Parameters:
chThe number of chains to initialise for.
chSuggested number of loops to reserve space for: this can choose to reserve more but should not reserve less capacity than this.
logsIndicator for whether this is to keep additional logs for calculations.

Implements subpavings::MCMCGRAuto::Diagnostic.

{
  clean();
  
  keepLogs = logs;
  
  chains = ch;
  
  rhatDiagnosticFlag = 0;
  
  states = 1;
  
  statesNotInCalcs = 0;
  
  runningSumColNames = vector<string>(chains);
  sampleVarianceColNames = vector<string>(chains);
  
  scalars = std::vector <RealVec > (chains);
  for (size_t ci = 0; ci < chains; ++ci) {
    scalars[ci].reserve(res);
  }
  
  /* vector containing one running sum of v's for each chain
  we can work out the average v for each chain so far from this
  start with a running sum of 0.0 for each chain */
  runningSum = RealVec (chains, cxsc::real(0.0));

  if (keepLogs) {
    // keep a vector of all the overall running sums as well
    runningSumOverall = RealVec(1, cxsc::real(0.0));
    runningSumOverall.reserve(res);
    // keep a vector of the runningsums for each chain as well
    runningSumChains = std::vector < RealVec >(chains);
    // keep a vector of the sample variances for each chain as well
    sampleVariances = std::vector < RealVec >(
          chains, RealVec(1, cxsc::real(0.0)) );
    
    for (size_t ci = 0; ci < chains; ++ci) {
      runningSumChains[ci].reserve(res);
      sampleVariances[ci].reserve(res);
    }
    
    /* keep a vector of the flag for convergence
     * (it's not a real, but easier to output it if we treat it like one) */
    rhatFlag = RealVec(1, cxsc::real(0.0));
    rhatFlag.reserve(res);
    
  }

  Ws = RealVec(1, cxsc::real (0.0) ); // to hold the Ws
  Ws.reserve(res);
  Bs = RealVec(1, cxsc::real (0.0) ); // to hold the Bs
  Bs.reserve(res);
  estVarV = RealVec(1, cxsc::real (0.0) ); // to hold the estimated var(v)
  estVarV.reserve(res);
  rhat = RealVec(1, cxsc::real (0.0) ); // to hold the rhats
  rhat.reserve(res);

  /* vector containing one running sum of 
  squared v's for each chain
  we can work out the average of the squared v's ie v^2
  for each chain so far from this
  start with a running sum of 0.0 for each chain.
  (Use a dotprecision for each running sum to keep accuracy 
  when accumulating products of reals) */
  runningSumSquared = VecDotPrec(chains, cxsc::dotprecision(0.0));

  /* 
  runningSumAllChains and
  initialSumOfSquaresOfRunningSums  are done in clean() */
  if (keepLogs) {
    for (size_t ci = 0; ci < chains; ci++) {
      {
        std::ostringstream stm;
        stm << baseRunningSumColName << ci;
        runningSumColNames[ci] = stm.str();
      }
      
      {
        std::ostringstream stm;
        stm << baseSampleVarianceColName << ci;
        sampleVarianceColNames[ci] = stm.str();
      }
      
    } // end loop initialising stuff for each chain
  }
  
}
void MCMCGRDiagnosticPSRF::initialiseChainValues ( const std::vector< ChangeOfStateInformationAutoMCMC > &  infoObjs) [virtual]

Initialise values for each chain.

Parameters:
infoObjsThe collection of ChangeOfStateInformationAutoMCMC objects that this can ask to initialise its scalar values for each chain.
Precondition:
infoObjs.size() == chains .

Implements subpavings::MCMCGRAuto::Diagnostic.

{
  for (size_t ci = 0; ci < chains; ++ci) {
    //get the last value
    real lastValue = getScalarValue(infoObjs[ci]);
    scalars.at(ci).push_back(lastValue);
    
    // update the running sum of v's for the chain, held in runningSum
    cxsc::real newRunningSum = runningSum.at(ci) 
                  + lastValue;
    runningSum.at(ci) = newRunningSum;
        
    // accumulate the square of the running sum of v's 
    initialSumOfSquaresOfRunningSums += newRunningSum*newRunningSum;
        
    /* update the running sum of squared v's over this chain
     *  held in runningSumSquared as a dot precision */
    cxsc::accumulate( runningSumSquared[ci], lastValue, lastValue );
    
    // update  the overall running sum runningSumAllChains 
    runningSumAllChains += lastValue;
    
    if (keepLogs) {
      //sampleVariances.at(ci) was initialised to 0.0
      runningSumChains.at(ci).push_back (newRunningSum);
      // store the current runningSumAllChains as well
      runningSumOverall.back() += newRunningSum;
    }
  }
}
void MCMCGRDiagnosticPSRF::outputCalculations ( const std::string &  filenameGRWorkingCalcs,
int  precData = 5 
) const [virtual]

Output values used in the calculations.

Note:
Used for debugging. Results go to filename returned by getGRWorkingCalcsFilename().

Outputs the intermediary values held and calculations for the the Gelman-Rubin convergence diagnostic calculations.

Parameters:
precDataThe precision for the output.

Implements subpavings::MCMCGRAuto::Diagnostic.

{
  
  std::vector <std::string > colNames;
  
  getScalarColNames(colNames);
  
  /* output working calcs: all v's for each chain, 
   * running sums for each chain, sample variances,
   * overall running sums */
  colNames.insert(colNames.end(), runningSumColNames.begin(), runningSumColNames.end());
  colNames.insert(colNames.end(), sampleVarianceColNames.begin(), sampleVarianceColNames.end());
  colNames.push_back(overallRunningSumColName);
  
  std::vector < const RealVec* > data;
  data = addDataPtrs(data, scalars);
  data = addDataPtrs(data, runningSumChains);
  data = addDataPtrs(data, sampleVariances);
  data.push_back(&runningSumOverall);
  
  outputToFileVertical(data, colNames, filenameGRWorkingCalcs, precData);
  
}
void MCMCGRDiagnosticPSRF::outputResults ( const std::string &  filenameGRScalars,
const RealVec sampledInd,
int  precData = 5 
) const [virtual]

Output results including the contents of sampledIndPtr.

Note:
Results go to filename returned by getGRDiagnosticsFilename().

Outputs the results of the Gelman-Rubin convergence diagnostic calculations (W, B, estimated variance, estimated R, whether convergence criteria satisfied, and sampledIndPtr values).

Note:
This version used for debugging.
Parameters:
sampledIndPtrA pointer to a collection of values, each one corresponding to a state for which this holds calcuations, indicating whether sampling took place in that state.
precDataThe precision for the output.

Implements subpavings::MCMCGRAuto::Diagnostic.

{
  /* sampledInd may contain more values than we monitored */
  RealVec::const_iterator it;
  advance(it, scalars.size());
  RealVec sampledIndTmp(sampledInd.begin(), it);
  
  std::vector < std::string > colNames;
  getScalarColNames(colNames);
  colNames.push_back("W");
  colNames.push_back("B");
  colNames.push_back("estVarV");
  colNames.push_back("rhat");
  
    colNames.push_back("rhatFlag");
    colNames.push_back("sampled?");
  
  std::vector < const RealVec* > data;
  addDataPtrs(data, scalars);
  data.push_back(&Ws);
  data.push_back(&Bs);
  data.push_back(&estVarV);
  data.push_back(&rhat);
  
    data.push_back(&rhatFlag);
    data.push_back(&sampledIndTmp);
  
  outputToFileVertical(data, colNames, 
  filenameGRScalars, precData);
} 
void MCMCGRDiagnosticPSRF::outputResults ( const std::string &  filenameGRScalars,
int  precData = 5 
) const [virtual]

Output results.

Note:
Results go to filename returned by getGRDiagnosticsFilename().

Outputs the results of the Gelman-Rubin convergence diagnostic calculations (W, B, estimated variance, estimated R, whether convergence criteria satisfied).

Parameters:
precDataThe precision for the output.

Implements subpavings::MCMCGRAuto::Diagnostic.

{
  std::vector <std::string > colNames;
  getScalarColNames(colNames);
  colNames.push_back("W");
  colNames.push_back("B");
  colNames.push_back("estVarV");
  colNames.push_back("rhat");
  
  std::vector < const RealVec* > data;
  addDataPtrs(data, scalars);
  data.push_back(&Ws);
  data.push_back(&Bs);
  data.push_back(&estVarV);
  data.push_back(&rhat);
  
  outputToFileVertical(data, colNames, 
  filenameGRScalars, precData);
} 
void MCMCGRDiagnosticPSRF::updateChainValuesInLoop ( const std::vector< ChangeOfStateInformationAutoMCMC > &  infoObjs) [virtual]

Update calculations for each chain.

Parameters:
infoObjsThe collection of ChangeOfStateInformationAutoMCMC objects that this can ask for updates to its scalar values for each chain.
Precondition:
infoObjs.size() == chains .

Implements subpavings::MCMCGRAuto::Diagnostic.

{
  // do initial values for everything so far
  /* we want to accumulate the sample variance of the scalar summary
   * for each chain up to the point reached in this loop */
  sumOfSampleVariancesOverChains = 0.0;
  
  /* also accumulate sum over all chains of the square of 
   * the running sum of v's 
   * for each chain up to the point reached in this loop */
  sumOfSquaresOfRunningSums = 0.0;
  
  ++states;
  
  size_t newStatesNotInCalcs = states/2;
  #ifdef OLDCALCMETHOD
    newStatesNotInCalcs = 0;
  #endif
  
  
  int dropped = newStatesNotInCalcs - statesNotInCalcs;
  assert((dropped == 0) || (dropped == 1));
  statesNotInCalcs = newStatesNotInCalcs;
  
  /*On the first run, states will be 2 but statesForCalcs will be 1)*/
  size_t statesForCalcs = states - statesNotInCalcs;
  #ifdef MYDEBUG_CALCS
    cout << "\nstates for calcs = " << statesForCalcs << endl;
    cout << "dropped = " << dropped << endl;
    cout << "statesNotInCalcs = " << statesNotInCalcs << endl;
  #endif  
  
  for (size_t ci = 0; ci < chains; ++ci) {
    //get the last value
    real lastValue = getScalarValue(infoObjs[ci]);
    real droppedValue = 0.0;
    if (dropped) droppedValue = scalars.at(ci)[statesNotInCalcs-1];
    #ifdef MYDEBUG_CALCS
      cout << "\nchain = " << ci << endl;
      cout << "droppedValue = " << droppedValue << endl;
    #endif
    scalars.at(ci).push_back(lastValue);
    
    // update the running sum of v's for the chain, held in runningSum
    cxsc::real newRunningSum = runningSum.at(ci) 
            + lastValue - droppedValue;
    runningSum.at(ci) = newRunningSum;
    
    // accumulate the square of the running sum of v's 
    sumOfSquaresOfRunningSums += newRunningSum*newRunningSum;
    
    /* update the running sum of squared v's over this chain
     *  held in runningSumSquared as a dot precision */
    cxsc::accumulate( runningSumSquared[ci], 
          lastValue, lastValue );
    cxsc::accumulate( runningSumSquared[ci], 
          -droppedValue, droppedValue );
          
    // update  the overall running sum runningSumAllChains 
    runningSumAllChains += (lastValue - droppedValue);
    
    /* accumulate the sample variance for v's for this chain: 
     * sample variance for the scalar summary v
     * calculated as (sum of squares - n * square of averages)/(n-1)
     * which equals (sum of squares - square of sums/n)/(n-1) */
    
    cxsc::real thisSampleVariance = 0.0;
    if (statesForCalcs > 1) {
      thisSampleVariance =  ( 1.0/(statesForCalcs - 1) )
        *( cxsc::rnd(runningSumSquared[ci])
        -  (newRunningSum*newRunningSum/(statesForCalcs * 1.0)) ) ;
    }
    
    #ifdef MYDEBUG_CALCS_EXTRA
      cout << "statesForCalcs = " << statesForCalcs << endl;
      cout << "runningSumSquared[ci] = " << runningSumSquared[ci] << endl;
      cout << "newRunningSum = " << newRunningSum << endl;
      cout << "thisSampleVariance = " << thisSampleVariance << endl;
    #endif
    
    sumOfSampleVariancesOverChains += thisSampleVariance;
    
    #ifdef MYDEBUG_CALCS_EXTRA
      cout << "now, sumOfSampleVariancesOverChains = " << sumOfSampleVariancesOverChains << endl;
    #endif
    
    if (keepLogs) {
      sampleVariances.at(ci).push_back( thisSampleVariance );
      runningSumChains.at(ci).push_back (newRunningSum);
    }
    
    #ifdef MYDEBUG_CALCS
    
      //check thisSampleVariance is correct, doing it the long way
      // scalars[ci] has the v_ij for each chain i
      
      real acc(0.0);
      {
        RealVecItr it = scalars.at(ci).begin();
        advance(it, statesNotInCalcs);
        assert( distance(it, scalars.at(ci).end()) == statesForCalcs);
        for ( ; it < scalars.at(ci).end(); ++it) {
          acc+= (*it);
        }
      }
      
      cxsc::real av = acc/(1.0*statesForCalcs);
      cxsc::dotprecision accDiffs(0.0);
      
      {
        RealVecItr it = scalars.at(ci).begin();
        advance(it, statesNotInCalcs);
        
        for ( ; it < scalars.at(ci).end(); ++it) {
          cxsc::real thisDiff = (*it) - av;
          // sum up the squares of the differences compared to overall average
          cxsc::accumulate(accDiffs, thisDiff, thisDiff);
        }
      }
      cxsc::real altVar = 0.0;
      if (statesForCalcs > 1) altVar = rnd(accDiffs)/( statesForCalcs- 1.0 );
      
      cout << "\nthisSampleVariance is\t" 
          << sampleVariances.at(ci).back() << endl;
      cout << "and value calculated from basics is \t" << altVar << endl;
  
    #endif
  }
}

The documentation for this class was generated from the following files:
 All Classes Namespaces Functions Variables Typedefs Enumerations Friends