A wrapper or manager for an SPSnode aka StatsSubPaving in conjunction with massive amounts of sample data. More...
Classes | |||||||||||||||||||||
class | ChangeOfStateInformation | ||||||||||||||||||||
Type for collecting change of state information. More... | |||||||||||||||||||||
class | NodePtrMeasurePair | ||||||||||||||||||||
class | PrioritySplitQueueEvaluator | ||||||||||||||||||||
Types for evaluating priority split queues. More... | |||||||||||||||||||||
class | PrivatePrioritySplitQueueEvaluator | ||||||||||||||||||||
Inner type for evaluating priority split queues. | |||||||||||||||||||||
Public Member Functions | |||||||||||||||||||||
AdaptiveHistogram () | |||||||||||||||||||||
Default constructor. | |||||||||||||||||||||
AdaptiveHistogram (bool as, int lab=0) | |||||||||||||||||||||
Initialised constructor. | |||||||||||||||||||||
AdaptiveHistogram (const ivector &v, bool as=false) | |||||||||||||||||||||
Initialised constructor. | |||||||||||||||||||||
AdaptiveHistogram (const ivector &v, bool as, int lab) | |||||||||||||||||||||
Initialised constructor. | |||||||||||||||||||||
AdaptiveHistogram (const AdaptiveHistogram &other) | |||||||||||||||||||||
Copy constructor. | |||||||||||||||||||||
AdaptiveHistogram & | operator= (AdaptiveHistogram rhs) | ||||||||||||||||||||
Copy assignment operator. | |||||||||||||||||||||
AdaptiveHistogram & | operator+= (const AdaptiveHistogram &rhs) | ||||||||||||||||||||
Overloaded add-to-self operator. | |||||||||||||||||||||
const AdaptiveHistogram | operator+ (const AdaptiveHistogram &rhs) const | ||||||||||||||||||||
Overloaded addition operator. | |||||||||||||||||||||
~AdaptiveHistogram () | |||||||||||||||||||||
Destructor. | |||||||||||||||||||||
SPSnode * | getSubPaving () const | ||||||||||||||||||||
Return a pointer to the SPSnode this manages. | |||||||||||||||||||||
int | getLabel () const | ||||||||||||||||||||
Return the label for this. | |||||||||||||||||||||
BigDataCollection | getDataCollection () const | ||||||||||||||||||||
Return a copy of the data collection this manages. | |||||||||||||||||||||
bool | getHoldAllStats () const | ||||||||||||||||||||
get the value of holdAllStats field. | |||||||||||||||||||||
bool | hasSubPaving () const | ||||||||||||||||||||
Get whether this has a subpaving to manage. | |||||||||||||||||||||
cxsc::ivector | getRootBox () const | ||||||||||||||||||||
Get the box of the subpaving managed by this. | |||||||||||||||||||||
real | getRootVolume () const | ||||||||||||||||||||
get the volume of root box of the subpaving this manages. | |||||||||||||||||||||
int | getDimensions () const | ||||||||||||||||||||
get the dimensions of the subpaving this manages. | |||||||||||||||||||||
rvector | getRootPavingMean () const | ||||||||||||||||||||
Gets sample mean for data held. | |||||||||||||||||||||
RealVec | getRootPavingVarCovar () const | ||||||||||||||||||||
Gets sample variance covariance vector for data held. | |||||||||||||||||||||
rvector | getDataCollectionMean () const | ||||||||||||||||||||
Gets sample mean for data held in the dataCollection. | |||||||||||||||||||||
RealVec | getDataCollectionVarCovar () const | ||||||||||||||||||||
Gets sample variance covariance vector for data held in the data collection. | |||||||||||||||||||||
size_t | getRootCounter () const | ||||||||||||||||||||
Gets count in the rootpaving in the root paving. | |||||||||||||||||||||
size_t | getRootLeaves () const | ||||||||||||||||||||
Gets number of leaf nodes in the root paving. | |||||||||||||||||||||
size_t | getRootCherries () const | ||||||||||||||||||||
Gets number of cherry nodes in the root paving. | |||||||||||||||||||||
unsigned long int | getRootTotalLeafDepth () const | ||||||||||||||||||||
Gets the total depth over all the leaves in the root. | |||||||||||||||||||||
real | getRootSumLeafCountOverVol () const | ||||||||||||||||||||
Gets the sum of leaf count over volume in root paving. | |||||||||||||||||||||
real | getEMPScoreCOPERR () const | ||||||||||||||||||||
get the EMP part of the COPERR score. | |||||||||||||||||||||
real | getEMPScoreAIC () const | ||||||||||||||||||||
get the EMP part of the AIC score. | |||||||||||||||||||||
real | getScoreCOPERR (const PenObj &pen, bool verbose=false) const | ||||||||||||||||||||
get the COPERR score. | |||||||||||||||||||||
real | getScoreAIC (const PenObj &pen, bool verbose=false) const | ||||||||||||||||||||
get AIC score. | |||||||||||||||||||||
real | getPENValue (const PenObj &pen, int deltaLeaf=0) const | ||||||||||||||||||||
get the PEN value. | |||||||||||||||||||||
double | getMinVol (double minVolB) const | ||||||||||||||||||||
get the value of the minimum volume for a splittable node. | |||||||||||||||||||||
IntVec | getLeafLevels () const | ||||||||||||||||||||
std::string | getLeafLevelsString () const | ||||||||||||||||||||
Size_tVec | getLeafCounts () const | ||||||||||||||||||||
void | outputLog (const std::string &s, int i, int prec=5) const | ||||||||||||||||||||
Append current state of histogram to a txt log file. | |||||||||||||||||||||
void | outputLogPlain (const std::string &s, int i, int prec=5) const | ||||||||||||||||||||
Append current state of histogram to a txt log file. | |||||||||||||||||||||
bool | insertOne (rvector newdata, const SplitDecisionObj &boolTest, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Insert a single data point into AdaptiveHistogram object. | |||||||||||||||||||||
bool | priorityMerge (const NodeCompObj &compTest, const HistEvalObj &he, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Priority merge to reduce number of leaves in histogram. | |||||||||||||||||||||
bool | priorityMerge (const NodeCompObj &compTest, size_t minLeaves, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Priority merge to reduce number of leaves in histogram. | |||||||||||||||||||||
bool | mergeUp () | ||||||||||||||||||||
Merge a multileaf histogram up to just root. | |||||||||||||||||||||
std::pair< size_t, cxsc::real > | getNonEmptyBoxSummary () const | ||||||||||||||||||||
Get summary information on non-empty box numbers and volumes in this histogram. | |||||||||||||||||||||
double | findCoverage (const rvector &pt) const | ||||||||||||||||||||
Find the coverage value for a data point. | |||||||||||||||||||||
double | findEmpiricalDensity (const rvector &pt) const | ||||||||||||||||||||
Find the empirical density for a data point. | |||||||||||||||||||||
bool | histBoxContains (const rvector &pt) const | ||||||||||||||||||||
Get whether the root box for this contains a given point. | |||||||||||||||||||||
bool | splitToShape (std::string instruction) | ||||||||||||||||||||
Split a histogram to a specified shape. | |||||||||||||||||||||
cxsc::real | getLogLikelihood () const | ||||||||||||||||||||
Get the log-likelihood for this histogram. | |||||||||||||||||||||
bool | outputGraphDot () const | ||||||||||||||||||||
Make a .dot graph file from histogram structure. | |||||||||||||||||||||
std::ostream & | outputToStreamTabs (std::ostream &os, int prec=5) const | ||||||||||||||||||||
Output the subpaving managed by this to a given stream. | |||||||||||||||||||||
void | reshapeToUnion (const PiecewiseConstantFunction &other) | ||||||||||||||||||||
Change this so that the subpaving it manages is the union of this's subpaving and the subpaving of that of a PiecewiseConstantFunction. | |||||||||||||||||||||
void | reshapeToUnion (const PiecewiseConstantFunction &other, size_t minChildPoints) | ||||||||||||||||||||
Change this so that the subpaving it manages is as close as possible to the union of this's subpaving and the subpaving of that of a PiecewiseConstantFunction. | |||||||||||||||||||||
cxsc::real | getL1Distance (const AdaptiveHistogram &other) const | ||||||||||||||||||||
void | setHoldAllStats (bool setTo) | ||||||||||||||||||||
void | setLabel (int lab) | ||||||||||||||||||||
void | clearAllHistData () | ||||||||||||||||||||
void | swap (AdaptiveHistogram &adh) | ||||||||||||||||||||
Swap the contents of this and another histogram. | |||||||||||||||||||||
std::string | stringSummary () const | ||||||||||||||||||||
Get a string summary of this histogram's properties. | |||||||||||||||||||||
std::string | doubleCheckStringSummary () const | ||||||||||||||||||||
Get a string summary of the properties of this which can be used for checking and debugging. | |||||||||||||||||||||
Insert rvectors in a txt file into AdaptiveHistogram object. | |||||||||||||||||||||
Reads in lines of data representing rvectors from a txt file. The dimensions of the data may given or may be deduced from existing data in this or from the data in the file. Expects one line per rvector with the elements separated by white space (space or tabs), or by commas. Input files can be checked to try to ensure data conforms with some given dimensions and/or has no illegal characters. Illegal characters are any characters other than those in the string "eE+-.,0123456789 \t". Note that this allows numbers in scientific format, and with space, tab, or comma delimiters. However, allowing the characters 'e' and 'E' as part of scientific format, and the delimiters, also allows the possibility that these are erroneously present. The level of checking for illegal characters is not rigorous and will not pick up the inadvertent or erroneous presence of these non-numeric characters. This includes the presence of delimiters before the first numeric character, which may result in 0.0 being inserted into the first dimension(s) of the rvector read from the relevant data line. Ordinary checking means that:
Note that under ordinary checking, data lines after the first valid line are not checked to ensure that they conform to expected dimensions: data in excess of the given dimensions will be ignored and 'missing' dimensions will be padded with 0.0's. Paranoid checking means that expected data dimensions must be given and checking is carried out as for Ordinary checking plus further checking:
Fast checking effectively means no checking (fast reading of data). Data is assumed to conform to the given dimensions: data in excess of the given dimensions will be ignored and all data lines which are less than the given dimensions will result in an rvector "padded" with 0.0 in the additional dimensions. Illegal characters in the input, including initial delimiters before the first numeric values, may be interpreted as part of a real vector, or as delimiters, or ignored. No guarantees are given about the resulting data read in if the data does not conform to the values given for the parameters for headerlines and data dimensions or if the data is corrupted by illegal characters. While reading of the file continues (has not been aborted, for example because the dimension of the data does not seem to be as expected) , input lines which do not pass checks are rejected and logged but the rest of the file will continue to be processed. Blank lines are ignored. If no data dimension is specified by the user (dim = -1), then the expected dimension dim will be taken as the dimension of the first valid data line read in from the file. If this has no subpaving to manage, then a subpaving to manage will be created by the method. The subpaving created will be of the same dimensions as the expected dimensions of the data and will have a box big enough to contain all the data of the expected dimensions. Throws an invalid_argument exception if dim is supplied by the user and dim < 1. For example, a string "12.04 1.00005e-10 -30.0006" will be read as a 3-dimensional rvector, a string "12.04 1.00005E-10 -30.0006" will be read as a 3-dimensional rvector, a string "-30.0006" will be read as a 1-dimensional rvector and a string "30 20" will be will be read as a 2-dimensional rvector
| |||||||||||||||||||||
bool | insertOneDimDataFromTxt (const std::string &s, const std::size_t headerlines=0, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Ordinary level checking, no splitting, 1-dimensional data. | |||||||||||||||||||||
bool | insertOneDimDataFromTxt (const std::string &s, const SplitDecisionObj &boolTest, const std::size_t headerlines=0, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Ordinary level checking, adaptive splitting with each data point inserted, 1-dimensional data. | |||||||||||||||||||||
bool | insertRvectorsFromTxt (const std::string &s, const std::size_t headerlines=0, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Ordinary level checking, no splitting, no dimensions specified. | |||||||||||||||||||||
bool | insertRvectorsFromTxt (const std::string &s, const SplitDecisionObj &boolTest, const std::size_t headerlines=0, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Ordinary level checking, adaptive splitting with each data point inserted, no dimensions specified. | |||||||||||||||||||||
bool | insertRvectorsFromTxt (const std::string &s, int dim, const std::size_t headerlines=0, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Ordinary level checking, no splitting, dimensions specified. | |||||||||||||||||||||
bool | insertRvectorsFromTxt (const std::string &s, const SplitDecisionObj &boolTest, int dim, const std::size_t headerlines=0, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Ordinary level checking, adaptive splitting with each data point inserted, dimensions specified. | |||||||||||||||||||||
bool | insertRvectorsFromTxtOrd (const std::string &s, const std::size_t headerlines=0, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Ordinary level checking, no splitting, no dimensions specified. | |||||||||||||||||||||
bool | insertRvectorsFromTxtOrd (const std::string &s, const SplitDecisionObj &boolTest, const std::size_t headerlines=0, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Ordinary level checking, adaptive splitting with each data point inserted, no dimensions specified. | |||||||||||||||||||||
bool | insertRvectorsFromTxtOrd (const std::string &s, int dim, const std::size_t headerlines=0, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Ordinary level checking, no splitting, dimensions specified. | |||||||||||||||||||||
bool | insertRvectorsFromTxtOrd (const std::string &s, const SplitDecisionObj &boolTest, int dim, const std::size_t headerlines=0, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Ordinary level checking, adaptive splitting with each data point inserted, dimensions specified. | |||||||||||||||||||||
bool | insertRvectorsFromTxtOrd (const std::string &s, const std::vector< int > &reqDims, const std::size_t headerlines=0, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Ordinary level checking, no splitting, selected required dimensions. | |||||||||||||||||||||
bool | insertRvectorsFromTxtOrd (const std::string &s, const SplitDecisionObj &boolTest, const std::vector< int > &reqDims, const std::size_t headerlines=0, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Ordinary level checking, adaptive splitting with each data point inserted, , selected required dimensions. | |||||||||||||||||||||
bool | insertRvectorsFromTxtParanoid (const std::string &s, int dim, const std::size_t headerlines=0, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Paranoid level checking, no splitting, dimensions specified. | |||||||||||||||||||||
bool | insertRvectorsFromTxtParanoid (const std::string &s, const SplitDecisionObj &boolTest, int dim, const std::size_t headerlines=0, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Paranoid level checking, adaptive splitting with each data point inserted, dimensions specified. | |||||||||||||||||||||
bool | insertRvectorsFromTxtParanoid (const std::string &s, const std::vector< int > &reqDims, const std::size_t headerlines=0, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Paranoid level checking, no splitting, selected required dimensions. | |||||||||||||||||||||
bool | insertRvectorsFromTxtParanoid (const std::string &s, const SplitDecisionObj &boolTest, const std::vector< int > &reqDims, const std::size_t headerlines=0, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Paranoid level checking, adaptive splitting with each data point inserted, selected required dimensions. | |||||||||||||||||||||
bool | insertRvectorsFromTxtFast (const std::string &s, int dim, const std::size_t headerlines=0, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Fast level checking, no splitting, dimensions specified. | |||||||||||||||||||||
bool | insertRvectorsFromTxtFast (const std::string &s, const SplitDecisionObj &boolTest, int dim, const std::size_t headerlines=0, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Fast level checking, adaptive splitting with each data point inserted, dimensions specified. | |||||||||||||||||||||
Insert rvectors from a vector of vectors of doubles | |||||||||||||||||||||
into AdaptiveHistogram object. Reads in vectors of doubles, representing rvectors, from a std::vector. The dimensions of the rvector can be given or (if dim is not given) deduced from from the first vector in the input vector inputData. The rest of the data then is expected to have the same dimension as that given or found from the frst vector in the input vector. Any data with dimensions less than the expected dimensions will be rejected. If data has more than the expected dimensions, only the values on the first expected dimensions will be read in (the rest will be ignored). Expects one inner vector per rvector. Throws an invalid_argument exception if dim is supplied by the user and dim < 1.
| |||||||||||||||||||||
bool | insertRvectorsFromVectorOfVecDbls (const std::vector< VecDbl > &inputData, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
All rvectors are associated with the root paving, no splitting. | |||||||||||||||||||||
bool | insertRvectorsFromVectorOfVecDbls (const std::vector< VecDbl > &inputData, const SplitDecisionObj &boolTest, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Adaptive splitting with each data point inserted. | |||||||||||||||||||||
bool | insertRvectorsFromVectorOfVecDbls (const std::vector< VecDbl > &inputData, int dim, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
All rvectors are associated with the root paving, no splitting, dims given. | |||||||||||||||||||||
bool | insertRvectorsFromVectorOfVecDbls (const std::vector< VecDbl > &inputData, const SplitDecisionObj &boolTest, int dim, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Adaptive splitting with each data point inserted, dims given. | |||||||||||||||||||||
Insert rvectors from a container of rvectors. | |||||||||||||||||||||
If checkDims == false, appends the entire contents of rvec to the end of data unless the first element in rvec is an empty (0-d) rvector. If checkDims == true, checks the dimensions of each element in inputData and only inserts those elements where with dimension equal to that of the first element in inputData . Throws an illegal_argument exception if the first element in inputData is an empty rvector.
| |||||||||||||||||||||
bool | insertFromRVec (const RVecData &inputData, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
bool | insertFromRVec (const RVecData &inputData, const SplitDecisionObj &boolTest, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
bool | insertFromRVec (const RVecData &inputData, bool checkDims, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
bool | insertFromRVec (const RVecData &inputData, const SplitDecisionObj &boolTest, bool checkDims, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Insert a set number of rvectors from a container. | |||||||||||||||||||||
The overloaded versions which take a random number generator as a parameter can be used to take successive samples from the same container. Otherwise the random number generator is created and destroyed during the scope of the function and repeating the identical function call will produce an identical sample from the container.
| |||||||||||||||||||||
bool | insertSampleFromRVec (size_t samplesize, gsl_rng *rgsl, const RVecData &rvec, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
bool | insertSampleFromRVec (size_t samplesize, int seed, const RVecData &rvec, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
bool | insertSampleFromRVec (size_t samplesize, const RVecData &rvec, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
bool | insertSampleFromRVec (size_t samplesize, gsl_rng *rgsl, const RVecData &rvec, const SplitDecisionObj &boolTest, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
bool | insertSampleFromRVec (size_t samplesize, int seed, const RVecData &rvec, const SplitDecisionObj &boolTest, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
bool | insertSampleFromRVec (size_t samplesize, const RVecData &rvec, const SplitDecisionObj &boolTest, LOGGING_LEVEL logging=NOLOG) | ||||||||||||||||||||
Insert all rvectors from an RSSample object. | |||||||||||||||||||||
Insert rvectors from the labeled point samples of an RSSample object. If a label parameter is not specified, insert only points from sample where the point label matches the label for this. Otherwise insert only points where the point label matches label.
| |||||||||||||||||||||
bool | insertFromRSSample (const RSSample &rss, int lab, LOGGING_LEVEL logging) | ||||||||||||||||||||
bool | insertFromRSSample (const RSSample &rss, LOGGING_LEVEL logging) | ||||||||||||||||||||
bool | insertFromRSSample (const RSSample &rss, int lab, const SplitDecisionObj &boolTest, LOGGING_LEVEL logging) | ||||||||||||||||||||
bool | insertFromRSSample (const RSSample &rss, const SplitDecisionObj &boolTest, LOGGING_LEVEL logging) | ||||||||||||||||||||
Insert a set number of rvectors from an RSSample object. | |||||||||||||||||||||
Insert a set number of rvectors from the labeled point samples of an RSSample object. Sampling is random with replacement. If a label parameter is not specified, insert only points from sample where the point label matches the label for this. Otherwise insert only points where the point label matches label. The overloaded versions which take a random number generator as a parameter can be used to take successive samples from the same RSSample object. Otherwise the random number generator is created and destroyed during the scope of the function and repeating the identical function call will produce an identical sample from the RSSample object.
| |||||||||||||||||||||
bool | insertSampleFromRSSample (size_t samplesize, gsl_rng *rgsl, const RSSample &rss, int lab, LOGGING_LEVEL logging) | ||||||||||||||||||||
bool | insertSampleFromRSSample (size_t samplesize, gsl_rng *rgsl, const RSSample &rss, LOGGING_LEVEL logging) | ||||||||||||||||||||
bool | insertSampleFromRSSample (size_t samplesize, int seed, const RSSample &rss, int lab, LOGGING_LEVEL logging) | ||||||||||||||||||||
bool | insertSampleFromRSSample (size_t samplesize, int seed, const RSSample &rss, LOGGING_LEVEL logging) | ||||||||||||||||||||
bool | insertSampleFromRSSample (size_t samplesize, const RSSample &rss, int lab, LOGGING_LEVEL logging) | ||||||||||||||||||||
bool | insertSampleFromRSSample (size_t samplesize, const RSSample &rss, LOGGING_LEVEL logging) | ||||||||||||||||||||
bool | insertSampleFromRSSample (size_t samplesize, gsl_rng *rgsl, const RSSample &rss, int lab, const SplitDecisionObj &boolTest, LOGGING_LEVEL logging) | ||||||||||||||||||||
bool | insertSampleFromRSSample (size_t samplesize, gsl_rng *rgsl, const RSSample &rss, const SplitDecisionObj &boolTest, LOGGING_LEVEL logging) | ||||||||||||||||||||
bool | insertSampleFromRSSample (size_t samplesize, int seed, const RSSample &rss, int lab, const SplitDecisionObj &boolTest, LOGGING_LEVEL logging) | ||||||||||||||||||||
bool | insertSampleFromRSSample (size_t samplesize, int seed, const RSSample &rss, const SplitDecisionObj &boolTest, LOGGING_LEVEL logging) | ||||||||||||||||||||
bool | insertSampleFromRSSample (size_t samplesize, const RSSample &rss, int lab, const SplitDecisionObj &boolTest, LOGGING_LEVEL logging) | ||||||||||||||||||||
bool | insertSampleFromRSSample (size_t samplesize, const RSSample &rss, const SplitDecisionObj &boolTest, LOGGING_LEVEL logging) | ||||||||||||||||||||
prioritySplit methods. | |||||||||||||||||||||
These methods takes a histogram and progressively splits using a priority queue to determine which node to split first. Splitting continues until there are maxLeaves leaves, or there are no more splittable nodes.
Nodes are considered to be splittable if they satisfy two criteria: First, their volume is greater than the minimum volume specified for the histogram as a whole through minVolB. Second if both prospective children would have at least minChildPoints data points associated with them. If more than one node is equally 'large', on the basis of the node comparison compTest used, then a random choice is made between all equally large nodes to find the node which will be split. The random number generator used for random selection between equally 'large' nodes uses a default seed to ensure that results can be replicated. If you are looking at distributions of results across mulitple histograms, supply the random number generator to the priority queue to ensure that each histogram will make different random choices. Throws a NullSubpavings_Error if the subpaving that this manages is a NULL pointer. Throws an std::logic_error if the state of this does not satisfy the criteria implied by minVolB and minChildPoints, ie if a cherry not satisfying the implied min volume and child points rules has been split. Throws an std::logic_error if the split becomes muddled because of some failure within the logic of the algorithm itself. Aborts if there are no splittable leaves left (or none at the start).
| |||||||||||||||||||||
bool | prioritySplit (const NodeCompObj &compTest, const HistEvalObj &he, LOGGING_LEVEL logging, size_t minChildPoints) | ||||||||||||||||||||
bool | prioritySplit (const NodeCompObj &compTest, const HistEvalObj &he, LOGGING_LEVEL logging, double minVolB) | ||||||||||||||||||||
bool | prioritySplit (const NodeCompObj &compTest, const HistEvalObj &he, LOGGING_LEVEL logging) | ||||||||||||||||||||
bool | prioritySplit (const NodeCompObj &compTest, const HistEvalObj &he, LOGGING_LEVEL logging, size_t minChildPoints, double minVolB) | ||||||||||||||||||||
bool | prioritySplit (const NodeCompObj &compTest, const HistEvalObj &he, LOGGING_LEVEL logging, size_t minChildPoints, gsl_rng *rgsl) | ||||||||||||||||||||
bool | prioritySplit (const NodeCompObj &compTest, const HistEvalObj &he, LOGGING_LEVEL logging, double minVolB, gsl_rng *rgsl) | ||||||||||||||||||||
bool | prioritySplit (const NodeCompObj &compTest, const HistEvalObj &he, LOGGING_LEVEL logging, gsl_rng *rgsl) | ||||||||||||||||||||
bool | prioritySplit (const NodeCompObj &compTest, const HistEvalObj &he, LOGGING_LEVEL logging, size_t minChildPoints, double minVolB, gsl_rng *rgsl) | ||||||||||||||||||||
bool | prioritySplit (const NodeCompObj &compTest, size_t maxLeaves, LOGGING_LEVEL logging, size_t minChildPoints) | ||||||||||||||||||||
bool | prioritySplit (const NodeCompObj &compTest, size_t maxLeaves, LOGGING_LEVEL logging, double minVolB) | ||||||||||||||||||||
bool | prioritySplit (const NodeCompObj &compTest, size_t maxLeaves, LOGGING_LEVEL logging) | ||||||||||||||||||||
bool | prioritySplit (const NodeCompObj &compTest, size_t maxLeaves, LOGGING_LEVEL logging, size_t minChildPoints, double minVolB) | ||||||||||||||||||||
bool | prioritySplit (const NodeCompObj &compTest, size_t maxLeaves, LOGGING_LEVEL logging, size_t minChildPoints, gsl_rng *rgsl) | ||||||||||||||||||||
bool | prioritySplit (const NodeCompObj &compTest, size_t maxLeaves, LOGGING_LEVEL logging, double minVolB, gsl_rng *rgsl) | ||||||||||||||||||||
bool | prioritySplit (const NodeCompObj &compTest, size_t maxLeaves, LOGGING_LEVEL logging, gsl_rng *rgsl) | ||||||||||||||||||||
bool | prioritySplit (const NodeCompObj &compTest, size_t maxLeaves, LOGGING_LEVEL logging, size_t minChildPoints, double minVolB, gsl_rng *rgsl) | ||||||||||||||||||||
Get a PiecewiseConstantFunction as the average | |||||||||||||||||||||
of samples from a Markov-Chain Monte Carlo process starting from this. The leaves of the SPSnode tree represent the partition of the data space (the root box of the tree). A histogram state is a particular partition of the root box which will be represented by a particular tree number and disposition of nodes of the tree. The Markov-Chain Monte Carlo process considers possible histogram states, given data, as a probability distribution. The the Metropolis-Hastings algorithm is used on the SPSnode tree managed by this Adaptive Histogram to generate PiecewiseConstantFunction samples from the histogram state probability density. This method returns the average over the samples MCMC requires a prior distribution over the state space and a likelihood of being in a particular state given the data, from which can be found (up to proportionality) a posterior distribution proportional to the likelihood x prior. A change in state can take place by either splitting a leaf node (bisecting the box represesented by that leaf and sending the data associated with the box down to the two new children) or absorbing two sibling leaf nodes back into their parent so that that parent becomes a leaf of the tree. The parent node of two sibling leaf nodes is referred to as a 'cherry' and the reabsorbtion of the sibling leaf children of a cherry is referred to as 'merging' a cherry. minPoints restricts the possible states by not allowing the chain to include a state where a leaf node has less than minPoints data points associated with it, unless that child leaf node has 0 points and its sibling has all the parent's points and number of parent's points >= minPoints. The final condition allows a node to be split when all the data goes to just one child provided that the number of points in the node >= minPoints so that the process can 'home in' on small peaks of data. | |||||||||||||||||||||
PiecewiseConstantFunction | MCMC (unsigned int loops, unsigned int burnin, unsigned int thinout, MCMCProposal &proposal, LogMCMCPrior &logPrior, size_t minPoints, LOGGING_LEVEL logging) | ||||||||||||||||||||
MCMC average method using a random number generator set and seeded internally, using the gsl default generator and seed. | |||||||||||||||||||||
PiecewiseConstantFunction | MCMC (unsigned int loops, unsigned int burnin, unsigned int thinout, MCMCProposal &proposal, LogMCMCPrior &logPrior, size_t minPoints, LOGGING_LEVEL logging, long unsigned int seed) | ||||||||||||||||||||
MCMC average method using a random number generator set internally using the gsl default generator, and with seed set by the user. | |||||||||||||||||||||
PiecewiseConstantFunction | MCMC (unsigned int loops, unsigned int burnin, unsigned int thinout, MCMCProposal &proposal, LogMCMCPrior &logPrior, size_t minPoints, LOGGING_LEVEL logging, gsl_rng *rgsl) | ||||||||||||||||||||
MCMC average method using a random number generator supplied by the user. | |||||||||||||||||||||
PiecewiseConstantFunction | MCMC_IMH (size_t maxLeaves, unsigned int loops, unsigned int burnin, unsigned int thinout, LogMCMCIMHPrior &logPrior, size_t minPoints, LOGGING_LEVEL logging) | ||||||||||||||||||||
MCMC average method using a random number generator set and seeded internally, using the gsl default generator and seed. | |||||||||||||||||||||
PiecewiseConstantFunction | MCMC_IMH (size_t maxLeaves, unsigned int loops, unsigned int burnin, unsigned int thinout, LogMCMCIMHPrior &logPrior, size_t minPoints, LOGGING_LEVEL logging, long unsigned int seed) | ||||||||||||||||||||
MCMC average method using a random number generator set internally using the gsl default generator, and with seed set by the user. | |||||||||||||||||||||
PiecewiseConstantFunction | MCMC_IMH (size_t maxLeaves, unsigned int loops, unsigned int burnin, unsigned int thinout, LogMCMCIMHPrior &logPrior, size_t minPoints, LOGGING_LEVEL logging, gsl_rng *rgsl) | ||||||||||||||||||||
MCMC average method using a random number generator supplied by the user. | |||||||||||||||||||||
Generating MCMC samples from histogram state space. | |||||||||||||||||||||
The leaves of the SPSnode tree represent the partition of the data space (the root box of the tree). A histogram state is a particular partition of the root box which will be represented by a particular tree number and disposition of nodes of the tree. The Markov-Chain Monte Carlo process considers possible histogram states, given data, as a probability distribution. In this implementation the the Metropolis-Hastings algorithm is used on the SPSnode tree managed by this Adaptive Histogram to generate PiecewiseConstantFunction samples from the histogram state probability density. MCMC requires a prior distribution over the state space and a likelihood of being in a particular state given the data, from which can be found (up to proportionality) a posterior distribution proportional to the likelihood x prior. A change in state can take place by either splitting a leaf node (bisecting the box represesented by that leaf and sending the data associated with the box down to the two new children) or absorbing two sibling leaf nodes back into their parent so that that parent becomes a leaf of the tree. The parent node of two sibling leaf nodes is referred to as a 'cherry' and the reabsorbtion of the sibling leaf children of a cherry is referred to as 'merging' a cherry. minPoints restricts the possible states by not allowing the chain to include a state where a leaf node has less than minPoints data points associated with it, unless that child leaf node has 0 points and its sibling has all the parent's points and number of parent's points >= minPoints. The final condition allows a node to be split when all the data goes to just one child provided that the number of points in the node >= minPoints so that the process can 'home in' on small peaks of data. | |||||||||||||||||||||
std::vector < PiecewiseConstantFunction > & | MCMCsamples (std::vector< PiecewiseConstantFunction > &samples, unsigned int loops, unsigned int burnin, unsigned int thinout, MCMCProposal &proposal, LogMCMCPrior &logPrior, size_t minPoints, LOGGING_LEVEL logging) | ||||||||||||||||||||
MCMC samples method using a random number generator set and seeded internally, using the gsl default generator and seed. | |||||||||||||||||||||
std::vector < PiecewiseConstantFunction > & | MCMCsamples (std::vector< PiecewiseConstantFunction > &samples, unsigned int loops, unsigned int burnin, unsigned int thinout, MCMCProposal &proposal, LogMCMCPrior &logPrior, size_t minPoints, LOGGING_LEVEL logging, long unsigned int seed) | ||||||||||||||||||||
MCMC samples method using a random number generator set internally using the gsl default generator, and with seed set by the user. | |||||||||||||||||||||
std::vector < PiecewiseConstantFunction > & | MCMCsamples (std::vector< PiecewiseConstantFunction > &samples, unsigned int loops, unsigned int burnin, unsigned int thinout, MCMCProposal &proposal, LogMCMCPrior &logPrior, size_t minPoints, LOGGING_LEVEL logging, gsl_rng *rgsl) | ||||||||||||||||||||
MCMC samples method using a random number generator supplied by the user. | |||||||||||||||||||||
Generating MCMC samples from histogram state space using | |||||||||||||||||||||
an Independent Metropolis Hastings chain. /note This is an experimental routine that has not been fully tested and where the implemententation possibly requires further work. It is not recommended for use in anything other than an experimental setting. The leaves of the SPSnode tree represent the partition of the data space (the root box of the tree). A histogram state is a particular partition of the root box which will be represented by a particular tree number and disposition of nodes of the tree. The Markov-Chain Monte Carlo process considers possible histogram states, given data, as a probability distribution. In this implementation the the Independent Metropolis-Hastings algorithm is used on the SPSnode tree managed by this AdaptiveHistogram to generate PiecewiseConstantFunction samples from the histogram state probability density. MCMC requires a prior distribution over the state space and a likelihood of being in a particular state given the data, from which can be found (up to proportionality) a posterior distribution proportional to the likelihood x prior. A change in state can take place by either splitting a leaf node (bisecting the box represesented by that leaf and sending the data associated with the box down to the two new children) or absorbing two sibling leaf nodes back into their parent so that that parent becomes a leaf of the tree. The parent node of two sibling leaf nodes is referred to as a 'cherry' and the reabsorbtion of the sibling leaf children of a cherry is referred to as 'merging' a cherry. minPoints restricts the possible states by not allowing the chain to include a state where a leaf node has less than minPoints data points associated with it, unless that child leaf node has 0 points and its sibling has all the parent's points and number of parent's points >= minPoints. The final condition allows a node to be split when all the data goes to just one child provided that the number of points in the node >= minPoints so that the process can 'home in' on small peaks of data. | |||||||||||||||||||||
std::vector < PiecewiseConstantFunction > & | MCMCsamplesIMH (size_t maxLeaves, std::vector< PiecewiseConstantFunction > &samples, unsigned int loops, unsigned int burnin, unsigned int thinout, LogMCMCIMHPrior &logPrior, size_t minPoints, LOGGING_LEVEL logging) | ||||||||||||||||||||
MCMC samples method using a random number generator set and seeded internally, using the gsl default generator and seed. | |||||||||||||||||||||
std::vector < PiecewiseConstantFunction > & | MCMCsamplesIMH (size_t maxLeaves, std::vector< PiecewiseConstantFunction > &samples, unsigned int loops, unsigned int burnin, unsigned int thinout, LogMCMCIMHPrior &logPrior, size_t minPoints, LOGGING_LEVEL logging, long unsigned int seed) | ||||||||||||||||||||
MCMC samples method using a random number generator set internally using the gsl default generator, and with seed set by the user. | |||||||||||||||||||||
std::vector < PiecewiseConstantFunction > & | MCMCsamplesIMH (size_t maxLeaves, std::vector< PiecewiseConstantFunction > &samples, unsigned int loops, unsigned int burnin, unsigned int thinout, LogMCMCIMHPrior &logPrior, size_t minPoints, LOGGING_LEVEL logging, gsl_rng *rgsl) | ||||||||||||||||||||
MCMC samples method using a random number generator supplied by the user. | |||||||||||||||||||||
Change the state of this Adaptive Histogram using MCMC process | |||||||||||||||||||||
and capture changes in an information object. | |||||||||||||||||||||
AdaptiveHistogram::ChangeOfStateInformation & | changeMCMCState (SPSnodeList &nodes, size_t &numLeaves, size_t &numCherries, MCMCProposal &proposal, LogMCMCPrior &logPrior, size_t minPoints, const RealMappedSPnode &rmsp, RealMappedSPnode::ListPtrs &leaves, RealMappedSPnode::ListPtrs &cherries, AdaptiveHistogram::ChangeOfStateInformation &info, gsl_rng *rgsl, LOGGING_LEVEL logging, const std::string &sHist, int i) | ||||||||||||||||||||
Version of method using minVol. | |||||||||||||||||||||
AdaptiveHistogram::ChangeOfStateInformation & | changeMCMCState (SPSnodeList &nodes, size_t &numLeaves, size_t &numCherries, MCMCProposal &proposal, LogMCMCPrior &logPrior, size_t minPoints, real minVol, const RealMappedSPnode &rmsp, RealMappedSPnode::ListPtrs &leaves, RealMappedSPnode::ListPtrs &cherries, AdaptiveHistogram::ChangeOfStateInformation &info, gsl_rng *rgsl, LOGGING_LEVEL logging, const std::string &sHist, int i) | ||||||||||||||||||||
bool | changeMCMCStateIMH (size_t maxLeaves, const MCMCPartitionGenerator &partitioner, const LogMCMCIMHPrior &logPrior, size_t minPoints, RealMappedSPnode &rmsp, size_t &numLeaves, real &unscaledLogLik, real &logPosterior, gsl_rng *rgsl, LOGGING_LEVEL logging, const std::string &sDec, int i, const string &failureLogFilename) | ||||||||||||||||||||
Find maximum log-posteriors using two priorty split functions. | |||||||||||||||||||||
Do a number of priority queue splits with two priority queue functions, operating one after the other, and find the maximum log-posterior points reached by each queue overall. The PrioritySplitQueueEvaluator psqe controls the first part of each queue and the PrioritySplitQueueEvaluator psqePosterior controls the second part of each queue. Each PrioritySplitQueueEvaluator will specify a maximum number of leaves. One queue using just the psqePosterior controlled queue is always run. The parameter checkMaxStep effectively controls the number of different versions of the psqe controlled queue that are run: each one is run to cover a number of interations (splits) that increments by checkMaxStep each time, until the overall maximum number of leaves specified in psqe is reached or exceeded. Each psqe controlled queue thus started is monitored and the maximum support-adjusted log-posterior point in the last checkMaxStep splis is recorded and used as the start point for a psqePosterior controlled queue that runs until the maximum number of leaves specified in pqsePosterior is reached (or until there are no more splittable nodes left). If checkMaxStep = 0, no psqe controlled queues are run (just the first queue using just psqePosterior). A support-adjusted log-posterior is unnormalised and is calculated by adjusting the unnormalised log-posterior for the total empty box volume over the whole of the root box (ie, tries to use the actual support as represented by boxes with at least some points in them). The effect is thus to run the psqe controlled queue in blocks and within each block, find the maximum support-adjusted log-posterior point, and use that as the start point for a controlled queue, and record the (un-normalised) log-posterior for the combined two-phase queue for the block. This approach is used as a kind of trial-and-error method of finding an appropriate number of leaves to run the first phase (typically the carving phase) for when we have very little information to go on. For each overall queue, including the one just controlled by pqsePosterior, the maximum log-posterior point is found and recorded using carvedLaunchPoints (the number of leaves the first psqe controlled queue runs to - for the queue that only uses the psqePosterior queue, this is taken as the number of leaves in this at the start of the operation), maxPosteriorPoints (the number of leaves that the psqePosterior controlled queue runs to) and maxPosteriors (the (unnormalised) value of the maximum log-posterior found)). emptyBoxVolsVec and posteriorSupportVec collect information from the total or overall pqse phase, ie the whole psqe controlled queue run until the maximum number of leaves specified in psqe is reached. emptyBoxVolsVec is used to store the total volume of the empty boxes in this for each loop (split) in the psqe phase, and posteriorSupportVec is used to store the (un-normalised) support-adjusted log-posterior of this after each split in the psqe phase. stopOnMaxPosterior controls how long the psqePosterior queue is run for after an (apparent) maximum posterior point is found. This is only relevant to the queues that include a psqe controlled phase (the first, pqsePosterior only controlled, queue should be run until the maximum leaves specified in pqsePosterior is reached or until there are no more splittable leaves). If stopOnMaxPosterior is false then each controlled phase of a queue with an initial phase controlled by pqse is also run until the maximum leaves specified in pqsePosterior is reached (or until there are no more splittable leaves). If stopOnMaxPosterior is true then each controlled queue with an initial phase controlled by pqse is still run for some distance past a point that appears to represent a maximum (to check that this is not just a local maximum), but not necessarily until the maximum leaves specified in pqsePosterior is reached. In each queue, if the log-posteriors are still going up at the end of the queue, ie when the maximum leaves specified in pqsePosterior is reached, then the final log-posterior is recorded. | |||||||||||||||||||||
bool | prioritySplitWithSupportPosteriorMaxLik (const PrioritySplitQueueEvaluator &psqe, const PrioritySplitQueueEvaluator &psqePosterior, LOGGING_LEVEL logging, size_t minChildPoints, double minVol, LogMCMCPrior &logPrior, std::vector< real > &emptyBoxVolsVec, std::vector< real > &posteriorSupportVec, int checkMaxStep, bool stopOnMaxPosterior, std::vector< size_t > &carvedLaunchPoints, std::vector< size_t > &maxPosteriorPoints, std::vector< real > &maxPosteriors, gsl_rng *rgsl, bool shiftCatalan) | ||||||||||||||||||||
Version taking minVol parameter. | |||||||||||||||||||||
bool | prioritySplitWithSupportPosteriorMaxLik (const PrioritySplitQueueEvaluator &psqe, const PrioritySplitQueueEvaluator &psqePosterior, LOGGING_LEVEL logging, size_t minChildPoints, LogMCMCPrior &logPrior, std::vector< real > &emptyBoxVolsVec, std::vector< real > &posteriorSupportVec, int checkMaxStep, bool stopOnMaxPosterior, std::vector< size_t > &carvedLaunchPoints, std::vector< size_t > &maxPosteriorPoints, std::vector< real > &maxPosteriors, gsl_rng *rgsl, bool shiftCatalan) | ||||||||||||||||||||
Version without minVol parameter. | |||||||||||||||||||||
Do a priority queue split using two priority queue | |||||||||||||||||||||
functions, operating one after the other. Typically used to get this into the state previously found using prioritySplitWithSupportPosteriorMaxLik or some operation that manipulates the results of prioritySplitWithSupportPosteriorMaxLik to find other states of interest. The PrioritySplitQueueEvaluator psqe controls the first part of each queue and the PrioritySplitQueueEvaluator psqePosterior controls the second part of each queue. Each PrioritySplitQueueEvaluator will specify a maximum number of leaves.
| |||||||||||||||||||||
bool | prioritySplitMaxLik (const PrioritySplitQueueEvaluator &psqe, const PrioritySplitQueueEvaluator &psqePosterior, LOGGING_LEVEL logging, size_t minChildPoints, double minVol, LogMCMCPrior &logPrior, std::vector< real > &posteriorVec, std::vector< real > &loglikVec, gsl_rng *rgsl, bool shiftCatalan) | ||||||||||||||||||||
Version taking minVol parameter. | |||||||||||||||||||||
bool | prioritySplitMaxLik (const PrioritySplitQueueEvaluator &psqe, const PrioritySplitQueueEvaluator &psqePosterior, LOGGING_LEVEL logging, size_t minChildPoints, LogMCMCPrior &logPrior, std::vector< real > &posteriorVec, std::vector< real > &loglikVec, gsl_rng *rgsl, bool shiftCatalan) | ||||||||||||||||||||
Version without minVol parameter. | |||||||||||||||||||||
Run a single priority queue recording the | |||||||||||||||||||||
log-posterior and log-likelihood. The PrioritySplitQueueEvaluator psqe will specify a maximum number of leaves to run the queue to. Typically pqse will contain some sort of SEB queue function. posteriorVec and loglikVec collect information from the queue.
| |||||||||||||||||||||
bool | prioritySplitWithSupportPosterior (const PrioritySplitQueueEvaluator &psqe, LOGGING_LEVEL logging, size_t minChildPoints, double minVol, LogMCMCPrior &logPrior, std::vector< real > &emptyBoxVolsVec, std::vector< real > &posteriorSupportVec, std::vector< real > &posteriorVec, gsl_rng *rgsl, bool shiftCatalan) | ||||||||||||||||||||
Version taking minVol parameter. | |||||||||||||||||||||
bool | prioritySplitWithSupportPosterior (const PrioritySplitQueueEvaluator &psqe, LOGGING_LEVEL logging, size_t minChildPoints, LogMCMCPrior &logPrior, std::vector< real > &emptyBoxVolsVec, std::vector< real > &posteriorSupportVec, std::vector< real > &posteriorVec, gsl_rng *rgsl, bool shiftCatalan) | ||||||||||||||||||||
Version without minVol parameter. | |||||||||||||||||||||
bool | prioritySplitWithPosterior (const PrioritySplitQueueEvaluator &psqe, LOGGING_LEVEL logging, size_t minChildPoints, LogMCMCPrior &logPrior, std::vector< real > &posteriorVec, std::vector< real > &loglikVec, gsl_rng *rgsl, bool shiftCatalan) | ||||||||||||||||||||
Version taking minVol parameter. | |||||||||||||||||||||
bool | prioritySplitWithPosterior (const PrioritySplitQueueEvaluator &psqe, LOGGING_LEVEL logging, size_t minChildPoints, double minVol, LogMCMCPrior &logPrior, std::vector< real > &posteriorVec, std::vector< real > &loglikVec, gsl_rng *rgsl, bool shiftCatalan) | ||||||||||||||||||||
Version without minVol parameter. | |||||||||||||||||||||
void | outputToTxtTabs (const std::string &s, int prec=5) const | ||||||||||||||||||||
Output the subpaving managed by this to a txt file. | |||||||||||||||||||||
void | outputToTxtTabs (const std::string &s, int prec, bool confirm) const | ||||||||||||||||||||
void | outputToTxtTabsWithEMPs (const std::string &s, int prec=5) const | ||||||||||||||||||||
Output the subpaving managed by this to a txt file. | |||||||||||||||||||||
void | outputToTxtTabsWithEMPs (const std::string &s, int prec, bool confirm) const | ||||||||||||||||||||
void | outputRootToTxt (const std::string &s, int prec=5) const | ||||||||||||||||||||
Output details of full sample (from root) to txt tile. | |||||||||||||||||||||
void | outputRootToTxt (const std::string &s, int prec, bool confirm) const |
A wrapper or manager for an SPSnode aka StatsSubPaving in conjunction with massive amounts of sample data.
Here sample data is multi-dimensional point-valued data in a cxsc::rvector container. The AdaptiveHistogram class manages SPSnode objects (StatsSubPavings ) for the purpose of creating adaptive histograms from sample data and also manages the container for the sample data itself.
An SPSnode (a pointer to an SPSnode is aliased as StatsSubPaving) is a binary tree representation of a regular subpaving which can be used for processing statistical sample data. SPSnodes do not actually hold data, they only need to know where the data they are associated with is stored. The leaf nodes in the SPSnode tree controlled by an AdaptiveHistogram object have a vector of iterators into the BigDataCollection, a dataCollection managed by that AdaptiveHistogram object.
The AdaptiveHistogram class uses the C-XSC library class rvector for sample data points. rvectors can have 1 or many dimensions.
Each AdaptiveHistogram has an integer label.
Default constructor.
By default, only counts are maintained in subpaving this manages, rather than all available stats. The default label is 0;
: label(0), rootPaving(NULL), holdAllStats(false), scaledEMPSumCOPERR(0.0), scaledEMPSumAIC(0.0) { // nothing happens to dataCollection when object is constructed }
AdaptiveHistogram::AdaptiveHistogram | ( | bool | as, |
int | lab = 0 |
||
) | [explicit] |
Initialised constructor.
as | indicator controlling whether all available statistics be maintained in the SPSnode tree managed by this AdaptiveHistogram (true for all stats, false for counts only). |
lab | value for the label for this (defaults to 0). |
: label(lab), rootPaving(NULL),
holdAllStats(as),
scaledEMPSumCOPERR(0.0), scaledEMPSumAIC(0.0)
{
// nothing happens to dataCollection when object is constructed
}
AdaptiveHistogram::AdaptiveHistogram | ( | const ivector & | v, |
bool | as = false |
||
) | [explicit] |
Initialised constructor.
Initialised with domain box. By default, only counts are maintained as stats in the SPSnode tree managed by this. Default label is 0.
Throws a MalconstructedBox_Error if the box is not suitable as the basis of a subpaving (eg, box has no dimensions, or the box has a thin interval on at least one dimension).
Ideal constructor when the support domain of data is known a priori or has been transformed to a known domain but splitting criteria have not been determined a priori.
: label(0), rootPaving(NULL), holdAllStats(as), scaledEMPSumCOPERR(0.0), scaledEMPSumAIC(0.0) { try { // check the box here if (!checkBox(v)) { throw subpavings::MalconstructedBox_Error( "AdaptiveHistogram::AdaptiveHistogram(const ivector&, bool)"); } rootPaving = new SPSnode(v, !as); } catch (exception const& e) { constructor_error_handler(); } }
AdaptiveHistogram::AdaptiveHistogram | ( | const ivector & | v, |
bool | as, | ||
int | lab | ||
) |
Initialised constructor.
Initialised with domain box, label and indicator for whether all stats are maintained in the in the SPSnode tree managed by this or only counts.
Throws a MalconstructedBox_Error if the box is not suitable as the basis of a subpaving (eg, box has no dimensions, or the box has a thin interval on at least one dimension).
Ideal constructor when the support domain of data is known a priori or has been transformed to a known domain but splitting criteria have not been determined a priori, and a specific label is to be specified.
: label(lab), rootPaving(NULL), holdAllStats(as), scaledEMPSumCOPERR(0.0), scaledEMPSumAIC(0.0) { try { // check the box here if (!checkBox(v)) { throw subpavings::MalconstructedBox_Error( "AdaptiveHistogram::AdaptiveHistogram(const ivector&, bool)"); } rootPaving = new SPSnode(v, !as); } catch (exception const& e) { constructor_error_handler(); } }
void AdaptiveHistogram::clearAllHistData | ( | ) |
Clear all the data in this.
Clears all the data from this's dataCollection and also from the subpaving managed.
Throws a NullSubpavingPointer_Error if the subpaving that this manages is a NULL pointer.
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error( "AdaptiveHistogram::clearAllHistData()"); } getSubPaving()->clearAllDataHeld(); BigDataCollection tmp; tmp.swap(dataCollection); // swap and clear scaledEMPSumCOPERR = cxsc::dotprecision(0.0); scaledEMPSumAIC = scaledEMPSumCOPERR; }
std::string AdaptiveHistogram::doubleCheckStringSummary | ( | ) | const |
Get a string summary of the properties of this which can be used for checking and debugging.
Includes details of the subpaving that this manages including addresses of the values associated with leaf nodes of the paving.
{ std::ostringstream oss; oss << "This address = " << this << endl; oss << "Label = " << label << endl; oss << "holdAllStats = " << getHoldAllStats() << endl; oss << "address of data collection is " << &dataCollection << " and data collection addresses are: " << endl; std::string delim = "\t"; for (BigDataConstItr it = dataCollection.begin(); it != dataCollection.end(); ++it) { oss << &(*it) << delim; } oss << endl; if (hasSubPaving()) { oss << "Subpaving is: " << endl; oss << getSubPaving()->doubleCheckStringSummary(); } else oss << "Subpaving is NULL" << endl; return oss.str(); }
double AdaptiveHistogram::findCoverage | ( | const rvector & | pt | ) | const |
Find the coverage value for a data point.
The coverage value is 1 - (sum of density of all the boxes with heights > the height of the box where the data point is).
If the point is not in the histogram at all, coverage = 0; If the point is in the lowest box in the histogram, coverage = count lowest box / total count; If the point is in the highest box of the histogram, coverage = 1
Throws a NullSubpavings_Error if the subpaving that this manages is a NULL pointer.
Throws an IncompatibleDimensions_Error if the given point does not have the same dimensions as the subpaving that this manages.
pt | the point to find coverage for |
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error( "AdaptiveHistogram::findCoverage(const rvector&)"); } if (getDimensions() != (Ub(pt) - Lb(pt) + 1)) { throw IncompatibleDimensions_Error( "AdaptiveHistogram::findCoverage(const rvector&)"); } return _coverage(pt); }
double AdaptiveHistogram::findEmpiricalDensity | ( | const rvector & | pt | ) | const |
Find the empirical density for a data point.
The empirical density is the relative density of the histogram at the box the given data point is in.
If the point is not in the histogram at all, empirical density = 0; If the point is in the some box in the histogram with height d (d = count in box / (total count x box volume), then d is the empirical density.
Throws a NullSubpavings_Error if the subpaving that this manages is a NULL pointer.
Throws an IncompatibleDimensions_Error if the given point does not have the same dimensions as the subpaving that this manages.
pt | the point to find empirical density for. |
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error( "AdaptiveHistogram::findEmpiricalDensity(const rvector&)"); } if (getDimensions() != (Ub(pt) - Lb(pt) + 1)) { throw IncompatibleDimensions_Error( "AdaptiveHistogram::findEmpiricalDensity(const rvector&)"); } return _empiricalDensity(pt); }
rvector AdaptiveHistogram::getDataCollectionMean | ( | ) | const |
Gets sample mean for data held in the dataCollection.
This calculates the maean for the data in the data collection and returns it as a d*d-dimensional rvector, where d is the dimension of the data.
If means are held (see holdAllStats) then this should give the same result as getMean(), but if this does not hold all stats then getMean() will give a collection of NaN values whereas this method recalculates the variance-covariances directly from the data collection.
{
return calculateMeanFromBigData();
}
Gets sample variance covariance vector for data held in the data collection.
This calculates the sample variance-covariance matrix for the data in the data collection and returns it as a d*d-dimensional vector of reals, where d is the dimension of the data.
If variance-covariances are held (see holdAllStats) then this should give the same result as getVarCov(), but if this does not hold all stats then getVarCov() will give a collection of NaN values whereas this method recalculates the variance-covariances directly from the data collection.
cov(i,j) is at index i*d+j in the returned RealVec (indices from 0 to d*d-1, where d is the dimension of the data).
{
return calculateVarCovarFromBigData();
}
int AdaptiveHistogram::getDimensions | ( | ) | const |
get the dimensions of the subpaving this manages.
{ int retValue = 0; if (hasSubPaving()) { retValue = getSubPaving()->getDimension(); } return retValue; }
real AdaptiveHistogram::getEMPScoreAIC | ( | ) | const |
get the EMP part of the AIC score.
{ //recalcScaledEMPSumAIC(); // default cxsc rounding dotprecision rnd_next return rnd(scaledEMPSumAIC); }
real AdaptiveHistogram::getEMPScoreCOPERR | ( | ) | const |
get the EMP part of the COPERR score.
{ //recalcScaledEMPSumCOPERR(); // default cxsc rounding dotprecision rnd_next return rnd(scaledEMPSumCOPERR); }
bool AdaptiveHistogram::getHoldAllStats | ( | ) | const |
get the value of holdAllStats field.
This determines whether the histrogram's rootPaving will maintain all available stats (true) or just the counts (false).
{
return holdAllStats;
}
cxsc::real AdaptiveHistogram::getL1Distance | ( | const AdaptiveHistogram & | other | ) | const |
Gets the L1 distance between this and another subpaving.
The L1 distance is defined as the sum of the absolute values of the differences in 'area' represented by the leaf nodes of this and the other paving. The 'area' represented by a leaf node is the proportion of the total count of the tree the leaf is part of that is in that leaf node.
Throws the following exceptions:
other | the histogram to calculate the distance against the L1 distance against. |
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error( "AdaptiveHistogram::getL1Distance(const AdaptiveHistogram&)"); } return getSubPaving()->getL1Distance(other.getSubPaving()); }
Size_tVec AdaptiveHistogram::getLeafCounts | ( | ) | const |
Get a vector of the leaf node counts.
{ Size_tVec counts; // empty container if (hasSubPaving()) { getSubPaving()->getLeafNodeCounts(counts); //levels has now been filled in } return counts; }
IntVec AdaptiveHistogram::getLeafLevels | ( | ) | const |
Get a vector of the leaf node levels.
Root is level 0, next level down is 1, etc.
{ IntVec levels; // empty container if (hasSubPaving()) { getSubPaving()->getLeafNodeLevels(levels, 0); //levels has now been filled in } return levels; }
std::string AdaptiveHistogram::getLeafLevelsString | ( | ) | const |
Get a string of the leaf node levels.
Root is level 0, next level down is 1, etc. Example return string "3,3,2,1"
{ string retValue = ""; if (hasSubPaving()) retValue = getSubPaving()->getLeafNodeLevelsString(); return retValue; }
cxsc::real AdaptiveHistogram::getLogLikelihood | ( | ) | const |
Get the log-likelihood for this histogram.
The log-likelihood is the sum over leaves of ln(leaf count/(leaf vol * total pts in histogram))
What to do if the histogram has no points in? Could return Nan, or 0. I have decided to return 0.
{ if ( !hasSubPaving()) { throw NullSubpavingPointer_Error( "AdaptiveHistogram::getLogLikelihood()"); } cxsc::real loglik(0.0); size_t totalPoints = getRootCounter(); if (totalPoints > 0) { // unscaled loglik = getSubPaving()->getUnscaledTreeLogLik(); /* scaling is -n x ln (n x vol) where n is the total points and vol is the volume of the root box */ loglik -= (1.0*totalPoints * cxsc::ln( getSubPaving()->nodeRealVolume() * (1.0*totalPoints))); } return loglik; }
double AdaptiveHistogram::getMinVol | ( | double | minVolB | ) | const |
get the value of the minimum volume for a splittable node.
Minimum volume = minVolB * (log n) ^2/n where n is points in histogram. Minimum volume is used in COPERR or AIC priority queue splitting to limit which nodes can be split.
minVolB | the multiplier applied to log n) ^2/n to find the minimum allowed node volume at which a node can be split (children will half the volume of the parent node). |
{ double retValue = 0.0; if (hasSubPaving()) { size_t counter = getRootCounter(); retValue = minVolB * log(1.0*counter)*log(1.0*counter)/counter; } return retValue; }
std::pair< size_t, cxsc::real > AdaptiveHistogram::getNonEmptyBoxSummary | ( | ) | const |
Get summary information on non-empty box numbers and volumes in this histogram.
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error( "AdaptiveHistogram::getNonEmptyBoxSummary()"); } return getSubPaving()->getNonEmptyBoxSummary(); }
real AdaptiveHistogram::getPENValue | ( | const PenObj & | pen, |
int | deltaLeaf = 0 |
||
) | const |
get the PEN value.
Get the value of the PEN given penalty function object pen.
pen | is the penalty function object |
deltaLeaf | the number of additional leaves (can be negative) to calulate the PEN with: allows changes in histogram shape to be evaluated. |
{ return pen(this, deltaLeaf); }
cxsc::ivector AdaptiveHistogram::getRootBox | ( | ) | const |
Get the box of the subpaving managed by this.
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error( "AdaptiveHistogram::getRootBox()"); } return getSubPaving()->getBox(); }
size_t AdaptiveHistogram::getRootCherries | ( | ) | const |
Gets number of cherry nodes in the root paving.
Throws NullSubpavingPointer_Error is the subpaving that this manages is a NULL pointer.
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error( "AdaptiveHistogram::getRootCherries()"); } return getSubPaving()->getNumberCherries(); }
size_t AdaptiveHistogram::getRootCounter | ( | ) | const |
Gets count in the rootpaving in the root paving.
Throws NullSubpavingPointer_Error is the subpaving that this manages is a NULL pointer.
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error("AdaptiveHistogram::getRootCounter()"); } return getSubPaving()->getCounter(); }
size_t AdaptiveHistogram::getRootLeaves | ( | ) | const |
Gets number of leaf nodes in the root paving.
Throws NullSubpavingPointer_Error is the subpaving that this manages is a NULL pointer.
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error("AdaptiveHistogram::getRootLeaves()"); } return getSubPaving()->getNumberLeaves(); }
rvector AdaptiveHistogram::getRootPavingMean | ( | ) | const |
Gets sample mean for data held.
This calculates the mean for the data and returns it as a d*d-dimensional rvector, where d is the dimension of the data.
Throws NullSubpavingPointer_Error is the subpaving that this manages is a NULL pointer.
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error("AdaptiveHistogram::getRootPavingMean()"); } return getSubPaving()->getMean(); }
Gets sample variance covariance vector for data held.
This calculates the sample variance-covariance matrix for the data and returns it as a d*d-dimensional vector of reals, where d is the dimension of the data.
cov(i,j) is at index i*d+j in the returned RealVec (indices from 0 to d*d-1, where d is the dimension of the data).
Throws NullSubpavingPointer_Error is the subpaving that this manages is a NULL pointer.
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error("AdaptiveHistogram::getRootPavingVarCovar()"); } return getSubPaving()->getVarCovar(); }
real AdaptiveHistogram::getRootSumLeafCountOverVol | ( | ) | const |
Gets the sum of leaf count over volume in root paving.
Throws NullSubpavingPointer_Error is the subpaving that this manages is a NULL pointer.
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error("AdaptiveHistogram::getRootSumLeafCountOverVol()"); } return getSubPaving()->getSumLeafCountOverVol(); }
unsigned long int AdaptiveHistogram::getRootTotalLeafDepth | ( | ) | const |
Gets the total depth over all the leaves in the root.
Throws NullSubpavingPointer_Error is the subpaving that this manages is a NULL pointer.
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error( "AdaptiveHistogram::getRootTotalLeafDepth()"); } return getSubPaving()->getTotalLeafDepth(); }
cxsc::real AdaptiveHistogram::getRootVolume | ( | ) | const |
get the volume of root box of the subpaving this manages.
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error( "AdaptiveHistogram::getRootVolume()"); } return getSubPaving()->nodeRealVolume(); }
real AdaptiveHistogram::getScoreAIC | ( | const PenObj & | pen, |
bool | verbose = false |
||
) | const |
get AIC score.
pen | is the penalty function object |
verbose | option: true for extra console output (default false) |
{ dotprecision temptotal = scaledEMPSumAIC; if (hasSubPaving()) { size_t counter = getRootCounter(); if (verbose) { std::cout << "AIC EMP is " << rnd(scaledEMPSumAIC) << std::endl; std::cout << "AIC penalty is " << getPENValue(pen, 0) << std::endl; std::cout << "Total AIC score is " << rnd(scaledEMPSumAIC) + getPENValue(pen, 0) << std::endl; } // getPENValue(pen, 0) gives value of PEN under pen accumulate(temptotal, getPENValue(pen, 0), 1.0); // temptotal now holds scaledEMPSumAIC + PEN) } // default cxsc rounding dotprecision rnd_next return rnd(temptotal); }
real AdaptiveHistogram::getScoreCOPERR | ( | const PenObj & | pen, |
bool | verbose = false |
||
) | const |
get the COPERR score.
pen | is the penalty function object |
verbose | option: true for extra console output (default false) |
{ dotprecision temptotal = scaledEMPSumCOPERR; if (hasSubPaving()) { size_t counter = getRootCounter(); if (verbose) { std::cout << "COPERR EMP is " << rnd(scaledEMPSumCOPERR) << std::endl; std::cout << "COPERR penalty is " << getPENValue(pen, 0) << std::endl; std::cout << "Total COPERR score is " << rnd(scaledEMPSumCOPERR) + getPENValue(pen, 0) << std::endl; } // getPENValue(pen, 0) gives value of PEN under pen accumulate(temptotal, getPENValue(pen), 1.0); // temptotal now holds scaledEMPSumCOPERR + PEN) } // default cxsc rounding dotprecision rnd_next return rnd(temptotal); }
SPSnode * AdaptiveHistogram::getSubPaving | ( | ) | const |
bool AdaptiveHistogram::hasSubPaving | ( | ) | const |
Get whether this has a subpaving to manage.
{ return ( getSubPaving() != NULL ); }
bool AdaptiveHistogram::histBoxContains | ( | const rvector & | pt | ) | const |
Get whether the root box for this contains a given point.
Does the support of the histogram include the point pt?
Throws a NullSubpavings_Error if the subpaving that this manages is a NULL pointer.
Throws an IncompatibleDimensions_Error if the given point does not have the same dimensions as the subpaving that this manages.
pt | the point to check. |
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error( "AdaptiveHistogram::histBoxContains(const rvector&)"); } if (getDimensions() != (Ub(pt) - Lb(pt) + 1)) { throw IncompatibleDimensions_Error( "AdaptiveHistogram::histBoxContains(const rvector&)"); } return ( getSubPaving()->findContainingNode(pt) != NULL ); }
bool subpavings::AdaptiveHistogram::insertFromRSSample | ( | const RSSample & | rss, |
int | lab, | ||
LOGGING_LEVEL | logging | ||
) | [inline] |
All rvectors are associated with the root paving, no splitting, inserts points with label lab.
{ SplitNever sn; // a dummy split decision object return insertFromRSSample(rss, lab, sn, logging); }
bool subpavings::AdaptiveHistogram::insertFromRSSample | ( | const RSSample & | rss, |
LOGGING_LEVEL | logging | ||
) | [inline] |
All rvectors are associated with the root paving, no splitting, inserts only points with same label as this.
{ int lab = label; // label for this SplitNever sn; // a dummy split decision object return insertFromRSSample(rss, lab, sn, logging); }
bool AdaptiveHistogram::insertFromRSSample | ( | const RSSample & | rss, |
int | lab, | ||
const SplitDecisionObj & | boolTest, | ||
LOGGING_LEVEL | logging | ||
) |
Adaptive splitting with each data point inserted, inserts points with label lab.
{ bool retValue = false; RVecData myDataRvectors; // container for the rvectors we take in // try to get data from rss.Samples and check how many data points found // uses the label from this size_t numberFound = getRvectorsFromRSSample(myDataRvectors, rss, lab); if (numberFound > 0) { // complete the data insertion retValue = completeDataInsertionFromVec(myDataRvectors, boolTest, logging); } return retValue; }
bool subpavings::AdaptiveHistogram::insertFromRSSample | ( | const RSSample & | rss, |
const SplitDecisionObj & | boolTest, | ||
LOGGING_LEVEL | logging | ||
) | [inline] |
Adaptive splitting with each data point inserted, inserts only points with same label as this.
{ int lab = label; // label for this return insertFromRSSample(rss, lab, boolTest, logging); }
bool AdaptiveHistogram::insertFromRVec | ( | const RVecData & | inputData, |
LOGGING_LEVEL | logging = NOLOG |
||
) |
All rvectors are associated with the root paving, no splitting.
{ bool retValue = false; RVecData theData; // container for the rvectors we take in size_t numberFound = getRvectorsFromRVec(theData, inputData); if (numberFound) { SplitNever boolTest; // a dummy split decision object retValue = completeDataInsertionFromVec(theData, boolTest, logging); } return retValue; }
bool AdaptiveHistogram::insertFromRVec | ( | const RVecData & | inputData, |
const SplitDecisionObj & | boolTest, | ||
LOGGING_LEVEL | logging = NOLOG |
||
) |
Adaptive splitting with each data point inserted.
{ bool retValue = false; RVecData theData; // container for the rvectors we take in size_t numberFound = getRvectorsFromRVec(theData, inputData); if (numberFound) { retValue = completeDataInsertionFromVec(theData, boolTest, logging); } return retValue; }
bool AdaptiveHistogram::insertFromRVec | ( | const RVecData & | inputData, |
bool | checkDims, | ||
LOGGING_LEVEL | logging = NOLOG |
||
) |
All rvectors are associated with the root paving, no splitting checkDims specified.
{ bool retValue = false; RVecData theData; // container for the rvectors we take in size_t numberFound = getRvectorsFromRVec(theData, inputData, checkDims); if (numberFound) { SplitNever boolTest; // a dummy split decision object retValue = completeDataInsertionFromVec(theData, boolTest, logging); } return retValue; }
bool AdaptiveHistogram::insertFromRVec | ( | const RVecData & | inputData, |
const SplitDecisionObj & | boolTest, | ||
bool | checkDims, | ||
LOGGING_LEVEL | logging = NOLOG |
||
) |
Adaptive splitting with each data point inserted, checkDims specified
{ bool retValue = false; RVecData theData; // container for the rvectors we take in size_t numberFound = getRvectorsFromRVec(theData, inputData, checkDims); if (numberFound) { retValue = completeDataInsertionFromVec(theData, boolTest, logging); } return retValue; }
bool AdaptiveHistogram::insertOne | ( | rvector | newdata, |
const SplitDecisionObj & | boolTest, | ||
LOGGING_LEVEL | logging = NOLOG |
||
) |
Insert a single data point into AdaptiveHistogram object.
A method which attempts to insert a datapoint into this AdaptiveHistogram object's dataCollection and associate the datapoint with one of the leaves of the tree pointed to by this's rootPaving. If the datapoint falls outside the boundaries of the rootBox it will not be inserted and a message will be printed to standard output.
newdata | the datapoint to be inserted, an rvector. |
boolTest | is a reference to an object providing a function operator determining whether to split a node when a data point arrives. |
logging | an enum controlling whether histogram creation output is sent to a log file (defaults to no logging). |
Throws the following exceptions:
{ // make sure we have a paving and then try inserting if (!hasSubPaving()) { throw NullSubpavingPointer_Error( "insertOne(rvector, const SplitDecisionObj&, LOGGING_LEVEL)"); } // make sure we have a paving and then try inserting if (getSubPaving()->isEmpty()) { throw NoBox_Error( "insertOne(rvector, const SplitDecisionObj&, LOGGING_LEVEL)"); } // check the dimensions if( (Ub(newData)-Lb(newData) + 1) != getDimensions() ) { throw IncompatibleDimensions_Error( "insertOne(rvector, const SplitDecisionObj&, LOGGING_LEVEL)"); } // for logging output to keep track of splits int i = 0; std::string baseFileName = ""; std::string s = ""; if (logging != NOLOG) { baseFileName = "splitOutput"; s = getUniqueFilename(baseFileName); #ifdef INSERTDEBUG cout << "logging to " << s << endl; #endif outputLogStart(s); // log the current state of the histogram outputLogPlain(s, i); #ifdef INSERTDEBUG cout << "logged state with i = " << i << endl; #endif i++; } bool retValue = false; BigDataItr it = dataCollection.end(); it = dataCollection.insert(it, newData); SPSnode* insertedInto = NULL; // try inserting insertedInto = getSubPaving()->insertOneFind(it, ON_PARENT, boolTest); if (insertedInto==NULL) { // failed to insert dataCollection.erase(it); std::cerr << "Failed to insert point "; prettyPrint(std::cerr, newData); std::cerr << std::endl; } else { // insertion succeeded retValue = true; // if we split on insert we may want to log if(!(insertedInto->isLeaf()) && logging != NOLOG) { // log the current state of the histogram #ifdef INSERTDEBUG cout << "insertion successful, about to log current state" << endl; #endif outputLogPlain(s, i); #ifdef INSERTDEBUG cout << "logged state with i = " << i << endl; #endif i++; } //recalculate the scaled EMP sum values; recalcScaledEMPSumCOPERR(); recalcScaledEMPSumAIC(); } if (logging != NOLOG) { // add leaf node levels string to log outputFile(s, getLeafLevelsString()); #ifdef INSERTDEBUG cout << "added leaf node levels string to log s= " << s << endl; #endif } return retValue; }
bool AdaptiveHistogram::insertOneDimDataFromTxt | ( | const std::string & | s, |
const std::size_t | headerlines = 0 , |
||
LOGGING_LEVEL | logging = NOLOG |
||
) |
Ordinary level checking, no splitting, 1-dimensional data.
{ int dim = 1; RVecData theData; // container for the rvectors we take in bool retValue = readRvectorsFromTxtOrd(theData, s, headerlines, dim); if (retValue) { SplitNever boolTest; // a dummy split decision object retValue = completeDataInsertionFromVec(theData, boolTest, logging); } return retValue; }
bool AdaptiveHistogram::insertOneDimDataFromTxt | ( | const std::string & | s, |
const SplitDecisionObj & | boolTest, | ||
const std::size_t | headerlines = 0 , |
||
LOGGING_LEVEL | logging = NOLOG |
||
) |
Ordinary level checking, adaptive splitting with each data point inserted, 1-dimensional data.
{ int dim = 1; RVecData theData; // container for the rvectors we take in bool retValue = readRvectorsFromTxtOrd(theData, s, headerlines, dim); if (retValue) { retValue = completeDataInsertionFromVec(theData, boolTest, logging); } return retValue; }
bool AdaptiveHistogram::insertRvectorsFromTxt | ( | const std::string & | s, |
const std::size_t | headerlines = 0 , |
||
LOGGING_LEVEL | logging = NOLOG |
||
) |
Ordinary level checking, no splitting, no dimensions specified.
{ return insertRvectorsFromTxtOrd(s, headerlines, logging); }
bool AdaptiveHistogram::insertRvectorsFromTxt | ( | const std::string & | s, |
const SplitDecisionObj & | boolTest, | ||
const std::size_t | headerlines = 0 , |
||
LOGGING_LEVEL | logging = NOLOG |
||
) |
Ordinary level checking, adaptive splitting with each data point inserted, no dimensions specified.
{ return insertRvectorsFromTxtOrd(s, boolTest, headerlines, logging); }
bool AdaptiveHistogram::insertRvectorsFromTxt | ( | const std::string & | s, |
int | dim, | ||
const std::size_t | headerlines = 0 , |
||
LOGGING_LEVEL | logging = NOLOG |
||
) |
Ordinary level checking, no splitting, dimensions specified.
{ return insertRvectorsFromTxtOrd(s, dim, headerlines, logging); }
bool AdaptiveHistogram::insertRvectorsFromTxt | ( | const std::string & | s, |
const SplitDecisionObj & | boolTest, | ||
int | dim, | ||
const std::size_t | headerlines = 0 , |
||
LOGGING_LEVEL | logging = NOLOG |
||
) |
Ordinary level checking, adaptive splitting with each data point inserted, dimensions specified.
{ return insertRvectorsFromTxtOrd(s, boolTest, dim, headerlines, logging); }
bool subpavings::AdaptiveHistogram::insertSampleFromRSSample | ( | size_t | samplesize, |
gsl_rng * | rgsl, | ||
const RSSample & | rss, | ||
int | lab, | ||
LOGGING_LEVEL | logging | ||
) | [inline] |
All rvectors are associated with the root paving, no spliting, random number generator supplied.
{ SplitNever sn; // a dummy split decision object return insertSampleFromRSSample(samplesize, rgsl, rss, lab, sn, logging); }
bool subpavings::AdaptiveHistogram::insertSampleFromRSSample | ( | size_t | samplesize, |
gsl_rng * | rgsl, | ||
const RSSample & | rss, | ||
LOGGING_LEVEL | logging | ||
) | [inline] |
All rvectors are associated with the root paving, no spliting, random number generator supplied.
{ int lab = label; // label for this SplitNever sn; // a dummy split decision object return insertSampleFromRSSample(samplesize, rgsl, rss, lab, sn, logging); }
bool subpavings::AdaptiveHistogram::insertSampleFromRSSample | ( | size_t | samplesize, |
int | seed, | ||
const RSSample & | rss, | ||
int | lab, | ||
LOGGING_LEVEL | logging | ||
) | [inline] |
All rvectors are associated with the root paving, no spliting, seed for creating a random number generator supplied.
{ SplitNever sn; // a dummy split decision object return insertSampleFromRSSample(samplesize, seed, rss, lab, sn, logging); }
bool subpavings::AdaptiveHistogram::insertSampleFromRSSample | ( | size_t | samplesize, |
int | seed, | ||
const RSSample & | rss, | ||
LOGGING_LEVEL | logging | ||
) | [inline] |
All rvectors are associated with the root paving, no spliting, seed for creating a random number generator supplied.
{ int lab = label; // label for this SplitNever sn; // a dummy split decision object return insertSampleFromRSSample(samplesize, seed, rss, lab, sn, logging); }
bool subpavings::AdaptiveHistogram::insertSampleFromRSSample | ( | size_t | samplesize, |
const RSSample & | rss, | ||
int | lab, | ||
LOGGING_LEVEL | logging | ||
) | [inline] |
All rvectors are associated with the root paving, no spliting, no random number generator supplied, default will be created.
{ SplitNever sn; // a dummy split decision object return insertSampleFromRSSample(samplesize, rss, lab, sn, logging); }
bool subpavings::AdaptiveHistogram::insertSampleFromRSSample | ( | size_t | samplesize, |
const RSSample & | rss, | ||
LOGGING_LEVEL | logging | ||
) | [inline] |
All rvectors are associated with the root paving, no spliting, no random number generator supplied, default will be created.
{ int lab = label; // label for this SplitNever sn; // a dummy split decision object return insertSampleFromRSSample(samplesize, rss, lab, sn, logging); }
bool AdaptiveHistogram::insertSampleFromRSSample | ( | size_t | samplesize, |
gsl_rng * | rgsl, | ||
const RSSample & | rss, | ||
int | lab, | ||
const SplitDecisionObj & | boolTest, | ||
LOGGING_LEVEL | logging | ||
) |
Adaptive splitting with each data point inserted, random number generator supplied.
{ bool retValue = false; RVecData myDataRvectors; // container for the rvectors we take in // try to sample data from rss.Samples and check how many data points found size_t numberTaken = getSampleRvectorsFromRSSample(myDataRvectors, rgsl, samplesize, rss, lab); if (numberTaken > 0) { /* switch on for more output during histogram creation // confirm the amount of data taken from the RSSample std::cout << "End of taking sample from data from RSSample: " << numberTaken << " data points used for sample" << std::endl; */ retValue = completeDataInsertionFromVec(myDataRvectors, boolTest, logging); } return retValue; }
bool subpavings::AdaptiveHistogram::insertSampleFromRSSample | ( | size_t | samplesize, |
gsl_rng * | rgsl, | ||
const RSSample & | rss, | ||
const SplitDecisionObj & | boolTest, | ||
LOGGING_LEVEL | logging | ||
) | [inline] |
Adaptive splitting with each data point inserted, random number generator supplied.
{ int lab = label; // label for this return insertSampleFromRSSample(samplesize, rgsl, rss, lab, boolTest, logging); }
bool AdaptiveHistogram::insertSampleFromRSSample | ( | size_t | samplesize, |
int | seed, | ||
const RSSample & | rss, | ||
int | lab, | ||
const SplitDecisionObj & | boolTest, | ||
LOGGING_LEVEL | logging | ||
) |
Adaptive splitting with each data point inserted, seed for creating random number generator supplied.
{ gsl_rng * rgsl = NULL; try { // make use mt19937 for generator rgsl = gsl_rng_alloc (gsl_rng_mt19937); // set up with default seed gsl_rng_set (rgsl, seed); // change the seed bool retValue = insertSampleFromRSSample(samplesize, rgsl, rss, lab, boolTest, logging); gsl_rng_free(rgsl); // free the random number generator rgsl = NULL; return retValue; } catch (exception const& e) { try { if (NULL != rgsl) { gsl_rng_free(rgsl); rgsl = NULL; } // free the random number generator } catch (exception const& ee) {} // catch and swallow throw; // rethrow original exception } }
bool subpavings::AdaptiveHistogram::insertSampleFromRSSample | ( | size_t | samplesize, |
int | seed, | ||
const RSSample & | rss, | ||
const SplitDecisionObj & | boolTest, | ||
LOGGING_LEVEL | logging | ||
) | [inline] |
Adaptive splitting with each data point inserted, seed for creating random number generator supplied.
{ int lab = label; // label for this return insertSampleFromRSSample(samplesize, seed, rss, lab, boolTest, logging); }
bool AdaptiveHistogram::insertSampleFromRSSample | ( | size_t | samplesize, |
const RSSample & | rss, | ||
int | lab, | ||
const SplitDecisionObj & | boolTest, | ||
LOGGING_LEVEL | logging | ||
) |
Adaptive splitting with each data point inserted, no random number generator supplied, default will be created.
{ gsl_rng * rgsl = NULL; try { // make use mt19937 for generator rgsl = gsl_rng_alloc (gsl_rng_mt19937); // set up with default seed long unsigned int seed = 1234; gsl_rng_set (rgsl, seed); // change the seed bool retValue = insertSampleFromRSSample(samplesize, rgsl, rss, lab, boolTest, logging); gsl_rng_free(rgsl); // free the random number generator rgsl = NULL; return retValue; } catch (exception const& e) { try { if (NULL != rgsl) { gsl_rng_free(rgsl); rgsl = NULL; } // free the random number generator } catch (exception const& ee) {} // catch and swallow throw; // rethrow original exception } }
bool subpavings::AdaptiveHistogram::insertSampleFromRSSample | ( | size_t | samplesize, |
const RSSample & | rss, | ||
const SplitDecisionObj & | boolTest, | ||
LOGGING_LEVEL | logging | ||
) | [inline] |
Adaptive splitting with each data point inserted, no random number generator supplied, default will be created.
{ int lab = label; // label for this return insertSampleFromRSSample(samplesize, rss, lab, boolTest, logging); }
bool subpavings::AdaptiveHistogram::insertSampleFromRVec | ( | size_t | samplesize, |
gsl_rng * | rgsl, | ||
const RVecData & | rvec, | ||
LOGGING_LEVEL | logging = NOLOG |
||
) | [inline] |
All rvectors are associated with the root paving, no splitting, random number generator supplied.
{ SplitNever sn; // a dummy split decision object return insertSampleFromRVec(samplesize, rgsl, rvec, sn, logging); }
bool subpavings::AdaptiveHistogram::insertSampleFromRVec | ( | size_t | samplesize, |
int | seed, | ||
const RVecData & | rvec, | ||
LOGGING_LEVEL | logging = NOLOG |
||
) | [inline] |
All rvectors are associated with the root paving, no splitting, seed for creating a random number generator supplied.
{ SplitNever sn; // a dummy split decision object return insertSampleFromRVec(samplesize, seed, rvec, sn, logging); }
bool subpavings::AdaptiveHistogram::insertSampleFromRVec | ( | size_t | samplesize, |
const RVecData & | rvec, | ||
LOGGING_LEVEL | logging = NOLOG |
||
) | [inline] |
All rvectors are associated with the root paving, no splitting, no random number generator supplied, default will be created.
{ SplitNever sn; // a dummy split decision object return insertSampleFromRVec(samplesize, rvec, sn, logging); }
bool AdaptiveHistogram::insertSampleFromRVec | ( | size_t | samplesize, |
gsl_rng * | rgsl, | ||
const RVecData & | rvec, | ||
const SplitDecisionObj & | boolTest, | ||
LOGGING_LEVEL | logging = NOLOG |
||
) |
Adaptive splitting with each data point inserted, random number generator supplied.
{ bool retValue = false; RVecData myDataRvectors; // container for the rvectors we take in // try to sample data from the container, check how many data points found size_t numberTaken = getSampleRvectorsFromRVec(myDataRvectors, rgsl, samplesize, rvec); // complete the data insertion if (numberTaken > 0) { /* Switch on for more output // confirm the amount of data taken from the RSSample std::cout << "End of taking sample from data from the container: " << numberTaken << " data points used for sample" << std::endl; */ retValue = completeDataInsertionFromVec(myDataRvectors, boolTest, logging); } return retValue; }
bool AdaptiveHistogram::insertSampleFromRVec | ( | size_t | samplesize, |
int | seed, | ||
const RVecData & | rvec, | ||
const SplitDecisionObj & | boolTest, | ||
LOGGING_LEVEL | logging = NOLOG |
||
) |
Adaptive splitting with each data point inserted, seed for creating random number generator supplied.
{ gsl_rng * rgsl = NULL; try { // make use mt19937 for generator rgsl = gsl_rng_alloc (gsl_rng_mt19937); // set up with default seed gsl_rng_set (rgsl, seed); // change the seed bool retValue = insertSampleFromRVec(samplesize, rgsl, rvec, boolTest, logging); gsl_rng_free(rgsl); // free the random number generator rgsl = NULL; return retValue; } catch (exception const& e) { try { if (NULL != rgsl) { gsl_rng_free(rgsl); rgsl = NULL; } // free the random number generator } catch (exception const& ee) {} // catch and swallow throw; // rethrow original exception } }
bool AdaptiveHistogram::insertSampleFromRVec | ( | size_t | samplesize, |
const RVecData & | rvec, | ||
const SplitDecisionObj & | boolTest, | ||
LOGGING_LEVEL | logging = NOLOG |
||
) |
Adaptive splitting with each data point inserted, no random number generator supplied, default will be created.
{ gsl_rng * rgsl = NULL; try { // make use mt19937 for generator rgsl = gsl_rng_alloc (gsl_rng_mt19937); // set up with default seed long unsigned int seed = 1234; gsl_rng_set (rgsl, seed); // change the seed bool retValue = insertSampleFromRVec(samplesize, rgsl, rvec, boolTest, logging); gsl_rng_free(rgsl); // free the random number generator rgsl = NULL; return retValue; } catch (exception const& e) { try { if (NULL != rgsl) { gsl_rng_free(rgsl); rgsl = NULL; } // free the random number generator } catch (exception const& ee) {} // catch and swallow throw; // rethrow original exception } }
PiecewiseConstantFunction AdaptiveHistogram::MCMC | ( | unsigned int | loops, |
unsigned int | burnin, | ||
unsigned int | thinout, | ||
MCMCProposal & | proposal, | ||
LogMCMCPrior & | logPrior, | ||
size_t | minPoints, | ||
LOGGING_LEVEL | logging, | ||
long unsigned int | seed | ||
) |
MCMC average method using a random number generator set internally using the gsl default generator, and with seed set by the user.
seed | is the seed to use for the random number generator. |
{ gsl_rng * rgsl = NULL; try { if (thinout == 0) throw std::invalid_argument( "AdaptiveHistogram::MCMC(...) : thinout == 0"); // set up a random number generator for uniform rvs const gsl_rng_type * tgsl; // set the library variables *gsl_rng_default and // gsl_rng_default_seed to default environmental vars gsl_rng_env_setup(); tgsl = gsl_rng_default; // make tgsl the default type rgsl = gsl_rng_alloc (tgsl); // set up with default seed gsl_rng_set (rgsl, seed); // change seed std::vector < PiecewiseConstantFunction > samples; bool average = true; bool volChecking = false; real minVol(0.0); // use the internal method -> non normalised samples _MCMCsamples(samples, average, loops, burnin, thinout, proposal, logPrior, minPoints, volChecking, minVol, logging, rgsl); gsl_rng_free (rgsl); rgsl = NULL; if (samples.empty()) throw std::runtime_error( "AdaptiveHistogram::MCMC(...) : no samples collected"); samples.back().normalise(); return samples.back(); } catch (exception const& e) { try { if (NULL != rgsl) { gsl_rng_free(rgsl); rgsl = NULL; } } catch (exception const& ee) {} // catch and swallow throw; // rethrow original exception } }
PiecewiseConstantFunction AdaptiveHistogram::MCMC | ( | unsigned int | loops, |
unsigned int | burnin, | ||
unsigned int | thinout, | ||
MCMCProposal & | proposal, | ||
LogMCMCPrior & | logPrior, | ||
size_t | minPoints, | ||
LOGGING_LEVEL | logging, | ||
gsl_rng * | rgsl | ||
) |
MCMC average method using a random number generator supplied by the user.
rgsl | is the random number generator to use. |
{ std::vector < PiecewiseConstantFunction > samples; bool average = true; bool volChecking = false; real minVol(0.0); // use the internal method -> non normalised samples _MCMCsamples(samples, average, loops, burnin, thinout, proposal, logPrior, minPoints, volChecking, minVol, logging, rgsl); samples.back().normalise(); return samples.back(); }
PiecewiseConstantFunction AdaptiveHistogram::MCMC_IMH | ( | size_t | maxLeaves, |
unsigned int | loops, | ||
unsigned int | burnin, | ||
unsigned int | thinout, | ||
LogMCMCIMHPrior & | logPrior, | ||
size_t | minPoints, | ||
LOGGING_LEVEL | logging, | ||
long unsigned int | seed | ||
) |
MCMC average method using a random number generator set internally using the gsl default generator, and with seed set by the user.
seed | is the seed to use for the random number generator. |
{ gsl_rng * rgsl = NULL; try { if (thinout == 0) throw std::invalid_argument( "AdaptiveHistogram::MCMC_IMH(...) : thinout == 0"); // set up a random number generator for uniform rvs const gsl_rng_type * tgsl; // set the library variables *gsl_rng_default and // gsl_rng_default_seed to default environmental vars gsl_rng_env_setup(); tgsl = gsl_rng_default; // make tgsl the default type rgsl = gsl_rng_alloc (tgsl); // set up with default seed gsl_rng_set (rgsl, seed); // change seed std::vector < PiecewiseConstantFunction > samples; bool average = true; // use the internal method -> non normalised samples _MCMCsamplesIMH(samples, average, maxLeaves, loops, burnin, thinout, logPrior, minPoints, logging, rgsl); gsl_rng_free (rgsl); rgsl = NULL; if (samples.empty()) throw std::runtime_error( "AdaptiveHistogram::MCMC_IMH(...) : no samples collected"); samples.back().normalise(); return samples.back(); } catch (exception const& e) { try { if (NULL != rgsl) { gsl_rng_free(rgsl); rgsl = NULL; } } catch (exception const& ee) {} // catch and swallow throw; // rethrow original exception } }
PiecewiseConstantFunction AdaptiveHistogram::MCMC_IMH | ( | size_t | maxLeaves, |
unsigned int | loops, | ||
unsigned int | burnin, | ||
unsigned int | thinout, | ||
LogMCMCIMHPrior & | logPrior, | ||
size_t | minPoints, | ||
LOGGING_LEVEL | logging, | ||
gsl_rng * | rgsl | ||
) |
MCMC average method using a random number generator supplied by the user.
rgsl | is the random number generator to use. |
{ std::vector < PiecewiseConstantFunction > samples; bool average = true; // use the internal method -> non normalised samples _MCMCsamplesIMH(samples, average, maxLeaves, loops, burnin, thinout, logPrior, minPoints, logging, rgsl); samples.back().normalise(); return samples.back(); }
std::vector< PiecewiseConstantFunction > & AdaptiveHistogram::MCMCsamples | ( | std::vector< PiecewiseConstantFunction > & | samples, |
unsigned int | loops, | ||
unsigned int | burnin, | ||
unsigned int | thinout, | ||
MCMCProposal & | proposal, | ||
LogMCMCPrior & | logPrior, | ||
size_t | minPoints, | ||
LOGGING_LEVEL | logging, | ||
long unsigned int | seed | ||
) |
MCMC samples method using a random number generator set internally using the gsl default generator, and with seed set by the user.
seed | is the seed to use for the random number generator. |
{ gsl_rng * rgsl = NULL; try { // make use mt19937 for generator rgsl = gsl_rng_alloc (gsl_rng_mt19937); // set up with default seed gsl_rng_set (rgsl, seed); // change seed bool average = false; bool volChecking = false; real minVol(0.0); // use the internal method _MCMCsamples(samples, average, loops, burnin, thinout, proposal, logPrior, minPoints, volChecking, minVol, logging, rgsl); for (std::vector < PiecewiseConstantFunction >::iterator it = samples.begin(); it < samples.end(); ++it) { it->normalise(); } // free the random number generator try { if (NULL != rgsl) { gsl_rng_free(rgsl); rgsl = NULL; } } catch (...) {} // catch and swallow return samples; } catch (...) { // catch anything try { if (NULL != rgsl) { gsl_rng_free(rgsl); rgsl = NULL; } } catch (...) {} // catch and swallow throw; // rethrow original exception } }
std::vector< PiecewiseConstantFunction > & AdaptiveHistogram::MCMCsamples | ( | std::vector< PiecewiseConstantFunction > & | samples, |
unsigned int | loops, | ||
unsigned int | burnin, | ||
unsigned int | thinout, | ||
MCMCProposal & | proposal, | ||
LogMCMCPrior & | logPrior, | ||
size_t | minPoints, | ||
LOGGING_LEVEL | logging, | ||
gsl_rng * | rgsl | ||
) |
MCMC samples method using a random number generator supplied by the user.
rgsl | is the random number generator to use. |
{ bool average = false; bool volChecking = false; real minVol(0.0); _MCMCsamples( samples, average, loops, burnin, thinout, proposal, logPrior, minPoints, volChecking, minVol, logging, rgsl); for (std::vector < PiecewiseConstantFunction >::iterator it = samples.begin(); it < samples.end(); ++it) { it->normalise(); } return samples; }
std::vector< PiecewiseConstantFunction > & AdaptiveHistogram::MCMCsamplesIMH | ( | size_t | maxLeaves, |
std::vector< PiecewiseConstantFunction > & | samples, | ||
unsigned int | loops, | ||
unsigned int | burnin, | ||
unsigned int | thinout, | ||
LogMCMCIMHPrior & | logPrior, | ||
size_t | minPoints, | ||
LOGGING_LEVEL | logging, | ||
long unsigned int | seed | ||
) |
MCMC samples method using a random number generator set internally using the gsl default generator, and with seed set by the user.
seed | is the seed to use for the random number generator. |
{ gsl_rng * rgsl = NULL; try { // make use mt19937 for generator rgsl = gsl_rng_alloc (gsl_rng_mt19937); // set up with default seed gsl_rng_set (rgsl, seed); // change seed bool average = false; // use the internal method _MCMCsamplesIMH( samples, average, maxLeaves, loops, burnin, thinout, logPrior, minPoints, logging, rgsl); for (std::vector < PiecewiseConstantFunction >::iterator it = samples.begin(); it < samples.end(); ++it) { it->normalise(); } // free the random number generator try { if (NULL != rgsl) { gsl_rng_free(rgsl); rgsl = NULL; } } catch (...) {} // catch and swallow return samples; } catch (...) { // catch anything try { if (NULL != rgsl) { gsl_rng_free(rgsl); rgsl = NULL; } } catch (...) {} // catch and swallow throw; // rethrow original exception } }
std::vector< PiecewiseConstantFunction > & AdaptiveHistogram::MCMCsamplesIMH | ( | size_t | maxLeaves, |
std::vector< PiecewiseConstantFunction > & | samples, | ||
unsigned int | loops, | ||
unsigned int | burnin, | ||
unsigned int | thinout, | ||
LogMCMCIMHPrior & | logPrior, | ||
size_t | minPoints, | ||
LOGGING_LEVEL | logging, | ||
gsl_rng * | rgsl | ||
) |
MCMC samples method using a random number generator supplied by the user.
rgsl | is the random number generator to use. |
{ bool average = false; _MCMCsamplesIMH( samples, average, maxLeaves, loops, burnin, thinout, logPrior, minPoints, logging, rgsl); for (std::vector < PiecewiseConstantFunction >::iterator it = samples.begin(); it < samples.end(); ++it) { it->normalise(); } return samples; }
bool AdaptiveHistogram::mergeUp | ( | ) |
Merge a multileaf histogram up to just root.
Throws a NullSubpavings_Error if the subpaving that this manages is a NULL pointer.
Throws an std::logic_error if the merge becomes muddled.
No prioritisation, just brute force
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error("AdaptiveHistogram::mergeUp()"); } bool merged = false; if (!getSubPaving()->isLeaf()) { merged = true; SPSnodePtrs subleaves; getSubPaving()->getSubLeaves(subleaves); // subleaves contains the subleaves if(subleaves.empty()) { throw std::logic_error( "AdaptiveHistogram::mergeUp()) : Error in queue"); } bool canmerge = true; // merge until there is only one leaf while (canmerge) { SPSnode* target = *(subleaves.rbegin ()); // the last in the vector // merge the biggest one target->nodeReabsorbChildren(); SPSnodePtrsItr it = subleaves.end(); it--; subleaves.erase(it, subleaves.end());// take the last out of the vector // if target had a leaf sibling, target's parent is now a cherry // and should be inserted into the multiset if (target->hasLeafSibling()) { subleaves.push_back(target->getParent()); } canmerge = (subleaves.size()>0); } if(!canmerge) { std::cout << "Merged to root" << std::endl; } // EMPSums are not adjusted during the merging process recalcScaledEMPSumAIC(); recalcScaledEMPSumCOPERR(); } else { std::cerr << "Nothing to be done - root paving is already a leaf " << std::endl; } return merged; }
const AdaptiveHistogram AdaptiveHistogram::operator+ | ( | const AdaptiveHistogram & | rhs | ) | const |
Overloaded addition operator.
Return the result of adding another histogram to this.
If before the operation, one of either this or rhs have no subpaving, the result will have the non-null subpaving. If before the operation, both this and rhs have no subpaving, the result will have no subpaving.
rhs | the histogram to add to this. |
{ /* result of += will have the holdAllStats of the lhs operand * so if just one of them holds all stats, we want this * one to be on the left, and if both or neither hold all stats, either * can be on the left*/ AdaptiveHistogram temp; if (!getHoldAllStats() && rhs.getHoldAllStats() ) { temp = rhs; temp += (*this); } else { temp = (*this); temp += rhs; } /* temp will have label of rhs or this: if these are different * we want to reset temp's label to 0*/ if ( getLabel() != rhs.getLabel() ) temp.setLabel(0); return temp; }
AdaptiveHistogram & AdaptiveHistogram::operator+= | ( | const AdaptiveHistogram & | rhs | ) |
Overloaded add-to-self operator.
This becomes the result of adding this and rhs together using a non-minimal union of subpavings.
If before the operation, one of either this or rhs have no subpaving, after the operation this will have the non-null subpaving. If before the operation, both this or rhs have no subpaving, after the operation this will have no subpaving.
holdAllStats will be unchanged as a result of this operation. label will be unchanged as a result of this operation.
rhs | the histogram to add to this. |
{ //nothing to add, just return this if ( !rhs.hasSubPaving() || rhs.getSubPaving()->isEmpty() ) { return *this; } // we now know that rhs subpaving exists and is not empty /* to avoid expense of multiple copies for large hists * I no longer use a temp as a work space, and at the end copy it into this * AdaptiveHistogram temp, but that means that failure cannot be reversed */ // if this has no subpaving or an empty one, it can just copy the other one if ( !hasSubPaving() || getSubPaving()->isEmpty() ) { bool thisHoldAllStats = getHoldAllStats(); int lab = getLabel(); (*this) = rhs; // reset label to be the one we had before setLabel(lab); // but only finished if the hold all stats are the same // otherwise we have to reverse the new holdAllStats. if ( thisHoldAllStats != rhs.getHoldAllStats() ) { setHoldAllStats(thisHoldAllStats); } } else { getSubPaving()->unionTreeStructure(rhs.getSubPaving()); // put all the data from the two histograms into this one. RVecData allData; BigDataCollection tmp1; tmp1.swap(dataCollection); // this clears my data collection and puts the contents // into tmp1 assert(dataCollection.empty()); BigDataCollection tmp2 = rhs.getDataCollection(); allData.reserve( tmp1.size() + tmp2.size() ); // copy from the dataCollections into allData; allData.assign(tmp1.begin(), tmp1.end()); allData.insert(allData.end(), tmp2.begin(), tmp2.end()); // and put the data into the histogram insertFromRVec(allData, NOLOG); } return *this; }
bool AdaptiveHistogram::outputGraphDot | ( | ) | const |
Make a .dot graph file from histogram structure.
Makes a simple .dot graph from the histogram using node names and also makes the .png image for this graph.
{ bool success = false; if (hasSubPaving()) { success = getSubPaving()->outputGraphDot(); } else { std::cerr << "Sorry, you can't make a graph without a root paving" << std::endl; } return success; }
void AdaptiveHistogram::outputLog | ( | const std::string & | s, |
int | i, | ||
int | prec = 5 |
||
) | const |
Append current state of histogram to a txt log file.
Format is a tab-delimited file of numeric data. Output includes node contributions to unscaled EMP under COPERR and AIC and the changes in EMP that would result from splitting the node.
s | the name of the txt file to send output to. |
i | the number of pass (ie, 0, 1, 2, 3 etc) in process. |
prec | the precision for output formatting. ie, number of decimal places. |
{ // To add output of the AdaptiveHistogram object to file ofstream os(s.c_str(), ios::app); // append if (os.is_open()) { size_t n = getRootCounter(); os << std::endl; os << "Pass " << i << std::endl; // numbering getSubPaving()->leavesOutputTabsWithEMPs(n, os, prec); // the output os.close(); } else { std::cerr << "Error: could not open file named " << s << std::endl << std::endl; } }
void AdaptiveHistogram::outputLogPlain | ( | const std::string & | s, |
int | i, | ||
int | prec = 5 |
||
) | const |
Append current state of histogram to a txt log file.
Format is a tab-delimited file of numeric data. Output is plain: just vols, counters, and boxes.
s | the name of the txt file to send output to. |
i | the number of pass (ie, 0, 1, 2, 3 etc) in process. |
prec | the precision for output formatting. ie, number of decimal places. |
{ // To add output of the AdaptiveHistogram object to file ofstream os(s.c_str(), ios::app); // append if (os.is_open()) { os << std::endl; os << "Pass " << i << std::endl; // numbering getSubPaving()->leavesOutputTabs(os, prec); // the output os.close(); } else { std::cerr << "Error: could not open file named " << s << std::endl << std::endl; } }
void AdaptiveHistogram::outputRootToTxt | ( | const std::string & | s, |
int | prec = 5 |
||
) | const |
Output details of full sample (from root) to txt tile.
Format is a mixture of alpha and numeric data.
s | the name of the txt file to send output to. |
prec | the precision for output formatting. ie, number of decimal places. |
confirm | is a boolean controlling whether confirmation goes to console output. |
{ outputRootToTxt(s, prec, false); }
std::ostream & AdaptiveHistogram::outputToStreamTabs | ( | std::ostream & | os, |
int | prec = 5 |
||
) | const |
Output the subpaving managed by this to a given stream.
Format is a tab-delimited file of numeric data starting with nodeName, then the node box volume, then the node counter, then the description of the node box as a tab-delimited list of interval upper and lower bounds.
os | is a reference to the stream to output the histogramm to. |
prec | the precision for output formatting. ie, number of decimal places. |
{ if (hasSubPaving()) { return getSubPaving()->leavesOutputTabs(os, prec); // the output } else return os; }
void AdaptiveHistogram::outputToTxtTabs | ( | const std::string & | s, |
int | prec = 5 |
||
) | const |
Output the subpaving managed by this to a txt file.
Format is a tab-delimited file of numeric data starting with nodeName, then the node box volume, then the node counter, then the description of the node box as a tab-delimited list of interval upper and lower bounds.
s | the name of the txt file to send output to. |
prec | the precision for output formatting. ie, number of decimal places. |
confirm | is a boolean controlling whether confirmation goes to console output. |
{ outputToTxtTabs(s, prec, false); }
void AdaptiveHistogram::outputToTxtTabsWithEMPs | ( | const std::string & | s, |
int | prec = 5 |
||
) | const |
Output the subpaving managed by this to a txt file.
Format is a tab-delimited file of numeric data starting with nodeName, then the node box volume, then the node counter, then node contribution to EMP under COPERR, the change that would result in the EMP under COPERR if the node were split, the node contribution to EMP under AIC, the change that would result in the EMP under AIC if the node were split, and then the node box as a tab-delimited list of interval upper and lower bounds.
s | the name of the txt file to send output to. |
prec | the precision for output formatting. ie, number of decimal places. |
confirm | is a boolean controlling whether confirmation goes to console output. |
{ outputToTxtTabsWithEMPs(s, prec, false); }
bool AdaptiveHistogram::priorityMerge | ( | const NodeCompObj & | compTest, |
const HistEvalObj & | he, | ||
LOGGING_LEVEL | logging = NOLOG |
||
) |
Priority merge to reduce number of leaves in histogram.
This method takes a histogram where where all the data is associated with multiple nodes and progressively merges the children of sub-terminal leaves using a priority queue to determine which node to merge first. Merging continues until some criteria applying either to individual nodes or to the histogram as a whole is satisfied, or the histogram only has one bin.
If more than one node is equally 'small', on the basis of the node comparison compTest used, then a random choice is made between all equally small nodes to find the node which will be merged.
Throws a NullSubpavings_Error if the subpaving that this manages is a NULL pointer.
Throws an UnfulfillableRequest_Error if the subpaving that this manages is a leaf.
Throws an std::logic_error if the merge becomes muddled.
compTest | is an instance of a class providing a function for comparing spsnodes, to order the nodes to prioitise splitting. |
he | is an instance of a class which provides a function to determine when to stop merging. |
logging | an enum controlling whether histogram creation output is sent to a log file (defaults to no logging) |
{ gsl_rng * rgsl = NULL; try { if (!hasSubPaving()) { throw NullSubpavingPointer_Error( "AdaptiveHistogram::priorityMerge(const NodeCompObj&, const HistEvalObj&, LOGGING_LEVEL)"); } if (getSubPaving()->isLeaf()) { throw UnfulfillableRequest_Error( "AdaptiveHistogram::priorityMerge(const NodeCompObj&, const HistEvalObj&, LOGGING_LEVEL)"); } size_t n = getRootCounter(); // number of points in histogram int i = 0; std::string baseFileName = ""; std::string s = ""; if (logging != NOLOG) { // pass to log output to keep track of splits baseFileName = "pqMergeOutput"; s = getUniqueFilename(baseFileName); } if (logging != NOLOG) { // Start log file with filename and timestamp outputLogStart(s); // log the current state of the histogram outputLogPlain(s, i); #ifdef LOGEMPS outputLogEMPAIC(s); // add AIC scores #endif i++; } // a multiset for the queue (key values are not necessarily unique) multiset<SPSnode*, MyCompare> pq((MyCompare(compTest))); _setupPriorityMerge(pq); size_t numLeaves = getRootLeaves(); // actual number of leaves // make use mt19937 for generator rgsl = gsl_rng_alloc (gsl_rng_mt19937); // set up with default seed long unsigned int seed = 1234; gsl_rng_set (rgsl, seed); // change the seed bool canContinue = !pq.empty(); if (canContinue) { bool bigEnough = true; // merge until the HistEvalObj he () operator returns true while (bigEnough && !he(this)) { bigEnough = AdaptiveHistogram::_priorityMergeLoop( pq, n,rgsl); if (logging != NOLOG) { // To add current state of histogram to log file outputLogPlain(s, i); #ifdef LOGEMPS outputLogEMPAIC(s); // add AIC scores #endif i++; } if (bigEnough) numLeaves--; } } else { std::cerr << "No mergable cherries - aborting" << std::endl; } if (canContinue && logging != NOLOG) { // log the leaf levels line outputFile(s, getLeafLevelsString()); } // EMPSums will have been adjusted during the merging process if (NULL != rgsl) { gsl_rng_free(rgsl); rgsl = NULL; } return canContinue; // true unless we could not even start merge } catch (exception const& e) { try { if (NULL != rgsl) { gsl_rng_free(rgsl); rgsl = NULL; } // free the random number generator } catch (exception const& ee) {} // catch and swallow throw; // rethrow original exception } }
bool AdaptiveHistogram::priorityMerge | ( | const NodeCompObj & | compTest, |
size_t | minLeaves, | ||
LOGGING_LEVEL | logging = NOLOG |
||
) |
Priority merge to reduce number of leaves in histogram.
This method takes a histogram where where all the data is associated with multiple nodes and progressively merges the children of sub-terminal leaves using a priority queue to determine which node to merge first. Merging continues until histogram has only minLeaves leaves or merging has had to be aborted.
If more than one node is equally 'small', on the basis of the node comparison compTest used, then a random choice is made between all equally small nodes to find the node which will be merged.
Throws a NullSubpavings_Error if the subpaving that this manages is a NULL pointer.
Throws an UnfulfillableRequest_Error if the subpaving that this manages is a leaf.
Throws an std::logic_error if the merge becomes muddled.
compTest | is an instance of a class providing a function for comparing spsnodes, to order the nodes to prioitise splitting. |
minLeaves | is the number of leaves to aim for. |
logging | an enum controlling whether histogram creation output is sent to a log file (defaults to no logging) |
{ gsl_rng * rgsl = NULL; try { if (!hasSubPaving()) { throw NullSubpavingPointer_Error( "AdaptiveHistogram::priorityMerge(const NodeCompObj&, const HistEvalObj&, LOGGING_LEVEL)"); } if (getSubPaving()->isLeaf()) { throw UnfulfillableRequest_Error( "AdaptiveHistogram::priorityMerge(const NodeCompObj&, size_t, LOGGING_LEVEL)"); } size_t n = getRootCounter(); // number of points in histogram int i = 0; std::string baseFileName = ""; std::string s = ""; if (logging != NOLOG) { // pass to log output to keep track of splits baseFileName = "pqMergeOutput"; s = getUniqueFilename(baseFileName); } if (logging != NOLOG) { // Start log file with filename and timestamp outputLogStart(s); // log the current state of the histogram outputLogPlain(s, i); #ifdef LOGEMPS outputLogEMPAIC(s); // add AIC scores #endif i++; } // a multiset for the queue (key values are not necessarily unique) multiset<SPSnode*, MyCompare> pq((MyCompare(compTest))); _setupPriorityMerge(pq); size_t numLeaves = getRootLeaves(); // actual number of leaves // make use mt19937 for generator rgsl = gsl_rng_alloc (gsl_rng_mt19937); // set up with default seed long unsigned int seed = 1234; gsl_rng_set (rgsl, seed); // change the seed bool canContinue = !pq.empty(); if (canContinue) { bool bigEnough = true; // merge until the HistEvalObj he () operator returns true while (bigEnough && numLeaves > minLeaves) { bigEnough = AdaptiveHistogram::_priorityMergeLoop( pq, n,rgsl); if (logging != NOLOG) { // To add current state of histogram to log file outputLogPlain(s, i); #ifdef LOGEMPS outputLogEMPAIC(s); // add AIC scores #endif i++; } if (bigEnough) numLeaves--; } } else { std::cerr << "No mergable cherries - aborting" << std::endl; } if (canContinue && logging != NOLOG) { // log the leaf levels line outputFile(s, getLeafLevelsString()); } // EMPSums will have been adjusted during the merging process if (NULL != rgsl) { gsl_rng_free(rgsl); rgsl = NULL; } return canContinue; // true unless we could not even start merge } catch (exception const& e) { try { if (NULL != rgsl) { gsl_rng_free(rgsl); rgsl = NULL; } // free the random number generator } catch (exception const& ee) {} // catch and swallow throw; // rethrow original exception } }
bool subpavings::AdaptiveHistogram::prioritySplit | ( | const NodeCompObj & | compTest, |
const HistEvalObj & | he, | ||
LOGGING_LEVEL | logging, | ||
size_t | minChildPoints | ||
) | [inline] |
HistEvalObj to stop splitting. Only minChildPoints supplied, no minvolB, no random number generator.
{ return prioritySplit(compTest, he, logging, minChildPoints, 0.0); }
bool subpavings::AdaptiveHistogram::prioritySplit | ( | const NodeCompObj & | compTest, |
const HistEvalObj & | he, | ||
LOGGING_LEVEL | logging, | ||
double | minVolB | ||
) | [inline] |
HistEvalObj to stop splitting. Only minVolB supplied, no minChildPoints, no random number generator.
{ return prioritySplit(compTest, he, logging, 0, minVolB); }
bool subpavings::AdaptiveHistogram::prioritySplit | ( | const NodeCompObj & | compTest, |
const HistEvalObj & | he, | ||
LOGGING_LEVEL | logging | ||
) | [inline] |
HistEvalObj to stop splitting. Neither minVolB nor minChildPoints supplied, no random number generator.
{ return prioritySplit(compTest, he, logging, 0, 0.0); }
bool AdaptiveHistogram::prioritySplit | ( | const NodeCompObj & | compTest, |
const HistEvalObj & | he, | ||
LOGGING_LEVEL | logging, | ||
size_t | minChildPoints, | ||
double | minVolB | ||
) |
HistEvalObj to stop splitting. minVolB and minChildPoints supplied but no random number generator.
{ gsl_rng * rgsl = NULL; try { // make use mt19937 for generator rgsl = gsl_rng_alloc (gsl_rng_mt19937); // set up with default seed long unsigned int seed = 1234; gsl_rng_set (rgsl, seed); // change the seed bool retValue = prioritySplit(compTest, he, logging, minChildPoints, minVolB, rgsl); gsl_rng_free (rgsl); rgsl = NULL; return retValue; } catch (exception const& e) { try { if (NULL != rgsl) { gsl_rng_free(rgsl); rgsl = NULL; } // free the random number generator } catch (exception const& ee) {} // catch and swallow throw; // rethrow original exception } }
bool subpavings::AdaptiveHistogram::prioritySplit | ( | const NodeCompObj & | compTest, |
const HistEvalObj & | he, | ||
LOGGING_LEVEL | logging, | ||
size_t | minChildPoints, | ||
gsl_rng * | rgsl | ||
) | [inline] |
HistEvalObj to stop splitting. With random number generator. Only minChildPoints supplied, no minvolB.
{ return prioritySplit(compTest, he, logging, minChildPoints, 0.0, rgsl); }
bool subpavings::AdaptiveHistogram::prioritySplit | ( | const NodeCompObj & | compTest, |
const HistEvalObj & | he, | ||
LOGGING_LEVEL | logging, | ||
double | minVolB, | ||
gsl_rng * | rgsl | ||
) | [inline] |
HistEvalObj to stop splitting. With random number generator. Only minVolB supplied, no minChildPoints.
{ return prioritySplit(compTest, he, logging, 0, minVolB, rgsl); }
bool subpavings::AdaptiveHistogram::prioritySplit | ( | const NodeCompObj & | compTest, |
const HistEvalObj & | he, | ||
LOGGING_LEVEL | logging, | ||
gsl_rng * | rgsl | ||
) | [inline] |
HistEvalObj to stop splitting. With random number generator. Neither minVolB nor minChildPoints supplied.
{ return prioritySplit(compTest, he, logging, 0, 0.0, rgsl); }
bool AdaptiveHistogram::prioritySplit | ( | const NodeCompObj & | compTest, |
const HistEvalObj & | he, | ||
LOGGING_LEVEL | logging, | ||
size_t | minChildPoints, | ||
double | minVolB, | ||
gsl_rng * | rgsl | ||
) |
HistEvalObj to stop splitting. With random number generator. All other parameters supplied.
{ string errorMsg("AdaptiveHistogram::prioritySplit(const NodeCompObj&, const HistEvalObj&, LOGGING_LEVEL, size_t, double, gsl_rng *)"); if (!hasSubPaving()) { throw NullSubpavingPointer_Error(errorMsg); } bool volChecking = false; // record if we need to check volume before split double minVol = -1.0; // minimum volume (used only if checking) size_t n = 0; // for number of points in histogram // make volChecking true if minVolB is > 0.0 if (minVolB > 0.0) { // minimum volume of a splittable node is minVolB(log n)^2/n minVol = getMinVol(minVolB); volChecking = true; } #ifdef MYDEBUG cout << "logging = " << logging << endl; cout << "minChildPoints = " << minChildPoints << endl; cout << "volChecking = " << volChecking << endl; cout << "minVol = " << minVol << endl; #endif // a multiset for the queue (key values are not necessarily unique) multiset<SPSnode*, MyCompare> pq((MyCompare(compTest))); _setupPrioritySplit(pq, minChildPoints, minVol); // throws an exception if current state not legal std::string baseFileName = ""; std::string s = ""; if (logging != NOLOG) { // pass to log output to keep track of splits baseFileName = "pqOutput"; s = getUniqueFilename(baseFileName); } n = getRootCounter(); // number of points in histogram #ifdef MYDEBUG cout << "rootCounter = " << n << endl; #endif int i = 0; if (logging != NOLOG) { // Start log file with filename and timestamp outputLogStart(s); // log the current state of the histogram outputLogPlain(s, i); #ifdef LOGEMPS outputLogEMPAIC(s); // add AIC scores #endif i++; } size_t numLeaves = getRootLeaves(); // actual number of leaves bool canContinue = !pq.empty(); if(!canContinue) { std::cerr << "No splittable leaves to split - aborting" << std::endl; } /* split until the HistEvalObj he () operator returns true we only put splittable nodes into the set, so we don't have to check that they are splittable when we take them out */ while (canContinue && !he(this)) { canContinue = _prioritySplitLoop(pq, n, minChildPoints, minVol, rgsl); if (logging != NOLOG) { // To add current state of histogram to log file outputLogPlain(s, i); #ifdef LOGEMPS outputLogEMPAIC(s); // add AIC scores #endif i++; } /*any successful split will have increased actual number of leaves */ if (canContinue) numLeaves++; } if (canContinue && (logging != NOLOG)) { // log the leaf levels line outputFile(s, getLeafLevelsString()); } // EMPSums will have been adjusted during the splitting process return canContinue; // true if fe satisfied }
bool subpavings::AdaptiveHistogram::prioritySplit | ( | const NodeCompObj & | compTest, |
size_t | maxLeaves, | ||
LOGGING_LEVEL | logging, | ||
size_t | minChildPoints | ||
) | [inline] |
maxLeaves to stop splitting. Only minChildPoints supplied, no minvolB, no random number generator.
{ return prioritySplit(compTest, maxLeaves, logging, minChildPoints, 0.0); }
bool subpavings::AdaptiveHistogram::prioritySplit | ( | const NodeCompObj & | compTest, |
size_t | maxLeaves, | ||
LOGGING_LEVEL | logging, | ||
double | minVolB | ||
) | [inline] |
maxLeaves to stop splitting. Only minVolB supplied, no minChildPoints, no random number generator.
{ return prioritySplit(compTest, maxLeaves, logging, 0, minVolB); }
bool subpavings::AdaptiveHistogram::prioritySplit | ( | const NodeCompObj & | compTest, |
size_t | maxLeaves, | ||
LOGGING_LEVEL | logging | ||
) | [inline] |
maxLeaves to stop splitting. Neither minVolB nor minChildPoints supplied, no random number generator.
{ return prioritySplit(compTest, maxLeaves, logging, 0, 0.0); }
bool AdaptiveHistogram::prioritySplit | ( | const NodeCompObj & | compTest, |
size_t | maxLeaves, | ||
LOGGING_LEVEL | logging, | ||
size_t | minChildPoints, | ||
double | minVolB | ||
) |
maxLeaves to stop splitting. minVolB and minChildPoints supplied but no random number generator.
{ gsl_rng * rgsl = NULL; try { // make use mt19937 for generator rgsl = gsl_rng_alloc (gsl_rng_mt19937); // set up with default seed long unsigned int seed = 1234; gsl_rng_set (rgsl, seed); // change the seed bool retValue = prioritySplit(compTest, maxLeaves, logging, minChildPoints, minVolB, rgsl); gsl_rng_free (rgsl); rgsl = NULL; return retValue; } catch (exception const& e) { try { if (NULL != rgsl) { gsl_rng_free(rgsl); rgsl = NULL; } // free the random number generator } catch (exception const& ee) {} // catch and swallow throw; // rethrow original exception } }
bool subpavings::AdaptiveHistogram::prioritySplit | ( | const NodeCompObj & | compTest, |
size_t | maxLeaves, | ||
LOGGING_LEVEL | logging, | ||
size_t | minChildPoints, | ||
gsl_rng * | rgsl | ||
) | [inline] |
maxLeaves to stop splitting. With random number generator. Only minChildPoints supplied, no minvolB.
{ return prioritySplit(compTest, maxLeaves, logging, minChildPoints, 0.0, rgsl); }
bool subpavings::AdaptiveHistogram::prioritySplit | ( | const NodeCompObj & | compTest, |
size_t | maxLeaves, | ||
LOGGING_LEVEL | logging, | ||
double | minVolB, | ||
gsl_rng * | rgsl | ||
) | [inline] |
maxLeaves to stop splitting. With random number generator. Only minVolB supplied, no minChildPoints.
{ return prioritySplit(compTest, maxLeaves, logging, 0, minVolB, rgsl); }
bool subpavings::AdaptiveHistogram::prioritySplit | ( | const NodeCompObj & | compTest, |
size_t | maxLeaves, | ||
LOGGING_LEVEL | logging, | ||
gsl_rng * | rgsl | ||
) | [inline] |
maxLeaves to stop splitting. With random number generator. Neither minVolB nor minChildPoints supplied.
{ return prioritySplit(compTest, maxLeaves, logging, 0, 0.0, rgsl); }
bool AdaptiveHistogram::prioritySplit | ( | const NodeCompObj & | compTest, |
size_t | maxLeaves, | ||
LOGGING_LEVEL | logging, | ||
size_t | minChildPoints, | ||
double | minVolB, | ||
gsl_rng * | rgsl | ||
) |
maxLeaves to stop splitting. With random number generator. All other parameters supplied.
{ string errorMsg("AdaptiveHistogram::prioritySplit(const NodeCompObj&, size_t, LOGGING_LEVEL, size_t, double, gsl_rng *)"); if (!hasSubPaving()) { throw NullSubpavingPointer_Error(errorMsg); } bool volChecking = false; // record if we need to check volume before split double minVol = -1.0; // minimum volume (used only if checking) size_t n = 0; // for number of points in histogram // make volChecking true if minVolB is > 0.0 if (minVolB > 0.0) { // minimum volume of a splittable node is minVolB(log n)^2/n minVol = getMinVol(minVolB); volChecking = true; } #ifdef MYDEBUG cout << "logging = " << logging << endl; cout << "minChildPoints = " << minChildPoints << endl; cout << "volChecking = " << volChecking << endl; cout << "minVol = " << minVol << endl; #endif // a multiset for the queue (key values are not necessarily unique) multiset<SPSnode*, MyCompare> pq((MyCompare(compTest))); _setupPrioritySplit(pq, minChildPoints, minVol); // throws an exception if current state not legal std::string baseFileName = ""; std::string s = ""; if (logging != NOLOG) { // pass to log output to keep track of splits baseFileName = "pqOutput"; s = getUniqueFilename(baseFileName); } n = getRootCounter(); // number of points in histogram #ifdef MYDEBUG cout << "rootCounter = " << n << endl; #endif int i = 0; if (logging != NOLOG) { // Start log file with filename and timestamp outputLogStart(s); // log the current state of the histogram outputLogPlain(s, i); #ifdef LOGEMPS outputLogEMPAIC(s); // add AIC scores #endif i++; } size_t numLeaves = getRootLeaves(); // actual number of leaves bool canContinue = !pq.empty(); if(!canContinue) { std::cerr << "No splittable leaves to split - aborting" << std::endl; } /* split until we have maxLeaves leaves we only put splittable nodes into the set, so we don't have to check that they are splittable when we take them out */ while (canContinue && numLeaves < maxLeaves) { canContinue = _prioritySplitLoop(pq, n, minChildPoints, minVol, rgsl); if (logging != NOLOG) { // To add current state of histogram to log file outputLogPlain(s, i); #ifdef LOGEMPS outputLogEMPAIC(s); // add AIC scores #endif i++; } /*any successful split will have increased actual number of leaves */ if (canContinue) numLeaves++; } if (canContinue && (logging != NOLOG)) { // log the leaf levels line outputFile(s, getLeafLevelsString()); } // EMPSums will have been adjusted during the splitting process return canContinue; // true if fe satisfied }
void AdaptiveHistogram::reshapeToUnion | ( | const PiecewiseConstantFunction & | other | ) |
Change this so that the subpaving it manages is the union of this's subpaving and the subpaving of that of a PiecewiseConstantFunction.
Throws a NullSubpavings_Error if the subpaving that this manages is a NULL pointer or if the subpaving managed by other is a NULL pointer.
Throws a NoBox_Error if the subpaving of this has no box or if the subpaving of other has no box.
Throws an IncompatibleDimensions_Error if the subpaving boxes of this and other are not identical.
There will be no change in this if the subpaving of is everywhere less split than the subpaving of this.
other | is the PiecewiseConstantFunction to make the union against. |
{ if ( !hasSubPaving() || !other.hasSubPaving() ) { throw NullSubpavingPointer_Error( "AdaptiveHistogram::reshapeToUnion(const PiecewiseConstantFunction&)"); } getSubPaving()->reshapeToUnion(other.getCopySubPaving()); }
void AdaptiveHistogram::reshapeToUnion | ( | const PiecewiseConstantFunction & | other, |
size_t | minChildPoints | ||
) |
Change this so that the subpaving it manages is as close as possible to the union of this's subpaving and the subpaving of that of a PiecewiseConstantFunction.
If other has a subpaving that is more split than the subpaving managed by this at any node, this will not exactly follow the shape of other if the resulting nodes would not splittable according to SPSnode::isSplittableNode(size_t minChildPoints). If any node cannot be split to follow the shape of other due to minChildPoints, a message will be printed to std::cerr.
Throws a NullSubpavings_Error if the subpaving that this manages is a NULL pointer or if the subpaving managed by other is a NULL pointer.
Throws a NoBox_Error if the subpaving of this has no box or if the subpaving of other has no box.
Throws an IncompatibleDimensions_Error if the subpaving boxes of this and other are not identical.
There will be no change in this if the subpaving of is everywhere less split than the subpaving of this.
other | is the PiecewiseConstantFunction to make the union against. |
minChildPoints | is the minumum child points to use to check if this can be split in order to follow other. |
{ if ( !hasSubPaving() || !other.hasSubPaving() ) { throw NullSubpavingPointer_Error( "AdaptiveHistogram::reshapeToUnion(const PiecewiseConstantFunction&, size_t)"); } getSubPaving()->reshapeToUnion(other.getCopySubPaving(), minChildPoints); }
void AdaptiveHistogram::setHoldAllStats | ( | bool | setTo | ) |
Set holdAllStats
Changes the holdAllStats indicator to the required value and also ensures that the paving is changed appropriately, ie that paving properties and statistics held are altered.
Throws a NullSubpavingPointer_Error if the subpaving that this manages is a NULL pointer.
setTo | The value to set the member holdAllStats to; |
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error( "AdaptiveHistogram::setHoldAllStats(bool)"); } if (getHoldAllStats() != setTo) { //if setTo is false, paving's countsOnly needs to be true //if setTo is true, paving's countsOnly needs to be false getSubPaving()->setCountsOnly(!setTo); holdAllStats = setTo; } }
void AdaptiveHistogram::setLabel | ( | int | lab | ) |
Set the label for this.
lab | the label for this. |
{ label = lab; }
bool AdaptiveHistogram::splitToShape | ( | std::string | instruction | ) |
Split a histogram to a specified shape.
Throws a NullSubpavings_Error if the subpaving that this manages is a NULL pointer.
Throws a NoBox_Error if the subpaving box is empty.
Prints a message to the standard error output if the instruction could not be carried out.
instruction | specifies the required shape, eg "3, 3, 2, 1" |
{ // checks: is there a root paving, is the string properly formed? if (!hasSubPaving()) { throw NullSubpavingPointer_Error( "AdaptiveHistogram::splitToShape()"); } bool success = false; SPSnode temp(*getSubPaving()); // copy to temp try { if (instruction.length() == 0) { throw std::invalid_argument( "AdaptiveHistogram::splitToShape() : No instruction"); } std::string legal(", 0123456789"); if (instruction.find_first_not_of(legal) != std::string::npos) { throw std::invalid_argument( "AdaptiveHistogram::splitToShape() : Illegal character"); } // all seems to be okay, we can start splitting the root paving success = getSubPaving()->splitRootToShape(instruction); if (!success) { handleSplitToShapeError(temp); } } catch (std::invalid_argument const& ia) { cerr << ia.what() << endl; handleSplitToShapeError(temp); success = false; } catch (std::logic_error const& le) { cerr << le.what() << endl; handleSplitToShapeError(temp); success = false; } return success; // any other exceptions are unhandled }
std::string AdaptiveHistogram::stringSummary | ( | ) | const |
Get a string summary of this histogram's properties.
A string description of this. Includes the address of the subpaving managed but not details of that subpaving.
{ std::ostringstream oss; oss << "This address = " << (this) << endl; oss << "Label = " << label << endl; oss << "holdAllStats = " << getHoldAllStats() << endl; oss << "AIC score = " << getEMPScoreAIC() << ", and COPERR score = " << getEMPScoreCOPERR() << endl; oss << "addresss of dataCollection is " << &dataCollection << " and dataCollection is:" << endl; std::string delim = "\t"; for (BigDataConstItr it = dataCollection.begin(); it != dataCollection.end(); ++it) { prettyPrint(oss, (*it)); oss << delim; } oss << endl; if (hasSubPaving()) oss << "Address of subpaving is " << getSubPaving() << endl; else oss << "Subpaving is NULL" << endl; return oss.str(); }
void AdaptiveHistogram::swap | ( | AdaptiveHistogram & | adh | ) |
Swap the contents of this and another histogram.
{ std::swap(label, adh.label); std::swap(dataCollection, adh.dataCollection); // use stl specialisation of swap std::swap(holdAllStats, adh.holdAllStats); // cxsc don't seem to have a swap for dot precisions dotprecision tempCOPERR(adh.scaledEMPSumCOPERR); adh.scaledEMPSumCOPERR = scaledEMPSumCOPERR; scaledEMPSumCOPERR = tempCOPERR; dotprecision tempAIC(adh.scaledEMPSumAIC); adh.scaledEMPSumAIC = scaledEMPSumAIC; scaledEMPSumAIC = tempAIC; std::swap(rootPaving, adh.rootPaving); // just swap the pointers }