A wrapper or manager for an RealMappedSPnode tree to be used for function estimation based on a kernel density estimate (KDE) function available pointwise. More...
Classes | |||||||||||||
class | NodePtrMeasurePair | ||||||||||||
class | TotalVariationMergeMeasurer | ||||||||||||
class | TotalVariationSplitMeasurer | ||||||||||||
Public Member Functions | |||||||||||||
FunctionEstimatorKDE (const ivector &v, const RealPointwiseFunctionEstimator &f, int lab=0) | |||||||||||||
Initialised constructor. | |||||||||||||
FunctionEstimatorKDE (const SPnode &spn, const RealPointwiseFunctionEstimator &f, int lab=0) | |||||||||||||
Initialised constructor. | |||||||||||||
FunctionEstimatorKDE (const FunctionEstimatorKDE &other) | |||||||||||||
Copy constructor. | |||||||||||||
~FunctionEstimatorKDE () | |||||||||||||
Destructor. | |||||||||||||
const RealPointwiseFunctionEstimator & | getFobjReference () const | ||||||||||||
Get the reference to the function object used by this. | |||||||||||||
int | getLabel () const | ||||||||||||
Get the label. | |||||||||||||
void | setLabel (int lab) | ||||||||||||
Set the label. | |||||||||||||
bool | hasSubPaving () const | ||||||||||||
Get whether this has a subpaving to manage. | |||||||||||||
cxsc::ivector | getRootBox () const | ||||||||||||
Get the box of the subpaving managed by this. | |||||||||||||
int | getDimensions () const | ||||||||||||
get the dimensions of the subpaving this manages. | |||||||||||||
cxsc::real | getDomainVolume () const | ||||||||||||
get volume of the root box of the subpaving this manages. | |||||||||||||
size_t | getRootLeaves () const | ||||||||||||
Gets number of leaf nodes in the root paving. | |||||||||||||
IntVec | getLeafLevels () const | ||||||||||||
std::string | getLeafLevelsString () const | ||||||||||||
subpavings::PiecewiseConstantFunction | makePiecewiseConstantFunction () const | ||||||||||||
Make a PiecewiseConstantFunction out of this estimator. | |||||||||||||
bool | splitToShape (std::string instruction) | ||||||||||||
Split an estimator to a specified shape. | |||||||||||||
cxsc::real | getTotalIntegralOfRealEstimate () const | ||||||||||||
Get the total integral of the function as estimated by this. | |||||||||||||
std::ostream & | outputToStreamTabs (std::ostream &os, int prec=5) const | ||||||||||||
Output the subpaving managed by this to a given stream. | |||||||||||||
prioritySplit methods. | |||||||||||||
These methods takes an estimator and progressively split using a priority queue of splittable nodes to determine which node to split first. The ordering for the queue is referred to here as the measure.
The measure of a node used is the total variation between the function estimate on the node and the function estimates on the node's prospective children. Pieces with the largest total variation will be split first. Note that this measure does not provide a globally optimal ordering for the queue in the sense that it will not necessarily provide the best function estimate for a given number of leaves. Splitting continues until there are maxLeaves leaves, or there are no more splittable nodes. Each node in the subpaving managed by this decides for itself whether it is splittable, using isSplittableNode(). If more than one splittable node is equally 'large', on the basis of the measure used, then a random choice is made between all equally large nodes to find the node which will be split. The seed for the random number generator used for random selection between equally 'large' nodes can be specified by the user or set by this. If you are looking at distributions of results across multiple estimates, supply the random number generator seed, or your own random number generator, to the priority queue to ensure that each estimator will make different random choices each time; use of the internally set seed will give the same results each time. Throws a NullSubpavings_Error if the subpaving that this manages is a NULL pointer. Throws an std::logic_error if the state of this is not 'legal', ie if this contains cherries that do not pass isSplittableNode(). Throws an std::logic_error if the split becomes muddled because of some failure within the logic of the algorithm itself. Aborts if there are no splittable leaves left (or none at the start).
| |||||||||||||
bool | prioritySplit (const RealMappedSPnode::Measurer &measure, size_t maxLeaves, LOGGING_LEVEL logging, long unsigned int seed=1234) | ||||||||||||
Version where the measure is supplied, with a pseudo-random number generator seed. | |||||||||||||
bool | prioritySplit (const RealMappedSPnode::Measurer &measure, real maxMeasure, size_t maxLeaves, LOGGING_LEVEL logging, long unsigned int seed=1234) | ||||||||||||
Version where the measure is supplied, and the maxMeasure, with a pseudo-random number generator seed. | |||||||||||||
bool | prioritySplit (const RealMappedSPnode::Measurer &measure, size_t maxLeaves, LOGGING_LEVEL logging, gsl_rng *rgsl) | ||||||||||||
Version where the measure is supplied, with a pseudo-random number generator. | |||||||||||||
bool | prioritySplit (const RealMappedSPnode::Measurer &measure, real maxMeasure, size_t maxLeaves, LOGGING_LEVEL logging, gsl_rng *rgsl) | ||||||||||||
Version where the measure is supplied, and the maxMeasure, with a pseudo-random number generator. | |||||||||||||
bool | prioritySplit (size_t maxLeaves, LOGGING_LEVEL logging, long unsigned int seed=1234) | ||||||||||||
Version using the default measure, the total variation, and a pseudo-random number generator seed. | |||||||||||||
bool | prioritySplit (real maxMeasure, size_t maxLeaves, LOGGING_LEVEL logging, long unsigned int seed=1234) | ||||||||||||
Version where the default measure, the total variation, is used, with the maxMeasure supplied. | |||||||||||||
bool | prioritySplit (size_t maxLeaves, LOGGING_LEVEL logging, gsl_rng *rgsl) | ||||||||||||
Version using the default measure, the total variation, and a pseudo-random number generator is supplied. | |||||||||||||
bool | prioritySplit (real maxMeasure, size_t maxLeaves, LOGGING_LEVEL logging, gsl_rng *rgsl) | ||||||||||||
Version using the default measure, the total variation, and a maxMeasure and a pseudo-random number generator are supplied. | |||||||||||||
Output the subpaving managed by this to a txt file. | |||||||||||||
Format is a tab-delimited file of numeric data starting with nodeName, then the node box volume, then the node counter, then the description of the node box as a tab-delimited list of interval upper and lower bounds.
| |||||||||||||
void | outputToTxtTabs (const std::string &s, int prec=5) const | ||||||||||||
void | outputToTxtTabs (const std::string &s, int prec, bool confirm) const | ||||||||||||
Output details of full sample (from root) to txt file. | |||||||||||||
Format is a mixture of alpha and numeric data.
| |||||||||||||
void | outputRootToTxt (const std::string &s, int prec=5) const | ||||||||||||
void | outputRootToTxt (const std::string &s, int prec, bool confirm) const | ||||||||||||
Output all nodes of the subpaving managed by this to | |||||||||||||
Format is a tab-delimited data giving details of all nodes.
| |||||||||||||
std::ostream & | outputRootToStreamTabs (std::ostream &os, int prec=5) const | ||||||||||||
void | outputLog (const std::string &s, int i, int prec=5) const | ||||||||||||
Append current state of estimator to a txt log file. | |||||||||||||
std::string | stringSummary () const | ||||||||||||
Get a string summary of this estimator's properties. |
A wrapper or manager for an RealMappedSPnode tree to be used for function estimation based on a kernel density estimate (KDE) function available pointwise.
FunctionEstimatorKDE::FunctionEstimatorKDE | ( | const ivector & | v, |
const RealPointwiseFunctionEstimator & | f, | ||
int | lab = 0 |
||
) |
Initialised constructor.
Initialised with domain box.
Throws a MalconstructedBox_Error if the box is not suitable as the basis of a subpaving (eg, box has no dimensions, or the box has a thin interval on at least one dimension).
Ideal constructor when the support domain of the function is set a priori.
v | The box to use for the subpaving to be managed. |
f | An estimator for the kde to be represented by this. |
lab | The label for this (defaults to 0). |
: rootPaving(NULL), fobj(f), label(lab) { try { // check the box here if (!checkBox(v)) { throw subpavings::MalconstructedBox_Error( "FunctionEstimatorKDE::FunctionEstimatorKDE(const ivector&, const TypeKDE&, int lab)"); } rootPaving = new RealMappedSPnode(v); rootPaving->acceptSPValueVisitor(fobj); } catch (exception const& e) { constructor_error_handler(); } }
FunctionEstimatorKDE::FunctionEstimatorKDE | ( | const SPnode & | spn, |
const RealPointwiseFunctionEstimator & | f, | ||
int | lab = 0 |
||
) |
Initialised constructor.
Initialised with a subpaving.
spn | A subpaving to copy as the subpaving to be managed. |
f | An estimator for the kde to be represented by this. |
lab | The label for this (defaults to 0). |
: rootPaving(NULL), fobj(f), label(lab) { try { // check spn has box if (spn.isEmpty()) { throw subpavings::NoBox_Error( "FunctionEstimatorKDE::FunctionEstimatorKDE(const SPnode&, TypeKDE&, int lab"); } rootPaving = new RealMappedSPnode(spn); rootPaving->acceptSPValueVisitor(fobj); } catch (exception const& e) { constructor_error_handler(); } }
int FunctionEstimatorKDE::getDimensions | ( | ) | const |
get the dimensions of the subpaving this manages.
{ int retValue = 0; if (hasSubPaving()) { retValue = getSubPaving()->getDimension(); } return retValue; }
cxsc::real FunctionEstimatorKDE::getDomainVolume | ( | ) | const |
get volume of the root box of the subpaving this manages.
{ real retValue(0.0); if (hasSubPaving()) { retValue = getSubPaving()->nodeRealVolume(); } return retValue; }
const RealPointwiseFunctionEstimator & FunctionEstimatorKDE::getFobjReference | ( | ) | const |
Get the reference to the function object used by this.
{return fobj;}
int FunctionEstimatorKDE::getLabel | ( | ) | const |
Get the label.
{return label;}
IntVec FunctionEstimatorKDE::getLeafLevels | ( | ) | const |
Get a vector of the leaf node levels.
Root is level 0, next level down is 1, etc.
{ IntVec levels; // empty container if (hasSubPaving()) { getSubPaving()->getLeafNodeLevels(levels, 0); //levels has now been filled in } return levels; }
std::string FunctionEstimatorKDE::getLeafLevelsString | ( | ) | const |
Get a string of the leaf node levels.
Root is level 0, next level down is 1, etc. Example return string "3,3,2,1"
{ string retValue = ""; if (hasSubPaving()) retValue = getSubPaving()->getLeafNodeLevelsString(); return retValue; }
cxsc::ivector FunctionEstimatorKDE::getRootBox | ( | ) | const |
Get the box of the subpaving managed by this.
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error( "FunctionEstimatorKDE::getRootBox()"); } return getSubPaving()->getBox(); }
size_t FunctionEstimatorKDE::getRootLeaves | ( | ) | const |
Gets number of leaf nodes in the root paving.
Throws NullSubpavingPointer_Error is the subpaving that this manages is a NULL pointer.
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error("FunctionEstimatorKDE::getRootLeaves()"); } return getSubPaving()->getNumberLeaves(); }
cxsc::real FunctionEstimatorKDE::getTotalIntegralOfRealEstimate | ( | ) | const |
Get the total integral of the function as estimated by this.
The integral is calculated as the sum over all the leaves of the subpaving managed by this of the absolute value of the real range on the leaf multiplied by the volume of the box represented by the leaf.
{ if (!hasSubPaving()) { throw NullSubpavingPointer_Error( "FunctionEstimatorKDE::getTotalIntegralOfRealEstimate)"); } return getSubPaving()->getTotalAbsLeafAreaRangeWithBox(); }
bool FunctionEstimatorKDE::hasSubPaving | ( | ) | const |
Get whether this has a subpaving to manage.
{
return ( getSubPaving() != NULL );
}
Make a PiecewiseConstantFunction out of this estimator.
{ return PiecewiseConstantFunction(*getSubPaving(), getLabel()); }
void FunctionEstimatorKDE::outputLog | ( | const std::string & | s, |
int | i, | ||
int | prec = 5 |
||
) | const |
Append current state of estimator to a txt log file.
Format is a tab-delimited file of numeric data. Output includes node contributions to unscaled EMP under COPERR and AIC and the changes in EMP that would result from splitting the node.
s | the name of the txt file to send output to. |
i | the number of pass (ie, 0, 1, 2, 3 etc) in process. |
prec | the precision for output formatting. ie, number of decimal places. |
{ // To add output of the FunctionEstimatorKDE object to file ofstream os(s.c_str(), ios::app); // append if (os.is_open()) { os << std::endl; os << "Pass " << i << std::endl; // numbering getSubPaving()->leavesOutputTabs(os, prec); // the output os.close(); } else { std::cerr << "Error: could not open file named " << s << std::endl << std::endl; } }
std::ostream & FunctionEstimatorKDE::outputToStreamTabs | ( | std::ostream & | os, |
int | prec = 5 |
||
) | const |
Output the subpaving managed by this to a given stream.
Format is a tab-delimited data giving details of leaf nodes.
os | is a reference to the stream to output the estimator to. |
prec | the precision for output formatting. ie, number of decimal places. |
{ if (hasSubPaving()) { // have to use cxsc io manipulators os << cxsc::SaveOpt; os << cxsc::Variable << cxsc::SetPrecision(prec+2,prec); getSubPaving()->leavesOutputTabs(os); // the output os << cxsc::RestoreOpt; } return os; }
bool FunctionEstimatorKDE::splitToShape | ( | std::string | instruction | ) |
Split an estimator to a specified shape.
Throws a NullSubpavings_Error if the subpaving that this manages is a NULL pointer.
Throws a NoBox_Error if the subpaving box is empty.
Prints a message to the standard error output if the instruction could not be carried out.
instruction | specifies the required shape, eg "3, 3, 2, 1" |
{ // checks: is there a root paving, is the string properly formed? if (!hasSubPaving()) { throw NullSubpavingPointer_Error( "FunctionEstimatorKDE::splitToShape()"); } bool success = false; RealMappedSPnode temp(*getSubPaving()); // copy to temp try { if (instruction.length() == 0) { throw std::invalid_argument( "FunctionEstimatorKDE::splitToShape() : No instruction"); } std::string legal(", 0123456789"); if (instruction.find_first_not_of(legal) != std::string::npos) { throw std::invalid_argument( "FunctionEstimatorKDE::splitToShape() : Illegal character"); } // all seems to be okay, we can start splitting the root paving success = getSubPaving()->splitRootToShape(instruction); rootPaving->acceptSPValueVisitor(fobj); if (!success) { handleSplitToShapeError(temp); } } catch (std::invalid_argument const& ia) { cerr << ia.what() << endl; handleSplitToShapeError(temp); success = false; } catch (std::logic_error const& le) { cerr << le.what() << endl; handleSplitToShapeError(temp); success = false; } return success; // any other exceptions are unhandled }
std::string FunctionEstimatorKDE::stringSummary | ( | ) | const |
Get a string summary of this estimator's properties.
A string description of this. Includes the address of the subpaving managed but not details of that subpaving.
{ std::ostringstream oss; oss << "This address = " << (this) << endl; oss << "Reference to kde function object is = " << (&fobj) << endl; if (hasSubPaving()) oss << "Address of subpaving is " << getSubPaving() << endl; else oss << "Subpaving is NULL" << endl; return oss.str(); }