Point Cloud Library (PCL)  1.9.1
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Modules Pages
decision_tree_trainer.h
1 /*
2  * Software License Agreement (BSD License)
3  *
4  * Point Cloud Library (PCL) - www.pointclouds.org
5  * Copyright (c) 2010-2011, Willow Garage, Inc.
6  *
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * * Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * * Redistributions in binary form must reproduce the above
16  * copyright notice, this list of conditions and the following
17  * disclaimer in the documentation and/or other materials provided
18  * with the distribution.
19  * * Neither the name of Willow Garage, Inc. nor the names of its
20  * contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  *
36  */
37 
38 #ifndef PCL_ML_DT_DECISION_TREE_TRAINER_H_
39 #define PCL_ML_DT_DECISION_TREE_TRAINER_H_
40 
41 #include <pcl/common/common.h>
42 
43 #include <pcl/ml/dt/decision_tree.h>
44 #include <pcl/ml/feature_handler.h>
45 #include <pcl/ml/stats_estimator.h>
46 #include <pcl/ml/dt/decision_tree_data_provider.h>
47 
48 #include <vector>
49 
50 namespace pcl
51 {
52 
53  /** \brief Trainer for decision trees. */
54  template <
55  class FeatureType,
56  class DataSet,
57  class LabelType,
58  class ExampleIndex,
59  class NodeType >
60  class PCL_EXPORTS DecisionTreeTrainer
61  {
62 
63  public:
64 
65  /** \brief Constructor. */
67  /** \brief Destructor. */
68  virtual
70 
71  /** \brief Sets the feature handler used to create and evaluate features.
72  * \param[in] feature_handler The feature handler.
73  */
74  inline void
76  {
77  feature_handler_ = &feature_handler;
78  }
79 
80  /** \brief Sets the object for estimating the statistics for tree nodes.
81  * \param[in] stats_estimator The statistics estimator.
82  */
83  inline void
85  {
86  stats_estimator_ = &stats_estimator;
87  }
88 
89  /** \brief Sets the maximum depth of the learned tree.
90  * \param[in] max_tree_depth Maximum depth of the learned tree.
91  */
92  inline void
93  setMaxTreeDepth (const size_t max_tree_depth)
94  {
95  max_tree_depth_ = max_tree_depth;
96  }
97 
98  /** \brief Sets the number of features used to find optimal decision features.
99  * \param[in] num_of_features The number of features.
100  */
101  inline void
102  setNumOfFeatures (const size_t num_of_features)
103  {
104  num_of_features_ = num_of_features;
105  }
106 
107  /** \brief Sets the number of thresholds tested for finding the optimal decision threshold on the feature responses.
108  * \param[in] num_of_threshold The number of thresholds.
109  */
110  inline void
111  setNumOfThresholds (const size_t num_of_threshold)
112  {
113  num_of_thresholds_ = num_of_threshold;
114  }
115 
116  /** \brief Sets the input data set used for training.
117  * \param[in] data_set The data set used for training.
118  */
119  inline void
120  setTrainingDataSet (DataSet & data_set)
121  {
122  data_set_ = data_set;
123  }
124 
125  /** \brief Example indices that specify the data used for training.
126  * \param[in] examples The examples.
127  */
128  inline void
129  setExamples (std::vector<ExampleIndex> & examples)
130  {
131  examples_ = examples;
132  }
133 
134  /** \brief Sets the label data corresponding to the example data.
135  * \param[in] label_data The label data.
136  */
137  inline void
138  setLabelData (std::vector<LabelType> & label_data)
139  {
140  label_data_ = label_data;
141  }
142 
143  /** \brief Sets the minimum number of examples to continue growing a tree.
144  * \param[in] n Number of examples
145  */
146  inline void
148  {
149  min_examples_for_split_ = n;
150  }
151 
152  /** \brief Specify the thresholds to be used when evaluating features.
153  * \param[in] thres The threshold values.
154  */
155  void
156  setThresholds (std::vector<float> & thres)
157  {
158  thresholds_ = thres;
159  }
160 
161  /** \brief Specify the data provider.
162  * \param[in] dtdp The data provider that should implement getDatasetAndLabels(...) function
163  */
164  void
166  {
167  decision_tree_trainer_data_provider_ = dtdp;
168  }
169 
170  /** \brief Specify if the features are randomly generated at each split node.
171  * \param[in] b Do it or not.
172  */
173  void
175  {
176  random_features_at_split_node_ = b;
177  }
178 
179  /** \brief Trains a decision tree using the set training data and settings.
180  * \param[out] tree Destination for the trained tree.
181  */
182  void
183  train (DecisionTree<NodeType> & tree);
184 
185  protected:
186 
187  /** \brief Trains a decision tree node from the specified features, label data, and examples.
188  * \param[in] features The feature pool used for training.
189  * \param[in] examples The examples used for training.
190  * \param[in] label_data The label data corresponding to the examples.
191  * \param[in] max_depth The maximum depth of the remaining tree.
192  * \param[out] node The resulting node.
193  */
194  void
195  trainDecisionTreeNode (std::vector<FeatureType> & features,
196  std::vector<ExampleIndex> & examples,
197  std::vector<LabelType> & label_data,
198  size_t max_depth,
199  NodeType & node);
200 
201  /** \brief Creates uniformely distrebuted thresholds over the range of the supplied values.
202  * \param[in] num_of_thresholds The number of thresholds to create.
203  * \param[in] values The values for estimating the expected value range.
204  * \param[out] thresholds The resulting thresholds.
205  */
206  static void
207  createThresholdsUniform (const size_t num_of_thresholds,
208  std::vector<float> & values,
209  std::vector<float> & thresholds);
210 
211  private:
212 
213  /** \brief Maximum depth of the learned tree. */
214  size_t max_tree_depth_;
215  /** \brief Number of features used to find optimal decision features. */
216  size_t num_of_features_;
217  /** \brief Number of thresholds. */
218  size_t num_of_thresholds_;
219 
220  /** \brief FeatureHandler instance, responsible for creating and evaluating features. */
222  /** \brief StatsEstimator instance, responsible for gathering stats about a node. */
224 
225  /** \brief The training data set. */
226  DataSet data_set_;
227  /** \brief The label data. */
228  std::vector<LabelType> label_data_;
229  /** \brief The example data. */
230  std::vector<ExampleIndex> examples_;
231 
232  /** \brief Minimum number of examples to split a node. */
233  size_t min_examples_for_split_;
234  /** \brief Thresholds to be used instead of generating uniform distributed thresholds. */
235  std::vector<float> thresholds_;
236  /** \brief The data provider which is called before training a specific tree, if pointer is NULL, then data_set_ is used. */
237  boost::shared_ptr<pcl::DecisionTreeTrainerDataProvider<FeatureType, DataSet, LabelType, ExampleIndex, NodeType> > decision_tree_trainer_data_provider_;
238  /** \brief If true, random features are generated at each node, otherwise, at start of training the tree */
239  bool random_features_at_split_node_;
240  };
241 
242 }
243 
244 #include <pcl/ml/impl/dt/decision_tree_trainer.hpp>
245 
246 #endif
void setLabelData(std::vector< LabelType > &label_data)
Sets the label data corresponding to the example data.
void setMinExamplesForSplit(size_t n)
Sets the minimum number of examples to continue growing a tree.
Trainer for decision trees.
Class representing a decision tree.
Definition: decision_tree.h:51
This file defines compatibility wrappers for low level I/O functions.
Definition: convolution.h:45
void setMaxTreeDepth(const size_t max_tree_depth)
Sets the maximum depth of the learned tree.
void setExamples(std::vector< ExampleIndex > &examples)
Example indices that specify the data used for training.
void setNumOfFeatures(const size_t num_of_features)
Sets the number of features used to find optimal decision features.
void setTrainingDataSet(DataSet &data_set)
Sets the input data set used for training.
void setFeatureHandler(pcl::FeatureHandler< FeatureType, DataSet, ExampleIndex > &feature_handler)
Sets the feature handler used to create and evaluate features.
void setNumOfThresholds(const size_t num_of_threshold)
Sets the number of thresholds tested for finding the optimal decision threshold on the feature respon...
void setDecisionTreeDataProvider(boost::shared_ptr< pcl::DecisionTreeTrainerDataProvider< FeatureType, DataSet, LabelType, ExampleIndex, NodeType > > &dtdp)
Specify the data provider.
void setStatsEstimator(pcl::StatsEstimator< LabelType, NodeType, DataSet, ExampleIndex > &stats_estimator)
Sets the object for estimating the statistics for tree nodes.
void setThresholds(std::vector< float > &thres)
Specify the thresholds to be used when evaluating features.
Utility class interface which is used for creating and evaluating features.
void setRandomFeaturesAtSplitNode(bool b)
Specify if the features are randomly generated at each split node.