160 std::vector<ExampleIndex>& examples,
161 std::vector<LabelDataType>& label_data,
162 std::vector<float>& results,
163 std::vector<unsigned char>& flags,
164 const float threshold)
const
166 const std::size_t num_of_examples = examples.size();
167 const std::size_t num_of_branches = getNumOfBranches();
170 std::vector<LabelDataType> sums(num_of_branches + 1, 0);
171 std::vector<LabelDataType> sqr_sums(num_of_branches + 1, 0);
172 std::vector<std::size_t> branch_element_count(num_of_branches + 1, 0);
174 for (std::size_t branch_index = 0; branch_index < num_of_branches; ++branch_index) {
175 branch_element_count[branch_index] = 1;
176 ++branch_element_count[num_of_branches];
179 for (std::size_t example_index = 0; example_index < num_of_examples;
181 unsigned char branch_index;
183 results[example_index], flags[example_index], threshold, branch_index);
185 LabelDataType label = label_data[example_index];
187 sums[branch_index] += label;
188 sums[num_of_branches] += label;
190 sqr_sums[branch_index] += label * label;
191 sqr_sums[num_of_branches] += label * label;
193 ++branch_element_count[branch_index];
194 ++branch_element_count[num_of_branches];
197 std::vector<float> variances(num_of_branches + 1, 0);
198 for (std::size_t branch_index = 0; branch_index < num_of_branches + 1;
200 const float mean_sum =
201 static_cast<float>(sums[branch_index]) / branch_element_count[branch_index];
202 const float mean_sqr_sum =
static_cast<float>(sqr_sums[branch_index]) /
203 branch_element_count[branch_index];
204 variances[branch_index] = mean_sqr_sum - mean_sum * mean_sum;
207 float information_gain = variances[num_of_branches];
208 for (std::size_t branch_index = 0; branch_index < num_of_branches; ++branch_index) {
211 const float weight =
static_cast<float>(branch_element_count[branch_index]) /
212 static_cast<float>(branch_element_count[num_of_branches]);
213 information_gain -= weight * variances[branch_index];
216 return information_gain;
228 std::vector<unsigned char>& flags,
229 const float threshold,
230 std::vector<unsigned char>& branch_indices)
const
232 const std::size_t num_of_results = results.size();
233 const std::size_t num_of_branches = getNumOfBranches();
235 branch_indices.resize(num_of_results);
236 for (std::size_t result_index = 0; result_index < num_of_results; ++result_index) {
237 unsigned char branch_index;
239 results[result_index], flags[result_index], threshold, branch_index);
240 branch_indices[result_index] = branch_index;
271 std::vector<ExampleIndex>& examples,
272 std::vector<LabelDataType>& label_data,
273 NodeType& node)
const
275 const std::size_t num_of_examples = examples.size();
277 LabelDataType sum = 0.0f;
278 LabelDataType sqr_sum = 0.0f;
279 for (std::size_t example_index = 0; example_index < num_of_examples;
281 const LabelDataType label = label_data[example_index];
284 sqr_sum += label * label;
287 sum /= num_of_examples;
288 sqr_sum /= num_of_examples;
290 const float variance = sqr_sum - sum * sum;
293 node.variance = variance;