I want to optimize the parameters of the decision tree learner using EvolutionaryParameterOptimization.
That's my model:
<?xml version="1.0" encoding="US-ASCII"?>
<process version="4.3">
<operator name="Root" class="Process" expanded="yes">
<operator name="CSVExampleSource" class="CSVExampleSource">
<parameter key="filename" value="example.csv"/>
<parameter key="label_name" value="label"/>
</operator>
<operator name="EvolutionaryParameterOptimization" class="EvolutionaryParameterOptimization" expanded="yes">
<list key="parameters">
<parameter key="DecisionTree.maximal_depth" value="[-1.0;10000.0]"/>
<parameter key="DecisionTree.minimal_leaf_size" value="[1.0;10000.0]"/>
<parameter key="DecisionTree.confidence" value="[1.0E-7;0.5]"/>
<parameter key="DecisionTree.minimal_size_for_split" value="[1.0;10000.0]"/>
<parameter key="DecisionTree.minimal_gain" value="[0.0;Infinity]"/>
</list>
<parameter key="population_size" value="10"/>
<operator name="IteratingPerformanceAverage" class="IteratingPerformanceAverage" expanded="yes">
<parameter key="iterations" value="3"/>
<operator name="XValidation" class="XValidation" expanded="yes">
<parameter key="sampling_type" value="shuffled sampling"/>
<operator name="DecisionTree" class="DecisionTree">
<parameter key="confidence" value="0.33223030703745715"/>
<parameter key="maximal_depth" value="6341"/>
<parameter key="minimal_gain" value="Infinity"/>
<parameter key="minimal_leaf_size" value="1424"/>
<parameter key="minimal_size_for_split" value="2961"/>
</operator>
<operator name="OperatorChain" class="OperatorChain" expanded="yes">
<operator name="ModelApplier" class="ModelApplier">
<list key="application_parameters">
</list>
</operator>
<operator name="ClassificationPerformance" class="ClassificationPerformance">
<parameter key="absolute_error" value="true"/>
<parameter key="accuracy" value="true"/>
<list key="class_weights">
</list>
<parameter key="classification_error" value="true"/>
</operator>
</operator>
</operator>
</operator>
<operator name="ProcessLog" class="ProcessLog">
<parameter key="filename" value="process3.log"/>
<list key="log">
<parameter key="iteration" value="operator.XValidation.value.iteration"/>
<parameter key="time" value="operator.XValidation.value.time"/>
<parameter key="deviation" value="operator.XValidation.value.deviation"/>
<parameter key="accuracy" value="operator.XValidation.value.performance1"/>
<parameter key="max_depth" value="operator.DecisionTree.parameter.maximal_depth"/>
<parameter key="max_leaf_size" value="operator.DecisionTree.parameter.minimal_leaf_size"/>
<parameter key="confidence" value="operator.DecisionTree.parameter.confidence"/>
</list>
<parameter key="persistent" value="true"/>
</operator>
</operator>
<operator name="PerformanceWriter" class="PerformanceWriter">
<parameter key="performance_file" value="final_performance.per"/>
</operator>
<operator name="ParameterSetWriter" class="ParameterSetWriter">
<parameter key="parameter_file" value="parameters.par"/>
</operator>
</operator>
</process>
The strange thing is that the accuracy remains always (almost) the same. Here are the first
lines of the generated log file:
# Generated by ProcessLog[com.rapidminer.operator.visualization.ProcessLogOperator]
# iteration time deviation accuracy max_depth max_leaf_size confidence
10.0 23.0 0.05129744389973656 0.7932203389830508 9839.0 1251.0 0.34154054826704505
10.0 23.0 0.05507822647211116 0.7932203389830507 6341.0 7767.0 0.32651644400635255
10.0 23.0 0.0392401250942013 0.793220338983051 3707.0 48.0 0.397380892497813
10.0 22.0 0.0726246958834861 0.7932203389830509 9008.0 5164.0 0.40415961507086146
10.0 25.0 0.03774755500223652 0.7932203389830509 7652.0 3992.0 0.27277836592326715
10.0 28.0 0.055078226472110144 0.7932203389830507 293.0 1424.0 0.03868159703154637
10.0 27.0 0.06287198979997247 0.7932203389830507 615.0 6926.0 0.12025785562389255
10.0 46.0 0.026037782196170964 0.7932203389830507 4846.0 1825.0 0.4842464140080626
Even when I replace the decision tree learned by a neural network, the accuracy values are always
the same. But I can't imagine that the number of correctly classified examples is always the same. Any
ideas what I'm dong wrong?
Chris