Boosting, Bagging and Random Forest

anyavery1
New Altair Community Member
Hi
I ran Adaboost, Bagging and Random Forest on my data. The performance vector results were not really different from the decision tree performance vector result. So I used the Sonar data and ran, Decision Tree followed by Adaboost, Bagging and Random Forest. Though in the tutorial it is mentioned that the performance vector accuracy improves I still got similar results.
Require inputs.
Regards
Anya
I ran Adaboost, Bagging and Random Forest on my data. The performance vector results were not really different from the decision tree performance vector result. So I used the Sonar data and ran, Decision Tree followed by Adaboost, Bagging and Random Forest. Though in the tutorial it is mentioned that the performance vector accuracy improves I still got similar results.
Require inputs.
Regards
Anya
Tagged:
0
Answers
-
Here's an example of using the Stacking operator for Ensemble modelling. It creates a Forest of Forests to produce greater mine detection accuracy.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve Sonar" width="90" x="45" y="85">
<parameter key="repository_entry" value="//Samples/data/Sonar"/>
</operator>
<operator activated="true" class="generate_id" compatibility="7.0.001" expanded="true" height="82" name="Generate ID" width="90" x="45" y="187"/>
<operator activated="true" class="sample" compatibility="7.0.001" expanded="true" height="82" name="Sample (2)" width="90" x="179" y="85">
<parameter key="sample" value="probability"/>
<parameter key="balance_data" value="true"/>
<list key="sample_size_per_class"/>
<list key="sample_ratio_per_class"/>
<list key="sample_probability_per_class">
<parameter key="Mine" value="0.2"/>
<parameter key="Rock" value="0.5"/>
</list>
<description align="center" color="transparent" colored="false" width="126">This undersamples the class 'Mine' to make it even more difficult to detect mines</description>
</operator>
<operator activated="true" class="subprocess" compatibility="7.0.001" expanded="true" height="103" name="StackingMagic" width="90" x="380" y="34">
<process expanded="true">
<operator activated="true" class="multiply" compatibility="7.0.001" expanded="true" height="124" name="Multiply" width="90" x="45" y="85"/>
<operator activated="true" class="stacking" compatibility="7.0.001" expanded="true" height="68" name="Stacking" width="90" x="179" y="34">
<parameter key="keep_all_attributes" value="false"/>
<process expanded="true">
<operator activated="true" class="extract_macro" compatibility="7.0.001" expanded="true" height="68" name="Extract Macro" width="90" x="45" y="34">
<parameter key="macro" value="posClass"/>
<parameter key="macro_type" value="statistics"/>
<parameter key="statistics" value="count"/>
<parameter key="attribute_name" value="class"/>
<parameter key="attribute_value" value="Mine"/>
<list key="additional_macros"/>
</operator>
<operator activated="true" class="loop" compatibility="7.0.001" expanded="true" height="82" name="Loop Trees" width="90" x="179" y="34">
<parameter key="set_iteration_macro" value="true"/>
<parameter key="iterations" value="20"/>
<process expanded="true">
<operator activated="true" class="sample" compatibility="7.0.001" expanded="true" height="82" name="Sample" width="90" x="45" y="34">
<parameter key="balance_data" value="true"/>
<list key="sample_size_per_class">
<parameter key="Mine" value="%{posClass}"/>
<parameter key="Rock" value="%{posClass}"/>
</list>
<list key="sample_ratio_per_class"/>
<list key="sample_probability_per_class"/>
<parameter key="use_local_random_seed" value="true"/>
<parameter key="local_random_seed" value="%{iteration}"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.0.001" expanded="true" height="82" name="Multiply (2)" width="90" x="112" y="136">
<description align="center" color="transparent" colored="false" width="126">You can add other models as you like here.</description>
</operator>
<operator activated="true" class="parallel_random_forest" compatibility="7.0.001" expanded="true" height="82" name="Random Forest" width="90" x="246" y="136"/>
<connect from_port="input 1" to_op="Sample" to_port="example set input"/>
<connect from_op="Sample" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
<connect from_op="Multiply (2)" from_port="output 1" to_op="Random Forest" to_port="training set"/>
<connect from_op="Random Forest" from_port="model" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Each loop builds a new tree using a different balanced data sample. The loop iteration is used as the RandomSeed so the more loops, the more trees.</description>
</operator>
<connect from_port="training set 1" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Extract Macro" from_port="example set" to_op="Loop Trees" to_port="input 1"/>
<connect from_op="Loop Trees" from_port="output 1" to_port="base model 1"/>
<portSpacing port="source_training set 1" spacing="0"/>
<portSpacing port="source_training set 2" spacing="0"/>
<portSpacing port="sink_base model 1" spacing="0"/>
<portSpacing port="sink_base model 2" spacing="0"/>
</process>
<process expanded="true">
<operator activated="false" class="naive_bayes" compatibility="7.0.001" expanded="true" height="82" name="Naive Bayes" width="90" x="45" y="85"/>
<operator activated="true" class="weka:W-Logistic" compatibility="7.0.000" expanded="true" height="82" name="W-Logistic" width="90" x="45" y="187"/>
<connect from_port="stacking examples" to_op="W-Logistic" to_port="training set"/>
<connect from_op="W-Logistic" from_port="model" to_port="stacking model"/>
<portSpacing port="source_stacking examples" spacing="0"/>
<portSpacing port="sink_stacking model" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">This is the operator where things are interesting.</description>
</operator>
<operator activated="true" class="set_minus" compatibility="7.0.001" expanded="true" height="82" name="Set Minus" width="90" x="179" y="289">
<description align="center" color="transparent" colored="false" width="126">Keeps it fair (ineffect it's a split test, but I went the complicated way)</description>
</operator>
<operator activated="true" class="apply_model" compatibility="7.0.001" expanded="true" height="82" name="Apply Model" width="90" x="313" y="136">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance" compatibility="7.0.001" expanded="true" height="82" name="Performance Loop Stacking" width="90" x="380" y="34"/>
<connect from_port="in 1" to_op="Multiply" to_port="input"/>
<connect from_port="in 2" to_op="Set Minus" to_port="example set input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Stacking" to_port="training set"/>
<connect from_op="Multiply" from_port="output 2" to_op="Set Minus" to_port="subtrahend"/>
<connect from_op="Stacking" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Set Minus" from_port="example set output" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance Loop Stacking" to_port="labelled data"/>
<connect from_op="Apply Model" from_port="model" to_port="out 1"/>
<connect from_op="Performance Loop Stacking" from_port="performance" to_port="out 2"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="189"/>
<portSpacing port="source_in 3" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
<portSpacing port="sink_out 3" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">This uses a loop to generate many decision trees using different samples of the data for a more complete picture.</description>
</operator>
<connect from_op="Retrieve Sonar" from_port="output" to_op="Generate ID" to_port="example set input"/>
<connect from_op="Generate ID" from_port="example set output" to_op="Sample (2)" to_port="example set input"/>
<connect from_op="Sample (2)" from_port="example set output" to_op="StackingMagic" to_port="in 1"/>
<connect from_op="Sample (2)" from_port="original" to_op="StackingMagic" to_port="in 2"/>
<connect from_op="StackingMagic" from_port="out 1" to_port="result 1"/>
<connect from_op="StackingMagic" from_port="out 2" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>0