Is is possible to loop a collection of models and obtain a collection of predictions?

earmijo
earmijo New Altair Community Member
edited November 2024 in Community Q&A

I'm trying to implement Blagging as described by Tom Fawcett in "Learning from Imbalanced Classes".

 

I summarized the algorithm:

 

1) Obtain bootstrap samples from the original imbalanced data set

2) Balance each sample by downsampling 

3) Estimate a model (e.g. tree)

4) Put the individual trees to vote

 

I'm trying to implement the algorithm in Rapidminer. I'm using collections. I can create a collection of models successfully, but when I try to loop this collection to produce another collection of prediction this time I run into trouble. Am I violating a rule about working with collections?

 

I'm using the abalone data set as an example. I've attached the files.

 

Any help will be appreciated.

 

<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.5.003" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.5.003" expanded="true" height="68" name="Retrieve training" width="90" x="45" y="187">
<parameter key="repository_entry" value="training"/>
</operator>
<operator activated="true" class="concurrency:loop" compatibility="7.5.003" expanded="true" height="82" name="Loop" width="90" x="246" y="187">
<parameter key="number_of_iterations" value="3"/>
<process expanded="true">
<operator activated="true" class="sample_bootstrapping" compatibility="7.5.003" expanded="true" height="82" name="Sample (Bootstrapping)" width="90" x="112" y="34"/>
<operator activated="true" class="filter_examples" compatibility="7.5.003" expanded="true" height="103" name="Filter Examples" width="90" x="179" y="187">
<list key="filters_list">
<parameter key="filters_entry_key" value="Class.equals.positive"/>
</list>
</operator>
<operator activated="true" class="extract_macro" compatibility="7.5.003" expanded="true" height="68" name="Extract Macro" width="90" x="581" y="34">
<parameter key="macro" value="numfraud"/>
<list key="additional_macros"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="7.5.003" expanded="true" height="103" name="Filter Examples (2)" width="90" x="380" y="238">
<list key="filters_list">
<parameter key="filters_entry_key" value="Class.equals.negative"/>
</list>
</operator>
<operator activated="true" class="sample" compatibility="7.5.003" expanded="true" height="82" name="Sample" width="90" x="581" y="238">
<parameter key="sample_size" value="%{numfraud}"/>
<list key="sample_size_per_class"/>
<list key="sample_ratio_per_class"/>
<list key="sample_probability_per_class"/>
</operator>
<operator activated="true" class="append" compatibility="7.5.003" expanded="true" height="103" name="Append" width="90" x="715" y="136"/>
<operator activated="true" class="concurrency:parallel_decision_tree" compatibility="7.5.003" expanded="true" height="82" name="Decision Tree" width="90" x="849" y="85">
<parameter key="criterion" value="gini_index"/>
<parameter key="maximal_depth" value="5"/>
<parameter key="apply_prepruning" value="false"/>
</operator>
<connect from_port="input 1" to_op="Sample (Bootstrapping)" to_port="example set input"/>
<connect from_op="Sample (Bootstrapping)" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Filter Examples" from_port="original" to_op="Filter Examples (2)" to_port="example set input"/>
<connect from_op="Extract Macro" from_port="example set" to_op="Append" to_port="example set 1"/>
<connect from_op="Filter Examples (2)" from_port="example set output" to_op="Sample" to_port="example set input"/>
<connect from_op="Sample" from_port="example set output" to_op="Append" to_port="example set 2"/>
<connect from_op="Append" from_port="merged set" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Decision Tree" from_port="model" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="collect" compatibility="7.5.003" expanded="true" height="82" name="Collect" width="90" x="380" y="187"/>
<operator activated="true" class="loop_collection" compatibility="7.5.003" expanded="true" height="82" name="Loop Collection" width="90" x="581" y="187">
<parameter key="set_iteration_macro" value="true"/>
<parameter key="macro_name" value="i"/>
<process expanded="true">
<operator activated="true" class="select" compatibility="7.5.003" expanded="true" height="68" name="Select" width="90" x="112" y="85">
<parameter key="index" value="%{i}"/>
</operator>
<operator activated="true" class="retrieve" compatibility="7.5.003" expanded="true" height="68" name="Retrieve test" width="90" x="112" y="187">
<parameter key="repository_entry" value="test"/>
</operator>
<operator activated="true" class="apply_model" compatibility="7.5.003" expanded="true" height="82" name="Apply Model" width="90" x="246" y="136">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.5.003" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="136">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="confidence(positive)|Id"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<connect from_port="single" to_op="Select" to_port="collection"/>
<connect from_op="Select" from_port="selected" to_op="Apply Model" to_port="model"/>
<connect from_op="Retrieve test" from_port="output" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_port="output 1"/>
<portSpacing port="source_single" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve training" from_port="output" to_op="Loop" to_port="input 1"/>
<connect from_op="Loop" from_port="output 1" to_op="Collect" to_port="input 1"/>
<connect from_op="Collect" from_port="collection" to_op="Loop Collection" to_port="collection"/>
<connect from_op="Loop Collection" from_port="output 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>

Answers

  • earmijo
    earmijo New Altair Community Member

    Forgot to attach the training set.

  • MartinLiebig
    MartinLiebig
    Altair Employee

    Hey,

     

    cant you just use a bagging operator and balance the classes inside (e.g. with Generate Weight (Stratification))?

     

    Best,

    Martin

  • Telcontar120
    Telcontar120 New Altair Community Member

    Alternatively when you create the original models, don't store them in a collection, but rather as separate models in the repository.  Then you can simply use the "Vote" ensemble operator or similar to get your final prediction.