🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

How to plot a Learning Curve for a given model?

User: "meloamaury"
New Altair Community Member
Updated by Jocelyn

Hi all,

 

I am new in RapidMiner Studio and I am trying to figure out how to plot a learning curve for a given model (basically plot the performance for training and testing as a function of the number of examples). In principle the learning curve would be a good indicator for the robusteness of the model (showing the bias versus variance problem).

I could not find in RapidMiner an operator or some video examples on this issue. I tried getting some information using the Log operator after my Cross Validation operator in order to plot afterwards, but without success.

Any guidance would be very much appreciated.

 

Best,

Amaury

Find more posts tagged with

Sort by:
1 - 2 of 21
    User: "Thomas_Ott"
    New Altair Community Member
    Accepted Answer

    The learning curve operator has been deprecated since about v7.3. Let me see if I can find a process that creates this for you.

    User: "IngoRM"
    New Altair Community Member
    Accepted Answer

    Hi,

     

    I use the operator "Loop Parameters" for this and the inner "Sample" operator uses ratios between 5% and 100%.  Make sure that you evaluate the model with a cross-validation with a fixed local random seed since otherwise the influence of the data splits might be bigger than that of the additional examples...

     

    Below is a process which you can use as a building block for this.

     

    Hope this helps,

    Ingo

     

    <?xml version="1.0" encoding="UTF-8"?><process version="7.5.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="7.5.001" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="generate_data" compatibility="7.5.001" expanded="true" height="68" name="Generate Data" width="90" x="45" y="34">
    <parameter key="target_function" value="global and local models classification"/>
    <parameter key="number_examples" value="10000"/>
    <parameter key="number_of_attributes" value="2"/>
    </operator>
    <operator activated="true" class="add_noise" compatibility="7.5.001" expanded="true" height="103" name="Add Noise" width="90" x="179" y="34">
    <parameter key="random_attributes" value="20"/>
    <list key="noise"/>
    </operator>
    <operator activated="true" class="loop_parameters" compatibility="7.5.001" expanded="true" height="82" name="Loop Parameters" width="90" x="313" y="34">
    <list key="parameters">
    <parameter key="Sample.sample_ratio" value="[0.05;1.0;19;linear]"/>
    </list>
    <process expanded="true">
    <operator activated="true" class="sample" compatibility="7.5.001" expanded="true" height="82" name="Sample" width="90" x="45" y="34">
    <parameter key="sample" value="relative"/>
    <parameter key="sample_ratio" value="1.0"/>
    <list key="sample_size_per_class"/>
    <list key="sample_ratio_per_class"/>
    <list key="sample_probability_per_class"/>
    </operator>
    <operator activated="true" class="concurrency:cross_validation" compatibility="7.5.001" expanded="true" height="145" name="Cross Validation" width="90" x="179" y="34">
    <parameter key="use_local_random_seed" value="true"/>
    <process expanded="true">
    <operator activated="true" class="naive_bayes" compatibility="7.5.001" expanded="true" height="82" name="Naive Bayes" width="90" x="45" y="34"/>
    <connect from_port="training set" to_op="Naive Bayes" to_port="training set"/>
    <connect from_op="Naive Bayes" from_port="model" to_port="model"/>
    <portSpacing port="source_training set" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="7.5.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="performance" compatibility="7.5.001" expanded="true" height="82" name="Performance" width="90" x="179" y="34"/>
    <connect from_port="model" to_op="Apply Model" to_port="model"/>
    <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
    <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
    <connect from_op="Performance" from_port="performance" to_port="performance 1"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="sink_test set results" spacing="0"/>
    <portSpacing port="sink_performance 1" spacing="0"/>
    <portSpacing port="sink_performance 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="log" compatibility="7.5.001" expanded="true" height="82" name="Log" width="90" x="313" y="34">
    <list key="log">
    <parameter key="ratio" value="operator.Sample.parameter.sample_ratio"/>
    <parameter key="performance" value="operator.Cross Validation.value.performance main criterion"/>
    </list>
    </operator>
    <connect from_port="input 1" to_op="Sample" to_port="example set input"/>
    <connect from_op="Sample" from_port="example set output" to_op="Cross Validation" to_port="example set"/>
    <connect from_op="Cross Validation" from_port="performance 1" to_op="Log" to_port="through 1"/>
    <connect from_op="Log" from_port="through 1" to_port="performance"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="source_input 2" spacing="0"/>
    <portSpacing port="sink_performance" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    </process>
    </operator>
    <connect from_op="Generate Data" from_port="output" to_op="Add Noise" to_port="example set input"/>
    <connect from_op="Add Noise" from_port="example set output" to_op="Loop Parameters" to_port="input 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    </process>
    </operator>
    </process>