Breakpoint
rfeigel
New Altair Community Member
I've set a breakpoint after an apply model node in a decision tree model with cross validation. After executing the breakpoint correctly, the model won't proceed. It just keeps repeating the breakpoint. I don't have this problem with models that have no subprocesses. The XML follows. Any ideas?
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="5.3.015" expanded="true" height="60" name="eReader Train" width="90" x="45" y="30">
<parameter key="repository_entry" value="//NewLocalRepository/Data Mining for the Masses/Chap 10/Chap 10 Training Data"/>
</operator>
<operator activated="true" class="set_role" compatibility="5.3.015" expanded="true" height="76" name="Set Role" width="90" x="179" y="30">
<parameter key="attribute_name" value="User_ID"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles">
<parameter key="eReader_Adoption" value="label"/>
<parameter key="User_ID" value="id"/>
</list>
</operator>
<operator activated="true" class="x_validation" compatibility="5.3.015" expanded="true" height="112" name="Validation" width="90" x="313" y="30">
<process expanded="true">
<operator activated="true" class="decision_tree" compatibility="5.3.015" expanded="true" height="76" name="Decision Tree" width="90" x="45" y="30">
<parameter key="criterion" value="gini_index"/>
</operator>
<connect from_port="training" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Decision Tree" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" breakpoints="after" class="apply_model" compatibility="5.3.015" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="5.3.015" expanded="true" height="76" name="Performance" width="90" x="179" y="30">
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="write_model" compatibility="5.3.015" expanded="true" height="60" name="Write Model" width="90" x="581" y="120">
<parameter key="model_file" value="F:\Data\RapidMiner 5.0\RapidMiner Book\Chapter 13 Model"/>
<parameter key="output_type" value="XML"/>
</operator>
<connect from_op="eReader Train" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_op="Write Model" to_port="input"/>
<connect from_op="Validation" from_port="training" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 2"/>
<connect from_op="Write Model" from_port="through" to_port="result 3"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
</process>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="5.3.015" expanded="true" height="60" name="eReader Train" width="90" x="45" y="30">
<parameter key="repository_entry" value="//NewLocalRepository/Data Mining for the Masses/Chap 10/Chap 10 Training Data"/>
</operator>
<operator activated="true" class="set_role" compatibility="5.3.015" expanded="true" height="76" name="Set Role" width="90" x="179" y="30">
<parameter key="attribute_name" value="User_ID"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles">
<parameter key="eReader_Adoption" value="label"/>
<parameter key="User_ID" value="id"/>
</list>
</operator>
<operator activated="true" class="x_validation" compatibility="5.3.015" expanded="true" height="112" name="Validation" width="90" x="313" y="30">
<process expanded="true">
<operator activated="true" class="decision_tree" compatibility="5.3.015" expanded="true" height="76" name="Decision Tree" width="90" x="45" y="30">
<parameter key="criterion" value="gini_index"/>
</operator>
<connect from_port="training" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Decision Tree" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" breakpoints="after" class="apply_model" compatibility="5.3.015" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="5.3.015" expanded="true" height="76" name="Performance" width="90" x="179" y="30">
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="write_model" compatibility="5.3.015" expanded="true" height="60" name="Write Model" width="90" x="581" y="120">
<parameter key="model_file" value="F:\Data\RapidMiner 5.0\RapidMiner Book\Chapter 13 Model"/>
<parameter key="output_type" value="XML"/>
</operator>
<connect from_op="eReader Train" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_op="Write Model" to_port="input"/>
<connect from_op="Validation" from_port="training" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 2"/>
<connect from_op="Write Model" from_port="through" to_port="result 3"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
</process>
</operator>
</process>
Tagged:
0
Answers
-
The cross validation operator will execute the subprocess 10 times (or whatever the 'number of validations' parameter is set to).
Each iteration trains a model on a subset of the data and tests it on another subset. The model is discarded but the performance is kept for later. The iterations repeat this for different partitions of the data.
At the end, the performances are averaged. The model produced by the operator is built on all the data and the average performance can be regarded as an estimate of how this model would perform on unseen data.
Andrew0