How to store performance metrics from each 10 fold cross validation?

Hi experts,
I need help in storing performance metrics calculated for each 10 folds validation. I used log operator but it only shows three performance values with standard deviation.
Please help.
Thank you,
Archana
Best Answer
-
Hi Archana,
i think there are several ways to do this. A quick one is to store each fold in the repo with a different name. Attached is a processes doing it. It uses the macro %{a} which is always the number of executions of each operators. Be careful if you use this in a loop/optimize. You need to store the process in the repo in order to make it work.
~Martin<?xml version="1.0" encoding="UTF-8"?><process version="7.3.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.3.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.3.001" expanded="true" height="68" name="Retrieve Golf" width="90" x="112" y="85">
<parameter key="repository_entry" value="//Samples/data/Golf"/>
</operator>
<operator activated="true" class="concurrency:cross_validation" compatibility="7.3.001" expanded="true" height="145" name="Cross Validation" width="90" x="246" y="85">
<process expanded="true">
<operator activated="true" class="parallel_decision_tree" compatibility="7.3.001" expanded="true" height="82" name="Decision Tree" width="90" x="246" y="34"/>
<connect from_port="training set" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Decision Tree" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.3.001" expanded="true" height="82" name="Apply Model" width="90" x="179" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="7.3.001" expanded="true" height="82" name="Performance" width="90" x="313" y="34">
<list key="class_weights"/>
</operator>
<operator activated="true" class="store" compatibility="7.3.001" expanded="true" height="68" name="Store" width="90" x="447" y="34">
<parameter key="repository_entry" value="../results/performance fold %{a}"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_op="Store" to_port="input"/>
<connect from_op="Store" from_port="through" to_port="performance 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve Golf" from_port="output" to_op="Cross Validation" to_port="example set"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
</process>1
Answers
-
Hi Archana,
i think there are several ways to do this. A quick one is to store each fold in the repo with a different name. Attached is a processes doing it. It uses the macro %{a} which is always the number of executions of each operators. Be careful if you use this in a loop/optimize. You need to store the process in the repo in order to make it work.
~Martin<?xml version="1.0" encoding="UTF-8"?><process version="7.3.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.3.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.3.001" expanded="true" height="68" name="Retrieve Golf" width="90" x="112" y="85">
<parameter key="repository_entry" value="//Samples/data/Golf"/>
</operator>
<operator activated="true" class="concurrency:cross_validation" compatibility="7.3.001" expanded="true" height="145" name="Cross Validation" width="90" x="246" y="85">
<process expanded="true">
<operator activated="true" class="parallel_decision_tree" compatibility="7.3.001" expanded="true" height="82" name="Decision Tree" width="90" x="246" y="34"/>
<connect from_port="training set" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Decision Tree" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.3.001" expanded="true" height="82" name="Apply Model" width="90" x="179" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="7.3.001" expanded="true" height="82" name="Performance" width="90" x="313" y="34">
<list key="class_weights"/>
</operator>
<operator activated="true" class="store" compatibility="7.3.001" expanded="true" height="68" name="Store" width="90" x="447" y="34">
<parameter key="repository_entry" value="../results/performance fold %{a}"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_op="Store" to_port="input"/>
<connect from_op="Store" from_port="through" to_port="performance 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve Golf" from_port="output" to_op="Cross Validation" to_port="example set"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
</process>1 -
Hey,
it should create a new folder results with the performance vectors, did you check for this?
~Martin
0 -
And me again,
alternativly you can use another way to do this missusing the App Objects. Just have a look.
~Martin
<?xml version="1.0" encoding="UTF-8"?><process version="7.3.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.3.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.3.001" expanded="true" height="68" name="Retrieve Golf" width="90" x="112" y="85">
<parameter key="repository_entry" value="//Samples/data/Golf"/>
</operator>
<operator activated="true" class="concurrency:cross_validation" compatibility="7.3.001" expanded="true" height="145" name="Cross Validation" width="90" x="246" y="85">
<process expanded="true">
<operator activated="true" class="parallel_decision_tree" compatibility="7.3.001" expanded="true" height="82" name="Decision Tree" width="90" x="246" y="34"/>
<connect from_port="training set" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Decision Tree" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.3.001" expanded="true" height="82" name="Apply Model" width="90" x="179" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="7.3.001" expanded="true" height="82" name="Performance" width="90" x="313" y="34">
<list key="class_weights"/>
</operator>
<operator activated="false" class="store" compatibility="7.3.001" expanded="true" height="68" name="Store" width="90" x="380" y="289">
<parameter key="repository_entry" value="../results/performance fold %{a}"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.3.001" expanded="true" height="103" name="Multiply" width="90" x="447" y="85"/>
<operator activated="true" class="handle_exception" compatibility="7.3.001" expanded="true" height="82" name="Handle Exception" width="90" x="581" y="34">
<process expanded="true">
<operator activated="false" class="remember" compatibility="7.3.001" expanded="true" height="68" name="Remember" width="90" x="447" y="34">
<parameter key="name" value="perf"/>
<parameter key="io_object" value="IOObjectCollection"/>
</operator>
<operator activated="true" class="recall_from_app" compatibility="7.3.001" expanded="true" height="82" name="Recall from App" width="90" x="45" y="85">
<parameter key="name" value="perf"/>
</operator>
<operator activated="true" class="collect" compatibility="7.3.001" expanded="true" height="103" name="Collect" width="90" x="179" y="34">
<parameter key="unfold" value="true"/>
</operator>
<operator activated="true" class="publish_to_app" compatibility="7.3.001" expanded="true" height="68" name="Publish to App" width="90" x="313" y="34">
<parameter key="name" value="perf"/>
</operator>
<connect from_port="in 1" to_op="Collect" to_port="input 1"/>
<connect from_op="Recall from App" from_port="result" to_op="Collect" to_port="input 2"/>
<connect from_op="Collect" from_port="collection" to_op="Publish to App" to_port="store"/>
<connect from_op="Publish to App" from_port="stored" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="publish_to_app" compatibility="7.3.001" expanded="true" height="68" name="Publish to App (2)" width="90" x="112" y="34">
<parameter key="name" value="perf"/>
</operator>
<connect from_port="in 1" to_op="Publish to App (2)" to_port="store"/>
<connect from_op="Publish to App (2)" from_port="stored" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Handle Exception" to_port="in 1"/>
<connect from_op="Multiply" from_port="output 2" to_port="performance 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="recall_from_app" compatibility="7.3.001" expanded="true" height="103" name="Recall from App (2)" width="90" x="380" y="136">
<parameter key="name" value="perf"/>
<parameter key="remove_from_app" value="true"/>
</operator>
<connect from_op="Retrieve Golf" from_port="output" to_op="Cross Validation" to_port="example set"/>
<connect from_op="Cross Validation" from_port="performance 1" to_op="Recall from App (2)" to_port="through 1"/>
<connect from_op="Recall from App (2)" from_port="result" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>0 -
Hi,
Problem solved, now it is making result folder and storing all the values. If i need other performance metric such as precision or specificity?
Thank you
0