🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

How to plot train accuracy vs. test accuracy?

User: "Fred12"
New Altair Community Member
Updated by Jocelyn

hi,

 

currently I am testing some configuration parameters C and gamma with a SVM inside X-Val inside Opt.Parameter Operator...

the process looks like this:

<?xml version="1.0" encoding="UTF-8"?><process version="7.2.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.2.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="false" class="retrieve" compatibility="7.2.000" expanded="true" height="68" name="Retrieve" width="90" x="45" y="238">
<parameter key="repository_entry" value="//RapidMiner_Nils/Nils/Master/Data/Master Excelliste_Gefügebezeichnung_3 klassen"/>
</operator>
<operator activated="false" class="split_data" compatibility="7.2.000" expanded="true" height="103" name="Split Data" width="90" x="179" y="238">
<enumeration key="partitions">
<parameter key="ratio" value="0.5"/>
<parameter key="ratio" value="0.5"/>
</enumeration>
<parameter key="sampling_type" value="stratified sampling"/>
<parameter key="use_local_random_seed" value="true"/>
</operator>
<operator activated="false" class="write_excel" compatibility="7.2.000" expanded="true" height="82" name="Write Excel (2)" width="90" x="45" y="442">
<parameter key="excel_file" value="C:\Users\Admin\Desktop\testData.xlsx"/>
</operator>
<operator activated="false" class="write_excel" compatibility="7.2.000" expanded="true" height="82" name="Write Excel" width="90" x="45" y="136">
<parameter key="excel_file" value="C:\Users\Admin\Desktop\trainData.xlsx"/>
</operator>
<operator activated="true" class="retrieve" compatibility="7.2.000" expanded="true" height="68" name="Retrieve testData" width="90" x="179" y="391">
<parameter key="repository_entry" value="//RapidMiner_Nils/repositories/Local Repository/data/test und training/testData"/>
</operator>
<operator activated="true" class="normalize" compatibility="7.2.000" expanded="true" height="103" name="Normalize Test Data" width="90" x="313" y="391"/>
<operator activated="true" class="retrieve" compatibility="7.2.000" expanded="true" height="68" name="Retrieve trainData" width="90" x="179" y="136">
<parameter key="repository_entry" value="//RapidMiner_Nils/repositories/Local Repository/data/test und training/trainData"/>
</operator>
<operator activated="true" class="normalize" compatibility="7.2.000" expanded="true" height="103" name="Normalize" width="90" x="45" y="34"/>
<operator activated="true" class="log" compatibility="7.2.000" expanded="true" height="82" name="Log Normaize Parameter" width="90" x="179" y="34">
<list key="log">
<parameter key="attributes" value="operator.Normalize.parameter.attributes"/>
<parameter key="value type" value="operator.Normalize.parameter.value_type"/>
</list>
</operator>
<operator activated="true" class="multiply" compatibility="7.2.000" expanded="true" height="124" name="Multiply Trainings Data" width="90" x="313" y="34"/>
<operator activated="true" class="optimize_parameters_grid" compatibility="7.2.000" expanded="true" height="103" name="Optimize Parameters (Grid)" width="90" x="648" y="34">
<list key="parameters">
<parameter key="SVM.C" value="[1000;300000;10;linear]"/>
<parameter key="SVM.gamma" value="[0.001;1;10;linear]"/>
</list>
<process expanded="true">
<operator activated="true" class="x_validation" compatibility="7.2.000" expanded="true" height="124" name="Validation" width="90" x="313" y="34">
<parameter key="number_of_validations" value="5"/>
<process expanded="true">
<operator activated="true" class="support_vector_machine_libsvm" compatibility="7.2.000" expanded="true" height="82" name="SVM" width="90" x="246" y="34">
<parameter key="gamma" value="1.0"/>
<parameter key="C" value="300000.0"/>
<list key="class_weights"/>
</operator>
<connect from_port="training" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.2.000" expanded="true" height="82" name="Apply Model" width="90" x="112" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="7.2.000" expanded="true" height="82" name="Performance" width="90" x="313" y="34">
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log" compatibility="7.2.000" expanded="true" height="82" name="Log" width="90" x="648" y="85">
<list key="log">
<parameter key="C" value="operator.SVM.parameter.C"/>
<parameter key="gamma" value="operator.SVM.parameter.gamma"/>
<parameter key="XVAL_Performance" value="operator.Validation.value.performance"/>
<parameter key="XVAL_iteration" value="operator.Validation.value.iteration"/>
<parameter key="XVAL_time" value="operator.Validation.value.time"/>
<parameter key="Perf_acc" value="operator.Performance.value.accuracy"/>
<parameter key="Perf_kappa" value="operator.Performance.value.kappa"/>
<parameter key="Perf_time" value="operator.Performance.value.cpu-time"/>
<parameter key="TESTPERF_acc" value="operator.TESTPERF.value.accuracy"/>
<parameter key="TESTPERF_kappa" value="operator.TESTPERF.value.kappa"/>
<parameter key="TRAINPERF_acc" value="operator.TRAINPERF.value.accuracy"/>
<parameter key="TRAINPERF_kappa" value="operator.TRAINPERF.value.kappa"/>
<parameter key="TESTPERF_time" value="operator.TESTPERF.value.time"/>
<parameter key="TRAINPERF_time" value="operator.TRAINPERF.value.time"/>
</list>
</operator>
<connect from_port="input 1" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="averagable 1" to_op="Log" to_port="through 1"/>
<connect from_op="Log" from_port="through 1" to_port="performance"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
<operator activated="true" class="set_parameters" compatibility="7.2.000" expanded="true" height="82" name="Set Parameters" width="90" x="849" y="85">
<list key="name_map">
<parameter key="SVM" value="SVM Trainings Data"/>
</list>
</operator>
<operator activated="true" class="support_vector_machine_libsvm" compatibility="7.2.000" expanded="true" height="82" name="SVM Trainings Data" width="90" x="581" y="187">
<parameter key="gamma" value="0.001"/>
<parameter key="C" value="270100.0"/>
<list key="class_weights"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.2.000" expanded="true" height="103" name="Multiply Model" width="90" x="782" y="187"/>
<operator activated="true" class="apply_model" compatibility="7.2.000" expanded="true" height="82" name="Apply Model (2)" width="90" x="447" y="289">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="7.2.000" expanded="true" height="82" name="TRAINPERF" width="90" x="715" y="340">
<parameter key="classification_error" value="true"/>
<list key="class_weights"/>
</operator>
<operator activated="true" class="apply_model" compatibility="7.2.000" expanded="true" height="82" name="Apply Model (3)" width="90" x="581" y="442">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="7.2.000" expanded="true" height="82" name="TESTPERF" width="90" x="715" y="442">
<parameter key="classification_error" value="true"/>
<list key="class_weights"/>
</operator>
<operator activated="true" class="log" compatibility="7.2.000" expanded="true" height="103" name="LOG ALL" width="90" x="849" y="340">
<list key="log">
<parameter key="accuracy" value="operator.Performance.value.accuracy"/>
<parameter key="classification error" value="operator.Performance.value.classification_error"/>
</list>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="Split Data" to_port="example set"/>
<connect from_op="Split Data" from_port="partition 1" to_op="Write Excel" to_port="input"/>
<connect from_op="Split Data" from_port="partition 2" to_op="Write Excel (2)" to_port="input"/>
<connect from_op="Retrieve testData" from_port="output" to_op="Normalize Test Data" to_port="example set input"/>
<connect from_op="Normalize Test Data" from_port="example set output" to_op="Apply Model (3)" to_port="unlabelled data"/>
<connect from_op="Retrieve trainData" from_port="output" to_op="Normalize" to_port="example set input"/>
<connect from_op="Normalize" from_port="example set output" to_op="Log Normaize Parameter" to_port="through 1"/>
<connect from_op="Log Normaize Parameter" from_port="through 1" to_op="Multiply Trainings Data" to_port="input"/>
<connect from_op="Multiply Trainings Data" from_port="output 1" to_op="SVM Trainings Data" to_port="training set"/>
<connect from_op="Multiply Trainings Data" from_port="output 2" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Multiply Trainings Data" from_port="output 3" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="performance" to_port="result 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="parameter" to_op="Set Parameters" to_port="parameter set"/>
<connect from_op="Set Parameters" from_port="parameter set" to_port="result 3"/>
<connect from_op="SVM Trainings Data" from_port="model" to_op="Multiply Model" to_port="input"/>
<connect from_op="Multiply Model" from_port="output 1" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Multiply Model" from_port="output 2" to_op="Apply Model (3)" to_port="model"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="TRAINPERF" to_port="labelled data"/>
<connect from_op="TRAINPERF" from_port="performance" to_op="LOG ALL" to_port="through 1"/>
<connect from_op="Apply Model (3)" from_port="labelled data" to_op="TESTPERF" to_port="labelled data"/>
<connect from_op="TESTPERF" from_port="performance" to_op="LOG ALL" to_port="through 2"/>
<connect from_op="LOG ALL" from_port="through 1" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
</process>
</operator>
</process>

what I would really like to achieve, is going to the different C and gamma with their respective performance, BOTH FOR TESTING AND TRAINING VALIDATION (+performance) and plot them together with C, gamma and their performance in a single graph.. best would be a series graph maybe... 

the thing is, I want to see when OVERFITTING occurs, like see a discrepancy some time when training accuracy goes up, and when (on what C and gamma configuration) the testing performance decreases ...

if I could also plot X-Val performance together with the others in the graph, that would be perfect..

is this somehow realisable in Rapidminer? maybe in series multiple, but I have the problem, that in my current configuration I get only best parameters C and gamma to be set for the testing and training model, how can I apply C and gamma for all possible grid combinations and set them for the test/ training run, so that at each configuration, also the test/train validation+performance is executed?

Find more posts tagged with