🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

Parameter Optimization - Why do you optimize on test data

User: "SebastianB12"
New Altair Community Member
Updated by Jocelyn

Hi all,

I'm wondering why in the tutorial process and also in other processes, which were pasted here in the forum, parameters are optimized on the test data and not on the training data. If I'm understanding the Optimize Parameters (Grid) operator correctly the performance of the connected performance vector at the sink is optimized. For me this is kind of cheating to optimize on out of sample test data. Sometimes this results in better test performance (not in this case) than training performance, which is kind of strange ;).

Could you clarify that for me?

I changed the tutorial Process a bit to show you how I would connect everything. But perhaps I just misunderstand the optimziation operator.

<?xml version="1.0" encoding="UTF-8"?><process version="7.2.002">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.2.002" expanded="true" height="68" name="Weighting" width="90" x="112" y="30">
<parameter key="repository_entry" value="//Samples/data/Weighting"/>
</operator>
<operator activated="true" class="optimize_parameters_grid" compatibility="6.0.003" expanded="true" height="124" name="Optimize Parameters (Grid)" width="90" x="313" y="30">
<list key="parameters">
<parameter key="SVM.C" value="[0.001;100000;10;logarithmic]"/>
<parameter key="SVM.gamma" value="[0.001;1.5;10;logarithmic]"/>
</list>
<parameter key="error_handling" value="fail on error"/>
<process expanded="true">
<operator activated="true" class="split_data" compatibility="7.2.002" expanded="true" height="103" name="Split Data" width="90" x="45" y="30">
<enumeration key="partitions">
<parameter key="ratio" value="0.5"/>
<parameter key="ratio" value="0.5"/>
</enumeration>
<parameter key="sampling_type" value="shuffled sampling"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
<operator activated="true" class="support_vector_machine_libsvm" compatibility="7.2.002" expanded="true" height="82" name="SVM" width="90" x="179" y="255">
<parameter key="svm_type" value="C-SVC"/>
<parameter key="kernel_type" value="rbf"/>
<parameter key="degree" value="3"/>
<parameter key="gamma" value="1.5"/>
<parameter key="coef0" value="0.0"/>
<parameter key="C" value="100000.0"/>
<parameter key="nu" value="0.5"/>
<parameter key="cache_size" value="80"/>
<parameter key="epsilon" value="0.001"/>
<parameter key="p" value="0.1"/>
<list key="class_weights"/>
<parameter key="shrinking" value="true"/>
<parameter key="calculate_confidences" value="false"/>
<parameter key="confidence_for_multiclass" value="true"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.2.002" expanded="true" height="103" name="Multiply" width="90" x="313" y="120"/>
<operator activated="true" class="apply_model" compatibility="7.1.001" expanded="true" height="82" name="Apply Model" width="90" x="447" y="30">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="apply_model" compatibility="7.1.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="447" y="255">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="7.2.002" expanded="true" height="82" name="Performance In Sample" width="90" x="581" y="238">
<parameter key="main_criterion" value="first"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="false"/>
<parameter key="kappa" value="false"/>
<parameter key="weighted_mean_recall" value="false"/>
<parameter key="weighted_mean_precision" value="false"/>
<parameter key="spearman_rho" value="false"/>
<parameter key="kendall_tau" value="false"/>
<parameter key="absolute_error" value="false"/>
<parameter key="relative_error" value="false"/>
<parameter key="relative_error_lenient" value="false"/>
<parameter key="relative_error_strict" value="false"/>
<parameter key="normalized_absolute_error" value="false"/>
<parameter key="root_mean_squared_error" value="false"/>
<parameter key="root_relative_squared_error" value="false"/>
<parameter key="squared_error" value="false"/>
<parameter key="correlation" value="false"/>
<parameter key="squared_correlation" value="false"/>
<parameter key="cross-entropy" value="false"/>
<parameter key="margin" value="false"/>
<parameter key="soft_margin_loss" value="false"/>
<parameter key="logistic_loss" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
<list key="class_weights"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="7.2.002" expanded="true" height="82" name="Performance Out of Sample" width="90" x="581" y="30">
<parameter key="main_criterion" value="first"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="false"/>
<parameter key="kappa" value="false"/>
<parameter key="weighted_mean_recall" value="false"/>
<parameter key="weighted_mean_precision" value="false"/>
<parameter key="spearman_rho" value="false"/>
<parameter key="kendall_tau" value="false"/>
<parameter key="absolute_error" value="false"/>
<parameter key="relative_error" value="false"/>
<parameter key="relative_error_lenient" value="false"/>
<parameter key="relative_error_strict" value="false"/>
<parameter key="normalized_absolute_error" value="false"/>
<parameter key="root_mean_squared_error" value="false"/>
<parameter key="root_relative_squared_error" value="false"/>
<parameter key="squared_error" value="false"/>
<parameter key="correlation" value="false"/>
<parameter key="squared_correlation" value="false"/>
<parameter key="cross-entropy" value="false"/>
<parameter key="margin" value="false"/>
<parameter key="soft_margin_loss" value="false"/>
<parameter key="logistic_loss" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
<list key="class_weights"/>
</operator>
<connect from_port="input 1" to_op="Split Data" to_port="example set"/>
<connect from_op="Split Data" from_port="partition 1" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Split Data" from_port="partition 2" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_op="Multiply" to_port="input"/>
<connect from_op="SVM" from_port="exampleSet" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Multiply" from_port="output 1" to_op="Apply Model" to_port="model"/>
<connect from_op="Multiply" from_port="output 2" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance Out of Sample" to_port="labelled data"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance In Sample" to_port="labelled data"/>
<connect from_op="Performance In Sample" from_port="performance" to_port="performance"/>
<connect from_op="Performance Out of Sample" from_port="performance" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
<operator activated="false" class="legacy:write_parameters" compatibility="7.2.002" expanded="true" height="68" name="Write Parameters" width="90" x="514" y="30">
<parameter key="parameter_file" value="D:\parameters.txt"/>
<parameter key="encoding" value="SYSTEM"/>
</operator>
<connect from_op="Weighting" from_port="output" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="performance" to_port="result 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="parameter" to_port="result 2"/>
<connect from_op="Optimize Parameters (Grid)" from_port="result 1" to_port="result 3"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
</process>
</operator>
</process>

Thanks a lot!

Cheers

Sebastian

Find more posts tagged with