🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

[SOLVED] PCA problem

User: "marcin_blachnik"
New Altair Community Member
Updated by Jocelyn
Hallo

I have a problem with PCA used as a preprocessing operator. In the second iteration of crossvalidation (in non parralel X_validation) the PCA operator freezes. If I use parallel X_validation then it freezes when given thread is calculating this operator for the second time.

Thank for all answers and help
Marcin B.

Below is the code:

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.1.017">
 <context>
   <input/>
   <output/>
   <macros/>
 </context>
 <operator activated="true" class="process" compatibility="5.1.017" expanded="true" name="Process">
   <parameter key="logverbosity" value="init"/>
   <parameter key="random_seed" value="2001"/>
   <parameter key="send_mail" value="never"/>
   <parameter key="notification_email" value=""/>
   <parameter key="process_duration_for_mail" value="30"/>
   <parameter key="encoding" value="SYSTEM"/>
   <parameter key="parallelize_main_process" value="false"/>
   <process expanded="true" height="408" width="472">
     <operator activated="true" class="retrieve" compatibility="5.1.017" expanded="true" height="60" name="Retrieve" width="90" x="41" y="66">
       <parameter key="repository_entry" value="//Samples/data/Iris"/>
     </operator>
     <operator activated="true" class="replace_missing_values" compatibility="5.1.017" expanded="true" height="94" name="Replace Missing Values" width="90" x="64" y="186">
       <parameter key="return_preprocessing_model" value="false"/>
       <parameter key="create_view" value="false"/>
       <parameter key="attribute_filter_type" value="all"/>
       <parameter key="attribute" value=""/>
       <parameter key="attributes" value=""/>
       <parameter key="use_except_expression" value="false"/>
       <parameter key="value_type" value="attribute_value"/>
       <parameter key="use_value_type_exception" value="false"/>
       <parameter key="except_value_type" value="time"/>
       <parameter key="block_type" value="attribute_block"/>
       <parameter key="use_block_type_exception" value="false"/>
       <parameter key="except_block_type" value="value_matrix_row_start"/>
       <parameter key="invert_selection" value="false"/>
       <parameter key="include_special_attributes" value="false"/>
       <parameter key="default" value="zero"/>
       <list key="columns"/>
     </operator>
     <operator activated="true" class="x_validation" compatibility="5.1.017" expanded="true" height="112" name="Validation" width="90" x="247" y="30">
       <parameter key="create_complete_model" value="false"/>
       <parameter key="average_performances_only" value="true"/>
       <parameter key="leave_one_out" value="false"/>
       <parameter key="number_of_validations" value="10"/>
       <parameter key="sampling_type" value="stratified sampling"/>
       <parameter key="use_local_random_seed" value="false"/>
       <parameter key="local_random_seed" value="1992"/>
       <parameter key="parallelize_training" value="false"/>
       <parameter key="parallelize_testing" value="false"/>
       <process expanded="true">
         <operator activated="true" class="multiply" compatibility="5.1.017" expanded="true" height="94" name="Multiply" width="90" x="40" y="153"/>
         <operator activated="true" class="optimize_parameters_grid" compatibility="5.1.017" expanded="true" height="94" name="Optimize Parameters (Grid)" width="90" x="145" y="23">
           <list key="parameters">
             <parameter key="SVM Opti.C" value="[0.01;10000;5;logarithmic]"/>
             <parameter key="SVM Opti.gamma" value="[0.01;1;5;linear]"/>
             <parameter key="PCA Opti.variance_threshold" value="[0.95;0.99;5;linear]"/>
           </list>
           <parameter key="parallelize_optimization_process" value="true"/>
           <process expanded="true" height="362" width="487">
             <operator activated="true" class="x_validation" compatibility="5.1.017" expanded="true" height="112" name="Validation Opti" width="90" x="45" y="30">
               <parameter key="create_complete_model" value="false"/>
               <parameter key="average_performances_only" value="true"/>
               <parameter key="leave_one_out" value="false"/>
               <parameter key="number_of_validations" value="10"/>
               <parameter key="sampling_type" value="stratified sampling"/>
               <parameter key="use_local_random_seed" value="false"/>
               <parameter key="local_random_seed" value="1992"/>
               <parameter key="parallelize_training" value="false"/>
               <parameter key="parallelize_testing" value="false"/>
               <process expanded="true">
                 <operator activated="true" class="principal_component_analysis" compatibility="5.1.017" expanded="true" height="94" name="PCA Opti" width="90" x="44" y="101">
                   <parameter key="dimensionality_reduction" value="fixed number"/>
                   <parameter key="variance_threshold" value="0.95"/>
                   <parameter key="number_of_components" value="1"/>
                 </operator>
                 <operator activated="true" class="support_vector_machine_libsvm" compatibility="5.1.017" expanded="true" height="76" name="SVM Opti" width="90" x="249" y="73">
                   <parameter key="svm_type" value="C-SVC"/>
                   <parameter key="kernel_type" value="rbf"/>
                   <parameter key="degree" value="3"/>
                   <parameter key="gamma" value="0.01"/>
                   <parameter key="coef0" value="0.0"/>
                   <parameter key="C" value="0.01"/>
                   <parameter key="nu" value="0.5"/>
                   <parameter key="cache_size" value="80"/>
                   <parameter key="epsilon" value="0.0010"/>
                   <parameter key="p" value="0.1"/>
                   <list key="class_weights"/>
                   <parameter key="shrinking" value="true"/>
                   <parameter key="calculate_confidences" value="false"/>
                   <parameter key="confidence_for_multiclass" value="true"/>
                 </operator>
                 <operator activated="true" class="group_models" compatibility="5.1.017" expanded="true" height="94" name="Group Models" width="90" x="190" y="243"/>
                 <connect from_port="training" to_op="PCA Opti" to_port="example set input"/>
                 <connect from_op="PCA Opti" from_port="example set output" to_op="SVM Opti" to_port="training set"/>
                 <connect from_op="PCA Opti" from_port="preprocessing model" to_op="Group Models" to_port="models in 1"/>
                 <connect from_op="SVM Opti" from_port="model" to_op="Group Models" to_port="models in 2"/>
                 <connect from_op="Group Models" from_port="model out" to_port="model"/>
                 <portSpacing port="source_training" spacing="0"/>
                 <portSpacing port="sink_model" spacing="0"/>
                 <portSpacing port="sink_through 1" spacing="0"/>
               </process>
               <process expanded="true">
                 <operator activated="true" class="apply_model" compatibility="5.1.017" expanded="true" height="76" name="Apply Opti" width="90" x="45" y="30">
                   <list key="application_parameters"/>
                   <parameter key="create_view" value="false"/>
                 </operator>
                 <operator activated="true" class="performance" compatibility="5.1.017" expanded="true" height="76" name="Performance Opti" width="90" x="201" y="30">
                   <parameter key="use_example_weights" value="true"/>
                 </operator>
                 <connect from_port="model" to_op="Apply Opti" to_port="model"/>
                 <connect from_port="test set" to_op="Apply Opti" to_port="unlabelled data"/>
                 <connect from_op="Apply Opti" from_port="labelled data" to_op="Performance Opti" to_port="labelled data"/>
                 <connect from_op="Performance Opti" from_port="performance" to_port="averagable 1"/>
                 <portSpacing port="source_model" spacing="0"/>
                 <portSpacing port="source_test set" spacing="0"/>
                 <portSpacing port="source_through 1" spacing="0"/>
                 <portSpacing port="sink_averagable 1" spacing="0"/>
                 <portSpacing port="sink_averagable 2" spacing="0"/>
               </process>
             </operator>
             <operator activated="true" class="log" compatibility="5.1.017" expanded="true" height="76" name="Log Opti" width="90" x="387" y="96">
               <parameter key="filename" value="C:\Users\Administrator\Desktop\SVM_PCA_Opti_Sub"/>
               <list key="log">
                 <parameter key="C" value="operator.SVM Opti.parameter.C"/>
                 <parameter key="gamma" value="operator.SVM Opti.parameter.gamma"/>
                 <parameter key="Acc" value="operator.Validation Opti.value.performance"/>
                 <parameter key="Std" value="operator.Validation Opti.value.deviation"/>
                 <parameter key="PCA_th" value="operator.PCA Opti.parameter.variance_threshold"/>
               </list>
               <parameter key="sorting_type" value="none"/>
               <parameter key="sorting_k" value="100"/>
               <parameter key="persistent" value="false"/>
             </operator>
             <connect from_port="input 1" to_op="Validation Opti" to_port="training"/>
             <connect from_op="Validation Opti" from_port="averagable 1" to_op="Log Opti" to_port="through 1"/>
             <connect from_op="Log Opti" from_port="through 1" to_port="performance"/>
             <portSpacing port="source_input 1" spacing="0"/>
             <portSpacing port="source_input 2" spacing="0"/>
             <portSpacing port="sink_performance" spacing="0"/>
             <portSpacing port="sink_result 1" spacing="0"/>
           </process>
         </operator>
         <operator activated="true" class="set_parameters" compatibility="5.1.017" expanded="true" height="60" name="Set Parameters" width="90" x="272" y="37">
           <list key="name_map">
             <parameter key="SVM Opti" value="SVM Final"/>
             <parameter key="PCA Opti" value="PCA Final"/>
           </list>
         </operator>
         <operator activated="true" class="principal_component_analysis" compatibility="5.1.017" expanded="true" height="94" name="PCA Final" width="90" x="83" y="314">
           <parameter key="dimensionality_reduction" value="keep variance"/>
           <parameter key="variance_threshold" value="0.95"/>
           <parameter key="number_of_components" value="1"/>
         </operator>
         <operator activated="true" class="support_vector_machine_libsvm" compatibility="5.1.017" expanded="true" height="76" name="SVM Final" width="90" x="318" y="199">
           <parameter key="svm_type" value="C-SVC"/>
           <parameter key="kernel_type" value="rbf"/>
           <parameter key="degree" value="3"/>
           <parameter key="gamma" value="0.802"/>
           <parameter key="coef0" value="0.0"/>
           <parameter key="C" value="630.9573444801937"/>
           <parameter key="nu" value="0.5"/>
           <parameter key="cache_size" value="80"/>
           <parameter key="epsilon" value="0.0010"/>
           <parameter key="p" value="0.1"/>
           <list key="class_weights"/>
           <parameter key="shrinking" value="true"/>
           <parameter key="calculate_confidences" value="false"/>
           <parameter key="confidence_for_multiclass" value="true"/>
         </operator>
         <operator activated="true" class="group_models" compatibility="5.1.017" expanded="true" height="94" name="Group Models (2)" width="90" x="442" y="291"/>
         <connect from_port="training" to_op="Multiply" to_port="input"/>
         <connect from_op="Multiply" from_port="output 1" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
         <connect from_op="Multiply" from_port="output 2" to_op="PCA Final" to_port="example set input"/>
         <connect from_op="Optimize Parameters (Grid)" from_port="parameter" to_op="Set Parameters" to_port="parameter set"/>
         <connect from_op="PCA Final" from_port="example set output" to_op="SVM Final" to_port="training set"/>
         <connect from_op="PCA Final" from_port="preprocessing model" to_op="Group Models (2)" to_port="models in 1"/>
         <connect from_op="SVM Final" from_port="model" to_op="Group Models (2)" to_port="models in 2"/>
         <connect from_op="Group Models (2)" from_port="model out" to_port="model"/>
         <portSpacing port="source_training" spacing="0"/>
         <portSpacing port="sink_model" spacing="0"/>
         <portSpacing port="sink_through 1" spacing="0"/>
       </process>
       <process expanded="true">
         <operator activated="true" class="apply_model" compatibility="5.1.017" expanded="true" height="76" name="Apply Final" width="90" x="66" y="37">
           <list key="application_parameters"/>
           <parameter key="create_view" value="false"/>
         </operator>
         <operator activated="true" class="performance" compatibility="5.1.017" expanded="true" height="76" name="Performance Final" width="90" x="198" y="36">
           <parameter key="use_example_weights" value="true"/>
         </operator>
         <operator activated="true" class="log" compatibility="5.1.017" expanded="true" height="76" name="Log Final" width="90" x="320" y="37">
           <parameter key="filename" value="C:\Users\Administrator\Desktop\svm_log_2"/>
           <list key="log">
             <parameter key="C" value="operator.SVM Final.parameter.C"/>
             <parameter key="Gamma" value="operator.SVM Final.parameter.gamma"/>
             <parameter key="Acc" value="operator.Performance Final.value.performance"/>
             <parameter key="PCA_th" value="operator.PCA Final.parameter.variance_threshold"/>
           </list>
           <parameter key="sorting_type" value="none"/>
           <parameter key="sorting_k" value="100"/>
           <parameter key="persistent" value="false"/>
         </operator>
         <operator activated="true" class="free_memory" compatibility="5.1.017" expanded="true" height="76" name="Free Memory" width="90" x="326" y="180"/>
         <connect from_port="model" to_op="Apply Final" to_port="model"/>
         <connect from_port="test set" to_op="Apply Final" to_port="unlabelled data"/>
         <connect from_op="Apply Final" from_port="labelled data" to_op="Performance Final" to_port="labelled data"/>
         <connect from_op="Performance Final" from_port="performance" to_op="Log Final" to_port="through 1"/>
         <connect from_op="Log Final" from_port="through 1" to_op="Free Memory" to_port="through 1"/>
         <connect from_op="Free Memory" from_port="through 1" to_port="averagable 1"/>
         <portSpacing port="source_model" spacing="0"/>
         <portSpacing port="source_test set" spacing="0"/>
         <portSpacing port="source_through 1" spacing="0"/>
         <portSpacing port="sink_averagable 1" spacing="0"/>
         <portSpacing port="sink_averagable 2" spacing="0"/>
       </process>
     </operator>
     <operator activated="true" class="log" compatibility="5.1.017" expanded="true" height="76" name="Log ACC" width="90" x="372" y="166">
       <parameter key="filename" value="C:\Users\Administrator\Desktop\svm_pca.res"/>
       <list key="log">
         <parameter key="acc" value="operator.Performance Final.value.performance"/>
       </list>
       <parameter key="sorting_type" value="none"/>
       <parameter key="sorting_k" value="100"/>
       <parameter key="persistent" value="false"/>
     </operator>
     <connect from_op="Retrieve" from_port="output" to_op="Replace Missing Values" to_port="example set input"/>
     <connect from_op="Replace Missing Values" from_port="example set output" to_op="Validation" to_port="training"/>
     <connect from_op="Validation" from_port="averagable 1" to_op="Log ACC" to_port="through 1"/>
     <connect from_op="Log ACC" from_port="through 1" to_port="result 1"/>
     <portSpacing port="source_input 1" spacing="0"/>
     <portSpacing port="sink_result 1" spacing="0"/>
     <portSpacing port="sink_result 2" spacing="0"/>
   </process>
 </operator>
</process>