"Loop SubSet (Brute Force) for SVM models"

User: "ltor"
New Altair Community Member
Updated by Jocelyn
Hello,

First, I am a newbie in machine learning and RapidMiner is fantastic to quickly learn the methods without code learning !
I try scikit and R but I prefer Rapidminer to get more time for real work...
In my case, I got a good model (SVM, RBF) for a set of 26 samples (13 in each class, 2 classes)  with 15 features by choosing only two features...how I choose the two good ones ? by making a naive bayes classification and looking at their probabilty curves and keep only those with the most separated ones.
To be more academic, I wanted to try brute force and log some values of the svm models like precision, accuracy, recall, class precision but the only I can get is three values performance1, performance2 and variance from this workflow :

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.5.002">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <parameter key="parallelize_main_process" value="true"/>
    <process expanded="true">
      <operator activated="true" class="read_excel" compatibility="6.5.002" expanded="true" height="60" name="Read Excel" width="90" x="45" y="30">
        <parameter key="excel_file" value="/home/toto/Bureau/Modelisation/Travail/BETA-IL.xls"/>
        <parameter key="sheet_number" value="2"/>
        <parameter key="imported_cell_range" value="A1:IV27"/>
        <parameter key="encoding" value="SYSTEM"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <parameter key="date_format" value=""/>
        <parameter key="time_zone" value="SYSTEM"/>
        <parameter key="locale" value="French"/>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="ID.false.integer.attribute"/>
          <parameter key="1" value="NOM.false.polynominal.attribute"/>
          <parameter key="2" value="Malade.true.polynominal.attribute"/>
          <parameter key="3" value="Age.false.integer.attribute"/>
          <parameter key="4" value="LM-HDMB.false.polynominal.attribute"/>
          <parameter key="5" value="LM.true.polynominal.attribute"/>
          <parameter key="6" value="sexe.false.polynominal.attribute"/>
          <parameter key="7" value="Statut.false.polynominal.attribute"/>
          <parameter key="8" value="D-PHON-LH-22.true.real.attribute"/>
          <parameter key="9" value="D-PHON-RH-22.true.real.attribute"/>
          <parameter key="10" value="D-PHON-LH-44.true.real.attribute"/>
          <parameter key="11" value="D-PHON-RH-44.true.numeric.attribute"/>
          <parameter key="12" value="D-PHON-LH-45.true.real.attribute"/>
          <parameter key="13" value="D-PHON-RH-45.true.real.attribute"/>
          <parameter key="14" value="D-SEM-LH-21.true.real.attribute"/>
          <parameter key="15" value="D-SEM-RH-21.true.real.attribute"/>
          <parameter key="16" value="D-SEM-LH-47.true.real.attribute"/>
          <parameter key="17" value="D-SEM-RH-47.true.real.attribute"/>
          <parameter key="18" value="IL025-44.true.real.attribute"/>
          <parameter key="19" value="IL025-45.true.real.attribute"/>
          <parameter key="20" value="IL025-47.true.real.attribute"/>
          <parameter key="21" value="IL025-22.true.real.attribute"/>
          <parameter key="22" value="IL025-21.true.real.attribute"/>
        </list>
        <parameter key="read_not_matching_values_as_missings" value="true"/>
        <parameter key="datamanagement" value="double_array"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="6.5.002" expanded="true" height="76" name="Select Attributes" width="90" x="179" y="30">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value="Malade|IL025-47|IL025-45|IL025-44|IL025-22|IL025-21|D-SEM-RH-47|D-SEM-RH-21|D-SEM-LH-47|D-SEM-LH-21|D-PHON-RH-45|D-PHON-RH-44|D-PHON-RH-22|D-PHON-LH-45|D-PHON-LH-44|D-PHON-LH-22"/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="attribute_value"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="time"/>
        <parameter key="block_type" value="attribute_block"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_matrix_row_start"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="6.5.002" expanded="true" height="76" name="Set Role (3)" width="90" x="313" y="30">
        <parameter key="attribute_name" value="Malade"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="normalize" compatibility="6.5.002" expanded="true" height="94" name="Normalize" width="90" x="447" y="30">
        <parameter key="return_preprocessing_model" value="false"/>
        <parameter key="create_view" value="false"/>
        <parameter key="attribute_filter_type" value="all"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value="|IL025-45|IL025-44"/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="numeric"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="real"/>
        <parameter key="block_type" value="value_series"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_series_end"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
        <parameter key="method" value="range transformation"/>
        <parameter key="min" value="-1.0"/>
        <parameter key="max" value="1.0"/>
      </operator>
      <operator activated="true" class="loop_attribute_subsets" compatibility="6.5.002" expanded="true" height="60" name="Loop Subsets" width="90" x="581" y="30">
        <parameter key="use_exact_number" value="false"/>
        <parameter key="exact_number_of_attributes" value="-1"/>
        <parameter key="min_number_of_attributes" value="2"/>
        <parameter key="limit_max_number" value="false"/>
        <parameter key="max_number_of_attributes" value="-1"/>
        <parameter key="parallelize_subprocess" value="true"/>
        <process expanded="true">
          <operator activated="true" class="x_validation" compatibility="6.5.002" expanded="true" height="112" name="Validation (2)" width="90" x="313" y="30">
            <parameter key="create_complete_model" value="false"/>
            <parameter key="average_performances_only" value="true"/>
            <parameter key="leave_one_out" value="true"/>
            <parameter key="number_of_validations" value="3"/>
            <parameter key="sampling_type" value="stratified sampling"/>
            <parameter key="use_local_random_seed" value="false"/>
            <parameter key="local_random_seed" value="1992"/>
            <parameter key="parallelize_training" value="true"/>
            <parameter key="parallelize_testing" value="true"/>
            <process expanded="true">
              <operator activated="true" class="polynomial_by_binomial_classification" compatibility="6.5.002" expanded="true" height="76" name="Polynominal by Binominal Classification (2)" width="90" x="184" y="30">
                <parameter key="classification_strategies" value="1 against all"/>
                <parameter key="random_code_multiplicator" value="2.0"/>
                <parameter key="use_local_random_seed" value="false"/>
                <parameter key="local_random_seed" value="1992"/>
                <parameter key="parallelize_learning_process" value="true"/>
                <process expanded="true">
                  <operator activated="true" class="support_vector_machine_libsvm" compatibility="6.5.002" expanded="true" height="76" name="SVM" width="90" x="447" y="30">
                    <parameter key="svm_type" value="C-SVC"/>
                    <parameter key="kernel_type" value="rbf"/>
                    <parameter key="degree" value="3"/>
                    <parameter key="gamma" value="0.5"/>
                    <parameter key="coef0" value="0.0"/>
                    <parameter key="C" value="1000.0"/>
                    <parameter key="nu" value="0.5"/>
                    <parameter key="cache_size" value="80"/>
                    <parameter key="epsilon" value="0.001"/>
                    <parameter key="p" value="0.1"/>
                    <list key="class_weights"/>
                    <parameter key="shrinking" value="true"/>
                    <parameter key="calculate_confidences" value="false"/>
                    <parameter key="confidence_for_multiclass" value="true"/>
                  </operator>
                  <connect from_port="training set" to_op="SVM" to_port="training set"/>
                  <connect from_op="SVM" from_port="model" to_port="model"/>
                  <portSpacing port="source_training set" spacing="0"/>
                  <portSpacing port="sink_model" spacing="0"/>
                </process>
              </operator>
              <connect from_port="training" to_op="Polynominal by Binominal Classification (2)" to_port="training set"/>
              <connect from_op="Polynominal by Binominal Classification (2)" from_port="model" to_port="model"/>
              <portSpacing port="source_training" spacing="0"/>
              <portSpacing port="sink_model" spacing="0"/>
              <portSpacing port="sink_through 1" spacing="0"/>
            </process>
            <process expanded="true">
              <operator activated="true" class="apply_model" compatibility="6.5.002" expanded="true" height="76" name="Apply Model (2)" width="90" x="45" y="30">
                <list key="application_parameters"/>
                <parameter key="create_view" value="false"/>
              </operator>
              <operator activated="true" class="performance" compatibility="6.5.002" expanded="true" height="76" name="Performance (2)" width="90" x="246" y="30">
                <parameter key="use_example_weights" value="true"/>
              </operator>
              <connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
              <connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
              <connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
              <connect from_op="Performance (2)" from_port="performance" to_port="averagable 1"/>
              <portSpacing port="source_model" spacing="0"/>
              <portSpacing port="source_test set" spacing="0"/>
              <portSpacing port="source_through 1" spacing="0"/>
              <portSpacing port="sink_averagable 1" spacing="0"/>
              <portSpacing port="sink_averagable 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="log" compatibility="6.5.002" expanded="true" height="112" name="Log" width="90" x="715" y="30">
            <parameter key="filename" value="/home/toto/Bureau/Modelisation/Travail/subset.log"/>
            <list key="log">
              <parameter key="Iteration" value="operator.Loop Subsets.value.iteration"/>
              <parameter key="Attributes" value="operator.Loop Subsets.value.feature_names"/>
              <parameter key="Performance" value="operator.Performance (2).value.performance"/>
              <parameter key="Performance1" value="operator.Validation (2).value.performance1"/>
              <parameter key="Performance2" value="operator.Validation (2).value.performance2"/>
              <parameter key="Variance" value="operator.Validation (2).value.variance"/>
            </list>
            <parameter key="sorting_type" value="none"/>
            <parameter key="sorting_k" value="100"/>
            <parameter key="persistent" value="false"/>
          </operator>
          <connect from_port="example set" to_op="Validation (2)" to_port="training"/>
          <connect from_op="Validation (2)" from_port="model" to_op="Log" to_port="through 1"/>
          <connect from_op="Validation (2)" from_port="training" to_op="Log" to_port="through 3"/>
          <connect from_op="Validation (2)" from_port="averagable 1" to_op="Log" to_port="through 2"/>
          <portSpacing port="source_example set" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Read Excel" from_port="output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Set Role (3)" to_port="example set input"/>
      <connect from_op="Set Role (3)" from_port="example set output" to_op="Normalize" to_port="example set input"/>
      <connect from_op="Normalize" from_port="example set output" to_op="Loop Subsets" to_port="example set"/>
      <connect from_op="Loop Subsets" from_port="example set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>
Any idea, first to get what I wanted and then about what are performance1,performance2 and variance ont the output of  validation process?

Thanks in advance,

L.T

Find more posts tagged with