Loop Subsets of attributes and log result of each loop

JonesFox
JonesFox New Altair Community Member
edited November 2024 in Community Q&A
Hi,
I have a bunch of attributes / features in my dataset and want to find out which combination of features will train the best model.

Therefore I am using the LoopSubsets Operator. I specified it to "min number of attributes" = 2, so I will get at least 1 feature and the label in each loop. I use the branch operator to check, if the combination of features contains my label and then proceed in the "Then" Branch to train a model and log the performance using the "Log" operator. 

The results I get don't match my training results that I get when training the model separatly on a subset of the features, by factor 10 in Absolute Error. I have the assumption that  either the "Subset" Operator does something I don't expect or the log operator inside the loop doesn't work like it would outside of a loop.

How would you log the results (performance of the model or entire prediction example set) inside a loop?


This my log. I would expect absolute errors <0.1:


This is my process:

<?xml version="1.0" encoding="UTF-8"?><process version="9.10.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.10.001" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="9.10.001" expanded="true" height="68" name="Retrieve df_90_Test_PVC" width="90" x="45" y="340">
        <parameter key="repository_entry" value="df_90_Test_PVC"/>
      </operator>
      <operator activated="true" class="retrieve" compatibility="9.10.001" expanded="true" height="68" name="Retrieve df_90_Training_PVC" width="90" x="45" y="136">
        <parameter key="repository_entry" value="df_90_Training_PVC"/>
      </operator>
      <operator activated="true" class="normalize" compatibility="9.10.001" expanded="true" height="103" name="Normalize" width="90" x="246" y="136">
        <parameter key="return_preprocessing_model" value="false"/>
        <parameter key="create_view" value="false"/>
        <parameter key="attribute_filter_type" value="all"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value=""/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="numeric"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="real"/>
        <parameter key="block_type" value="value_series"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_series_end"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
        <parameter key="method" value="Z-transformation"/>
        <parameter key="min" value="0.0"/>
        <parameter key="max" value="1.0"/>
        <parameter key="allow_negative_values" value="false"/>
      </operator>
      <operator activated="true" class="apply_model" compatibility="9.10.001" expanded="true" height="82" name="Apply Model (5)" width="90" x="246" y="340">
        <list key="application_parameters"/>
        <parameter key="create_view" value="false"/>
      </operator>
      <operator activated="true" class="append" compatibility="9.10.001" expanded="true" height="103" name="Append" width="90" x="447" y="136">
        <parameter key="datamanagement" value="double_array"/>
        <parameter key="data_management" value="auto"/>
        <parameter key="merge_type" value="all"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="9.10.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="581" y="136">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value="att1"/>
        <parameter key="attributes" value="F_angl[N]|F_anw[N]|T_Heiz[°C]|weld_factor|WS_Int[µVs]"/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="attribute_value"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="time"/>
        <parameter key="block_type" value="attribute_block"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_matrix_row_start"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
      </operator>
      <operator activated="true" class="loop_attribute_subsets" compatibility="9.10.001" expanded="true" height="68" name="Loop Subsets" width="90" x="715" y="136">
        <parameter key="use_exact_number" value="false"/>
        <parameter key="exact_number_of_attributes" value="-1"/>
        <parameter key="min_number_of_attributes" value="2"/>
        <parameter key="limit_max_number" value="false"/>
        <parameter key="max_number_of_attributes" value="-1"/>
        <process expanded="true">
          <operator activated="true" class="multiply" compatibility="9.10.001" expanded="true" height="103" name="Multiply" width="90" x="45" y="34"/>
          <operator activated="true" class="branch" compatibility="9.10.001" expanded="true" height="103" name="Branch" width="90" x="313" y="34">
            <parameter key="condition_type" value="attribute_available"/>
            <parameter key="condition_value" value="weld_factor"/>
            <parameter key="expression" value=""/>
            <parameter key="io_object" value="ANOVAMatrix"/>
            <parameter key="return_inner_output" value="true"/>
            <process expanded="true">
              <operator activated="true" class="set_role" compatibility="9.10.001" expanded="true" height="82" name="Set Role" width="90" x="112" y="85">
                <parameter key="attribute_name" value="weld_factor"/>
                <parameter key="target_role" value="label"/>
                <list key="set_additional_roles"/>
              </operator>
              <operator activated="true" class="split_data" compatibility="9.10.001" expanded="true" height="103" name="Split Data" origin="GENERATED_TUTORIAL" width="90" x="246" y="85">
                <enumeration key="partitions">
                  <parameter key="ratio" value="0.8"/>
                  <parameter key="ratio" value="0.2"/>
                </enumeration>
                <parameter key="sampling_type" value="linear sampling"/>
                <parameter key="use_local_random_seed" value="false"/>
                <parameter key="local_random_seed" value="1992"/>
              </operator>
              <operator activated="true" class="linear_regression" compatibility="9.10.001" expanded="true" height="103" name="Linear Regression" width="90" x="447" y="85">
                <parameter key="feature_selection" value="M5 prime"/>
                <parameter key="alpha" value="0.05"/>
                <parameter key="max_iterations" value="10"/>
                <parameter key="forward_alpha" value="0.05"/>
                <parameter key="backward_alpha" value="0.05"/>
                <parameter key="eliminate_colinear_features" value="true"/>
                <parameter key="min_tolerance" value="0.05"/>
                <parameter key="use_bias" value="true"/>
                <parameter key="ridge" value="1.0E-8"/>
              </operator>
              <operator activated="true" class="apply_model" compatibility="9.10.001" expanded="true" height="82" name="Apply Model" origin="GENERATED_TUTORIAL" width="90" x="514" y="340">
                <list key="application_parameters"/>
                <parameter key="create_view" value="false"/>
              </operator>
              <operator activated="true" class="performance_regression" compatibility="9.10.001" expanded="true" height="82" name="Performance" width="90" x="648" y="340">
                <parameter key="main_criterion" value="root_mean_squared_error"/>
                <parameter key="root_mean_squared_error" value="true"/>
                <parameter key="absolute_error" value="true"/>
                <parameter key="relative_error" value="false"/>
                <parameter key="relative_error_lenient" value="false"/>
                <parameter key="relative_error_strict" value="false"/>
                <parameter key="normalized_absolute_error" value="false"/>
                <parameter key="root_relative_squared_error" value="false"/>
                <parameter key="squared_error" value="false"/>
                <parameter key="correlation" value="false"/>
                <parameter key="squared_correlation" value="false"/>
                <parameter key="prediction_average" value="false"/>
                <parameter key="spearman_rho" value="false"/>
                <parameter key="kendall_tau" value="false"/>
                <parameter key="skip_undefined_labels" value="true"/>
                <parameter key="use_example_weights" value="true"/>
              </operator>
              <operator activated="true" class="log" compatibility="9.10.001" expanded="true" height="82" name="Log" origin="GENERATED_TUTORIAL" width="90" x="648" y="187">
                <list key="log">
                  <parameter key="Features" value="operator.Loop Subsets.value.feature_names"/>
                  <parameter key="Testing MSE" value="operator.Performance.value.root_mean_squared_error"/>
                  <parameter key="Testing absolute Error" value="operator.Performance.value.absolute_error"/>
                </list>
                <parameter key="sorting_type" value="none"/>
                <parameter key="sorting_k" value="100"/>
                <parameter key="persistent" value="false"/>
              </operator>
              <connect from_port="input 1" to_op="Set Role" to_port="example set input"/>
              <connect from_op="Set Role" from_port="example set output" to_op="Split Data" to_port="example set"/>
              <connect from_op="Split Data" from_port="partition 1" to_op="Linear Regression" to_port="training set"/>
              <connect from_op="Split Data" from_port="partition 2" to_op="Apply Model" to_port="unlabelled data"/>
              <connect from_op="Linear Regression" from_port="model" to_op="Apply Model" to_port="model"/>
              <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
              <connect from_op="Performance" from_port="performance" to_op="Log" to_port="through 1"/>
              <connect from_op="Performance" from_port="example set" to_port="input 1"/>
              <portSpacing port="source_condition" spacing="0"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="source_input 2" spacing="0"/>
              <portSpacing port="sink_input 1" spacing="0"/>
              <portSpacing port="sink_input 2" spacing="0"/>
            </process>
            <process expanded="true">
              <connect from_port="condition" to_port="input 1"/>
              <portSpacing port="source_condition" spacing="0"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="source_input 2" spacing="0"/>
              <portSpacing port="sink_input 1" spacing="0"/>
              <portSpacing port="sink_input 2" spacing="0"/>
            </process>
          </operator>
          <connect from_port="example set" to_op="Multiply" to_port="input"/>
          <connect from_op="Multiply" from_port="output 1" to_op="Branch" to_port="condition"/>
          <connect from_op="Multiply" from_port="output 2" to_op="Branch" to_port="input 1"/>
          <portSpacing port="source_example set" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Retrieve df_90_Test_PVC" from_port="output" to_op="Apply Model (5)" to_port="unlabelled data"/>
      <connect from_op="Retrieve df_90_Training_PVC" from_port="output" to_op="Normalize" to_port="example set input"/>
      <connect from_op="Normalize" from_port="example set output" to_op="Append" to_port="example set 1"/>
      <connect from_op="Normalize" from_port="preprocessing model" to_op="Apply Model (5)" to_port="model"/>
      <connect from_op="Apply Model (5)" from_port="labelled data" to_op="Append" to_port="example set 2"/>
      <connect from_op="Append" from_port="merged set" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Loop Subsets" to_port="example set"/>
      <connect from_op="Loop Subsets" from_port="example set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

Welcome!

It looks like you're new here. Sign in or register to get started.

Best Answer

  • MartinLiebig
    MartinLiebig
    Altair Employee
    Answer ✓
    Hi,
    did you check the Automatic Feature Engineering operator? That should be very close of what you want to do.

    BR,
    Martin

Answers

  • MartinLiebig
    MartinLiebig
    Altair Employee
    Answer ✓
    Hi,
    did you check the Automatic Feature Engineering operator? That should be very close of what you want to do.

    BR,
    Martin
  • JonesFox
    JonesFox New Altair Community Member
    The Automatic Feature Engineering Operator seems good for my purpose, thanks! However, it's not available in the free version...
  • MartinLiebig
    MartinLiebig
    Altair Employee
    Did you check for our educational offers? Feels like you are eligible for it.

Welcome!

It looks like you're new here. Sign in or register to get started.

Welcome!

It looks like you're new here. Sign in or register to get started.