🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

"Learners not working inside missing values"

User: "simon"
New Altair Community Member
Updated by Jocelyn
Hi,
I have been trying to use the "impute missing values" operator with different learners, and I haven't been successful. In particular, tree type learners always give me the following error: Does not have sufficient capabilities to deal with example sets with only one label.
This doesn't make much sense to me, as I know you can learn one attribute only with decision trees. Moreover, I tried using Decision tree on its own with the same input data as it gets when used as inner operator of "impute missing values", and it works in that case!
Do you have any idea where that problem come from, or any suggestions as to which learners would be better to use with this operator? here are the processes I'm working on :
With impute missing values operator:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
  <context>
    <input>
      <location/>
    </input>
    <output>
      <location/>
      <location/>
    </output>
    <macros/>
  </context>
  <operator activated="true" class="process" expanded="true" name="Process">
    <parameter key="logverbosity" value="3"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="1"/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true" height="428" width="639">
      <operator activated="true" class="retrieve" expanded="true" height="60" name="Retrieve" width="90" x="45" y="75">
        <parameter key="repository_entry" value="//NewLocalRepository/Preprocessing_I"/>
      </operator>
      <operator activated="true" class="sample" expanded="true" height="76" name="Sample" width="90" x="179" y="75">
        <parameter key="sample" value="0"/>
        <parameter key="sample_size" value="5000"/>
        <parameter key="sample_ratio" value="0.1"/>
        <parameter key="sample_probability" value="0.1"/>
        <parameter key="use_local_random_seed" value="false"/>
        <parameter key="local_random_seed" value="1992"/>
      </operator>
      <operator activated="true" class="work_on_subset" expanded="true" height="76" name="Work on Subset" width="90" x="380" y="75">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value="RFA_2R|RFA_2F|RFA_2A|MSA"/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="numeric"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="11"/>
        <parameter key="block_type" value="0"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="8"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
        <parameter key="keep_subset_only" value="false"/>
        <parameter key="deliver_inner_results" value="false"/>
        <process expanded="true" height="428" width="639">
          <operator activated="true" class="impute_missing_values" expanded="true" height="60" name="Impute Missing Values" width="90" x="246" y="30">
            <parameter key="attribute_filter_type" value="value_type"/>
            <parameter key="attribute" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="11"/>
            <parameter key="block_type" value="0"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="8"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="iterate" value="true"/>
            <parameter key="learn_on_complete_cases" value="true"/>
            <parameter key="order" value="0"/>
            <parameter key="sort" value="0"/>
            <parameter key="use_local_random_seed" value="false"/>
            <parameter key="local_random_seed" value="1992"/>
            <process expanded="true">
              <operator activated="true" class="decision_tree" expanded="true" height="76" name="Decision Tree" width="90" x="274" y="30">
                <parameter key="criterion" value="gain_ratio"/>
                <parameter key="minimal_size_for_split" value="4"/>
                <parameter key="minimal_leaf_size" value="2"/>
                <parameter key="minimal_gain" value="0.1"/>
                <parameter key="maximal_depth" value="20"/>
                <parameter key="confidence" value="0.25"/>
                <parameter key="number_of_prepruning_alternatives" value="3"/>
                <parameter key="no_pre_pruning" value="false"/>
                <parameter key="no_pruning" value="false"/>
              </operator>
              <connect from_port="example set source" to_op="Decision Tree" to_port="training set"/>
              <connect from_op="Decision Tree" from_port="model" to_port="model sink"/>
              <portSpacing port="source_example set source" spacing="0"/>
              <portSpacing port="sink_model sink" spacing="0"/>
            </process>
          </operator>
          <connect from_port="exampleSet" to_op="Impute Missing Values" to_port="example set in"/>
          <connect from_op="Impute Missing Values" from_port="example set out" to_port="example set"/>
          <portSpacing port="source_exampleSet" spacing="0"/>
          <portSpacing port="sink_example set" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Retrieve" from_port="output" to_op="Sample" to_port="example set input"/>
      <connect from_op="Sample" from_port="example set output" to_op="Work on Subset" to_port="example set"/>
      <connect from_op="Work on Subset" from_port="example set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>
With simply Decision Tree operator:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
  <context>
    <input>
      <location/>
    </input>
    <output>
      <location/>
      <location/>
      <location/>
    </output>
    <macros/>
  </context>
  <operator activated="true" class="process" expanded="true" name="Process">
    <parameter key="logverbosity" value="3"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="1"/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true" height="428" width="639">
      <operator activated="true" class="retrieve" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
        <parameter key="repository_entry" value="//NewLocalRepository/Preprocessing_I"/>
      </operator>
      <operator activated="true" class="sample" expanded="true" height="76" name="Sample" width="90" x="45" y="165">
        <parameter key="sample" value="0"/>
        <parameter key="sample_size" value="5000"/>
        <parameter key="sample_ratio" value="0.1"/>
        <parameter key="sample_probability" value="0.1"/>
        <parameter key="use_local_random_seed" value="false"/>
        <parameter key="local_random_seed" value="1992"/>
      </operator>
      <operator activated="true" class="set_role" expanded="true" height="76" name="Set Role" width="90" x="179" y="165">
        <parameter key="name" value="MSA"/>
        <parameter key="target_role" value="label"/>
      </operator>
      <operator activated="true" class="select_attributes" expanded="true" height="76" name="Select Attributes" width="90" x="313" y="165">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value="RFA_2R|RFA_2F|RFA_2A|MSA"/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="nominal"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="11"/>
        <parameter key="block_type" value="0"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="8"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
      </operator>
      <operator activated="true" class="decision_tree" expanded="true" height="76" name="Decision Tree" width="90" x="447" y="75">
        <parameter key="criterion" value="gain_ratio"/>
        <parameter key="minimal_size_for_split" value="4"/>
        <parameter key="minimal_leaf_size" value="1"/>
        <parameter key="minimal_gain" value="0.1"/>
        <parameter key="maximal_depth" value="20"/>
        <parameter key="confidence" value="0.25"/>
        <parameter key="number_of_prepruning_alternatives" value="3"/>
        <parameter key="no_pre_pruning" value="false"/>
        <parameter key="no_pruning" value="false"/>
      </operator>
      <connect from_op="Retrieve" from_port="output" to_op="Sample" to_port="example set input"/>
      <connect from_op="Sample" from_port="example set output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Decision Tree" to_port="training set"/>
      <connect from_op="Decision Tree" from_port="model" to_port="result 1"/>
      <connect from_op="Decision Tree" from_port="exampleSet" to_port="result 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
    </process>
  </operator>
</process>
Thanks in advance

Simon

Find more posts tagged with