🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

How to aggregate attribute values by percentage

User: "MarceloC"
New Altair Community Member
Updated by Jocelyn
Hello there!  :)

I have a data set that I need to transform from nominal to binominal for posterior mining. However, In order to reduce the number of columns after the transformation, I want to get each attribute and check the total occurrence of each of it's values for all my examples (same thing that rapidminer does in the results -> statistics tab). So that I could get all the values below a certain threshold and aggregate them as a single value called "other_values".
I tried to figure this out using the loop attribute and loop value operators but I could not find a solution, anyone did something like this before or has any idea on how this can be acomplished?

Best regards  ;)

Marcelo.

Find more posts tagged with

Sort by:
1 - 1 of 11
    User: "MarceloC"
    New Altair Community Member
    OP
    Hey guys I believe I found a solution, but maybe there is an easier way. Hope this helps someone oneday.  :D
    <operator activated="true" class="subprocess" compatibility="6.5.000" expanded="true" height="76" name="Aggregate Attribute Values by Percentage" width="90" x="648" y="300">
            <process expanded="true">
              <operator activated="true" class="extract_macro" compatibility="6.5.000" expanded="true" height="60" name="Extract Macro (2)" width="90" x="45" y="30">
                <parameter key="macro" value="total_number_of_examples"/>
                <list key="additional_macros"/>
              </operator>
              <operator activated="true" class="loop_attributes" compatibility="6.5.000" expanded="true" height="76" name="Loop Attributes" width="90" x="246" y="30">
                <process expanded="true">
                  <operator activated="true" class="loop_values" compatibility="6.5.000" expanded="true" height="76" name="Loop Values" width="90" x="112" y="30">
                    <parameter key="attribute" value="%{loop_attribute}"/>
                    <process expanded="true">
                      <operator activated="true" class="set_macro" compatibility="6.5.000" expanded="true" height="76" name="Set Macro" width="90" x="45" y="120">
                        <parameter key="macro" value="threshold"/>
                        <parameter key="value" value="0.1"/>
                      </operator>
                      <operator activated="true" class="filter_examples" compatibility="6.5.000" expanded="true" height="94" name="Filter Examples" width="90" x="179" y="120">
                        <parameter key="parameter_string" value="%{loop_attribute} = %{loop_value}"/>
                        <parameter key="parameter_expression" value="%{loop_attribute} = %{loop_value}"/>
                        <parameter key="condition_class" value="attribute_value_filter"/>
                        <list key="filters_list">
                          <parameter key="filters_entry_key" value="%{loop_attribute}.equals.%{loop_value}"/>
                        </list>
                      </operator>
                      <operator activated="true" class="extract_macro" compatibility="6.5.000" expanded="true" height="60" name="Extract Macro" width="90" x="313" y="120">
                        <parameter key="macro" value="number_of_examples"/>
                        <list key="additional_macros"/>
                      </operator>
                      <operator activated="true" class="generate_macro" compatibility="6.5.000" expanded="true" height="76" name="Generate Macro" width="90" x="447" y="120">
                        <list key="function_descriptions">
                          <parameter key="rename_value" value="if((eval(%{number_of_examples}) / eval(%{total_number_of_examples})) &lt; eval(%{threshold}), str(1), str(2))"/>
                        </list>
                      </operator>
                      <operator activated="true" class="select_subprocess" compatibility="6.5.000" expanded="true" height="76" name="Select Subprocess" width="90" x="581" y="120">
                        <parameter key="select_which" value="%{rename_value}"/>
                        <process expanded="true">
                          <operator activated="true" class="replace" compatibility="6.5.000" expanded="true" height="76" name="Replace (3)" width="90" x="112" y="30">
                            <parameter key="attribute_filter_type" value="single"/>
                            <parameter key="attribute" value="%{loop_attribute}"/>
                            <parameter key="replace_what" value=".+"/>
                            <parameter key="replace_by" value="other_values"/>
                          </operator>
                          <connect from_port="input 1" to_op="Replace (3)" to_port="example set input"/>
                          <connect from_op="Replace (3)" from_port="example set output" to_port="output 1"/>
                          <portSpacing port="source_input 1" spacing="0"/>
                          <portSpacing port="source_input 2" spacing="0"/>
                          <portSpacing port="sink_output 1" spacing="0"/>
                          <portSpacing port="sink_output 2" spacing="0"/>
                        </process>
                        <process expanded="true">
                          <connect from_port="input 1" to_port="output 1"/>
                          <portSpacing port="source_input 1" spacing="0"/>
                          <portSpacing port="source_input 2" spacing="0"/>
                          <portSpacing port="sink_output 1" spacing="0"/>
                          <portSpacing port="sink_output 2" spacing="0"/>
                        </process>
                      </operator>
                      <connect from_port="example set" to_op="Set Macro" to_port="through 1"/>
                      <connect from_op="Set Macro" from_port="through 1" to_op="Filter Examples" to_port="example set input"/>
                      <connect from_op="Filter Examples" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
                      <connect from_op="Extract Macro" from_port="example set" to_op="Generate Macro" to_port="through 1"/>
                      <connect from_op="Generate Macro" from_port="through 1" to_op="Select Subprocess" to_port="input 1"/>
                      <connect from_op="Select Subprocess" from_port="output 1" to_port="out 1"/>
                      <portSpacing port="source_example set" spacing="0"/>
                      <portSpacing port="sink_out 1" spacing="0"/>
                      <portSpacing port="sink_out 2" spacing="0"/>
                    </process>
                  </operator>
                  <operator activated="true" breakpoints="after" class="append" compatibility="6.5.000" expanded="true" height="76" name="Append" width="90" x="313" y="30"/>
                  <connect from_port="example set" to_op="Loop Values" to_port="example set"/>
                  <connect from_op="Loop Values" from_port="out 1" to_op="Append" to_port="example set 1"/>
                  <connect from_op="Append" from_port="merged set" to_port="example set"/>
                  <portSpacing port="source_example set" spacing="0"/>
                  <portSpacing port="sink_example set" spacing="0"/>
                  <portSpacing port="sink_result 1" spacing="0"/>
                </process>
              </operator>
              <connect from_port="in 1" to_op="Extract Macro (2)" to_port="example set"/>
              <connect from_op="Extract Macro (2)" from_port="example set" to_op="Loop Attributes" to_port="example set"/>
              <connect from_op="Loop Attributes" from_port="example set" to_port="out 1"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="source_in 2" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
            </process>
          </operator>
    Best regards!

    Marcelo.