"[Solved] Set Macro dynamically based on Dataset"

aryan_hosseinza
aryan_hosseinza New Altair Community Member
edited November 2024 in Community Q&A
Hi ,

I am doing a down sampling by use of clustering , it's an imbalanced dataset which the number of example with 'f' label is about 6 times more than the number of examples with 't' label,

I want to set the K in clustering module equals to number of 't' examples in the dataset ,

How can I do that ?

Thanks

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
 <context>
   <input/>
   <output/>
   <macros/>
 </context>
 <operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
   <process expanded="true" height="539" width="2225">
     <operator activated="true" class="retrieve" compatibility="5.2.008" expanded="true" height="60" name="Retrieve" width="90" x="45" y="75">
       <parameter key="repository_entry" value="//NewLocalRepository/temp_5000sampled_MI4"/>
     </operator>
     <operator activated="true" class="sample_stratified" compatibility="5.2.008" expanded="true" height="76" name="Sample (Stratified)" width="90" x="112" y="210">
       <parameter key="sample" value="relative"/>
     </operator>
     <operator activated="true" class="nominal_to_numerical" compatibility="5.2.008" expanded="true" height="94" name="Nominal to Numerical" width="90" x="246" y="75">
       <parameter key="attribute_filter_type" value="single"/>
       <parameter key="attribute" value="sex"/>
       <list key="comparison_groups"/>
     </operator>
     <operator activated="true" class="normalize" compatibility="5.2.008" expanded="true" height="94" name="Normalize" width="90" x="380" y="75"/>
     <operator activated="false" class="select_attributes" compatibility="5.2.008" expanded="true" height="76" name="Select Attributes" width="90" x="179" y="345">
       <parameter key="attribute_filter_type" value="single"/>
       <parameter key="attribute" value="event"/>
       <parameter key="invert_selection" value="true"/>
     </operator>
     <operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="94" name="Multiply" width="90" x="581" y="75"/>
     <operator activated="true" class="filter_examples" compatibility="5.2.008" expanded="true" height="76" name="Filter Examples (2)" width="90" x="782" y="255">
       <parameter key="condition_class" value="attribute_value_filter"/>
       <parameter key="parameter_string" value="event=t"/>
     </operator>
     <operator activated="true" class="filter_examples" compatibility="5.2.008" expanded="true" height="76" name="Filter Examples" width="90" x="782" y="30">
       <parameter key="condition_class" value="attribute_value_filter"/>
       <parameter key="parameter_string" value="event=f"/>
     </operator>
     <operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="94" name="Multiply (2)" width="90" x="916" y="30"/>
     <operator activated="true" class="k_means" compatibility="5.2.008" expanded="true" height="76" name="Clustering" width="90" x="1117" y="30">
       <parameter key="k" value="4"/>
       <parameter key="max_runs" value="100"/>
       <parameter key="measure_types" value="MixedMeasures"/>
     </operator>
     <operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model" width="90" x="1117" y="165">
       <list key="application_parameters"/>
     </operator>
     <operator activated="true" class="remove_duplicates" compatibility="5.2.008" expanded="true" height="76" name="Remove Duplicates" width="90" x="1452" y="165">
       <parameter key="attribute_filter_type" value="single"/>
       <parameter key="attribute" value="cluster"/>
       <parameter key="include_special_attributes" value="true"/>
     </operator>
     <operator activated="true" class="union" compatibility="5.2.008" expanded="true" height="76" name="Union" width="90" x="1720" y="210"/>
     <operator activated="true" class="select_attributes" compatibility="5.2.008" expanded="true" height="76" name="Select Attributes (3)" width="90" x="1921" y="210">
       <parameter key="attribute_filter_type" value="single"/>
       <parameter key="attribute" value="cluster"/>
       <parameter key="invert_selection" value="true"/>
       <parameter key="include_special_attributes" value="true"/>
     </operator>
     <operator activated="true" class="shuffle" compatibility="5.2.008" expanded="true" height="76" name="Shuffle" width="90" x="2055" y="210"/>
     <connect from_op="Retrieve" from_port="output" to_op="Sample (Stratified)" to_port="example set input"/>
     <connect from_op="Sample (Stratified)" from_port="example set output" to_op="Nominal to Numerical" to_port="example set input"/>
     <connect from_op="Nominal to Numerical" from_port="example set output" to_op="Normalize" to_port="example set input"/>
     <connect from_op="Normalize" from_port="example set output" to_op="Multiply" to_port="input"/>
     <connect from_op="Multiply" from_port="output 1" to_op="Filter Examples" to_port="example set input"/>
     <connect from_op="Multiply" from_port="output 2" to_op="Filter Examples (2)" to_port="example set input"/>
     <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Union" to_port="example set 2"/>
     <connect from_op="Filter Examples" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
     <connect from_op="Multiply (2)" from_port="output 1" to_op="Clustering" to_port="example set"/>
     <connect from_op="Multiply (2)" from_port="output 2" to_op="Apply Model" to_port="unlabelled data"/>
     <connect from_op="Clustering" from_port="cluster model" to_op="Apply Model" to_port="model"/>
     <connect from_op="Apply Model" from_port="labelled data" to_op="Remove Duplicates" to_port="example set input"/>
     <connect from_op="Remove Duplicates" from_port="example set output" to_op="Union" to_port="example set 1"/>
     <connect from_op="Union" from_port="union" to_op="Select Attributes (3)" to_port="example set input"/>
     <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Shuffle" to_port="example set input"/>
     <connect from_op="Shuffle" from_port="example set output" to_port="result 1"/>
     <portSpacing port="source_input 1" spacing="0"/>
     <portSpacing port="sink_result 1" spacing="0"/>
     <portSpacing port="sink_result 2" spacing="0"/>
   </process>
 </operator>
</process>

Welcome!

It looks like you're new here. Sign in or register to get started.

Answers

  • Andrew2
    Andrew2 New Altair Community Member
    Hello,

    One approach is to set a macro equal to the number of rows where the label is 't'. This can be done using the "Extract Macro" operator.

    Then you need to use this macro as a parameter to the k-means operator.

    It's very important to make sure the extraction happens before the k-means otherwise you will get an error.

    I don't have your data so I can't test it but here's an example
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.2.008">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
        <process expanded="true" height="661" width="1030">
          <operator activated="true" class="retrieve" compatibility="5.2.008" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
            <parameter key="repository_entry" value="//NewLocalRepository/temp_5000sampled_MI4"/>
          </operator>
          <operator activated="true" class="sample_stratified" compatibility="5.2.008" expanded="true" height="76" name="Sample (Stratified)" width="90" x="45" y="120">
            <parameter key="sample" value="relative"/>
          </operator>
          <operator activated="true" class="nominal_to_numerical" compatibility="5.2.008" expanded="true" height="94" name="Nominal to Numerical" width="90" x="179" y="30">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="sex"/>
            <list key="comparison_groups"/>
          </operator>
          <operator activated="true" class="normalize" compatibility="5.2.008" expanded="true" height="94" name="Normalize" width="90" x="179" y="165"/>
          <operator activated="false" class="select_attributes" compatibility="5.2.008" expanded="true" height="76" name="Select Attributes" width="90" x="45" y="300">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="event"/>
            <parameter key="invert_selection" value="true"/>
          </operator>
          <operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="94" name="Multiply" width="90" x="179" y="300"/>
          <operator activated="true" class="filter_examples" compatibility="5.2.008" expanded="true" height="76" name="Filter Examples (2)" width="90" x="313" y="300">
            <parameter key="condition_class" value="attribute_value_filter"/>
            <parameter key="parameter_string" value="event=t"/>
          </operator>
          <operator activated="true" class="extract_macro" compatibility="5.2.008" expanded="true" height="60" name="Extract Macro" width="90" x="313" y="390">
            <parameter key="macro" value="k"/>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="5.2.008" expanded="true" height="76" name="Filter Examples" width="90" x="313" y="30">
            <parameter key="condition_class" value="attribute_value_filter"/>
            <parameter key="parameter_string" value="event=f"/>
          </operator>
          <operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="94" name="Multiply (2)" width="90" x="313" y="120"/>
          <operator activated="true" class="k_means" compatibility="5.2.008" expanded="true" height="76" name="Clustering" width="90" x="447" y="30">
            <parameter key="k" value="%{k}"/>
            <parameter key="max_runs" value="100"/>
            <parameter key="measure_types" value="MixedMeasures"/>
          </operator>
          <operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model" width="90" x="447" y="120">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="remove_duplicates" compatibility="5.2.008" expanded="true" height="76" name="Remove Duplicates" width="90" x="447" y="210">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="cluster"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="union" compatibility="5.2.008" expanded="true" height="76" name="Union" width="90" x="581" y="255"/>
          <operator activated="true" class="select_attributes" compatibility="5.2.008" expanded="true" height="76" name="Select Attributes (3)" width="90" x="581" y="345">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="cluster"/>
            <parameter key="invert_selection" value="true"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="shuffle" compatibility="5.2.008" expanded="true" height="76" name="Shuffle" width="90" x="581" y="435"/>
          <connect from_op="Retrieve" from_port="output" to_op="Sample (Stratified)" to_port="example set input"/>
          <connect from_op="Sample (Stratified)" from_port="example set output" to_op="Nominal to Numerical" to_port="example set input"/>
          <connect from_op="Nominal to Numerical" from_port="example set output" to_op="Normalize" to_port="example set input"/>
          <connect from_op="Normalize" from_port="example set output" to_op="Multiply" to_port="input"/>
          <connect from_op="Multiply" from_port="output 1" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Multiply" from_port="output 2" to_op="Filter Examples (2)" to_port="example set input"/>
          <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
          <connect from_op="Extract Macro" from_port="example set" to_op="Union" to_port="example set 2"/>
          <connect from_op="Filter Examples" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
          <connect from_op="Multiply (2)" from_port="output 1" to_op="Clustering" to_port="example set"/>
          <connect from_op="Multiply (2)" from_port="output 2" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Clustering" from_port="cluster model" to_op="Apply Model" to_port="model"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Remove Duplicates" to_port="example set input"/>
          <connect from_op="Remove Duplicates" from_port="example set output" to_op="Union" to_port="example set 1"/>
          <connect from_op="Union" from_port="union" to_op="Select Attributes (3)" to_port="example set input"/>
          <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Shuffle" to_port="example set input"/>
          <connect from_op="Shuffle" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    regards

    Andrew
  • aryan_hosseinza
    aryan_hosseinza New Altair Community Member
    It returns error : A value for the parameter 'k' must be specified , but I already set 'k' before reaching the clustering , Should we refer to macro with %{} ?


    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.2.008">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
        <process expanded="true" height="566" width="2225">
          <operator activated="true" class="retrieve" compatibility="5.2.008" expanded="true" height="60" name="Retrieve" width="90" x="45" y="75">
            <parameter key="repository_entry" value="//NewLocalRepository/temp_5000sampled_MI4"/>
          </operator>
          <operator activated="false" class="sample_stratified" compatibility="5.2.008" expanded="true" height="76" name="Sample (Stratified)" width="90" x="179" y="210">
            <parameter key="sample" value="relative"/>
          </operator>
          <operator activated="true" class="nominal_to_numerical" compatibility="5.2.008" expanded="true" height="94" name="Nominal to Numerical" width="90" x="179" y="75">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="sex"/>
            <list key="comparison_groups"/>
          </operator>
          <operator activated="true" class="normalize" compatibility="5.2.008" expanded="true" height="94" name="Normalize" width="90" x="313" y="75"/>
          <operator activated="false" class="select_attributes" compatibility="5.2.008" expanded="true" height="76" name="Select Attributes" width="90" x="179" y="345">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="event"/>
            <parameter key="invert_selection" value="true"/>
          </operator>
          <operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="94" name="Multiply (3)" width="90" x="447" y="75"/>
          <operator activated="true" class="filter_examples" compatibility="5.2.008" expanded="true" height="76" name="Filter Examples (3)" width="90" x="581" y="30">
            <parameter key="condition_class" value="attribute_value_filter"/>
            <parameter key="parameter_string" value="event=t"/>
          </operator>
          <operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="94" name="Multiply" width="90" x="715" y="165"/>
          <operator activated="true" class="filter_examples" compatibility="5.2.008" expanded="true" height="76" name="Filter Examples (4)" width="90" x="849" y="300">
            <parameter key="condition_class" value="attribute_value_filter"/>
            <parameter key="parameter_string" value="event=t"/>
          </operator>
          <operator activated="false" class="filter_examples" compatibility="5.2.008" expanded="true" height="76" name="Filter Examples (2)" width="90" x="648" y="480">
            <parameter key="condition_class" value="attribute_value_filter"/>
            <parameter key="parameter_string" value="event=t"/>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="5.2.008" expanded="true" height="76" name="Filter Examples" width="90" x="849" y="30">
            <parameter key="condition_class" value="attribute_value_filter"/>
            <parameter key="parameter_string" value="event=f"/>
          </operator>
          <operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="94" name="Multiply (2)" width="90" x="983" y="165"/>
          <operator activated="true" class="k_means" compatibility="5.2.008" expanded="true" height="76" name="Clustering" width="90" x="1117" y="30">
            <parameter key="max_runs" value="100"/>
            <parameter key="measure_types" value="MixedMeasures"/>
          </operator>
          <operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model" width="90" x="1251" y="165">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="remove_duplicates" compatibility="5.2.008" expanded="true" height="76" name="Remove Duplicates" width="90" x="1452" y="165">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="cluster"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="extract_macro" compatibility="5.2.008" expanded="true" height="60" name="Extract Macro" width="90" x="715" y="30">
            <parameter key="macro" value="k"/>
            <parameter key="attribute_name" value="event"/>
          </operator>
          <operator activated="true" class="union" compatibility="5.2.008" expanded="true" height="76" name="Union" width="90" x="1854" y="255"/>
          <operator activated="true" class="select_attributes" compatibility="5.2.008" expanded="true" height="76" name="Select Attributes (3)" width="90" x="1988" y="255">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="cluster"/>
            <parameter key="invert_selection" value="true"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="shuffle" compatibility="5.2.008" expanded="true" height="76" name="Shuffle" width="90" x="2122" y="255"/>
          <connect from_op="Retrieve" from_port="output" to_op="Nominal to Numerical" to_port="example set input"/>
          <connect from_op="Nominal to Numerical" from_port="example set output" to_op="Normalize" to_port="example set input"/>
          <connect from_op="Normalize" from_port="example set output" to_op="Multiply (3)" to_port="input"/>
          <connect from_op="Multiply (3)" from_port="output 1" to_op="Filter Examples (3)" to_port="example set input"/>
          <connect from_op="Multiply (3)" from_port="output 2" to_op="Multiply" to_port="input"/>
          <connect from_op="Filter Examples (3)" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
          <connect from_op="Multiply" from_port="output 1" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Multiply" from_port="output 2" to_op="Filter Examples (4)" to_port="example set input"/>
          <connect from_op="Filter Examples (4)" from_port="example set output" to_op="Union" to_port="example set 2"/>
          <connect from_op="Filter Examples" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
          <connect from_op="Multiply (2)" from_port="output 1" to_op="Clustering" to_port="example set"/>
          <connect from_op="Multiply (2)" from_port="output 2" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Clustering" from_port="cluster model" to_op="Apply Model" to_port="model"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Remove Duplicates" to_port="example set input"/>
          <connect from_op="Remove Duplicates" from_port="example set output" to_op="Union" to_port="example set 1"/>
          <connect from_op="Union" from_port="union" to_op="Select Attributes (3)" to_port="example set input"/>
          <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Shuffle" to_port="example set input"/>
          <connect from_op="Shuffle" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
  • Andrew2
    Andrew2 New Altair Community Member
    Hello

    The calculation of the macro is happening after it is being used. Change the ordering from the GUI using Process->Operator Execution Order->Order Execution.

    regards

    Andrew
  • aryan_hosseinza
    aryan_hosseinza New Altair Community Member
    Thanks for your help, it works.

Welcome!

It looks like you're new here. Sign in or register to get started.

Welcome!

It looks like you're new here. Sign in or register to get started.