transaction data, can not aggregate binominal values

caceter
caceter New Altair Community Member
edited November 2024 in Community Q&A

Hello all,
I have a dataset that looks like:

User | Item
-------------
1 | Cheese
1 | Bread
2 | Milk

I'd like to mine the frequent item sets from this data. First thing I did was feed this to "Nominal to Binomial" which seems to work as expected, eg:

User | Cheese | Bread | Milk
------------------------------------------------------------
1 | true | false | false
1 | false | true | false
2 | false | false | true

What I now need to do is aggregate by user ID to generate:

User | Cheese | Bread | Milk
------------------------------------------------------------
1 | true | true | false
2 | false | false | true

I thought I could do this with the Aggregate operator, but that operator seems completely blind to the binomial columns; I can't find any way of selecting them.

What should I be doing here?

Thank you!

Welcome!

It looks like you're new here. Sign in or register to get started.

Answers

  • sgenzer
    sgenzer
    Altair Employee

    Hi.  I would Pivot by User ID.  You can choose which attributes to aggregate.  Put the User ID in the "Group By" section.

     

    Scott

     

  • YYH
    YYH
    Altair Employee

    Hi Caceter,

     

    You can use the 0/1 to represent the false/true values and aggregate by user ID.

     

    Here is the sample process. There are many ways to solve your problem. If you prefer 'Aggregation' here is some example

     

    <?xml version="1.0" encoding="UTF-8"?><process version="7.3.000">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="7.3.000" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="subprocess" compatibility="7.3.000" expanded="true" height="82" name="Example 1" width="90" x="112" y="34">
    <process expanded="true">
    <operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="45" y="34">
    <list key="attribute_values">
    <parameter key="User" value="1"/>
    <parameter key="Cheese" value="true"/>
    <parameter key="Bread" value="false"/>
    <parameter key="Milk" value="false"/>
    </list>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="45" y="136">
    <list key="attribute_values">
    <parameter key="User" value="1"/>
    <parameter key="Cheese" value="false"/>
    <parameter key="Bread" value="true"/>
    <parameter key="Milk" value="false"/>
    </list>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification (3)" width="90" x="45" y="238">
    <list key="attribute_values">
    <parameter key="User" value="2"/>
    <parameter key="Cheese" value="false"/>
    <parameter key="Bread" value="false"/>
    <parameter key="Milk" value="true"/>
    </list>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" breakpoints="after" class="append" compatibility="7.3.000" expanded="true" height="124" name="Append" width="90" x="246" y="34"/>
    <operator activated="true" class="set_role" compatibility="7.3.000" expanded="true" height="82" name="Set Role" width="90" x="380" y="34">
    <parameter key="attribute_name" value="User"/>
    <parameter key="target_role" value="id"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="nominal_to_numerical" compatibility="7.3.000" expanded="true" height="103" name="Nominal to Numerical" width="90" x="514" y="34">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="Milk|Cheese|Bread"/>
    <list key="comparison_groups"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="7.3.000" expanded="true" height="82" name="Select Attributes" width="90" x="648" y="34">
    <parameter key="attribute_filter_type" value="regular_expression"/>
    <parameter key="regular_expression" value=".* = true"/>
    </operator>
    <operator activated="true" class="rename_by_replacing" compatibility="7.3.000" expanded="true" height="82" name="Rename by Replacing" width="90" x="782" y="34">
    <parameter key="replace_what" value="= true"/>
    </operator>
    <operator activated="true" class="aggregate" compatibility="7.3.000" expanded="true" height="82" name="Aggregate" width="90" x="916" y="34">
    <list key="aggregation_attributes">
    <parameter key="Bread " value="maximum"/>
    <parameter key="Cheese " value="maximum"/>
    <parameter key="Milk " value="maximum"/>
    </list>
    <parameter key="group_by_attributes" value="User"/>
    <parameter key="ignore_missings" value="false"/>
    </operator>
    <operator activated="true" class="rename_by_replacing" compatibility="7.3.000" expanded="true" height="82" name="Rename by Replacing (2)" width="90" x="1050" y="34">
    <parameter key="replace_what" value="maximum\(| \)"/>
    </operator>
    <operator activated="true" class="set_role" compatibility="7.3.000" expanded="true" height="82" name="Set Role (2)" width="90" x="1184" y="34">
    <parameter key="attribute_name" value="User"/>
    <parameter key="target_role" value="id"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="numerical_to_binominal" compatibility="7.3.000" expanded="true" height="82" name="Example1" width="90" x="1318" y="34"/>
    <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
    <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
    <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
    <connect from_op="Append" from_port="merged set" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Nominal to Numerical" to_port="example set input"/>
    <connect from_op="Nominal to Numerical" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_op="Rename by Replacing" to_port="example set input"/>
    <connect from_op="Rename by Replacing" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
    <connect from_op="Aggregate" from_port="example set output" to_op="Rename by Replacing (2)" to_port="example set input"/>
    <connect from_op="Rename by Replacing (2)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
    <connect from_op="Set Role (2)" from_port="example set output" to_op="Example1" to_port="example set input"/>
    <connect from_op="Example1" from_port="example set output" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="subprocess" compatibility="7.3.000" expanded="true" height="82" name="Example 2" width="90" x="112" y="187">
    <process expanded="true">
    <operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification (4)" width="90" x="45" y="34">
    <list key="attribute_values">
    <parameter key="User" value="1"/>
    <parameter key="Item" value="&quot;Cheese&quot;"/>
    </list>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification (5)" width="90" x="45" y="136">
    <list key="attribute_values">
    <parameter key="User" value="1"/>
    <parameter key="Item" value="&quot;Bread&quot;"/>
    </list>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification (6)" width="90" x="45" y="238">
    <list key="attribute_values">
    <parameter key="User" value="2"/>
    <parameter key="Item" value="&quot;Milk&quot;"/>
    </list>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" breakpoints="after" class="append" compatibility="7.3.000" expanded="true" height="124" name="Append (2)" width="90" x="179" y="34"/>
    <operator activated="true" class="set_role" compatibility="7.3.000" expanded="true" height="82" name="Set Role (3)" width="90" x="313" y="34">
    <parameter key="attribute_name" value="User"/>
    <parameter key="target_role" value="id"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="nominal_to_numerical" compatibility="7.3.000" expanded="true" height="103" name="Nominal to Numerical (2)" width="90" x="447" y="34">
    <list key="comparison_groups"/>
    </operator>
    <operator activated="true" class="rename_by_replacing" compatibility="7.3.000" expanded="true" height="82" name="Rename by Replacing (3)" width="90" x="581" y="34">
    <parameter key="replace_what" value="Item = "/>
    </operator>
    <operator activated="true" class="aggregate" compatibility="7.3.000" expanded="true" height="82" name="Aggregate (2)" width="90" x="715" y="34">
    <list key="aggregation_attributes">
    <parameter key="Cheese" value="maximum"/>
    <parameter key="Bread" value="maximum"/>
    <parameter key="Milk" value="maximum"/>
    </list>
    <parameter key="group_by_attributes" value="User"/>
    <parameter key="ignore_missings" value="false"/>
    </operator>
    <operator activated="true" class="rename_by_replacing" compatibility="7.3.000" expanded="true" height="82" name="Rename by Replacing (4)" width="90" x="849" y="34">
    <parameter key="replace_what" value="maximum\(|\)"/>
    </operator>
    <operator activated="true" class="set_role" compatibility="7.3.000" expanded="true" height="82" name="Set Role (4)" width="90" x="983" y="34">
    <parameter key="attribute_name" value="User"/>
    <parameter key="target_role" value="id"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="numerical_to_binominal" compatibility="7.3.000" expanded="true" height="82" name="Example2" width="90" x="1117" y="34"/>
    <connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append (2)" to_port="example set 1"/>
    <connect from_op="Generate Data by User Specification (5)" from_port="output" to_op="Append (2)" to_port="example set 2"/>
    <connect from_op="Generate Data by User Specification (6)" from_port="output" to_op="Append (2)" to_port="example set 3"/>
    <connect from_op="Append (2)" from_port="merged set" to_op="Set Role (3)" to_port="example set input"/>
    <connect from_op="Set Role (3)" from_port="example set output" to_op="Nominal to Numerical (2)" to_port="example set input"/>
    <connect from_op="Nominal to Numerical (2)" from_port="example set output" to_op="Rename by Replacing (3)" to_port="example set input"/>
    <connect from_op="Rename by Replacing (3)" from_port="example set output" to_op="Aggregate (2)" to_port="example set input"/>
    <connect from_op="Aggregate (2)" from_port="example set output" to_op="Rename by Replacing (4)" to_port="example set input"/>
    <connect from_op="Rename by Replacing (4)" from_port="example set output" to_op="Set Role (4)" to_port="example set input"/>
    <connect from_op="Set Role (4)" from_port="example set output" to_op="Example2" to_port="example set input"/>
    <connect from_op="Example2" from_port="example set output" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <connect from_op="Example 1" from_port="out 1" to_port="result 1"/>
    <connect from_op="Example 2" from_port="out 1" to_port="result 2"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    </process>
    </operator>
    </process>

    HTH,

    YY

  • andkuo_7
    andkuo_7 New Altair Community Member

    Two years later and I have exactly the same problem as OP and yyhuang's answer solves it perfectly (I took inspiration from your example 1). Thank you both!

Welcome!

It looks like you're new here. Sign in or register to get started.

Welcome!

It looks like you're new here. Sign in or register to get started.