transaction data, can not aggregate binominal values
Hello all,
I have a dataset that looks like:
User | Item
-------------
1 | Cheese
1 | Bread
2 | Milk
I'd like to mine the frequent item sets from this data. First thing I did was feed this to "Nominal to Binomial" which seems to work as expected, eg:
User | Cheese | Bread | Milk
--------------------------------------------------
1 | true | false | false
1 | false | true | false
2 | false | false | true
What I now need to do is aggregate by user ID to generate:
User | Cheese | Bread | Milk
--------------------------------------------------
1 | true | true | false
2 | false | false | true
I thought I could do this with the Aggregate operator, but that operator seems completely blind to the binomial columns; I can't find any way of selecting them.
What should I be doing here?
Thank you!
Answers
-
Hi. I would Pivot by User ID. You can choose which attributes to aggregate. Put the User ID in the "Group By" section.
Scott
0 -
Hi Caceter,
You can use the 0/1 to represent the false/true values and aggregate by user ID.
Here is the sample process. There are many ways to solve your problem. If you prefer 'Aggregation' here is some example
<?xml version="1.0" encoding="UTF-8"?><process version="7.3.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.3.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="7.3.000" expanded="true" height="82" name="Example 1" width="90" x="112" y="34">
<process expanded="true">
<operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="45" y="34">
<list key="attribute_values">
<parameter key="User" value="1"/>
<parameter key="Cheese" value="true"/>
<parameter key="Bread" value="false"/>
<parameter key="Milk" value="false"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="45" y="136">
<list key="attribute_values">
<parameter key="User" value="1"/>
<parameter key="Cheese" value="false"/>
<parameter key="Bread" value="true"/>
<parameter key="Milk" value="false"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification (3)" width="90" x="45" y="238">
<list key="attribute_values">
<parameter key="User" value="2"/>
<parameter key="Cheese" value="false"/>
<parameter key="Bread" value="false"/>
<parameter key="Milk" value="true"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" breakpoints="after" class="append" compatibility="7.3.000" expanded="true" height="124" name="Append" width="90" x="246" y="34"/>
<operator activated="true" class="set_role" compatibility="7.3.000" expanded="true" height="82" name="Set Role" width="90" x="380" y="34">
<parameter key="attribute_name" value="User"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="nominal_to_numerical" compatibility="7.3.000" expanded="true" height="103" name="Nominal to Numerical" width="90" x="514" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="Milk|Cheese|Bread"/>
<list key="comparison_groups"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.3.000" expanded="true" height="82" name="Select Attributes" width="90" x="648" y="34">
<parameter key="attribute_filter_type" value="regular_expression"/>
<parameter key="regular_expression" value=".* = true"/>
</operator>
<operator activated="true" class="rename_by_replacing" compatibility="7.3.000" expanded="true" height="82" name="Rename by Replacing" width="90" x="782" y="34">
<parameter key="replace_what" value="= true"/>
</operator>
<operator activated="true" class="aggregate" compatibility="7.3.000" expanded="true" height="82" name="Aggregate" width="90" x="916" y="34">
<list key="aggregation_attributes">
<parameter key="Bread " value="maximum"/>
<parameter key="Cheese " value="maximum"/>
<parameter key="Milk " value="maximum"/>
</list>
<parameter key="group_by_attributes" value="User"/>
<parameter key="ignore_missings" value="false"/>
</operator>
<operator activated="true" class="rename_by_replacing" compatibility="7.3.000" expanded="true" height="82" name="Rename by Replacing (2)" width="90" x="1050" y="34">
<parameter key="replace_what" value="maximum\(| \)"/>
</operator>
<operator activated="true" class="set_role" compatibility="7.3.000" expanded="true" height="82" name="Set Role (2)" width="90" x="1184" y="34">
<parameter key="attribute_name" value="User"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="numerical_to_binominal" compatibility="7.3.000" expanded="true" height="82" name="Example1" width="90" x="1318" y="34"/>
<connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
<connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
<connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
<connect from_op="Append" from_port="merged set" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Nominal to Numerical" to_port="example set input"/>
<connect from_op="Nominal to Numerical" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Rename by Replacing" to_port="example set input"/>
<connect from_op="Rename by Replacing" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Aggregate" from_port="example set output" to_op="Rename by Replacing (2)" to_port="example set input"/>
<connect from_op="Rename by Replacing (2)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Example1" to_port="example set input"/>
<connect from_op="Example1" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="subprocess" compatibility="7.3.000" expanded="true" height="82" name="Example 2" width="90" x="112" y="187">
<process expanded="true">
<operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification (4)" width="90" x="45" y="34">
<list key="attribute_values">
<parameter key="User" value="1"/>
<parameter key="Item" value=""Cheese""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification (5)" width="90" x="45" y="136">
<list key="attribute_values">
<parameter key="User" value="1"/>
<parameter key="Item" value=""Bread""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification (6)" width="90" x="45" y="238">
<list key="attribute_values">
<parameter key="User" value="2"/>
<parameter key="Item" value=""Milk""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" breakpoints="after" class="append" compatibility="7.3.000" expanded="true" height="124" name="Append (2)" width="90" x="179" y="34"/>
<operator activated="true" class="set_role" compatibility="7.3.000" expanded="true" height="82" name="Set Role (3)" width="90" x="313" y="34">
<parameter key="attribute_name" value="User"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="nominal_to_numerical" compatibility="7.3.000" expanded="true" height="103" name="Nominal to Numerical (2)" width="90" x="447" y="34">
<list key="comparison_groups"/>
</operator>
<operator activated="true" class="rename_by_replacing" compatibility="7.3.000" expanded="true" height="82" name="Rename by Replacing (3)" width="90" x="581" y="34">
<parameter key="replace_what" value="Item = "/>
</operator>
<operator activated="true" class="aggregate" compatibility="7.3.000" expanded="true" height="82" name="Aggregate (2)" width="90" x="715" y="34">
<list key="aggregation_attributes">
<parameter key="Cheese" value="maximum"/>
<parameter key="Bread" value="maximum"/>
<parameter key="Milk" value="maximum"/>
</list>
<parameter key="group_by_attributes" value="User"/>
<parameter key="ignore_missings" value="false"/>
</operator>
<operator activated="true" class="rename_by_replacing" compatibility="7.3.000" expanded="true" height="82" name="Rename by Replacing (4)" width="90" x="849" y="34">
<parameter key="replace_what" value="maximum\(|\)"/>
</operator>
<operator activated="true" class="set_role" compatibility="7.3.000" expanded="true" height="82" name="Set Role (4)" width="90" x="983" y="34">
<parameter key="attribute_name" value="User"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="numerical_to_binominal" compatibility="7.3.000" expanded="true" height="82" name="Example2" width="90" x="1117" y="34"/>
<connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append (2)" to_port="example set 1"/>
<connect from_op="Generate Data by User Specification (5)" from_port="output" to_op="Append (2)" to_port="example set 2"/>
<connect from_op="Generate Data by User Specification (6)" from_port="output" to_op="Append (2)" to_port="example set 3"/>
<connect from_op="Append (2)" from_port="merged set" to_op="Set Role (3)" to_port="example set input"/>
<connect from_op="Set Role (3)" from_port="example set output" to_op="Nominal to Numerical (2)" to_port="example set input"/>
<connect from_op="Nominal to Numerical (2)" from_port="example set output" to_op="Rename by Replacing (3)" to_port="example set input"/>
<connect from_op="Rename by Replacing (3)" from_port="example set output" to_op="Aggregate (2)" to_port="example set input"/>
<connect from_op="Aggregate (2)" from_port="example set output" to_op="Rename by Replacing (4)" to_port="example set input"/>
<connect from_op="Rename by Replacing (4)" from_port="example set output" to_op="Set Role (4)" to_port="example set input"/>
<connect from_op="Set Role (4)" from_port="example set output" to_op="Example2" to_port="example set input"/>
<connect from_op="Example2" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<connect from_op="Example 1" from_port="out 1" to_port="result 1"/>
<connect from_op="Example 2" from_port="out 1" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>HTH,
YY
1 -
Two years later and I have exactly the same problem as OP and yyhuang's answer solves it perfectly (I took inspiration from your example 1). Thank you both!
2