[SOLVED] Transactional2Basket Problem
npiani
New Altair Community Member
Hello there friends!
I'm learning about Market Basket Analysis, and I can across the Transactional2Basket Preprocessing sample process. Now, the first thing that I noticed when I ran it (with the default data set) was the following line:
WARNING: FPGrowth: Removed 1 non-binominal attribute, frequent item set mining is only supported for the positive values of binominal attributes.
I ignored it for the time being, since I assume the data is correct since it came with the software.
To make things clear, the default data (Market-Data) looks like this:
Row TID ITEM
1 1.0 1.0
2 1.0 2.0
3 1.0 3.0
4 2.0 1.0
5 3.0 4.0
6 3.0 5.0
7 3.0 6.0
Where TID is Transaction ID, and ITEM is the Item ID
Now my problem is the following:
According to the results, Items 2 and 3 have a support of 0.667, and Item 1 has only 0.333.
Now correct me if I am wrong, but looking at the data, it is clear that Item 1 has the higher support. Where Items 2 and 3 have a support of only 0.333.
Thank you for your time. Any input would be appreciated.
~Dr. Chen
I'm learning about Market Basket Analysis, and I can across the Transactional2Basket Preprocessing sample process. Now, the first thing that I noticed when I ran it (with the default data set) was the following line:
WARNING: FPGrowth: Removed 1 non-binominal attribute, frequent item set mining is only supported for the positive values of binominal attributes.
I ignored it for the time being, since I assume the data is correct since it came with the software.
To make things clear, the default data (Market-Data) looks like this:
Row TID ITEM
1 1.0 1.0
2 1.0 2.0
3 1.0 3.0
4 2.0 1.0
5 3.0 4.0
6 3.0 5.0
7 3.0 6.0
Where TID is Transaction ID, and ITEM is the Item ID
Now my problem is the following:
According to the results, Items 2 and 3 have a support of 0.667, and Item 1 has only 0.333.
Now correct me if I am wrong, but looking at the data, it is clear that Item 1 has the higher support. Where Items 2 and 3 have a support of only 0.333.
Thank you for your time. Any input would be appreciated.
~Dr. Chen
Tagged:
0
Answers
-
Hi,
thanks for the hint i've fixed it in the current SVN version. The example process isn't totally correct. The FPGrowth is missing a value for the 'positive value' parameter.
Here is the correct process:
Best,
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.003">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.0.000" expanded="true" name="Root">
<process expanded="true" height="558" width="570">
<operator activated="true" class="retrieve" compatibility="5.0.000" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
<parameter key="repository_entry" value="../../data/Market-Data"/>
</operator>
<operator activated="true" class="generate_id" compatibility="5.0.000" expanded="true" height="76" name="IdTagging" width="90" x="179" y="30"/>
<operator activated="true" class="set_role" compatibility="5.0.000" expanded="true" height="76" name="IdToRegular" width="90" x="315" y="30">
<parameter key="name" value="id"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" breakpoints="after" class="pivot" compatibility="5.0.000" expanded="true" height="76" name="Example2AttributePivoting" width="90" x="450" y="30">
<parameter key="group_attribute" value="TID"/>
<parameter key="index_attribute" value="ITEM"/>
</operator>
<operator activated="true" class="numerical_to_polynominal" compatibility="5.0.000" expanded="true" height="76" name="Numerical2Polynominal" width="90" x="179" y="210"/>
<operator activated="true" class="work_on_subset" compatibility="5.0.000" expanded="true" height="76" name="AttributeSubsetPreprocessing" width="90" x="313" y="210">
<parameter key="attribute_filter_type" value="regular_expression"/>
<parameter key="regular_expression" value="TID"/>
<parameter key="invert_selection" value="true"/>
<process expanded="true" height="558" width="165">
<operator activated="true" class="map" compatibility="5.0.000" expanded="true" height="76" name="Mapping" width="90" x="45" y="30">
<parameter key="attribute_filter_type" value="regular_expression"/>
<parameter key="regular_expression" value=".*"/>
<list key="value_mappings"/>
<parameter key="replace_what" value="?"/>
<parameter key="replace_by" value="false"/>
<parameter key="add_default_mapping" value="true"/>
<parameter key="default_value" value="true"/>
</operator>
<connect from_port="exampleSet" to_op="Mapping" to_port="example set input"/>
<connect from_op="Mapping" from_port="example set output" to_port="example set"/>
<portSpacing port="source_exampleSet" spacing="0"/>
<portSpacing port="sink_example set" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
</operator>
<operator activated="true" class="fp_growth" compatibility="5.0.000" expanded="true" height="76" name="FPGrowth" width="90" x="447" y="210">
<parameter key="positive_value" value="true"/>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="IdTagging" to_port="example set input"/>
<connect from_op="IdTagging" from_port="example set output" to_op="IdToRegular" to_port="example set input"/>
<connect from_op="IdToRegular" from_port="example set output" to_op="Example2AttributePivoting" to_port="example set input"/>
<connect from_op="Example2AttributePivoting" from_port="example set output" to_op="Numerical2Polynominal" to_port="example set input"/>
<connect from_op="Numerical2Polynominal" from_port="example set output" to_op="AttributeSubsetPreprocessing" to_port="example set"/>
<connect from_op="AttributeSubsetPreprocessing" from_port="example set" to_op="FPGrowth" to_port="example set"/>
<connect from_op="FPGrowth" from_port="example set" to_port="result 1"/>
<connect from_op="FPGrowth" from_port="frequent sets" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="180"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
Nils0 -
Thank you very much for your time.
Now the answer makes a lot more sense.
~Dr. Chen0