"How to create an association matrix instead of the rules?"
Hello altogether,
the example set which I have contains the transition of customers between different hotels for four years. I already did the basket analysis but am not satisfied with the result due to it's lack of visualization.
What I want to achieve is kind of an association matrix for example:
Product A was bought again 80 times.
Product B was bought again 100 times.
20 Customers who bought Product B also bought Product A.
The Matrix (in percentage) would look like this:
A B
A 1 0,2
B 0,25 1
So an unsymmetrical matrix is created, which then could be visualised by xy-scatter with different circle sizes.
The problem is I don't know how to get to this matrix. My starting point would be to pivot and aggregate the data so that I get to the matrix-format.
Thank you
Answers
-
Did you use the Association Rules to Exampleset operator?
I was just faced with creating a similar type of matrix this morning but I haven't solved it yet.
0 -
Thank you Thomas,
yes I already used it, but is the association rule really suitable for that? Isn't it just a thing of aggregating or counting?
Best
Philipp0 -
Off the top of my head this morning I don't know how the matrix would look for more products than your example.
For your example, there is an operator in the Statistics extension that does exactly this, so you can loop it to produce one for each product.
See below rather rushed example
<?xml version="1.0" encoding="UTF-8"?><process version="7.4.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.4.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.4.000" expanded="true" height="68" name="Load Transactions" width="90" x="112" y="187">
<parameter key="repository_entry" value="//Samples/Templates/Market Basket Analysis/Transactions"/>
</operator>
<operator activated="true" class="aggregate" compatibility="6.0.006" expanded="true" height="82" name="Aggregate" width="90" x="112" y="336">
<list key="aggregation_attributes">
<parameter key="Orders" value="sum"/>
</list>
<parameter key="group_by_attributes" value="Invoice|product 1"/>
</operator>
<operator activated="true" class="pivot" compatibility="7.4.000" expanded="true" height="82" name="Pivot" width="90" x="246" y="336">
<parameter key="group_attribute" value="Invoice"/>
<parameter key="index_attribute" value="product 1"/>
</operator>
<operator activated="true" class="rename_by_replacing" compatibility="7.4.000" expanded="true" height="82" name="Rename by Replacing" width="90" x="380" y="336">
<parameter key="attribute" value="Invoice"/>
<parameter key="replace_what" value="sum\(Orders\)_"/>
</operator>
<operator activated="true" class="replace_missing_values" compatibility="7.4.000" expanded="true" height="103" name="Replace Missing Values" width="90" x="112" y="442">
<parameter key="default" value="zero"/>
<list key="columns"/>
</operator>
<operator activated="true" class="numerical_to_binominal" compatibility="6.0.003" expanded="true" height="82" name="Numerical to Binominal" width="90" x="246" y="442"/>
<operator activated="true" class="set_role" compatibility="7.4.000" expanded="true" height="82" name="Set Role" width="90" x="380" y="442">
<parameter key="attribute_name" value="Invoice"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="rmx_stat:cross_table" compatibility="1.3.004" expanded="true" height="82" name="Extract Cross Table" width="90" x="514" y="340">
<parameter key="group_attribute_a" value="Product 10"/>
<parameter key="group_attribute_b" value="Product 11"/>
<parameter key="generate_totals" value="true"/>
<parameter key="show_relative_total_counts" value="true"/>
</operator>
<connect from_op="Load Transactions" from_port="output" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Aggregate" from_port="example set output" to_op="Pivot" to_port="example set input"/>
<connect from_op="Pivot" from_port="example set output" to_op="Rename by Replacing" to_port="example set input"/>
<connect from_op="Rename by Replacing" from_port="example set output" to_op="Replace Missing Values" to_port="example set input"/>
<connect from_op="Replace Missing Values" from_port="example set output" to_op="Numerical to Binominal" to_port="example set input"/>
<connect from_op="Numerical to Binominal" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Extract Cross Table" to_port="example set input"/>
<connect from_op="Extract Cross Table" from_port="cross table output" to_port="result 3"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="147"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="42"/>
<portSpacing port="sink_result 4" spacing="0"/>
<description align="left" color="yellow" colored="false" height="70" resized="false" width="850" x="20" y="25">MARKET BASKET ANALYSIS<br>Model associations between products by determining sets of items frequently purchased together and building association rules to derive recommendations.</description>
<description align="left" color="blue" colored="true" height="185" resized="true" width="550" x="20" y="105">Step 1:<br/>Load transaction data containing a transaction id, a product id and a quantifier. The data denotes how many times a certain product has been purchased as part of a transactions.</description>
<description align="left" color="purple" colored="true" height="341" resized="true" width="549" x="20" y="300"><br> <br> <br> <br> <br> <br> <br> <br> <br> <br> <br> <br> <br> Step 2:<br>Edit, transform &amp; load (ETL) - Aggregate transaction data to account for multiple occurrences of the same product in a transaction. Pivot the data so that each transaction is represented by a row. Transform purchase amounts to binary &quot;product purchased yes/no &quot; indicators.<br></description>
<description align="left" color="green" colored="true" height="310" resized="true" width="290" x="580" y="105">Step 3:<br/>Using FP-Growth, determine frequent item sets. A frequent item sets denotes that the items (products) in the set have been purchased together frequently, i.e. in a certain ratio of transactions. This ratio is given by the support of the item set.</description>
<description align="left" color="green" colored="true" height="215" resized="true" width="286" x="579" y="425"><br> <br> <br> <br> <br> <br> Step 4:<br/>Create association rules which can be used for product recommendations depending on the confidences of the rules.<br></description>
<description align="left" color="yellow" colored="false" height="35" resized="true" width="849" x="20" y="655">Outputs: association rules, frequent item set<br></description>
</process>
</operator>
</process>2 -
Thank you Edward,
but as you said, the operator just works for two attributes, thus two products.
I found a website, which explains exactly what I want to achieve, in Excel. Would this also be possible in RapidMiner somehow?The photo shows this "influence matrix".
Thank you0 -
Sure,
its just a pivot and a Replace Missing Values operator.
~Martin
1 -
Not as easy, though?
My example set contains 4 attributes (different years) and the the examples, which resemble the different product which was bought each year by each customer. What I have done now (since time doesn't play a role in the association matrix) is that I removed dublicates within each row (customer). So if a customer has bought product A, B, C, A within four years, it is now reduced to A, B, C since the only information which is important is, that these products were bought together.
Now something like counting every combination has to happen. But I'm stuck with this problem now. Because the normal association rule to example set doesn't allow me to convert it to a matrix.
Thank you0 -
Are you meaning something like this then?
<?xml version="1.0" encoding="UTF-8"?><process version="7.4.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.4.000" expanded="true" height="68" name="Iris" width="90" x="45" y="120">
<parameter key="repository_entry" value="//Samples/data/Iris"/>
</operator>
<operator activated="true" class="discretize_by_frequency" compatibility="7.1.001" expanded="true" height="103" name="Discretize by Frequency" width="90" x="179" y="120">
<parameter key="number_of_bins" value="5"/>
<parameter key="range_name_type" value="short"/>
</operator>
<operator activated="true" class="nominal_to_binominal" compatibility="7.1.001" expanded="true" height="103" name="Nominal to Binominal" width="90" x="313" y="120">
<parameter key="transform_binominal" value="true"/>
<parameter key="use_underscore_in_name" value="true"/>
</operator>
<operator activated="true" class="fp_growth" compatibility="7.4.000" expanded="true" height="82" name="FPGrowth" width="90" x="447" y="120">
<parameter key="find_min_number_of_itemsets" value="false"/>
<parameter key="min_number_of_itemsets" value="1"/>
<parameter key="min_support" value="0.1"/>
</operator>
<operator activated="true" class="create_association_rules" compatibility="7.4.000" expanded="true" height="82" name="Create Association Rules" width="90" x="581" y="136">
<parameter key="min_confidence" value="0.0"/>
</operator>
<operator activated="true" class="converters:rules_2_example_set" compatibility="0.2.000" expanded="true" height="82" name="Association Rules to ExampleSet" width="90" x="715" y="136"/>
<operator activated="true" class="select_attributes" compatibility="7.4.000" expanded="true" height="82" name="Select Attributes" width="90" x="849" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="Premises|Conclusion|Confidence"/>
</operator>
<operator activated="true" class="pivot" compatibility="7.4.000" expanded="true" height="82" name="Pivot" width="90" x="782" y="238">
<parameter key="group_attribute" value="Conclusion"/>
<parameter key="index_attribute" value="Premises"/>
<parameter key="consider_weights" value="false"/>
<parameter key="skip_constant_attributes" value="false"/>
</operator>
<operator activated="true" class="rename_by_replacing" compatibility="7.4.000" expanded="true" height="82" name="Rename by Replacing" width="90" x="916" y="391">
<parameter key="replace_what" value="Confidence_"/>
</operator>
<operator activated="true" class="replace_missing_values" compatibility="7.4.000" expanded="true" height="103" name="Replace Missing Values" width="90" x="950" y="289">
<parameter key="default" value="zero"/>
<list key="columns"/>
</operator>
<connect from_op="Iris" from_port="output" to_op="Discretize by Frequency" to_port="example set input"/>
<connect from_op="Discretize by Frequency" from_port="example set output" to_op="Nominal to Binominal" to_port="example set input"/>
<connect from_op="Nominal to Binominal" from_port="example set output" to_op="FPGrowth" to_port="example set"/>
<connect from_op="FPGrowth" from_port="frequent sets" to_op="Create Association Rules" to_port="item sets"/>
<connect from_op="Create Association Rules" from_port="rules" to_op="Association Rules to ExampleSet" to_port="rules input"/>
<connect from_op="Association Rules to ExampleSet" from_port="example set" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Pivot" to_port="example set input"/>
<connect from_op="Pivot" from_port="example set output" to_op="Rename by Replacing" to_port="example set input"/>
<connect from_op="Rename by Replacing" from_port="example set output" to_op="Replace Missing Values" to_port="example set input"/>
<connect from_op="Replace Missing Values" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="108"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>0 -
Thank you Edward that the kind of influence matrix I was searching for, but is it also possible that we have just single attributes in columnes and examples?
Best
Philipp1