Hello, i was testing the results of pivot in Excel and Rapidminer and i found out that they are completely different, whith those of RM being wrong! Any ideas?
Here is the dataset i used in both cases:
Student |
Item |
Amount |
Id 1 |
Item 1 |
1 |
Id 2 |
Item 2 |
8 |
Id 3 |
Item 1 |
1 |
Id 3 |
Item 1 |
1 |
Id 2 |
Item 2 |
1 |
Id 2 |
Item 1 |
1 |
Id 3 |
Item 1 |
1 |
Id 2 |
Item 1 |
1 |
Id 2 |
Item 2 |
5 |
Id 2 |
Item 1 |
1 |
and here is the result from Pivot operation in Excel (the correct one)
Row Labels |
Item 1 |
Item 2 |
Id 1 |
1 |
|
Id 2 |
3 |
14 |
Id 3 |
3 |
|
and here is the result from the Pivot operator in RM. i selected the student attr as ID and Itema s Index and SUM as weight aggregation.
Row No |
Item1 |
Item 2 |
1 |
1.0 |
? |
2 |
1.0 |
5.0 |
3 |
1.0 |
? |
here is the process XML
<?xml version="1.0" encoding="UTF-8"?><process version="7.4.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.4.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="generate_transaction_data" compatibility="7.4.000" expanded="true" height="68" name="Generate Transaction Data" width="90" x="45" y="340">
<parameter key="number_transactions" value="10"/>
<parameter key="number_customers" value="3"/>
<parameter key="number_items" value="2"/>
<parameter key="number_clusters" value="2"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.4.000" expanded="true" height="103" name="Multiply" width="90" x="246" y="289"/>
<operator activated="true" class="pivot" compatibility="7.4.000" expanded="true" height="82" name="Pivot" width="90" x="246" y="85">
<parameter key="group_attribute" value="Id"/>
<parameter key="index_attribute" value="Item"/>
</operator>
<operator activated="true" class="rename_by_replacing" compatibility="7.4.000" expanded="true" height="82" name="Rename by Replacing" width="90" x="447" y="34">
<parameter key="replace_what" value="Amount_"/>
</operator>
<connect from_op="Generate Transaction Data" from_port="output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Pivot" to_port="example set input"/>
<connect from_op="Multiply" from_port="output 2" to_port="result 2"/>
<connect from_op="Pivot" from_port="example set output" to_op="Rename by Replacing" to_port="example set input"/>
<connect from_op="Rename by Replacing" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>