Somehow pivot operator still confuses me
Edit:
Sorry for this long thread, but i was able to solve my problem in that way:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output>
<location>Model</location>
</output>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.0.0" expanded="true" name="Process">
<process expanded="true" height="430" width="418">
<operator activated="true" class="read_csv" compatibility="5.0.8" expanded="true" height="60" name="Read CSV" width="90" x="45" y="300">
<parameter key="file_name" value="D:\test.csv"/>
</operator>
<operator activated="true" class="aggregate" compatibility="5.0.8" expanded="true" height="76" name="Aggregate" width="90" x="45" y="75">
<list key="aggregation_attributes">
<parameter key="Amount" value="sum"/>
</list>
<parameter key="group_by_attributes" value="Customer|Item"/>
</operator>
<operator activated="true" class="pivot" compatibility="5.0.0" expanded="true" height="76" name="Pivot" width="90" x="246" y="75">
<parameter key="group_attribute" value="Customer"/>
<parameter key="index_attribute" value="Item"/>
<parameter key="consider_weights" value="false"/>
<parameter key="weight_aggregation" value="standard_deviation"/>
</operator>
<connect from_op="Read CSV" from_port="output" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Aggregate" from_port="example set output" to_op="Pivot" to_port="example set input"/>
<connect from_op="Pivot" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
thread can be closed.
Somehow that operator does not do what it should and it confuses me alot.
My mind tells me that it should work like that:
Customer | Item | Amount |
C1 | Item1 | 3 |
C2 | Item1 | 3 |
C1 | Item2 | 5 |
C1 | Item1 | 1 |
If I use pivot operator, with Customer as group and item as index it normaly should create something like that if i use the "sum" as weight aggregation:
Customer | Item1_Amount | Item2_Amount |
C1 | 4 | 5 |
C2 | 3 | ? |
The output for some reason is like:
Amount_Item1 Amount_Item2
1 5
3 ?
Thats in my CSV for the test:
Customer;Item;Amount
C1;Item1;3
C2;Item1;3
C1;Item2;5
C1;Item1;1
And thats the XML:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output>
<location>Model</location>
</output>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.0.0" expanded="true" name="Process">
<process expanded="true" height="594" width="835">
<operator activated="true" class="read_csv" compatibility="5.0.8" expanded="true" height="60" name="Read CSV" width="90" x="112" y="255">
<parameter key="file_name" value="D:\test.csv"/>
</operator>
<operator activated="true" class="set_role" compatibility="5.0.8" expanded="true" height="76" name="Set Role (2)" width="90" x="265" y="126">
<parameter key="name" value="Customer"/>
<parameter key="target_role" value="label"/>
</operator>
<operator activated="true" class="pivot" compatibility="5.0.0" expanded="true" height="76" name="Pivot" width="90" x="313" y="30">
<parameter key="group_attribute" value="Customer"/>
<parameter key="index_attribute" value="Item"/>
</operator>
<connect from_op="Read CSV" from_port="output" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Pivot" to_port="example set input"/>
<connect from_op="Pivot" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Tagged:
0