[SOLVED]How to transpose/de-pivot a Utility Matrix
Chiko
New Altair Community Member
How do I turn a matrix such as this one:
into a new transposed one that looks like this:
Thank you. Apologies if this has been asked before, but my search couldn't bring the results I was looking for.
Customer ID | Product1 | Product2 | Product3 | Product4 | |
1 | 1 | 0 | 3 | 0 | 1 |
2 | 0 | 1 | 2 | 0 | 1 |
3 | 1 | 0 | 0 | 0 | 4 |
into a new transposed one that looks like this:
CustomerID | ProductID | Qty |
1 | Product1 | 1 |
1 | Product2 | 0 |
1 | Product3 | 3 |
1 | Product4 | 0 |
1 | Product5 | 1 |
2 | Product1 | 0 |
2 | Product2 | 1 |
2 | Product3 | 2 |
2 | Product4 | 0 |
2 | Product5 | 1 |
3 | Product1 | 1 |
3 | Product2 | 0 |
3 | Product3 | 0 |
3 | Product4 | 0 |
3 | Product5 | 4 |
Tagged:
0
Answers
-
Have you had a look on De-Pivot?
~Martin0 -
Hi Martin,
Yes thanks I did look at De-Pivot and as far as I can see, it is failing to turn each column name/attribute name into a row instance. I have mucked around with transpose as well with no success. May I also add that in my real data set the product attributes range from Product1....Product1001.
Regards,
Chiko0 -
Hi,
Have a look at this process. looks like it does what you like to have.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="7.0.001" expanded="true" height="82" name="Subprocess" width="90" x="45" y="85">
<process expanded="true">
<operator activated="true" class="loop" compatibility="7.0.001" expanded="true" height="68" name="Loop" width="90" x="45" y="34">
<parameter key="set_iteration_macro" value="true"/>
<parameter key="iterations" value="5"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34">
<parameter key="repository_entry" value="//Samples/data/Iris"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.0.001" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="34">
<list key="function_descriptions">
<parameter key="Product%{iteration}" value="round(rand()*50)"/>
</list>
</operator>
<operator activated="true" class="generate_id" compatibility="7.0.001" expanded="true" height="82" name="Generate ID" width="90" x="447" y="34"/>
<operator activated="true" class="handle_exception" compatibility="7.0.001" expanded="true" height="82" name="Handle Exception" width="90" x="581" y="34">
<process expanded="true">
<operator activated="true" class="recall" compatibility="7.0.001" expanded="true" height="68" name="Recall" width="90" x="45" y="136">
<parameter key="name" value="data"/>
</operator>
<operator activated="true" class="join" compatibility="7.0.001" expanded="true" height="82" name="Join" width="90" x="246" y="34">
<list key="key_attributes"/>
</operator>
<operator activated="true" class="remember" compatibility="7.0.001" expanded="true" height="68" name="Remember" width="90" x="380" y="34">
<parameter key="name" value="data"/>
</operator>
<connect from_port="in 1" to_op="Join" to_port="left"/>
<connect from_op="Recall" from_port="result" to_op="Join" to_port="right"/>
<connect from_op="Join" from_port="join" to_op="Remember" to_port="store"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="remember" compatibility="7.0.001" expanded="true" height="68" name="Remember (2)" width="90" x="45" y="34">
<parameter key="name" value="data"/>
</operator>
<connect from_port="in 1" to_op="Remember (2)" to_port="store"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve Iris" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
<connect from_op="Generate ID" from_port="example set output" to_op="Handle Exception" to_port="in 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
</process>
</operator>
<operator activated="true" class="recall" compatibility="7.0.001" expanded="true" height="68" name="Recall (2)" width="90" x="179" y="34">
<parameter key="name" value="data"/>
</operator>
<operator activated="false" class="select_attributes" compatibility="7.0.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="380" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="id"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<connect from_op="Recall (2)" from_port="result" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="order_attributes" compatibility="7.0.001" expanded="true" height="82" name="Reorder Attributes" width="90" x="179" y="85">
<parameter key="sort_mode" value="alphabetically"/>
<description align="center" color="transparent" colored="false" width="126">Make sure that it's the same order. Might be not needed</description>
</operator>
<operator activated="true" class="de_pivot" compatibility="7.0.001" expanded="true" height="82" name="De-Pivot" width="90" x="313" y="85">
<list key="attribute_name">
<parameter key="Quantity" value="Product.*"/>
</list>
<parameter key="index_attribute" value="Product"/>
</operator>
<operator activated="true" class="numerical_to_polynominal" compatibility="7.0.001" expanded="true" height="82" name="Numerical to Polynominal" width="90" x="447" y="85"/>
<operator activated="true" class="replace" compatibility="7.0.001" expanded="true" height="82" name="Replace" width="90" x="581" y="85">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Product"/>
<parameter key="replace_what" value="(\d)"/>
<parameter key="replace_by" value="Product_$1"/>
</operator>
<connect from_op="Subprocess" from_port="out 1" to_op="Reorder Attributes" to_port="example set input"/>
<connect from_op="Reorder Attributes" from_port="example set output" to_op="De-Pivot" to_port="example set input"/>
<connect from_op="De-Pivot" from_port="example set output" to_op="Numerical to Polynominal" to_port="example set input"/>
<connect from_op="Numerical to Polynominal" from_port="example set output" to_op="Replace" to_port="example set input"/>
<connect from_op="Replace" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<description align="center" color="yellow" colored="false" height="202" resized="true" width="282" x="431" y="13">Make the id generated by depivot more readable</description>
</process>
</operator>
</process>0 -
Thanks Martin, having looked at your process, I have managed to sort of get insight into how to properly set the parameters for the de-pivot operator. However for some reason instead of getting my attribute names as instances, I am now getting a serial identifier that 1-n, where n is the number of attributes, which would work if I didn't care about the attribute names. However in the business domain each column/attribute is a Unique Product Code(UPC) or SKU(in the USA) and SKUs are of the form 1002635, 4567889, 1282739, etc. So although I had simplified my problem in Product1-Product5, in reality I have got SKUs instead of the string literals Product1-Product5. Thus with my new de-pivot I am getting the rows created ok, but instead of picking the SKUs as they are in the column header, serial numbers are being used. I hope I am making sense...
In short where you are prefixing the serial with Product_ in the above process is where I am getting stuck, I simply need to read the column header(SKU) as it is, rather than generate a serial and then use regular expressions. In my case, I must take the column names verbatim.
Regards,
Chiko0 -
so what you need is a table mapping the attribute names to it's positions and join it back on.
That should work:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="7.0.001" expanded="true" height="82" name="Subprocess" width="90" x="45" y="85">
<process expanded="true">
<operator activated="true" class="loop" compatibility="7.0.001" expanded="true" height="68" name="Loop" width="90" x="45" y="34">
<parameter key="set_iteration_macro" value="true"/>
<parameter key="iterations" value="5"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34">
<parameter key="repository_entry" value="//Samples/data/Iris"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.0.001" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="34">
<list key="function_descriptions">
<parameter key="Product%{iteration}" value="round(rand()*50)"/>
</list>
</operator>
<operator activated="true" class="generate_id" compatibility="7.0.001" expanded="true" height="82" name="Generate ID" width="90" x="447" y="34"/>
<operator activated="true" class="handle_exception" compatibility="7.0.001" expanded="true" height="82" name="Handle Exception" width="90" x="581" y="34">
<process expanded="true">
<operator activated="true" class="recall" compatibility="7.0.001" expanded="true" height="68" name="Recall" width="90" x="45" y="136">
<parameter key="name" value="data"/>
</operator>
<operator activated="true" class="join" compatibility="7.0.001" expanded="true" height="82" name="Join" width="90" x="246" y="34">
<list key="key_attributes"/>
</operator>
<operator activated="true" class="remember" compatibility="7.0.001" expanded="true" height="68" name="Remember" width="90" x="380" y="34">
<parameter key="name" value="data"/>
</operator>
<connect from_port="in 1" to_op="Join" to_port="left"/>
<connect from_op="Recall" from_port="result" to_op="Join" to_port="right"/>
<connect from_op="Join" from_port="join" to_op="Remember" to_port="store"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="remember" compatibility="7.0.001" expanded="true" height="68" name="Remember (2)" width="90" x="45" y="34">
<parameter key="name" value="data"/>
</operator>
<connect from_port="in 1" to_op="Remember (2)" to_port="store"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve Iris" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
<connect from_op="Generate ID" from_port="example set output" to_op="Handle Exception" to_port="in 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
</process>
</operator>
<operator activated="true" class="recall" compatibility="7.0.001" expanded="true" height="68" name="Recall (2)" width="90" x="179" y="34">
<parameter key="name" value="data"/>
</operator>
<operator activated="false" class="select_attributes" compatibility="7.0.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="380" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="id"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<connect from_op="Recall (2)" from_port="result" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="order_attributes" compatibility="7.0.001" expanded="true" height="82" name="Reorder Attributes" width="90" x="179" y="85">
<parameter key="sort_mode" value="alphabetically"/>
<description align="center" color="transparent" colored="false" width="126">Make sure that it's the same order. Might be not needed</description>
</operator>
<operator activated="true" class="multiply" compatibility="7.0.001" expanded="true" height="103" name="Multiply" width="90" x="313" y="85"/>
<operator activated="true" class="transpose" compatibility="7.0.001" expanded="true" height="82" name="Transpose" width="90" x="447" y="238"/>
<operator activated="true" class="select_attributes" compatibility="7.0.001" expanded="true" height="82" name="Select Attributes (3)" width="90" x="581" y="238">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="id"/>
</operator>
<operator activated="true" class="rename" compatibility="7.0.001" expanded="true" height="82" name="Rename" width="90" x="715" y="238">
<parameter key="old_name" value="id"/>
<parameter key="new_name" value="ProductName"/>
<list key="rename_additional_attributes"/>
</operator>
<operator activated="true" class="de_pivot" compatibility="7.0.001" expanded="true" height="82" name="De-Pivot" width="90" x="715" y="85">
<list key="attribute_name">
<parameter key="Quantity" value="Product.*"/>
</list>
<parameter key="index_attribute" value="ProductPosition"/>
</operator>
<operator activated="true" class="set_role" compatibility="7.0.001" expanded="true" height="82" name="Set Role" width="90" x="849" y="238">
<parameter key="attribute_name" value="ProductName"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_id" compatibility="7.0.001" expanded="true" height="82" name="Generate ID (2)" width="90" x="983" y="238"/>
<operator activated="true" class="join" compatibility="7.0.001" expanded="true" height="82" name="Join (2)" width="90" x="1117" y="85">
<parameter key="use_id_attribute_as_key" value="false"/>
<list key="key_attributes">
<parameter key="ProductPosition" value="id"/>
</list>
</operator>
<connect from_op="Subprocess" from_port="out 1" to_op="Reorder Attributes" to_port="example set input"/>
<connect from_op="Reorder Attributes" from_port="example set output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="De-Pivot" to_port="example set input"/>
<connect from_op="Multiply" from_port="output 2" to_op="Transpose" to_port="example set input"/>
<connect from_op="Transpose" from_port="example set output" to_op="Select Attributes (3)" to_port="example set input"/>
<connect from_op="Select Attributes (3)" from_port="example set output" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="De-Pivot" from_port="example set output" to_op="Join (2)" to_port="left"/>
<connect from_op="Set Role" from_port="example set output" to_op="Generate ID (2)" to_port="example set input"/>
<connect from_op="Generate ID (2)" from_port="example set output" to_op="Join (2)" to_port="right"/>
<connect from_op="Join (2)" from_port="join" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<description align="center" color="yellow" colored="false" height="237" resized="true" width="686" x="427" y="183">Generate a table with position and name of the header</description>
</process>
</operator>
</process>0 -
Hi Martin,
You are a genius! The mapping table worked a treat.
Regards,
Chiko0