Generate combination for all values in example

I'm currently trying to get a list of attributes to be two-columned.
Currently, the data looks like this
synonym1;synonym2;synonym3;synonymX
tv;television;led-tv;plasma-tv;
cable;cord;energy cable
So there isn't neccesarly the same number of attributes / synonyms in each example.
What I want is:
synonym1;synonym2
tv;television
tv;led-tv
tv;plasma-tv
television;led-tv
television;plasma-tv
...
cable;cord
cord;energy cable
...
I guess it is a combination of Loops and Aggregations.
Can anyone please help me?
Thank you very much in advance!
Best Answer
-
Hey onlin360,
can i assume that tv is always in the first attribute for all tv synomyms?
If yes, you can do loop values, transpose, cartesian product and filter out the duplicates - i think.
~Martin
0
Answers
-
Hey onlin360,
can i assume that tv is always in the first attribute for all tv synomyms?
If yes, you can do loop values, transpose, cartesian product and filter out the duplicates - i think.
~Martin
0 -
Hi Martin!
Well there is always a word in the first attribute, but there are about 950 rows.And each of them is a different pair of synonyms, so not always "tv".
Regards,
Steven
0 -
Hi,
have a look at the attached process. That should work.
~Martin
<?xml version="1.0" encoding="UTF-8"?><process version="7.3.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.3.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="7.3.000" expanded="true" height="82" name="Subprocess" width="90" x="45" y="34">
<process expanded="true">
<operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="45" y="34">
<list key="attribute_values">
<parameter key="syn1" value=""tv""/>
<parameter key="syn2" value=""television""/>
<parameter key="syn3" value=""led-tv""/>
<parameter key="syn4" value=""plasma-tv""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="45" y="136">
<list key="attribute_values">
<parameter key="syn1" value=""cable""/>
<parameter key="syn2" value=""cord""/>
<parameter key="syn3" value=""hawser""/>
<parameter key="syn4" value=""lead""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="append" compatibility="7.3.000" expanded="true" height="103" name="Append" width="90" x="179" y="34"/>
<connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
<connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
<connect from_op="Append" from_port="merged set" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Get a data set</description>
</operator>
<operator activated="true" class="loop_values" compatibility="7.3.000" expanded="true" height="82" name="Loop Values" width="90" x="179" y="34">
<parameter key="attribute" value="syn1"/>
<process expanded="true">
<operator activated="true" class="filter_examples" compatibility="7.3.000" expanded="true" height="103" name="Filter Examples" width="90" x="112" y="34">
<parameter key="parameter_expression" value="syn1 == %{loop_value}"/>
<parameter key="condition_class" value="expression"/>
<list key="filters_list">
<parameter key="filters_entry_key" value="syn1.equals.%{loop_attribute}"/>
</list>
</operator>
<operator activated="true" class="transpose" compatibility="7.3.000" expanded="true" height="82" name="Transpose" width="90" x="246" y="34"/>
<operator activated="true" class="select_attributes" compatibility="7.3.000" expanded="true" height="82" name="Select Attributes" width="90" x="380" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="id"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.3.000" expanded="true" height="103" name="Multiply" width="90" x="514" y="34"/>
<operator activated="true" class="cartesian_product" compatibility="7.3.000" expanded="true" height="82" name="Cartesian" width="90" x="648" y="34">
<parameter key="remove_double_attributes" value="false"/>
</operator>
<operator activated="true" class="rename" compatibility="7.3.000" expanded="true" height="82" name="Rename" width="90" x="782" y="34">
<parameter key="old_name" value="att_1"/>
<parameter key="new_name" value="att1"/>
<list key="rename_additional_attributes">
<parameter key="att_1_from_ES2" value="att2"/>
</list>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.3.000" expanded="true" height="82" name="Generate Attributes" width="90" x="916" y="34">
<list key="function_descriptions">
<parameter key="dup1" value="if(compare(att1,att2),concat(att1,att2),concat(att2,att1))"/>
</list>
</operator>
<operator activated="true" class="remove_duplicates" compatibility="7.3.000" expanded="true" height="82" name="Remove Duplicates" width="90" x="1050" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="dup1"/>
</operator>
<operator activated="false" class="filter_examples" compatibility="7.3.000" expanded="true" height="103" name="Filter Examples (2)" width="90" x="782" y="187">
<parameter key="parameter_expression" value="att_1==att_1_from_ES2"/>
<parameter key="condition_class" value="expression"/>
<parameter key="invert_filter" value="true"/>
<list key="filters_list"/>
<description align="center" color="transparent" colored="false" width="126">Remove duplicates</description>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.3.000" expanded="true" height="82" name="Select Attributes (2)" width="90" x="1184" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="dup1"/>
<parameter key="invert_selection" value="true"/>
</operator>
<connect from_port="example set" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Transpose" to_port="example set input"/>
<connect from_op="Transpose" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Cartesian" to_port="left"/>
<connect from_op="Multiply" from_port="output 2" to_op="Cartesian" to_port="right"/>
<connect from_op="Cartesian" from_port="join" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Remove Duplicates" to_port="example set input"/>
<connect from_op="Remove Duplicates" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="append" compatibility="7.3.000" expanded="true" height="82" name="Append (2)" width="90" x="380" y="34"/>
<connect from_op="Subprocess" from_port="out 1" to_op="Loop Values" to_port="example set"/>
<connect from_op="Loop Values" from_port="out 1" to_op="Append (2)" to_port="example set 1"/>
<connect from_op="Append (2)" from_port="merged set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>0 -
@BalazsBarany educated me that there is a way easier way to this using De-Pivot. Attached is a process demonstrating it.
~Martin
<?xml version="1.0" encoding="UTF-8"?><process version="7.3.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.3.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="7.3.000" expanded="true" height="82" name="Subprocess" width="90" x="45" y="34">
<process expanded="true">
<operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="45" y="34">
<list key="attribute_values">
<parameter key="syn1" value=""tv""/>
<parameter key="syn2" value=""television""/>
<parameter key="syn3" value=""led-tv""/>
<parameter key="syn4" value=""plasma-tv""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="45" y="136">
<list key="attribute_values">
<parameter key="syn1" value=""cable""/>
<parameter key="syn2" value=""cord""/>
<parameter key="syn3" value=""hawser""/>
<parameter key="syn4" value=""lead""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="append" compatibility="7.3.000" expanded="true" height="103" name="Append" width="90" x="179" y="34"/>
<connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
<connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
<connect from_op="Append" from_port="merged set" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Get a data set</description>
</operator>
<operator activated="true" class="rename" compatibility="7.3.000" expanded="true" height="82" name="Rename (2)" width="90" x="246" y="34">
<parameter key="old_name" value="syn1"/>
<parameter key="new_name" value="id"/>
<list key="rename_additional_attributes"/>
</operator>
<operator activated="true" class="de_pivot" compatibility="7.3.000" expanded="true" height="82" name="De-Pivot" width="90" x="380" y="34">
<list key="attribute_name">
<parameter key="attribute" value="syn.*"/>
</list>
<parameter key="index_attribute" value="xx"/>
</operator>
<connect from_op="Subprocess" from_port="out 1" to_op="Rename (2)" to_port="example set input"/>
<connect from_op="Rename (2)" from_port="example set output" to_op="De-Pivot" to_port="example set input"/>
<connect from_op="De-Pivot" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>1 -
If you have your data in the described format (value;value1;value2 etc.), the easiest way to get Martin's data structure is with the Split operator.
0