A program to recognize and reward our most engaged community members
Hey onlin360,
can i assume that tv is always in the first attribute for all tv synomyms?
If yes, you can do loop values, transpose, cartesian product and filter out the duplicates - i think.
~Martin
Hi Martin!Well there is always a word in the first attribute, but there are about 950 rows.
And each of them is a different pair of synonyms, so not always "tv".
Regards,
Steven
Hi,
have a look at the attached process. That should work.
<?xml version="1.0" encoding="UTF-8"?><process version="7.3.000"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="7.3.000" expanded="true" name="Process"> <process expanded="true"> <operator activated="true" class="subprocess" compatibility="7.3.000" expanded="true" height="82" name="Subprocess" width="90" x="45" y="34"> <process expanded="true"> <operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="45" y="34"> <list key="attribute_values"> <parameter key="syn1" value=""tv""/> <parameter key="syn2" value=""television""/> <parameter key="syn3" value=""led-tv""/> <parameter key="syn4" value=""plasma-tv""/> </list> <list key="set_additional_roles"/> </operator> <operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="45" y="136"> <list key="attribute_values"> <parameter key="syn1" value=""cable""/> <parameter key="syn2" value=""cord""/> <parameter key="syn3" value=""hawser""/> <parameter key="syn4" value=""lead""/> </list> <list key="set_additional_roles"/> </operator> <operator activated="true" class="append" compatibility="7.3.000" expanded="true" height="103" name="Append" width="90" x="179" y="34"/> <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/> <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/> <connect from_op="Append" from_port="merged set" to_port="out 1"/> <portSpacing port="source_in 1" spacing="0"/> <portSpacing port="sink_out 1" spacing="0"/> <portSpacing port="sink_out 2" spacing="0"/> </process> <description align="center" color="transparent" colored="false" width="126">Get a data set</description> </operator> <operator activated="true" class="loop_values" compatibility="7.3.000" expanded="true" height="82" name="Loop Values" width="90" x="179" y="34"> <parameter key="attribute" value="syn1"/> <process expanded="true"> <operator activated="true" class="filter_examples" compatibility="7.3.000" expanded="true" height="103" name="Filter Examples" width="90" x="112" y="34"> <parameter key="parameter_expression" value="syn1 == %{loop_value}"/> <parameter key="condition_class" value="expression"/> <list key="filters_list"> <parameter key="filters_entry_key" value="syn1.equals.%{loop_attribute}"/> </list> </operator> <operator activated="true" class="transpose" compatibility="7.3.000" expanded="true" height="82" name="Transpose" width="90" x="246" y="34"/> <operator activated="true" class="select_attributes" compatibility="7.3.000" expanded="true" height="82" name="Select Attributes" width="90" x="380" y="34"> <parameter key="attribute_filter_type" value="single"/> <parameter key="attribute" value="id"/> <parameter key="invert_selection" value="true"/> <parameter key="include_special_attributes" value="true"/> </operator> <operator activated="true" class="multiply" compatibility="7.3.000" expanded="true" height="103" name="Multiply" width="90" x="514" y="34"/> <operator activated="true" class="cartesian_product" compatibility="7.3.000" expanded="true" height="82" name="Cartesian" width="90" x="648" y="34"> <parameter key="remove_double_attributes" value="false"/> </operator> <operator activated="true" class="rename" compatibility="7.3.000" expanded="true" height="82" name="Rename" width="90" x="782" y="34"> <parameter key="old_name" value="att_1"/> <parameter key="new_name" value="att1"/> <list key="rename_additional_attributes"> <parameter key="att_1_from_ES2" value="att2"/> </list> </operator> <operator activated="true" class="generate_attributes" compatibility="7.3.000" expanded="true" height="82" name="Generate Attributes" width="90" x="916" y="34"> <list key="function_descriptions"> <parameter key="dup1" value="if(compare(att1,att2),concat(att1,att2),concat(att2,att1))"/> </list> </operator> <operator activated="true" class="remove_duplicates" compatibility="7.3.000" expanded="true" height="82" name="Remove Duplicates" width="90" x="1050" y="34"> <parameter key="attribute_filter_type" value="single"/> <parameter key="attribute" value="dup1"/> </operator> <operator activated="false" class="filter_examples" compatibility="7.3.000" expanded="true" height="103" name="Filter Examples (2)" width="90" x="782" y="187"> <parameter key="parameter_expression" value="att_1==att_1_from_ES2"/> <parameter key="condition_class" value="expression"/> <parameter key="invert_filter" value="true"/> <list key="filters_list"/> <description align="center" color="transparent" colored="false" width="126">Remove duplicates</description> </operator> <operator activated="true" class="select_attributes" compatibility="7.3.000" expanded="true" height="82" name="Select Attributes (2)" width="90" x="1184" y="34"> <parameter key="attribute_filter_type" value="single"/> <parameter key="attribute" value="dup1"/> <parameter key="invert_selection" value="true"/> </operator> <connect from_port="example set" to_op="Filter Examples" to_port="example set input"/> <connect from_op="Filter Examples" from_port="example set output" to_op="Transpose" to_port="example set input"/> <connect from_op="Transpose" from_port="example set output" to_op="Select Attributes" to_port="example set input"/> <connect from_op="Select Attributes" from_port="example set output" to_op="Multiply" to_port="input"/> <connect from_op="Multiply" from_port="output 1" to_op="Cartesian" to_port="left"/> <connect from_op="Multiply" from_port="output 2" to_op="Cartesian" to_port="right"/> <connect from_op="Cartesian" from_port="join" to_op="Rename" to_port="example set input"/> <connect from_op="Rename" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/> <connect from_op="Generate Attributes" from_port="example set output" to_op="Remove Duplicates" to_port="example set input"/> <connect from_op="Remove Duplicates" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/> <connect from_op="Select Attributes (2)" from_port="example set output" to_port="out 1"/> <portSpacing port="source_example set" spacing="0"/> <portSpacing port="sink_out 1" spacing="0"/> <portSpacing port="sink_out 2" spacing="0"/> </process> </operator> <operator activated="true" class="append" compatibility="7.3.000" expanded="true" height="82" name="Append (2)" width="90" x="380" y="34"/> <connect from_op="Subprocess" from_port="out 1" to_op="Loop Values" to_port="example set"/> <connect from_op="Loop Values" from_port="out 1" to_op="Append (2)" to_port="example set 1"/> <connect from_op="Append (2)" from_port="merged set" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator></process>
@BalazsBarany educated me that there is a way easier way to this using De-Pivot. Attached is a process demonstrating it.
<?xml version="1.0" encoding="UTF-8"?><process version="7.3.000"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="7.3.000" expanded="true" name="Process"> <process expanded="true"> <operator activated="true" class="subprocess" compatibility="7.3.000" expanded="true" height="82" name="Subprocess" width="90" x="45" y="34"> <process expanded="true"> <operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="45" y="34"> <list key="attribute_values"> <parameter key="syn1" value=""tv""/> <parameter key="syn2" value=""television""/> <parameter key="syn3" value=""led-tv""/> <parameter key="syn4" value=""plasma-tv""/> </list> <list key="set_additional_roles"/> </operator> <operator activated="true" class="generate_data_user_specification" compatibility="7.3.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="45" y="136"> <list key="attribute_values"> <parameter key="syn1" value=""cable""/> <parameter key="syn2" value=""cord""/> <parameter key="syn3" value=""hawser""/> <parameter key="syn4" value=""lead""/> </list> <list key="set_additional_roles"/> </operator> <operator activated="true" class="append" compatibility="7.3.000" expanded="true" height="103" name="Append" width="90" x="179" y="34"/> <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/> <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/> <connect from_op="Append" from_port="merged set" to_port="out 1"/> <portSpacing port="source_in 1" spacing="0"/> <portSpacing port="sink_out 1" spacing="0"/> <portSpacing port="sink_out 2" spacing="0"/> </process> <description align="center" color="transparent" colored="false" width="126">Get a data set</description> </operator> <operator activated="true" class="rename" compatibility="7.3.000" expanded="true" height="82" name="Rename (2)" width="90" x="246" y="34"> <parameter key="old_name" value="syn1"/> <parameter key="new_name" value="id"/> <list key="rename_additional_attributes"/> </operator> <operator activated="true" class="de_pivot" compatibility="7.3.000" expanded="true" height="82" name="De-Pivot" width="90" x="380" y="34"> <list key="attribute_name"> <parameter key="attribute" value="syn.*"/> </list> <parameter key="index_attribute" value="xx"/> </operator> <connect from_op="Subprocess" from_port="out 1" to_op="Rename (2)" to_port="example set input"/> <connect from_op="Rename (2)" from_port="example set output" to_op="De-Pivot" to_port="example set input"/> <connect from_op="De-Pivot" from_port="example set output" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator></process>
If you have your data in the described format (value;value1;value2 etc.), the easiest way to get Martin's data structure is with the Split operator.