Concatenation duplication

User: "asn4293"
New Altair Community Member
Updated by Jocelyn

I want to remove duplication of the data after using Concat operator 

Original Data 

Id name skills department language experience
1 John x Fin FR 3
2 Mickeal y HR ENG 5
3 Sean y HR ENG 1
1 John x Fin ENG 3


Should Look Like

Id name skills department language experience
1 John x Fin FR|ENG 3
2 Mickeal y HR ENG 5
3 Sean y HR ENG 1


 But After using Concat it is looking like 

Id name skills department language experience
1 John|John x|x Fin|Fin FR|ENG 3
2 Mickeal y HR ENG 5
3 Sean y HR ENG 1

 

<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="8.0.001" expanded="true" height="68" name="Retrieve" width="90" x="112" y="85">
<parameter key="repository_entry" value="//Local Repository/data/Employees"/>
</operator>
<operator activated="true" class="set_role" compatibility="8.0.001" expanded="true" height="82" name="Set Role" width="90" x="313" y="85">
<parameter key="attribute_name" value="Id"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="aggregate" compatibility="8.0.001" expanded="true" height="82" name="Aggregate" width="90" x="447" y="85">
<list key="aggregation_attributes">
<parameter key="department" value="concatenation"/>
<parameter key="language" value="concatenation"/>
<parameter key="name" value="concatenation"/>
<parameter key="skills" value="concatenation"/>
</list>
<parameter key="group_by_attributes" value="Id"/>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Aggregate" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>

This is file you can work on 
https://drive.google.com/open?id=1evx7JGzVmx0f6VO1fNMFG5tBGmB-xKVN

Find more posts tagged with

Sort by:
1 - 1 of 11
    User: "Edin_Klapic"
    New Altair Community Member
    Accepted Answer

    Hi @asn4293,

     

    For my tests, I created an Excel file only with the last table you posted which is the equivalent of your dataset but without the ID.

    Below you find the XML for a working process with your dataset.

     

    By using the Operator Work on Subset and only selecting the Mobility Region for that you can overcome the issue with the User Sys ID.

     

    Happy Mining,

    Edin

     

    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="8.1.003" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="read_excel" compatibility="8.1.000" expanded="true" height="68" name="Read Excel" width="90" x="45" y="34">
    <parameter key="excel_file" value="C:\Users\EdinKlapic\Desktop\MobilityConcat.xlsx"/>
    <parameter key="imported_cell_range" value="A1:B14065"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="User Sys ID.true.integer.id"/>
    <parameter key="1" value="Mobility Region.true.polynominal.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="loop_examples" compatibility="8.1.003" expanded="true" height="103" name="Loop Examples" width="90" x="246" y="34">
    <process expanded="true">
    <operator activated="false" breakpoints="after" class="remove_duplicates" compatibility="8.1.003" expanded="true" height="103" name="Remove Duplicates" width="90" x="581" y="595"/>
    <operator activated="false" class="numerical_to_polynominal" compatibility="8.1.003" expanded="true" height="82" name="Numerical to Polynominal" width="90" x="45" y="187">
    <parameter key="include_special_attributes" value="true"/>
    </operator>
    <operator activated="false" class="filter_example_range" compatibility="8.1.003" expanded="true" height="82" name="Filter Example Range" width="90" x="380" y="136">
    <parameter key="first_example" value="%{example}"/>
    <parameter key="last_example" value="%{example}"/>
    </operator>
    <operator activated="false" class="split" compatibility="8.1.003" expanded="true" height="82" name="Split" width="90" x="514" y="136">
    <parameter key="split_pattern" value="\|"/>
    </operator>
    <operator activated="false" class="de_pivot" compatibility="8.1.003" expanded="true" height="82" name="De-Pivot" width="90" x="648" y="136">
    <list key="attribute_name">
    <parameter key="Region" value=".*"/>
    </list>
    <parameter key="index_attribute" value="ID"/>
    <parameter key="create_nominal_index" value="true"/>
    </operator>
    <operator activated="false" class="remove_duplicates" compatibility="8.1.003" expanded="true" height="103" name="Remove Duplicates (2)" width="90" x="313" y="289">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Region"/>
    </operator>
    <operator activated="false" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="289">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Region"/>
    </operator>
    <operator activated="false" breakpoints="after" class="transpose" compatibility="8.1.003" expanded="true" height="82" name="Transpose" width="90" x="581" y="289"/>
    <operator activated="false" class="aggregate" compatibility="8.1.003" expanded="true" height="82" name="Aggregate" width="90" x="715" y="289">
    <list key="aggregation_attributes">
    <parameter key="att_1" value="concatenation"/>
    </list>
    <parameter key="only_distinct" value="true"/>
    </operator>
    <operator activated="true" class="filter_example_range" compatibility="8.1.003" expanded="true" height="82" name="Filter Example Range (3)" width="90" x="45" y="34">
    <parameter key="first_example" value="%{example}"/>
    <parameter key="last_example" value="%{example}"/>
    </operator>
    <operator activated="true" class="work_on_subset" compatibility="8.1.003" expanded="true" height="82" name="Work on Subset" width="90" x="179" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Mobility Region"/>
    <parameter key="deliver_inner_results" value="true"/>
    <process expanded="true">
    <operator activated="true" class="split" compatibility="8.1.003" expanded="true" height="82" name="Split (2)" width="90" x="45" y="34">
    <parameter key="split_pattern" value="\|"/>
    </operator>
    <operator activated="true" class="transpose" compatibility="8.1.003" expanded="true" height="82" name="Transpose (2)" width="90" x="179" y="34"/>
    <operator activated="true" class="remove_duplicates" compatibility="8.1.003" expanded="true" height="103" name="Remove Duplicates (3)" width="90" x="313" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="att_1"/>
    <parameter key="include_special_attributes" value="true"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes (2)" width="90" x="447" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="id"/>
    <parameter key="invert_selection" value="true"/>
    <parameter key="include_special_attributes" value="true"/>
    </operator>
    <operator activated="true" class="aggregate" compatibility="8.1.003" expanded="true" height="82" name="Aggregate (2)" width="90" x="581" y="34">
    <list key="aggregation_attributes">
    <parameter key="att_1" value="concatenation"/>
    </list>
    <parameter key="only_distinct" value="true"/>
    </operator>
    <operator activated="true" class="rename" compatibility="8.1.003" expanded="true" height="82" name="Rename" width="90" x="715" y="34">
    <parameter key="old_name" value="concat(att_1)"/>
    <parameter key="new_name" value="Mobility Region"/>
    <list key="rename_additional_attributes"/>
    </operator>
    <connect from_port="exampleSet" to_op="Split (2)" to_port="example set input"/>
    <connect from_op="Split (2)" from_port="example set output" to_op="Transpose (2)" to_port="example set input"/>
    <connect from_op="Transpose (2)" from_port="example set output" to_op="Remove Duplicates (3)" to_port="example set input"/>
    <connect from_op="Remove Duplicates (3)" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
    <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Aggregate (2)" to_port="example set input"/>
    <connect from_op="Aggregate (2)" from_port="example set output" to_op="Rename" to_port="example set input"/>
    <connect from_op="Rename" from_port="example set output" to_port="example set"/>
    <portSpacing port="source_exampleSet" spacing="0"/>
    <portSpacing port="sink_example set" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    </process>
    </operator>
    <connect from_port="example set" to_op="Filter Example Range (3)" to_port="example set input"/>
    <connect from_op="Filter Example Range" from_port="example set output" to_op="Split" to_port="example set input"/>
    <connect from_op="Split" from_port="example set output" to_op="De-Pivot" to_port="example set input"/>
    <connect from_op="De-Pivot" from_port="example set output" to_op="Remove Duplicates (2)" to_port="example set input"/>
    <connect from_op="Remove Duplicates (2)" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_op="Transpose" to_port="example set input"/>
    <connect from_op="Transpose" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
    <connect from_op="Filter Example Range (3)" from_port="example set output" to_op="Work on Subset" to_port="example set"/>
    <connect from_op="Work on Subset" from_port="example set" to_port="output 1"/>
    <portSpacing port="source_example set" spacing="0"/>
    <portSpacing port="sink_example set" spacing="0"/>
    <portSpacing port="sink_output 1" spacing="0"/>
    <portSpacing port="sink_output 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="append" compatibility="8.1.003" expanded="true" height="82" name="Append" width="90" x="447" y="34"/>
    <connect from_op="Read Excel" from_port="output" to_op="Loop Examples" to_port="example set"/>
    <connect from_op="Loop Examples" from_port="output 1" to_op="Append" to_port="example set 1"/>
    <connect from_op="Append" from_port="merged set" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>