🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

How to remove duplicate from rows

User: "sgnarkhede2016"
New Altair Community Member
Updated by Jocelyn
Hello,
Data Like this
RISK HIGH|HIGH|HIGH|HIGH|HIGH|HIGH|HIGH|HIGH|HIGH|HIGH|HIGH|HIGH|HIGH|HIGH

D_CHANNEL  DIGITAL|DIGITAL
DIR OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|INWARD|INWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|INWARD|INWARD|INWARD|INWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|INWARD|INWARD|INWARD|INWARD|OUTWARD|OUTWARD|INWARD|INWARD|OUTWARD|OUTWARD|OUTWARD|SALE|PURCHASE|OUTWARD|OUTBOUND|OUTBOUND|PURCHASE|PURCHASE|PURCHASE|PURCHASE|SALE|SALE|SALE|SALE|PURCHASE|PURCHASE|SALE|SALE|INWARD|PURCHASE;SALE|PURCHASE;SALE|INWARD;OUTWARD|OUTWARD|INWARD|SALE|PURCHASE|PURCHASE;SALE|PURCHASE;SALE|PURCHASE;SALE;INWARD;OUTWARD|OUTWARD|OUTWARD|OUTWARD|INWARD|INWARD|SALE|SALE|SALE|OUTWARD|OUTWARD|OUTWARD
I want
RISK  HIGH
D_CHANNEL  DIGITAL
DIR    INWARD|OUTWARD|SALE|PURCHASE

Find more posts tagged with

Sort by:
1 - 1 of 11
    User: "BalazsBaranyRM"
    New Altair Community Member
    Accepted Answer
    Hi,

    you can do it with a few operators.

    1. Split the data attribute on \| (need to quote the | with the backslash, as Split works with regular expressions)
    2. De-pivot the resulting data_.+ attributes, resulting in a narrow table with the values from the "pivoted" split parts
    3. Aggregate using category, data to remove duplicates
    4. Aggregate using category, concatenate the data

    Example process:
    <?xml version="1.0" encoding="UTF-8"?><process version="9.8.001">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.8.001" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="-1"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="utility:create_exampleset" compatibility="9.8.001" expanded="true" height="68" name="Create ExampleSet" width="90" x="112" y="34">
            <parameter key="generator_type" value="comma separated text"/>
            <parameter key="number_of_examples" value="100"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions"/>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration"/>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="input_csv_text" value="category,data&#10;dir,OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|INWARD|INWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|INWARD|INWARD|INWARD|INWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|OUTWARD|INWARD|INWARD|INWARD|INWARD|OUTWARD|OUTWARD|INWARD|INWARD|OUTWARD|OUTWARD|OUTWARD|SALE|PURCHASE|OUTWARD|OUTBOUND|OUTBOUND|PURCHASE|PURCHASE|PURCHASE|PURCHASE|SALE|SALE|SALE|SALE|PURCHASE|PURCHASE|SALE|SALE|INWARD|PURCHASE;SALE|PURCHASE;SALE|INWARD;OUTWARD|OUTWARD|INWARD|SALE|PURCHASE|PURCHASE;SALE|PURCHASE;SALE|PURCHASE;SALE;INWARD;OUTWARD|OUTWARD|OUTWARD|OUTWARD|INWARD|INWARD|SALE|SALE|SALE|OUTWARD|OUTWARD|OUTWARD&#10;risk,HIGH|HIGH|HIGH|HIGH|HIGH|HIGH|HIGH|HIGH|HIGH|HIGH|HIGH|HIGH|HIGH|HIGH&#10;d_channel,DIGITAL|DIGITAL"/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="split" compatibility="9.8.001" expanded="true" height="82" name="Split" width="90" x="246" y="34">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="data"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="file_path"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="single_value"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="split_pattern" value="\|"/>
            <parameter key="split_mode" value="ordered_split"/>
          </operator>
          <operator activated="true" class="de_pivot" compatibility="9.8.001" expanded="true" height="82" name="De-Pivot" width="90" x="380" y="34">
            <list key="attribute_name">
              <parameter key="data" value="data_.+"/>
            </list>
            <parameter key="index_attribute" value="category_nr"/>
            <parameter key="create_nominal_index" value="false"/>
            <parameter key="keep_missings" value="false"/>
          </operator>
          <operator activated="true" class="aggregate" compatibility="9.8.001" expanded="true" height="82" name="Aggregate" width="90" x="514" y="34">
            <parameter key="use_default_aggregation" value="false"/>
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="default_aggregation_function" value="average"/>
            <list key="aggregation_attributes"/>
            <parameter key="group_by_attributes" value="data|category"/>
            <parameter key="count_all_combinations" value="false"/>
            <parameter key="only_distinct" value="false"/>
            <parameter key="ignore_missings" value="true"/>
          </operator>
          <operator activated="true" class="aggregate" compatibility="9.8.001" expanded="true" height="82" name="Aggregate (2)" width="90" x="648" y="34">
            <parameter key="use_default_aggregation" value="false"/>
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="default_aggregation_function" value="average"/>
            <list key="aggregation_attributes">
              <parameter key="data" value="concatenation"/>
            </list>
            <parameter key="group_by_attributes" value="category"/>
            <parameter key="count_all_combinations" value="false"/>
            <parameter key="only_distinct" value="false"/>
            <parameter key="ignore_missings" value="true"/>
          </operator>
          <connect from_op="Create ExampleSet" from_port="output" to_op="Split" to_port="example set input"/>
          <connect from_op="Split" from_port="example set output" to_op="De-Pivot" to_port="example set input"/>
          <connect from_op="De-Pivot" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="example set output" to_op="Aggregate (2)" to_port="example set input"/>
          <connect from_op="Aggregate (2)" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    



    Best regards,
    Balázs