Merge of multiple data tables with overlapping attributes

StefanRei
StefanRei New Altair Community Member
edited November 2024 in Community Q&A
Hello everyone,

i've got multiple .csv-files containing one row of data. The Attributes are overlapping but not fully equal.
I would like to have a data table with all possibles Attributes containing all rows and NAs for the missing values.

Do you've got any suggestions?
Thanks!

Best Answer

  • varunm1
    varunm1 New Altair Community Member
    Answer ✓
    Hello @StefanRei

    You can use, loop files and connect it to append (Superset). That will do what you need and will show missing values. Sample code below.

    <?xml version="1.0" encoding="UTF-8"?><process version="9.3.001">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.3.001" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="concurrency:loop_files" compatibility="9.3.001" expanded="true" height="82" name="Loop Files" width="90" x="179" y="136">
            <parameter key="directory" value="F:\RM\Union"/>
            <parameter key="filter_type" value="glob"/>
            <parameter key="recursive" value="false"/>
            <parameter key="enable_macros" value="false"/>
            <parameter key="macro_for_file_name" value="file_name"/>
            <parameter key="macro_for_file_type" value="file_type"/>
            <parameter key="macro_for_folder_name" value="folder_name"/>
            <parameter key="reuse_results" value="false"/>
            <parameter key="enable_parallel_execution" value="true"/>
            <process expanded="true">
              <operator activated="true" class="read_csv" compatibility="9.3.001" expanded="true" height="68" name="Read CSV" width="90" x="380" y="34">
                <parameter key="column_separators" value=","/>
                <parameter key="trim_lines" value="false"/>
                <parameter key="use_quotes" value="true"/>
                <parameter key="quotes_character" value="&quot;"/>
                <parameter key="escape_character" value="\"/>
                <parameter key="skip_comments" value="false"/>
                <parameter key="comment_characters" value="#"/>
                <parameter key="starting_row" value="1"/>
                <parameter key="parse_numbers" value="true"/>
                <parameter key="decimal_character" value="."/>
                <parameter key="grouped_digits" value="false"/>
                <parameter key="grouping_character" value=","/>
                <parameter key="infinity_representation" value=""/>
                <parameter key="date_format" value=""/>
                <parameter key="first_row_as_names" value="true"/>
                <list key="annotations"/>
                <parameter key="time_zone" value="SYSTEM"/>
                <parameter key="locale" value="English (United States)"/>
                <parameter key="encoding" value="SYSTEM"/>
                <parameter key="read_all_values_as_polynominal" value="false"/>
                <list key="data_set_meta_data_information"/>
                <parameter key="read_not_matching_values_as_missings" value="true"/>
                <parameter key="datamanagement" value="double_array"/>
                <parameter key="data_management" value="auto"/>
              </operator>
              <connect from_port="file object" to_op="Read CSV" to_port="file"/>
              <connect from_op="Read CSV" from_port="output" to_port="output 1"/>
              <portSpacing port="source_file object" spacing="0"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="operator_toolbox:advanced_append" compatibility="2.0.001" expanded="true" height="82" name="Append (Superset)" width="90" x="447" y="136"/>
          <connect from_op="Loop Files" from_port="output 1" to_op="Append (Superset)" to_port="example set 1"/>
          <connect from_op="Append (Superset)" from_port="merged set" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    
    Hope this helps.

Answers

  • varunm1
    varunm1 New Altair Community Member
    Answer ✓
    Hello @StefanRei

    You can use, loop files and connect it to append (Superset). That will do what you need and will show missing values. Sample code below.

    <?xml version="1.0" encoding="UTF-8"?><process version="9.3.001">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.3.001" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="concurrency:loop_files" compatibility="9.3.001" expanded="true" height="82" name="Loop Files" width="90" x="179" y="136">
            <parameter key="directory" value="F:\RM\Union"/>
            <parameter key="filter_type" value="glob"/>
            <parameter key="recursive" value="false"/>
            <parameter key="enable_macros" value="false"/>
            <parameter key="macro_for_file_name" value="file_name"/>
            <parameter key="macro_for_file_type" value="file_type"/>
            <parameter key="macro_for_folder_name" value="folder_name"/>
            <parameter key="reuse_results" value="false"/>
            <parameter key="enable_parallel_execution" value="true"/>
            <process expanded="true">
              <operator activated="true" class="read_csv" compatibility="9.3.001" expanded="true" height="68" name="Read CSV" width="90" x="380" y="34">
                <parameter key="column_separators" value=","/>
                <parameter key="trim_lines" value="false"/>
                <parameter key="use_quotes" value="true"/>
                <parameter key="quotes_character" value="&quot;"/>
                <parameter key="escape_character" value="\"/>
                <parameter key="skip_comments" value="false"/>
                <parameter key="comment_characters" value="#"/>
                <parameter key="starting_row" value="1"/>
                <parameter key="parse_numbers" value="true"/>
                <parameter key="decimal_character" value="."/>
                <parameter key="grouped_digits" value="false"/>
                <parameter key="grouping_character" value=","/>
                <parameter key="infinity_representation" value=""/>
                <parameter key="date_format" value=""/>
                <parameter key="first_row_as_names" value="true"/>
                <list key="annotations"/>
                <parameter key="time_zone" value="SYSTEM"/>
                <parameter key="locale" value="English (United States)"/>
                <parameter key="encoding" value="SYSTEM"/>
                <parameter key="read_all_values_as_polynominal" value="false"/>
                <list key="data_set_meta_data_information"/>
                <parameter key="read_not_matching_values_as_missings" value="true"/>
                <parameter key="datamanagement" value="double_array"/>
                <parameter key="data_management" value="auto"/>
              </operator>
              <connect from_port="file object" to_op="Read CSV" to_port="file"/>
              <connect from_op="Read CSV" from_port="output" to_port="output 1"/>
              <portSpacing port="source_file object" spacing="0"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="operator_toolbox:advanced_append" compatibility="2.0.001" expanded="true" height="82" name="Append (Superset)" width="90" x="447" y="136"/>
          <connect from_op="Loop Files" from_port="output 1" to_op="Append (Superset)" to_port="example set 1"/>
          <connect from_op="Append (Superset)" from_port="merged set" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    
    Hope this helps.

  • StefanRei
    StefanRei New Altair Community Member
    Thank you very much, works totally fine!

Welcome!

It looks like you're new here. Sign in or register to get started.

Welcome!

It looks like you're new here. Sign in or register to get started.