Altair RISE

A program to recognize and reward our most engaged community members
Nominate Yourself Now!

compare csv files by ID column

User: "giorogo"
New Altair Community Member
Updated by Jocelyn

Hi, I'm new with this excellent program; I need help to perform the following task: I have two csv files two common columns (ID and emotion) I would like to create a task to compare these two files and get as a result two documents in which are shown in one all the ids with the same emotion and in another the ids with different emotions; for example id 001 file A felicity emotion, B emotion sadness file will be placed in the file with different emotions. Could you tell me step by step how should I do? Thank you

 

 

Find more posts tagged with

Sort by:
1 - 1 of 11
    User: "lionelderkrikor"
    New Altair Community Member
    Accepted Answer

    Hi again @giorogo,

     

    You can find here the complete process of what you want to do, based on the @mschmitz ' s idea :

    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="8.1.000" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="read_csv" compatibility="8.1.000" expanded="true" height="68" name="Read CSV" width="90" x="45" y="34">
    <parameter key="csv_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\Compare_csv_files\crowdflower_emozionidominanti.csv"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <parameter key="encoding" value="windows-1252"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="text.true.polynominal.attribute"/>
    <parameter key="1" value="annotation.true.polynominal.attribute"/>
    <parameter key="2" value="id.true.integer.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="set_role" compatibility="8.1.000" expanded="true" height="82" name="Set Role" width="90" x="246" y="34">
    <parameter key="attribute_name" value="id"/>
    <parameter key="target_role" value="id"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="read_csv" compatibility="8.1.000" expanded="true" height="68" name="Read CSV (2)" width="90" x="45" y="187">
    <parameter key="csv_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\Compare_csv_files\video_progettoTSI.csv"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <parameter key="encoding" value="windows-1252"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="id.true.integer.attribute"/>
    <parameter key="1" value="video.true.polynominal.attribute"/>
    <parameter key="2" value="text.true.polynominal.attribute"/>
    <parameter key="3" value="faccia.true.polynominal.attribute"/>
    <parameter key="4" value="emotion.true.polynominal.attribute"/>
    <parameter key="5" value="emozioneSecondaria.true.polynominal.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="set_role" compatibility="8.1.000" expanded="true" height="82" name="Set Role (2)" width="90" x="246" y="187">
    <parameter key="attribute_name" value="id"/>
    <parameter key="target_role" value="id"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="concurrency:join" compatibility="8.1.000" expanded="true" height="82" name="Join" width="90" x="447" y="85">
    <list key="key_attributes"/>
    </operator>
    <operator activated="true" class="filter_examples" compatibility="8.1.000" expanded="true" height="103" name="Filter Examples" width="90" x="581" y="85">
    <parameter key="parameter_expression" value="[annotation]==[emotion]"/>
    <parameter key="condition_class" value="expression"/>
    <list key="filters_list">
    <parameter key="filters_entry_key" value="annotation.equals.[emotion]"/>
    </list>
    </operator>
    <connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Join" to_port="left"/>
    <connect from_op="Read CSV (2)" from_port="output" to_op="Set Role (2)" to_port="example set input"/>
    <connect from_op="Set Role (2)" from_port="example set output" to_op="Join" to_port="right"/>
    <connect from_op="Join" from_port="join" to_op="Filter Examples" to_port="example set input"/>
    <connect from_op="Filter Examples" from_port="example set output" to_port="result 1"/>
    <connect from_op="Filter Examples" from_port="unmatched example set" to_port="result 2"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    </process>
    </operator>
    </process>

    I hope it helps,

     

    Regards,

     

    Lionel