Merging Two Tables and Calculating Difference of Two Separate Columns

Yasir
Yasir New Altair Community Member
edited November 5 in Community Q&A
I have generated PERFORMANCE of two datasets. Now I want to merge those two generated tables and calculate difference between accuracy,precision and recall. How to do that? 

Answers

  • varunm1
    varunm1 New Altair Community Member
    edited April 2019
    Hello @Yasir,

    Please see below sample XML code by clicking show, I used the same titanic dataset to train and test two models using rapidminer, then I converted the performances of both models into data using performance to data operator. I joined two performances and applied generate attribute operator to find a difference in performance. The new attribute "Difference in performance" will have your required result. You can run the below XML code by going to View --> Show Panel --> XML. Here you should paste this code in XML window and press green tick mark on the window which will show you the process.

    <?xml version="1.0" encoding="UTF-8"?><process version="9.2.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="9.2.001" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
    <operator activated="true" class="retrieve" compatibility="9.2.001" expanded="true" height="68" name="Retrieve Titanic Training" width="90" x="45" y="85">
    <parameter key="repository_entry" value="//Samples/data/Titanic Training"/>
    </operator>
    <operator activated="true" class="multiply" compatibility="9.2.001" expanded="true" height="103" name="Multiply" width="90" x="179" y="85"/>
    <operator activated="true" class="concurrency:cross_validation" compatibility="9.2.001" expanded="true" height="145" name="Cross Validation (2)" width="90" x="313" y="289">
    <parameter key="split_on_batch_attribute" value="false"/>
    <parameter key="leave_one_out" value="false"/>
    <parameter key="number_of_folds" value="10"/>
    <parameter key="sampling_type" value="automatic"/>
    <parameter key="use_local_random_seed" value="false"/>
    <parameter key="local_random_seed" value="1992"/>
    <parameter key="enable_parallel_execution" value="true"/>
    <process expanded="true">
    <operator activated="true" class="h2o:logistic_regression" compatibility="9.2.000" expanded="true" height="124" name="Logistic Regression" width="90" x="112" y="34">
    <parameter key="solver" value="AUTO"/>
    <parameter key="reproducible" value="false"/>
    <parameter key="maximum_number_of_threads" value="4"/>
    <parameter key="use_regularization" value="false"/>
    <parameter key="lambda_search" value="false"/>
    <parameter key="number_of_lambdas" value="0"/>
    <parameter key="lambda_min_ratio" value="0.0"/>
    <parameter key="early_stopping" value="true"/>
    <parameter key="stopping_rounds" value="3"/>
    <parameter key="stopping_tolerance" value="0.001"/>
    <parameter key="standardize" value="true"/>
    <parameter key="non-negative_coefficients" value="false"/>
    <parameter key="add_intercept" value="true"/>
    <parameter key="compute_p-values" value="true"/>
    <parameter key="remove_collinear_columns" value="true"/>
    <parameter key="missing_values_handling" value="MeanImputation"/>
    <parameter key="max_iterations" value="0"/>
    <parameter key="max_runtime_seconds" value="0"/>
    </operator>
    <connect from_port="training set" to_op="Logistic Regression" to_port="training set"/>
    <connect from_op="Logistic Regression" from_port="model" to_port="model"/>
    <portSpacing port="source_training set" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="9.2.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="45" y="34">
    <list key="application_parameters"/>
    <parameter key="create_view" value="false"/>
    </operator>
    <operator activated="true" class="performance" compatibility="9.2.001" expanded="true" height="82" name="Performance (2)" width="90" x="179" y="85">
    <parameter key="use_example_weights" value="true"/>
    </operator>
    <connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
    <connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
    <connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
    <connect from_op="Performance (2)" from_port="performance" to_port="performance 1"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="sink_test set results" spacing="0"/>
    <portSpacing port="sink_performance 1" spacing="0"/>
    <portSpacing port="sink_performance 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="performance_to_data" compatibility="9.2.001" expanded="true" height="82" name="Performance to Data (2)" width="90" x="447" y="340"/>
    <operator activated="true" class="concurrency:cross_validation" compatibility="9.2.001" expanded="true" height="145" name="Cross Validation" width="90" x="313" y="85">
    <parameter key="split_on_batch_attribute" value="false"/>
    <parameter key="leave_one_out" value="false"/>
    <parameter key="number_of_folds" value="10"/>
    <parameter key="sampling_type" value="automatic"/>
    <parameter key="use_local_random_seed" value="false"/>
    <parameter key="local_random_seed" value="1992"/>
    <parameter key="enable_parallel_execution" value="true"/>
    <process expanded="true">
    <operator activated="true" class="concurrency:parallel_decision_tree" compatibility="9.2.001" expanded="true" height="103" name="Decision Tree" width="90" x="112" y="34">
    <parameter key="criterion" value="gain_ratio"/>
    <parameter key="maximal_depth" value="10"/>
    <parameter key="apply_pruning" value="true"/>
    <parameter key="confidence" value="0.1"/>
    <parameter key="apply_prepruning" value="true"/>
    <parameter key="minimal_gain" value="0.01"/>
    <parameter key="minimal_leaf_size" value="2"/>
    <parameter key="minimal_size_for_split" value="4"/>
    <parameter key="number_of_prepruning_alternatives" value="3"/>
    </operator>
    <connect from_port="training set" to_op="Decision Tree" to_port="training set"/>
    <connect from_op="Decision Tree" from_port="model" to_port="model"/>
    <portSpacing port="source_training set" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="9.2.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
    <list key="application_parameters"/>
    <parameter key="create_view" value="false"/>
    </operator>
    <operator activated="true" class="performance" compatibility="9.2.001" expanded="true" height="82" name="Performance" width="90" x="179" y="85">
    <parameter key="use_example_weights" value="true"/>
    </operator>
    <connect from_port="model" to_op="Apply Model" to_port="model"/>
    <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
    <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
    <connect from_op="Performance" from_port="performance" to_port="performance 1"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="sink_test set results" spacing="0"/>
    <portSpacing port="sink_performance 1" spacing="0"/>
    <portSpacing port="sink_performance 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="performance_to_data" compatibility="9.2.001" expanded="true" height="82" name="Performance to Data" width="90" x="447" y="187"/>
    <operator activated="true" class="concurrency:join" compatibility="9.2.001" expanded="true" height="82" name="Join" width="90" x="581" y="289">
    <parameter key="remove_double_attributes" value="false"/>
    <parameter key="join_type" value="outer"/>
    <parameter key="use_id_attribute_as_key" value="false"/>
    <list key="key_attributes">
    <parameter key="Criterion" value="Criterion"/>
    </list>
    <parameter key="keep_both_join_attributes" value="false"/>
    </operator>
    <operator activated="true" class="generate_attributes" compatibility="9.2.001" expanded="true" height="82" name="Generate Attributes" width="90" x="648" y="136">
    <list key="function_descriptions">
    <parameter key="Difference in Performance" value="Value-Value_from_ES2"/>
    </list>
    <parameter key="keep_all" value="true"/>
    </operator>
    <connect from_op="Retrieve Titanic Training" from_port="output" to_op="Multiply" to_port="input"/>
    <connect from_op="Multiply" from_port="output 1" to_op="Cross Validation" to_port="example set"/>
    <connect from_op="Multiply" from_port="output 2" to_op="Cross Validation (2)" to_port="example set"/>
    <connect from_op="Cross Validation (2)" from_port="performance 1" to_op="Performance to Data (2)" to_port="performance vector"/>
    <connect from_op="Performance to Data (2)" from_port="example set" to_op="Join" to_port="right"/>
    <connect from_op="Cross Validation" from_port="performance 1" to_op="Performance to Data" to_port="performance vector"/>
    <connect from_op="Performance to Data" from_port="example set" to_op="Join" to_port="left"/>
    <connect from_op="Join" from_port="join" to_op="Generate Attributes" to_port="example set input"/>
    <connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

    Image for understanding.


    Result: Values from two performances and final difference between them.


    Hope this helps.