Merging Two Tables and Calculating Difference of Two Separate Columns
Yasir
New Altair Community Member
I have generated PERFORMANCE of two datasets. Now I want to merge those two generated tables and calculate difference between accuracy,precision and recall. How to do that?
Tagged:
0
Answers
-
Hello @Yasir,
Please see below sample XML code by clicking show, I used the same titanic dataset to train and test two models using rapidminer, then I converted the performances of both models into data using performance to data operator. I joined two performances and applied generate attribute operator to find a difference in performance. The new attribute "Difference in performance" will have your required result. You can run the below XML code by going to View --> Show Panel --> XML. Here you should paste this code in XML window and press green tick mark on the window which will show you the process.<?xml version="1.0" encoding="UTF-8"?><process version="9.2.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.2.001" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="9.2.001" expanded="true" height="68" name="Retrieve Titanic Training" width="90" x="45" y="85">
<parameter key="repository_entry" value="//Samples/data/Titanic Training"/>
</operator>
<operator activated="true" class="multiply" compatibility="9.2.001" expanded="true" height="103" name="Multiply" width="90" x="179" y="85"/>
<operator activated="true" class="concurrency:cross_validation" compatibility="9.2.001" expanded="true" height="145" name="Cross Validation (2)" width="90" x="313" y="289">
<parameter key="split_on_batch_attribute" value="false"/>
<parameter key="leave_one_out" value="false"/>
<parameter key="number_of_folds" value="10"/>
<parameter key="sampling_type" value="automatic"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="h2o:logistic_regression" compatibility="9.2.000" expanded="true" height="124" name="Logistic Regression" width="90" x="112" y="34">
<parameter key="solver" value="AUTO"/>
<parameter key="reproducible" value="false"/>
<parameter key="maximum_number_of_threads" value="4"/>
<parameter key="use_regularization" value="false"/>
<parameter key="lambda_search" value="false"/>
<parameter key="number_of_lambdas" value="0"/>
<parameter key="lambda_min_ratio" value="0.0"/>
<parameter key="early_stopping" value="true"/>
<parameter key="stopping_rounds" value="3"/>
<parameter key="stopping_tolerance" value="0.001"/>
<parameter key="standardize" value="true"/>
<parameter key="non-negative_coefficients" value="false"/>
<parameter key="add_intercept" value="true"/>
<parameter key="compute_p-values" value="true"/>
<parameter key="remove_collinear_columns" value="true"/>
<parameter key="missing_values_handling" value="MeanImputation"/>
<parameter key="max_iterations" value="0"/>
<parameter key="max_runtime_seconds" value="0"/>
</operator>
<connect from_port="training set" to_op="Logistic Regression" to_port="training set"/>
<connect from_op="Logistic Regression" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="9.2.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="45" y="34">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance" compatibility="9.2.001" expanded="true" height="82" name="Performance (2)" width="90" x="179" y="85">
<parameter key="use_example_weights" value="true"/>
</operator>
<connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
<connect from_op="Performance (2)" from_port="performance" to_port="performance 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="performance_to_data" compatibility="9.2.001" expanded="true" height="82" name="Performance to Data (2)" width="90" x="447" y="340"/>
<operator activated="true" class="concurrency:cross_validation" compatibility="9.2.001" expanded="true" height="145" name="Cross Validation" width="90" x="313" y="85">
<parameter key="split_on_batch_attribute" value="false"/>
<parameter key="leave_one_out" value="false"/>
<parameter key="number_of_folds" value="10"/>
<parameter key="sampling_type" value="automatic"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="concurrency:parallel_decision_tree" compatibility="9.2.001" expanded="true" height="103" name="Decision Tree" width="90" x="112" y="34">
<parameter key="criterion" value="gain_ratio"/>
<parameter key="maximal_depth" value="10"/>
<parameter key="apply_pruning" value="true"/>
<parameter key="confidence" value="0.1"/>
<parameter key="apply_prepruning" value="true"/>
<parameter key="minimal_gain" value="0.01"/>
<parameter key="minimal_leaf_size" value="2"/>
<parameter key="minimal_size_for_split" value="4"/>
<parameter key="number_of_prepruning_alternatives" value="3"/>
</operator>
<connect from_port="training set" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Decision Tree" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="9.2.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance" compatibility="9.2.001" expanded="true" height="82" name="Performance" width="90" x="179" y="85">
<parameter key="use_example_weights" value="true"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="performance 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="performance_to_data" compatibility="9.2.001" expanded="true" height="82" name="Performance to Data" width="90" x="447" y="187"/>
<operator activated="true" class="concurrency:join" compatibility="9.2.001" expanded="true" height="82" name="Join" width="90" x="581" y="289">
<parameter key="remove_double_attributes" value="false"/>
<parameter key="join_type" value="outer"/>
<parameter key="use_id_attribute_as_key" value="false"/>
<list key="key_attributes">
<parameter key="Criterion" value="Criterion"/>
</list>
<parameter key="keep_both_join_attributes" value="false"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="9.2.001" expanded="true" height="82" name="Generate Attributes" width="90" x="648" y="136">
<list key="function_descriptions">
<parameter key="Difference in Performance" value="Value-Value_from_ES2"/>
</list>
<parameter key="keep_all" value="true"/>
</operator>
<connect from_op="Retrieve Titanic Training" from_port="output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Cross Validation" to_port="example set"/>
<connect from_op="Multiply" from_port="output 2" to_op="Cross Validation (2)" to_port="example set"/>
<connect from_op="Cross Validation (2)" from_port="performance 1" to_op="Performance to Data (2)" to_port="performance vector"/>
<connect from_op="Performance to Data (2)" from_port="example set" to_op="Join" to_port="right"/>
<connect from_op="Cross Validation" from_port="performance 1" to_op="Performance to Data" to_port="performance vector"/>
<connect from_op="Performance to Data" from_port="example set" to_op="Join" to_port="left"/>
<connect from_op="Join" from_port="join" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>Image for understanding.
Result: Values from two performances and final difference between them.
Hope this helps.2