can someone share the steps for calculating the MCC value
indu
New Altair Community Member
can someone share the steps for calculating the MCC value or avg MCC value on rapidminer
Tagged:
0
Answers
-
Hi @indu;
If Varun is right, you can use :
- a Confusion Matrix to Exampleset operator,
- 4 Extract Macro operators (to extract the TP, TN, FP and FN)
- a Generate Macro operator to calculate the MCC according to the TP,TN,FP,FN
- a Create Exampleset to display the MCC as exampleset in the results panel
Here a process implementing this method (to adapt to your own data) :<?xml version="1.0" encoding="UTF-8"?><process version="9.5.000"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process" origin="GENERATED_TUTORIAL"> <parameter key="logverbosity" value="init"/> <parameter key="random_seed" value="2001"/> <parameter key="send_mail" value="never"/> <parameter key="notification_email" value=""/> <parameter key="process_duration_for_mail" value="30"/> <parameter key="encoding" value="SYSTEM"/> <process expanded="true"> <operator activated="true" class="retrieve" compatibility="9.5.000" expanded="true" height="68" name="Retrieve Golf" origin="GENERATED_TUTORIAL" width="90" x="45" y="34"> <parameter key="repository_entry" value="//Samples/data/Golf"/> </operator> <operator activated="true" class="concurrency:cross_validation" compatibility="8.2.000" expanded="true" height="145" name="Validation" origin="GENERATED_TUTORIAL" width="90" x="447" y="34"> <parameter key="split_on_batch_attribute" value="false"/> <parameter key="leave_one_out" value="false"/> <parameter key="number_of_folds" value="10"/> <parameter key="sampling_type" value="stratified sampling"/> <parameter key="use_local_random_seed" value="false"/> <parameter key="local_random_seed" value="1992"/> <parameter key="enable_parallel_execution" value="true"/> <process expanded="true"> <operator activated="true" class="concurrency:parallel_decision_tree" compatibility="9.4.000" expanded="true" height="103" name="Decision Tree" origin="GENERATED_TUTORIAL" width="90" x="179" y="34"> <parameter key="criterion" value="gain_ratio"/> <parameter key="maximal_depth" value="10"/> <parameter key="apply_pruning" value="true"/> <parameter key="confidence" value="0.1"/> <parameter key="apply_prepruning" value="true"/> <parameter key="minimal_gain" value="0.01"/> <parameter key="minimal_leaf_size" value="2"/> <parameter key="minimal_size_for_split" value="4"/> <parameter key="number_of_prepruning_alternatives" value="3"/> </operator> <connect from_port="training set" to_op="Decision Tree" to_port="training set"/> <connect from_op="Decision Tree" from_port="model" to_port="model"/> <portSpacing port="source_training set" spacing="0"/> <portSpacing port="sink_model" spacing="0"/> <portSpacing port="sink_through 1" spacing="0"/> <description align="left" color="green" colored="true" height="80" resized="true" width="248" x="37" y="137">In the training phase, a model is built on the current training data set. (90 % of data by default, 10 times)</description> </process> <process expanded="true"> <operator activated="true" class="apply_model" compatibility="9.5.000" expanded="true" height="82" name="Apply Model" origin="GENERATED_TUTORIAL" width="90" x="45" y="34"> <list key="application_parameters"/> <parameter key="create_view" value="false"/> </operator> <operator activated="true" class="performance_binominal_classification" compatibility="9.5.000" expanded="true" height="82" name="Performance (2)" origin="GENERATED_TUTORIAL" width="90" x="313" y="34"> <parameter key="manually_set_positive_class" value="false"/> <parameter key="main_criterion" value="first"/> <parameter key="accuracy" value="true"/> <parameter key="classification_error" value="false"/> <parameter key="kappa" value="false"/> <parameter key="AUC (optimistic)" value="false"/> <parameter key="AUC" value="true"/> <parameter key="AUC (pessimistic)" value="true"/> <parameter key="precision" value="false"/> <parameter key="recall" value="false"/> <parameter key="lift" value="false"/> <parameter key="fallout" value="false"/> <parameter key="f_measure" value="false"/> <parameter key="false_positive" value="false"/> <parameter key="false_negative" value="false"/> <parameter key="true_positive" value="false"/> <parameter key="true_negative" value="false"/> <parameter key="sensitivity" value="false"/> <parameter key="specificity" value="false"/> <parameter key="youden" value="false"/> <parameter key="positive_predictive_value" value="false"/> <parameter key="negative_predictive_value" value="false"/> <parameter key="psep" value="false"/> <parameter key="skip_undefined_labels" value="true"/> <parameter key="use_example_weights" value="true"/> </operator> <connect from_port="model" to_op="Apply Model" to_port="model"/> <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/> <connect from_op="Apply Model" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/> <connect from_op="Performance (2)" from_port="performance" to_port="performance 1"/> <connect from_op="Performance (2)" from_port="example set" to_port="test set results"/> <portSpacing port="source_model" spacing="0"/> <portSpacing port="source_test set" spacing="0"/> <portSpacing port="source_through 1" spacing="0"/> <portSpacing port="sink_test set results" spacing="0"/> <portSpacing port="sink_performance 1" spacing="0"/> <portSpacing port="sink_performance 2" spacing="0"/> <description align="left" color="blue" colored="true" height="103" resized="true" width="315" x="38" y="137">The model created in the Training step is applied to the current test set (10 %).<br/>The performance is evaluated and sent to the operator results.</description> </process> <description align="center" color="transparent" colored="false" width="126">A cross-validation evaluating a decision tree model.</description> </operator> <operator activated="true" breakpoints="after" class="converters:confusionmatrix_2_example_set" compatibility="0.6.000" expanded="true" height="82" name="Confusion Matrix to ExampleSet" origin="GENERATED_TUTORIAL" width="90" x="648" y="136"/> <operator activated="true" class="extract_macro" compatibility="9.5.000" expanded="true" height="68" name="Extract Macro TP" width="90" x="782" y="136"> <parameter key="macro" value="TP"/> <parameter key="macro_type" value="data_value"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value="true yes"/> <parameter key="example_index" value="2"/> <list key="additional_macros"/> </operator> <operator activated="true" class="extract_macro" compatibility="9.5.000" expanded="true" height="68" name="Extract Macro TN" width="90" x="916" y="136"> <parameter key="macro" value="TN"/> <parameter key="macro_type" value="data_value"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value="true no"/> <parameter key="example_index" value="1"/> <list key="additional_macros"/> </operator> <operator activated="true" class="extract_macro" compatibility="9.5.000" expanded="true" height="68" name="Extract Macro FN" width="90" x="1050" y="136"> <parameter key="macro" value="FN"/> <parameter key="macro_type" value="data_value"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value="true yes"/> <parameter key="example_index" value="1"/> <list key="additional_macros"/> </operator> <operator activated="true" breakpoints="after" class="extract_macro" compatibility="9.5.000" expanded="true" height="68" name="Extract Macro FP" width="90" x="1184" y="136"> <parameter key="macro" value="FP"/> <parameter key="macro_type" value="data_value"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value="true no"/> <parameter key="example_index" value="2"/> <list key="additional_macros"/> </operator> <operator activated="true" class="generate_macro" compatibility="9.5.000" expanded="true" height="82" name="Generate Macro" width="90" x="1318" y="136"> <list key="function_descriptions"> <parameter key="MCC" value="(eval(%{TP})*eval(%{TN})-eval(%{FP})*eval(%{FN}))/(sqrt((eval(%{TP})+eval(%{FP}))*(eval(%{TP})+eval(%{FN}))*(eval(%{TN})+eval(%{FP}))*(eval(%{TN})+eval(%{FN}))))"/> </list> </operator> <operator activated="true" class="utility:create_exampleset" compatibility="9.5.000" expanded="true" height="68" name="Create ExampleSet" width="90" x="1117" y="238"> <parameter key="generator_type" value="comma separated text"/> <parameter key="number_of_examples" value="100"/> <parameter key="use_stepsize" value="false"/> <list key="function_descriptions"/> <parameter key="add_id_attribute" value="false"/> <list key="numeric_series_configuration"/> <list key="date_series_configuration"/> <list key="date_series_configuration (interval)"/> <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="input_csv_text" value="MCC %{MCC}"/> <parameter key="column_separator" value=","/> <parameter key="parse_all_as_nominal" value="false"/> <parameter key="decimal_point_character" value="."/> <parameter key="trim_attribute_names" value="true"/> </operator> <connect from_op="Retrieve Golf" from_port="output" to_op="Validation" to_port="example set"/> <connect from_op="Validation" from_port="performance 1" to_op="Confusion Matrix to ExampleSet" to_port="per"/> <connect from_op="Confusion Matrix to ExampleSet" from_port="exa" to_op="Extract Macro TP" to_port="example set"/> <connect from_op="Confusion Matrix to ExampleSet" from_port="ori" to_port="result 2"/> <connect from_op="Extract Macro TP" from_port="example set" to_op="Extract Macro TN" to_port="example set"/> <connect from_op="Extract Macro TN" from_port="example set" to_op="Extract Macro FN" to_port="example set"/> <connect from_op="Extract Macro FN" from_port="example set" to_op="Extract Macro FP" to_port="example set"/> <connect from_op="Extract Macro FP" from_port="example set" to_op="Generate Macro" to_port="through 1"/> <connect from_op="Generate Macro" from_port="through 1" to_port="result 1"/> <connect from_op="Create ExampleSet" from_port="output" to_port="result 3"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> <portSpacing port="sink_result 3" spacing="0"/> <portSpacing port="sink_result 4" spacing="0"/> </process> </operator> </process>
Hope this helps,
Regards,
Lionel
3