A program to recognize and reward our most engaged community members
Yes, that is exactly right. The terms "support" and "contradict" are always relative to the prediction of the model, independent of the fact if the prediction is correct or wrong. This way, the explanations can also be created if the true class is not even known at all.
Hope this helps,Ingo
* Calculate the correlation between the given attribute and the predictions. Make sure that the* the predictions are set in a one-vs-all fashion for multiclass problems. It uses the confidence* for the class to correlated with.** For nominal attributes we just into 1 (same value as the one predicted) vs. 0 (different value).** Please note that this method artificially sets the standard deviation to a small value in case* of all labels being the same (which can happen if the model is really confident in certain* areas). First, we artificially change one random label in case they are all the same.* Then we also capture the case that the standard deviations are still 0 by replacing it* by a small value then.** These small changes will avoid that all correlations for all attributes would be NaN* otherwise. Because of those changes, this method should not be used for calculating* regular correlations.
<?xml version="1.0" encoding="UTF-8"?><process version="9.2.000"><br> <context><br> <input/><br> <output/><br> <macros/><br> </context><br> <operator activated="true" class="process" compatibility="9.2.000" expanded="true" name="Process"><br> <parameter key="logverbosity" value="init"/><br> <parameter key="random_seed" value="2001"/><br> <parameter key="send_mail" value="never"/><br> <parameter key="notification_email" value=""/><br> <parameter key="process_duration_for_mail" value="30"/><br> <parameter key="encoding" value="UTF-8"/><br> <process expanded="true"><br> <operator activated="true" class="retrieve" compatibility="9.2.000" expanded="true" height="68" name="Retrieve Titanic Training" width="90" x="45" y="187"><br> <parameter key="repository_entry" value="//Samples/data/Titanic Training"/><br> </operator><br> <operator activated="true" class="split_data" compatibility="9.2.000" expanded="true" height="103" name="Split Data" width="90" x="179" y="187"><br> <enumeration key="partitions"><br> <parameter key="ratio" value="0.7"/><br> <parameter key="ratio" value="0.3"/><br> </enumeration><br> <parameter key="sampling_type" value="automatic"/><br> <parameter key="use_local_random_seed" value="false"/><br> <parameter key="local_random_seed" value="1992"/><br> </operator><br> <operator activated="true" class="concurrency:parallel_decision_tree" compatibility="9.2.000" expanded="true" height="103" name="Decision Tree" width="90" x="313" y="34"><br> <parameter key="criterion" value="gain_ratio"/><br> <parameter key="maximal_depth" value="10"/><br> <parameter key="apply_pruning" value="true"/><br> <parameter key="confidence" value="0.1"/><br> <parameter key="apply_prepruning" value="true"/><br> <parameter key="minimal_gain" value="0.01"/><br> <parameter key="minimal_leaf_size" value="2"/><br> <parameter key="minimal_size_for_split" value="4"/><br> <parameter key="number_of_prepruning_alternatives" value="3"/><br> </operator><br> <operator activated="true" class="model_simulator:explain_predictions" compatibility="9.2.000" expanded="true" height="103" name="Explain Predictions" width="90" x="514" y="187"><br> <parameter key="maximal explaining attributes" value="3"/><br> <parameter key="local sample size" value="500"/><br> <parameter key="only create predictions" value="false"/><br> </operator><br> <operator activated="true" class="filter_examples" compatibility="9.2.000" expanded="true" height="103" name="Filter Examples" width="90" x="648" y="187"><br> <parameter key="parameter_expression" value=""/><br> <parameter key="condition_class" value="correct_predictions"/><br> <parameter key="invert_filter" value="false"/><br> <list key="filters_list"/><br> <parameter key="filters_logic_and" value="true"/><br> <parameter key="filters_check_metadata" value="true"/><br> </operator><br> <connect from_op="Retrieve Titanic Training" from_port="output" to_op="Split Data" to_port="example set"/><br> <connect from_op="Split Data" from_port="partition 1" to_op="Decision Tree" to_port="training set"/><br> <connect from_op="Split Data" from_port="partition 2" to_op="Explain Predictions" to_port="test data"/><br> <connect from_op="Decision Tree" from_port="model" to_op="Explain Predictions" to_port="model"/><br> <connect from_op="Decision Tree" from_port="exampleSet" to_op="Explain Predictions" to_port="training data"/><br> <connect from_op="Explain Predictions" from_port="example set output" to_op="Filter Examples" to_port="example set input"/><br> <connect from_op="Filter Examples" from_port="example set output" to_port="result 1"/><br> <portSpacing port="source_input 1" spacing="0"/><br> <portSpacing port="sink_result 1" spacing="0"/><br> <portSpacing port="sink_result 2" spacing="0"/><br> </process><br> </operator><br></process>
<?xml version="1.0" encoding="UTF-8"?><process version="9.2.000"><br> <context><br> <input/><br> <output/><br> <macros/><br> </context><br> <operator activated="true" class="process" compatibility="9.2.000" expanded="true" name="Process"><br> <parameter key="logverbosity" value="init"/><br> <parameter key="random_seed" value="2001"/><br> <parameter key="send_mail" value="never"/><br> <parameter key="notification_email" value=""/><br> <parameter key="process_duration_for_mail" value="30"/><br> <parameter key="encoding" value="UTF-8"/><br> <process expanded="true"><br> <operator activated="true" class="retrieve" compatibility="9.2.000" expanded="true" height="68" name="Retrieve Titanic Training" width="90" x="45" y="187"><br> <parameter key="repository_entry" value="//Samples/data/Titanic Training"/><br> </operator><br> <operator activated="true" class="split_data" compatibility="9.2.000" expanded="true" height="103" name="Split Data" width="90" x="179" y="187"><br> <enumeration key="partitions"><br> <parameter key="ratio" value="0.7"/><br> <parameter key="ratio" value="0.3"/><br> </enumeration><br> <parameter key="sampling_type" value="automatic"/><br> <parameter key="use_local_random_seed" value="false"/><br> <parameter key="local_random_seed" value="1992"/><br> </operator><br> <operator activated="true" class="concurrency:parallel_decision_tree" compatibility="9.2.000" expanded="true" height="103" name="Decision Tree" width="90" x="313" y="34"><br> <parameter key="criterion" value="gain_ratio"/><br> <parameter key="maximal_depth" value="10"/><br> <parameter key="apply_pruning" value="true"/><br> <parameter key="confidence" value="0.1"/><br> <parameter key="apply_prepruning" value="true"/><br> <parameter key="minimal_gain" value="0.01"/><br> <parameter key="minimal_leaf_size" value="2"/><br> <parameter key="minimal_size_for_split" value="4"/><br> <parameter key="number_of_prepruning_alternatives" value="3"/><br> </operator><br> <operator activated="true" class="model_simulator:explain_predictions" compatibility="9.2.000" expanded="true" height="103" name="Explain Predictions" width="90" x="514" y="238"><br> <parameter key="maximal explaining attributes" value="3"/><br> <parameter key="local sample size" value="500"/><br> <parameter key="only create predictions" value="false"/><br> </operator><br> <operator activated="true" class="generate_id" compatibility="9.2.000" expanded="true" height="82" name="Generate ID" width="90" x="648" y="136"><br> <parameter key="create_nominal_ids" value="false"/><br> <parameter key="offset" value="0"/><br> </operator><br> <operator activated="true" class="filter_examples" compatibility="9.2.000" expanded="true" height="103" name="Filter Examples" width="90" x="782" y="136"><br> <parameter key="parameter_expression" value=""/><br> <parameter key="condition_class" value="correct_predictions"/><br> <parameter key="invert_filter" value="false"/><br> <list key="filters_list"/><br> <parameter key="filters_logic_and" value="true"/><br> <parameter key="filters_check_metadata" value="true"/><br> </operator><br> <operator activated="true" class="select_attributes" compatibility="9.2.000" expanded="true" height="82" name="Select Attributes" width="90" x="916" y="136"><br> <parameter key="attribute_filter_type" value="single"/><br> <parameter key="attribute" value="id"/><br> <parameter key="attributes" value=""/><br> <parameter key="use_except_expression" value="false"/><br> <parameter key="value_type" value="attribute_value"/><br> <parameter key="use_value_type_exception" value="false"/><br> <parameter key="except_value_type" value="time"/><br> <parameter key="block_type" value="attribute_block"/><br> <parameter key="use_block_type_exception" value="false"/><br> <parameter key="except_block_type" value="value_matrix_row_start"/><br> <parameter key="invert_selection" value="false"/><br> <parameter key="include_special_attributes" value="true"/><br> </operator><br> <operator activated="true" class="concurrency:join" compatibility="9.2.000" expanded="true" height="82" name="Join" width="90" x="1050" y="238"><br> <parameter key="remove_double_attributes" value="true"/><br> <parameter key="join_type" value="left"/><br> <parameter key="use_id_attribute_as_key" value="false"/><br> <list key="key_attributes"><br> <parameter key="id" value="Row No"/><br> </list><br> <parameter key="keep_both_join_attributes" value="false"/><br> </operator><br> <operator activated="true" class="select_attributes" compatibility="9.2.000" expanded="true" height="82" name="Select Attributes (2)" width="90" x="1184" y="238"><br> <parameter key="attribute_filter_type" value="subset"/><br> <parameter key="attribute" value=""/><br> <parameter key="attributes" value="id|Name|Importance"/><br> <parameter key="use_except_expression" value="false"/><br> <parameter key="value_type" value="attribute_value"/><br> <parameter key="use_value_type_exception" value="false"/><br> <parameter key="except_value_type" value="time"/><br> <parameter key="block_type" value="attribute_block"/><br> <parameter key="use_block_type_exception" value="false"/><br> <parameter key="except_block_type" value="value_matrix_row_start"/><br> <parameter key="invert_selection" value="false"/><br> <parameter key="include_special_attributes" value="false"/><br> </operator><br> <operator activated="true" class="blending:pivot" compatibility="9.2.000" expanded="true" height="82" name="Pivot" width="90" x="1318" y="238"><br> <parameter key="group_by_attributes" value="id"/><br> <parameter key="column_grouping_attribute" value="Name"/><br> <list key="aggregation_attributes"><br> <parameter key="Importance" value="average"/><br> </list><br> <parameter key="use_default_aggregation" value="false"/><br> <parameter key="default_aggregation_function" value="first"/><br> </operator><br> <operator activated="true" class="rename_by_replacing" compatibility="9.2.000" expanded="true" height="82" name="Rename by Replacing" width="90" x="1452" y="238"><br> <parameter key="attribute_filter_type" value="single"/><br> <parameter key="attribute" value="id"/><br> <parameter key="attributes" value=""/><br> <parameter key="use_except_expression" value="false"/><br> <parameter key="value_type" value="attribute_value"/><br> <parameter key="use_value_type_exception" value="false"/><br> <parameter key="except_value_type" value="time"/><br> <parameter key="block_type" value="attribute_block"/><br> <parameter key="use_block_type_exception" value="false"/><br> <parameter key="except_block_type" value="value_matrix_row_start"/><br> <parameter key="invert_selection" value="true"/><br> <parameter key="include_special_attributes" value="true"/><br> <parameter key="replace_what" value="average\(Importance\)_(.*)"/><br> <parameter key="replace_by" value="$1"/><br> </operator><br> <operator activated="true" class="concurrency:loop_attributes" compatibility="9.2.000" expanded="true" height="82" name="Loop Attributes" width="90" x="1586" y="238"><br> <parameter key="attribute_filter_type" value="single"/><br> <parameter key="attribute" value="id"/><br> <parameter key="attributes" value=""/><br> <parameter key="use_except_expression" value="false"/><br> <parameter key="value_type" value="attribute_value"/><br> <parameter key="use_value_type_exception" value="false"/><br> <parameter key="except_value_type" value="time"/><br> <parameter key="block_type" value="attribute_block"/><br> <parameter key="use_block_type_exception" value="false"/><br> <parameter key="except_block_type" value="value_matrix_row_start"/><br> <parameter key="invert_selection" value="true"/><br> <parameter key="include_special_attributes" value="false"/><br> <parameter key="attribute_name_macro" value="loop_attribute"/><br> <parameter key="reuse_results" value="false"/><br> <parameter key="enable_parallel_execution" value="true"/><br> <process expanded="true"><br> <operator activated="true" class="aggregate" compatibility="9.2.000" expanded="true" height="82" name="Aggregate" width="90" x="45" y="34"><br> <parameter key="use_default_aggregation" value="false"/><br> <parameter key="attribute_filter_type" value="all"/><br> <parameter key="attribute" value=""/><br> <parameter key="attributes" value=""/><br> <parameter key="use_except_expression" value="false"/><br> <parameter key="value_type" value="attribute_value"/><br> <parameter key="use_value_type_exception" value="false"/><br> <parameter key="except_value_type" value="time"/><br> <parameter key="block_type" value="attribute_block"/><br> <parameter key="use_block_type_exception" value="false"/><br> <parameter key="except_block_type" value="value_matrix_row_start"/><br> <parameter key="invert_selection" value="false"/><br> <parameter key="include_special_attributes" value="false"/><br> <parameter key="default_aggregation_function" value="average"/><br> <list key="aggregation_attributes"><br> <parameter key="%{loop_attribute}" value="average"/><br> </list><br> <parameter key="group_by_attributes" value=""/><br> <parameter key="count_all_combinations" value="false"/><br> <parameter key="only_distinct" value="false"/><br> <parameter key="ignore_missings" value="true"/><br> </operator><br> <operator activated="true" class="rename" compatibility="9.2.000" expanded="true" height="82" name="Rename" width="90" x="179" y="34"><br> <parameter key="old_name" value="average(%{loop_attribute})"/><br> <parameter key="new_name" value="%{loop_attribute}"/><br> <list key="rename_additional_attributes"/><br> </operator><br> <operator activated="true" class="transpose" compatibility="9.2.000" expanded="true" height="82" name="Transpose" width="90" x="313" y="34"/><br> <connect from_port="input 1" to_op="Aggregate" to_port="example set input"/><br> <connect from_op="Aggregate" from_port="example set output" to_op="Rename" to_port="example set input"/><br> <connect from_op="Rename" from_port="example set output" to_op="Transpose" to_port="example set input"/><br> <connect from_op="Transpose" from_port="example set output" to_port="output 1"/><br> <portSpacing port="source_input 1" spacing="0"/><br> <portSpacing port="source_input 2" spacing="0"/><br> <portSpacing port="sink_output 1" spacing="0"/><br> <portSpacing port="sink_output 2" spacing="0"/><br> </process><br> </operator><br> <operator activated="true" class="append" compatibility="9.2.000" expanded="true" height="82" name="Append" width="90" x="1720" y="238"><br> <parameter key="datamanagement" value="double_array"/><br> <parameter key="data_management" value="auto"/><br> <parameter key="merge_type" value="all"/><br> </operator><br> <operator activated="true" class="rename" compatibility="9.2.000" expanded="true" height="82" name="Rename (2)" width="90" x="1854" y="238"><br> <parameter key="old_name" value="id"/><br> <parameter key="new_name" value="Attribute"/><br> <list key="rename_additional_attributes"><br> <parameter key="att_1" value="Avg Importance"/><br> </list><br> </operator><br> <operator activated="true" class="sort" compatibility="9.2.000" expanded="true" height="82" name="Sort" width="90" x="1988" y="238"><br> <parameter key="attribute_name" value="Avg Importance"/><br> <parameter key="sorting_direction" value="decreasing"/><br> </operator><br> <connect from_op="Retrieve Titanic Training" from_port="output" to_op="Split Data" to_port="example set"/><br> <connect from_op="Split Data" from_port="partition 1" to_op="Decision Tree" to_port="training set"/><br> <connect from_op="Split Data" from_port="partition 2" to_op="Explain Predictions" to_port="test data"/><br> <connect from_op="Decision Tree" from_port="model" to_op="Explain Predictions" to_port="model"/><br> <connect from_op="Decision Tree" from_port="exampleSet" to_op="Explain Predictions" to_port="training data"/><br> <connect from_op="Explain Predictions" from_port="example set output" to_op="Generate ID" to_port="example set input"/><br> <connect from_op="Explain Predictions" from_port="importances output" to_op="Join" to_port="right"/><br> <connect from_op="Generate ID" from_port="example set output" to_op="Filter Examples" to_port="example set input"/><br> <connect from_op="Filter Examples" from_port="example set output" to_op="Select Attributes" to_port="example set input"/><br> <connect from_op="Select Attributes" from_port="example set output" to_op="Join" to_port="left"/><br> <connect from_op="Join" from_port="join" to_op="Select Attributes (2)" to_port="example set input"/><br> <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Pivot" to_port="input"/><br> <connect from_op="Pivot" from_port="output" to_op="Rename by Replacing" to_port="example set input"/><br> <connect from_op="Rename by Replacing" from_port="example set output" to_op="Loop Attributes" to_port="input 1"/><br> <connect from_op="Loop Attributes" from_port="output 1" to_op="Append" to_port="example set 1"/><br> <connect from_op="Append" from_port="merged set" to_op="Rename (2)" to_port="example set input"/><br> <connect from_op="Rename (2)" from_port="example set output" to_op="Sort" to_port="example set input"/><br> <connect from_op="Sort" from_port="example set output" to_port="result 1"/><br> <portSpacing port="source_input 1" spacing="0"/><br> <portSpacing port="sink_result 1" spacing="0"/><br> <portSpacing port="sink_result 2" spacing="0"/><br> </process><br> </operator><br></process>