🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

Calculations for the Pos and Neg Predictive Values and the PSEP in binomial performance

User: "kdafoe"
New Altair Community Member
Updated by Jocelyn
Hi. I am trying to understand a few calculations in the Binomial Performance Classification operator. I am using the Titanic dataset, and a decision tree operator within the nominal cross-validation building block. I switched the performance operator for the binomial classification performance operator so that I could get more criterion. Everything looks great (meaning I can verify the values provided), except for three values.

The value Rapidminer shows for the Positive Predictive Value is 82.87%, which matches my calculation for a Negative Predictive Value. The reverse is also true that the Negative Predictive Value of 75.49% matches my Positive value. Is there a labeling mis-match? There are more negative (no) survival values in the Titanic dataset than positive (yes) values, so I think my values are correct.

Also, how do you calculate the PSEP or Positive Satisfactory Error Probability value of .584? The equation I use is FPR + FNR * (1- Acceptable Error Rate), but after substituting FPR and FNR values the only value that matches your score for the Acceptable Error Rate is -0.532. But it doesn't make sense that an Acceptable Error Rate is a negative value, nor do I understand how you arrive at 53.2%? 

Can someone help explain these differences to me?

Thanks for you time.



Find more posts tagged with

Sort by:
1 - 1 of 11
    User: "kdafoe"
    New Altair Community Member
    OP
    Here, is the XML code

    ?xml version="1.0" encoding="UTF-8"?><process version="10.1.002">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="10.1.002" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
    <operator activated="true" class="retrieve" compatibility="10.1.002" expanded="true" height="68" name="Retrieve Titanic" width="90" x="179" y="187">
    <parameter key="repository_entry" value="//Samples/data/Titanic"/>
    </operator>
    <operator activated="true" class="blending:select_attributes" compatibility="10.1.002" expanded="true" height="82" name="Select Attributes" width="90" x="313" y="187">
    <parameter key="type" value="exclude attributes"/>
    <parameter key="attribute_filter_type" value="a subset"/>
    <parameter key="select_attribute" value=""/>
    <parameter key="select_subset" value="Cabin␞Life Boat␞Name␞Port of Embarkation␞Ticket Number"/>
    <parameter key="also_apply_to_special_attributes_(id,_label..)" value="false"/>
    </operator>
    <operator activated="true" class="blending:set_role" compatibility="10.1.002" expanded="true" height="82" name="Set Role" width="90" x="447" y="187">
    <list key="set_roles">
    <parameter key="Survived" value="label"/>
    </list>
    </operator>
    <operator activated="true" class="impute_missing_values" compatibility="10.1.002" expanded="true" height="68" name="Impute Missing Values" width="90" x="581" y="187">
    <parameter key="attribute_filter_type" value="all"/>
    <parameter key="attribute" value="Age"/>
    <parameter key="attributes" value=""/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="attribute_value"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="time"/>
    <parameter key="block_type" value="attribute_block"/>
    <parameter key="use_block_type_exception" value="false"/>
    <parameter key="except_block_type" value="value_matrix_row_start"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="false"/>
    <parameter key="iterate" value="true"/>
    <parameter key="learn_on_complete_cases" value="true"/>
    <parameter key="order" value="chronological"/>
    <parameter key="sort" value="ascending"/>
    <parameter key="use_local_random_seed" value="false"/>
    <parameter key="local_random_seed" value="1992"/>
    <process expanded="true">
    <operator activated="true" class="k_nn" compatibility="10.1.002" expanded="true" height="82" name="k-NN" width="90" x="112" y="136">
    <parameter key="k" value="2"/>
    <parameter key="weighted_vote" value="false"/>
    <parameter key="measure_types" value="MixedMeasures"/>
    <parameter key="mixed_measure" value="MixedEuclideanDistance"/>
    <parameter key="nominal_measure" value="NominalDistance"/>
    <parameter key="numerical_measure" value="EuclideanDistance"/>
    <parameter key="divergence" value="GeneralizedIDivergence"/>
    <parameter key="kernel_type" value="radial"/>
    <parameter key="kernel_gamma" value="1.0"/>
    <parameter key="kernel_sigma1" value="1.0"/>
    <parameter key="kernel_sigma2" value="0.0"/>
    <parameter key="kernel_sigma3" value="2.0"/>
    <parameter key="kernel_degree" value="3.0"/>
    <parameter key="kernel_shift" value="1.0"/>
    <parameter key="kernel_a" value="1.0"/>
    <parameter key="kernel_b" value="0.0"/>
    </operator>
    <connect from_port="example set source" to_op="k-NN" to_port="training set"/>
    <connect from_op="k-NN" from_port="model" to_port="model sink"/>
    <portSpacing port="source_example set source" spacing="0"/>
    <portSpacing port="sink_model sink" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="concurrency:cross_validation" compatibility="10.0.000" expanded="true" height="145" name="Validation" width="90" x="782" y="238">
    <parameter key="split_on_batch_attribute" value="false"/>
    <parameter key="leave_one_out" value="false"/>
    <parameter key="number_of_folds" value="10"/>
    <parameter key="sampling_type" value="stratified sampling"/>
    <parameter key="use_local_random_seed" value="false"/>
    <parameter key="local_random_seed" value="1992"/>
    <parameter key="enable_parallel_execution" value="true"/>
    <process expanded="true">
    <operator activated="true" class="concurrency:parallel_decision_tree" compatibility="10.1.002" expanded="true" height="103" name="Decision Tree (2)" width="90" x="45" y="34">
    <parameter key="criterion" value="gain_ratio"/>
    <parameter key="maximal_depth" value="10"/>
    <parameter key="apply_pruning" value="true"/>
    <parameter key="confidence" value="0.1"/>
    <parameter key="apply_prepruning" value="true"/>
    <parameter key="minimal_gain" value="0.01"/>
    <parameter key="minimal_leaf_size" value="2"/>
    <parameter key="minimal_size_for_split" value="4"/>
    <parameter key="number_of_prepruning_alternatives" value="3"/>
    </operator>
    <connect from_port="training set" to_op="Decision Tree (2)" to_port="training set"/>
    <connect from_op="Decision Tree (2)" from_port="model" to_port="model"/>
    <portSpacing port="source_training set" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    <description align="left" color="green" colored="true" height="80" resized="true" width="248" x="37" y="158">In the training phase, a model is built on the current training data set. (90 % of data by default, 10 times)</description>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="10.1.002" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
    <list key="application_parameters"/>
    </operator>
    <operator activated="false" class="performance" compatibility="10.1.002" expanded="true" height="82" name="Performance" width="90" x="112" y="340">
    <parameter key="use_example_weights" value="true"/>
    </operator>
    <operator activated="true" class="performance_binominal_classification" compatibility="10.1.002" expanded="true" height="82" name="Performance (2)" width="90" x="179" y="34">
    <parameter key="manually_set_positive_class" value="false"/>
    <parameter key="main_criterion" value="first"/>
    <parameter key="accuracy" value="true"/>
    <parameter key="classification_error" value="true"/>
    <parameter key="kappa" value="true"/>
    <parameter key="AUC (optimistic)" value="false"/>
    <parameter key="AUC" value="true"/>
    <parameter key="AUC (pessimistic)" value="false"/>
    <parameter key="precision" value="true"/>
    <parameter key="recall" value="true"/>
    <parameter key="lift" value="false"/>
    <parameter key="fallout" value="false"/>
    <parameter key="f_measure" value="true"/>
    <parameter key="false_positive" value="true"/>
    <parameter key="false_negative" value="true"/>
    <parameter key="true_positive" value="true"/>
    <parameter key="true_negative" value="true"/>
    <parameter key="sensitivity" value="true"/>
    <parameter key="specificity" value="true"/>
    <parameter key="youden" value="true"/>
    <parameter key="positive_predictive_value" value="true"/>
    <parameter key="negative_predictive_value" value="true"/>
    <parameter key="psep" value="true"/>
    <parameter key="skip_undefined_labels" value="true"/>
    <parameter key="use_example_weights" value="true"/>
    </operator>
    <connect from_port="model" to_op="Apply Model" to_port="model"/>
    <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
    <connect from_op="Apply Model" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
    <connect from_op="Performance (2)" from_port="performance" to_port="performance 1"/>
    <connect from_op="Performance (2)" from_port="example set" to_port="test set results"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="sink_test set results" spacing="0"/>
    <portSpacing port="sink_performance 1" spacing="0"/>
    <portSpacing port="sink_performance 2" spacing="0"/>
    <description align="left" color="blue" colored="true" height="103" resized="true" width="315" x="38" y="158">The model created in the Training step is applied to the current test set (10 %).&lt;br/&gt;The performance is evaluated and sent to the operator results.</description>
    </process>
    <description align="center" color="transparent" colored="false" width="126">A cross-validation evaluating a decision tree model.</description>
    </operator>
    <operator activated="true" class="model_simulator:model_simulator" compatibility="10.1.000" expanded="true" height="103" name="Model Simulator" width="90" x="983" y="391"/>
    <connect from_op="Retrieve Titanic" from_port="output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Impute Missing Values" to_port="example set in"/>
    <connect from_op="Impute Missing Values" from_port="example set out" to_op="Validation" to_port="example set"/>
    <connect from_op="Validation" from_port="model" to_op="Model Simulator" to_port="model"/>
    <connect from_op="Validation" from_port="example set" to_op="Model Simulator" to_port="training data"/>
    <connect from_op="Validation" from_port="test result set" to_op="Model Simulator" to_port="test data"/>
    <connect from_op="Validation" from_port="performance 1" to_port="result 3"/>
    <connect from_op="Model Simulator" from_port="simulator output" to_port="result 1"/>
    <connect from_op="Model Simulator" from_port="model output" to_port="result 2"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    <portSpacing port="sink_result 4" spacing="0"/>
    <description align="center" color="yellow" colored="false" height="81" resized="true" width="422" x="503" y="33">Simple Titanic Decision Tree Model</description>
    </process>
    </operator>
    </process>