[SOLVED] How to get roc curve data?
Find more posts tagged with
Sort by:
1 - 7 of
71
Thank you for your reply, Marius.
Marius wrote:
Hi,
does your model create confidences? Then you can apply your model outside of RapidMiner, save the processed data including original label, predicted label and confidences in a csv file and import it into RapidMiner. Then make sure that attribute roles and attribute names comply with the naming scheme of classified data sets in RapidMiner (make use of Rename and Set Role operators). If you then use the Performance operator and push your data through it, you will get a ROC curve.
For doing the same outside RapidMiner, sort your examples by confidence, calculate True and False positive rate for each distinct confidence and create a data point from this.
Cheers,
Marius
Is there any way we can directly get the point data of ROC curve?
Hi, without any warranties the following process might do what you want. Instead of the first row in the process, load your own data and set the correct roles.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.1.014">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.1.014" expanded="true" name="Process">
<process expanded="true" height="836" width="1284">
<operator activated="true" class="generate_data" compatibility="5.1.014" expanded="true" height="60" name="Generate Data" width="90" x="45" y="30">
<parameter key="target_function" value="random classification"/>
<parameter key="number_examples" value="1000"/>
<parameter key="number_of_attributes" value="2"/>
</operator>
<operator activated="true" class="naive_bayes" compatibility="5.1.014" expanded="true" height="76" name="Naive Bayes" width="90" x="179" y="30"/>
<operator activated="true" class="apply_model" compatibility="5.1.014" expanded="true" height="76" name="Apply Model" width="90" x="313" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="multiply" compatibility="5.1.014" expanded="true" height="94" name="Multiply" width="90" x="447" y="30"/>
<operator activated="true" class="performance" compatibility="5.1.014" expanded="true" height="76" name="Performance" width="90" x="581" y="30"/>
<operator activated="true" class="sort" compatibility="5.1.014" expanded="true" height="76" name="Sort" width="90" x="45" y="390">
<parameter key="attribute_name" value="confidence(positive)"/>
</operator>
<operator activated="true" class="rename_by_replacing" compatibility="5.1.014" expanded="true" height="76" name="Rename by Replacing" width="90" x="179" y="390">
<parameter key="include_special_attributes" value="true"/>
<parameter key="replace_what" value="\(|\)"/>
<parameter key="replace_by" value="_"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="5.1.014" expanded="true" height="76" name="Generate Attributes" width="90" x="313" y="390">
<list key="function_descriptions">
<parameter key="correct_prediction" value="label==prediction_label_"/>
</list>
</operator>
<operator activated="true" class="multiply" compatibility="5.1.014" expanded="true" height="112" name="Multiply (2)" width="90" x="447" y="390"/>
<operator activated="true" class="filter_examples" compatibility="5.1.014" expanded="true" height="76" name="Filter Examples (2)" width="90" x="648" y="615">
<parameter key="condition_class" value="wrong_predictions"/>
</operator>
<operator activated="true" class="extract_macro" compatibility="5.1.014" expanded="true" height="60" name="Extract Macro (4)" width="90" x="782" y="615">
<parameter key="macro" value="wrong_prediction_count"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="5.1.014" expanded="true" height="76" name="Filter Examples" width="90" x="648" y="525">
<parameter key="condition_class" value="correct_predictions"/>
</operator>
<operator activated="true" class="extract_macro" compatibility="5.1.014" expanded="true" height="60" name="Extract Macro (3)" width="90" x="782" y="525">
<parameter key="macro" value="correct_prediction_count"/>
</operator>
<operator activated="true" class="aggregate" compatibility="5.1.014" expanded="true" height="76" name="Aggregate" width="90" x="648" y="390">
<list key="aggregation_attributes">
<parameter key="correct_prediction" value="count"/>
<parameter key="correct_prediction" value="sum"/>
</list>
<parameter key="group_by_attributes" value="|confidence_positive_"/>
</operator>
<operator activated="true" class="loop_examples" compatibility="5.1.014" expanded="true" height="94" name="Loop Examples" width="90" x="782" y="390">
<process expanded="true" height="553" width="765">
<operator activated="true" class="materialize_data" compatibility="5.1.014" expanded="true" height="76" name="Materialize Data" width="90" x="45" y="30"/>
<operator activated="true" class="filter_example_range" compatibility="5.1.014" expanded="true" height="76" name="Filter Example Range" width="90" x="112" y="120">
<parameter key="first_example" value="1"/>
<parameter key="last_example" value="%{example}"/>
</operator>
<operator activated="true" class="extract_macro" compatibility="5.1.014" expanded="true" height="60" name="Extract Macro (5)" width="90" x="246" y="255">
<parameter key="macro" value="confidence"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="confidence_positive_"/>
<parameter key="example_index" value="%{example}"/>
</operator>
<operator activated="true" class="aggregate" compatibility="5.1.014" expanded="true" height="76" name="Aggregate (2)" width="90" x="313" y="120">
<list key="aggregation_attributes">
<parameter key="sum(correct_prediction)" value="sum"/>
</list>
</operator>
<operator activated="true" class="extract_macro" compatibility="5.1.014" expanded="true" height="60" name="Extract Macro" width="90" x="447" y="120">
<parameter key="macro" value="correct_predictions"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="sum(sum(correct_prediction))"/>
<parameter key="example_index" value="1"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="5.1.014" expanded="true" height="60" name="Generate Data by User Specification" width="90" x="581" y="120">
<list key="attribute_values">
<parameter key="correct_predictions" value="%{correct_predictions}"/>
<parameter key="example_count" value="%{example}"/>
<parameter key="confidence" value="1-%{confidence}"/>
</list>
<list key="set_additional_roles"/>
</operator>
<connect from_port="example set" to_op="Materialize Data" to_port="example set input"/>
<connect from_op="Materialize Data" from_port="example set output" to_op="Filter Example Range" to_port="example set input"/>
<connect from_op="Materialize Data" from_port="original" to_port="example set"/>
<connect from_op="Filter Example Range" from_port="example set output" to_op="Extract Macro (5)" to_port="example set"/>
<connect from_op="Extract Macro (5)" from_port="example set" to_op="Aggregate (2)" to_port="example set input"/>
<connect from_op="Aggregate (2)" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Generate Data by User Specification" from_port="output" to_port="output 1"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_example set" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="append" compatibility="5.1.014" expanded="true" height="76" name="Append" width="90" x="45" y="750"/>
<operator activated="true" class="extract_macro" compatibility="5.1.014" expanded="true" height="60" name="Extract Macro (2)" width="90" x="179" y="750">
<parameter key="macro" value="example_count"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="5.1.014" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="447" y="750">
<list key="function_descriptions">
<parameter key="TPr" value="correct_predictions/%{correct_prediction_count}"/>
<parameter key="FPr" value="(example_count-correct_predictions)/%{wrong_prediction_count}"/>
</list>
</operator>
<connect from_op="Generate Data" from_port="output" to_op="Naive Bayes" to_port="training set"/>
<connect from_op="Naive Bayes" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Naive Bayes" from_port="exampleSet" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Performance" to_port="labelled data"/>
<connect from_op="Multiply" from_port="output 2" to_op="Sort" to_port="example set input"/>
<connect from_op="Performance" from_port="performance" to_port="result 1"/>
<connect from_op="Performance" from_port="example set" to_port="result 2"/>
<connect from_op="Sort" from_port="example set output" to_op="Rename by Replacing" to_port="example set input"/>
<connect from_op="Rename by Replacing" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
<connect from_op="Multiply (2)" from_port="output 1" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Multiply (2)" from_port="output 2" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Multiply (2)" from_port="output 3" to_op="Filter Examples (2)" to_port="example set input"/>
<connect from_op="Filter Examples (2)" from_port="example set output" to_op="Extract Macro (4)" to_port="example set"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Extract Macro (3)" to_port="example set"/>
<connect from_op="Aggregate" from_port="example set output" to_op="Loop Examples" to_port="example set"/>
<connect from_op="Loop Examples" from_port="example set" to_port="result 3"/>
<connect from_op="Loop Examples" from_port="output 1" to_op="Append" to_port="example set 1"/>
<connect from_op="Append" from_port="merged set" to_op="Extract Macro (2)" to_port="example set"/>
<connect from_op="Extract Macro (2)" from_port="example set" to_op="Generate Attributes (2)" to_port="example set input"/>
<connect from_op="Generate Attributes (2)" from_port="example set output" to_port="result 4"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="144"/>
<portSpacing port="sink_result 4" spacing="324"/>
<portSpacing port="sink_result 5" spacing="0"/>
</process>
</operator>
</process>
does your model create confidences? Then you can apply your model outside of RapidMiner, save the processed data including original label, predicted label and confidences in a csv file and import it into RapidMiner. Then make sure that attribute roles and attribute names comply with the naming scheme of classified data sets in RapidMiner (make use of Rename and Set Role operators). If you then use the Performance operator and push your data through it, you will get a ROC curve.
For doing the same outside RapidMiner, sort your examples by confidence, calculate True and False positive rate for each distinct confidence and create a data point from this.
Cheers,
Marius