"Evaluating Anomaly detection methods"
cazzi123
New Altair Community Member
Hi,
I am trying to evaluate anomaly detection methods. I want to do this using ROC and F-measure. I am not sure how to do this. i have attached the xml of the process.
i would appreciate any assistance on this as I am still learning Rapidminer.
Thanks
I am trying to evaluate anomaly detection methods. I want to do this using ROC and F-measure. I am not sure how to do this. i have attached the xml of the process.
i would appreciate any assistance on this as I am still learning Rapidminer.
Thanks
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="5.3.015" expanded="true" height="60" name="Retrieve Test1" width="90" x="45" y="75">
<parameter key="repository_entry" value="Red wine ten outliers"/>
</operator>
<operator activated="true" class="normalize" compatibility="5.3.015" expanded="true" height="94" name="Normalize" width="90" x="45" y="165"/>
<operator activated="true" class="multiply" compatibility="5.3.015" expanded="true" height="130" name="Multiply" width="90" x="45" y="300"/>
<operator activated="true" class="anomalydetection:Connectivity-Based Outlier Factor (COF)" compatibility="2.1.002" expanded="true" height="94" name="Connectivity-Based Outlier Factor (COF)" width="90" x="179" y="570"/>
<operator activated="false" class="anomalydetection:Local Correlation Integeral (LOCI)" compatibility="2.1.002" expanded="true" height="76" name="Local Correlation Integeral (LOCI)" width="90" x="179" y="390"/>
<operator activated="true" class="anomalydetection:Local Outlier Factor (LOF)" compatibility="2.1.002" expanded="true" height="94" name="Local Outlier Factor (2)" width="90" x="179" y="210"/>
<operator activated="true" class="anomalydetection:k-NN Global Anomaly Score" compatibility="2.1.002" expanded="true" height="94" name="k-NN Global Anomaly Score" width="90" x="179" y="30"/>
<operator activated="true" class="generate_attributes" compatibility="5.3.015" expanded="true" height="76" name="Generate Attributes (3)" width="90" x="313" y="210">
<list key="function_descriptions">
<parameter key="Outlier_Label" value="if(outlier>3.5,"outlier","normal")"/>
</list>
</operator>
<operator activated="true" class="generate_attributes" compatibility="5.3.015" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="313" y="30">
<list key="function_descriptions">
<parameter key="Outlier_Label" value="if(outlier>3.5,"outlier","normal")"/>
</list>
</operator>
<operator activated="true" class="set_role" compatibility="5.3.015" expanded="true" height="76" name="Set Role" width="90" x="447" y="30">
<parameter key="attribute_name" value="Outlier_Label"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="5.3.015" expanded="true" height="76" name="Select Attributes (2)" width="90" x="581" y="30">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="outlier"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="anomalydetection:Generate ROC" compatibility="2.1.002" expanded="true" height="130" name="Generate ROC (2)" width="90" x="715" y="30">
<parameter key="label value for outliers" value="outlier"/>
</operator>
<operator activated="true" class="set_role" compatibility="5.3.015" expanded="true" height="76" name="Set Role (2)" width="90" x="447" y="210">
<parameter key="attribute_name" value="Outlier_Label"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="5.3.015" expanded="true" height="76" name="Select Attributes (3)" width="90" x="581" y="210">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="outlier"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="anomalydetection:Generate ROC" compatibility="2.1.002" expanded="true" height="130" name="Generate ROC" width="90" x="715" y="210">
<parameter key="label value for outliers" value="outlier"/>
</operator>
<operator activated="false" class="compare_rocs" compatibility="5.3.015" expanded="true" height="76" name="Compare ROCs" width="90" x="899" y="435">
<process expanded="true">
<portSpacing port="source_train 1" spacing="0"/>
<portSpacing port="sink_model 1" spacing="0"/>
</process>
</operator>
<operator activated="false" class="generate_attributes" compatibility="5.3.015" expanded="true" height="76" name="Generate Attributes (4)" width="90" x="313" y="390">
<list key="function_descriptions">
<parameter key="Outlier_Label" value="if(outlier>3.5,"outlier","normal")"/>
</list>
</operator>
<operator activated="false" class="set_role" compatibility="5.3.015" expanded="true" height="76" name="Set Role (3)" width="90" x="447" y="390">
<parameter key="attribute_name" value="Outlier_Label"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="false" class="select_attributes" compatibility="5.3.015" expanded="true" height="76" name="Select Attributes (4)" width="90" x="581" y="390">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="outlier"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="false" class="anomalydetection:Generate ROC" compatibility="2.1.002" expanded="true" height="130" name="Generate ROC (3)" width="90" x="715" y="390">
<parameter key="label value for outliers" value="outlier"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="5.3.015" expanded="true" height="76" name="Generate Attributes (5)" width="90" x="313" y="570">
<list key="function_descriptions">
<parameter key="Outlier_Label" value="if(outlier>3.5,"outlier","normal")"/>
</list>
</operator>
<operator activated="true" class="set_role" compatibility="5.3.015" expanded="true" height="76" name="Set Role (4)" width="90" x="447" y="570">
<parameter key="attribute_name" value="Outlier_Label"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="5.3.015" expanded="true" height="76" name="Select Attributes (5)" width="90" x="581" y="570">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="outlier"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="anomalydetection:Generate ROC" compatibility="2.1.002" expanded="true" height="130" name="Generate ROC (4)" width="90" x="715" y="570">
<parameter key="label value for outliers" value="outlier"/>
</operator>
<connect from_op="Retrieve Test1" from_port="output" to_op="Normalize" to_port="example set input"/>
<connect from_op="Normalize" from_port="example set output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="k-NN Global Anomaly Score" to_port="example set"/>
<connect from_op="Multiply" from_port="output 2" to_op="Local Outlier Factor (2)" to_port="example set"/>
<connect from_op="Multiply" from_port="output 3" to_op="Connectivity-Based Outlier Factor (COF)" to_port="example set"/>
<connect from_op="Connectivity-Based Outlier Factor (COF)" from_port="example set" to_op="Generate Attributes (5)" to_port="example set input"/>
<connect from_op="Local Outlier Factor (2)" from_port="example set" to_op="Generate Attributes (3)" to_port="example set input"/>
<connect from_op="k-NN Global Anomaly Score" from_port="example set" to_op="Generate Attributes (2)" to_port="example set input"/>
<connect from_op="Generate Attributes (3)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Generate ROC (2)" to_port="example set"/>
<connect from_op="Generate ROC (2)" from_port="example set" to_port="result 1"/>
<connect from_op="Generate ROC (2)" from_port="roc set" to_port="result 2"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Select Attributes (3)" to_port="example set input"/>
<connect from_op="Select Attributes (3)" from_port="example set output" to_op="Generate ROC" to_port="example set"/>
<connect from_op="Generate ROC" from_port="example set" to_port="result 3"/>
<connect from_op="Generate ROC" from_port="roc set" to_port="result 5"/>
<connect from_op="Generate Attributes (4)" from_port="example set output" to_op="Set Role (3)" to_port="example set input"/>
<connect from_op="Set Role (3)" from_port="example set output" to_op="Select Attributes (4)" to_port="example set input"/>
<connect from_op="Select Attributes (4)" from_port="example set output" to_op="Generate ROC (3)" to_port="example set"/>
<connect from_op="Generate Attributes (5)" from_port="example set output" to_op="Set Role (4)" to_port="example set input"/>
<connect from_op="Set Role (4)" from_port="example set output" to_op="Select Attributes (5)" to_port="example set input"/>
<connect from_op="Select Attributes (5)" from_port="example set output" to_op="Generate ROC (4)" to_port="example set"/>
<connect from_op="Generate ROC (4)" from_port="example set" to_port="result 4"/>
<connect from_op="Generate ROC (4)" from_port="roc set" to_port="result 6"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
<portSpacing port="sink_result 6" spacing="0"/>
<portSpacing port="sink_result 7" spacing="0"/>
</process>
</operator>
</process>
[\code]
Tagged:
0
Answers
-
Your value "3.5" for the threshold delivers no values "outlier" in the
label attribute "Outlier_Label". I added a macro "threshold" to handle this more comfortable
(lowering it to "1"):
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.0.003">
<context>
<input/>
<output/>
<macros>
<macro>
<key>threshold</key>
<value>1</value>
</macro>
</macros>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="6.0.003" expanded="true" height="60" name="Retrieve Test1" width="90" x="45" y="75">
<parameter key="repository_entry" value="//Local Repository/data/red-wine"/>
</operator>
<operator activated="true" class="normalize" compatibility="6.0.003" expanded="true" height="94" name="Normalize" width="90" x="45" y="165"/>
<operator activated="true" class="multiply" compatibility="6.0.003" expanded="true" height="112" name="Multiply" width="90" x="45" y="300"/>
<operator activated="true" class="anomalydetection:Connectivity-Based Outlier Factor (COF)" compatibility="2.2.000" expanded="true" height="94" name="Connectivity-Based Outlier Factor (COF)" width="90" x="179" y="570"/>
<operator activated="false" class="anomalydetection:Local Correlation Integeral (LOCI)" compatibility="2.2.000" expanded="true" height="76" name="Local Correlation Integeral (LOCI)" width="90" x="179" y="390"/>
<operator activated="true" class="anomalydetection:Local Outlier Factor (LOF)" compatibility="2.2.000" expanded="true" height="94" name="Local Outlier Factor (2)" width="90" x="179" y="210"/>
<operator activated="true" class="anomalydetection:k-NN Global Anomaly Score" compatibility="2.2.000" expanded="true" height="94" name="k-NN Global Anomaly Score" width="90" x="179" y="30"/>
<operator activated="true" class="generate_attributes" compatibility="6.0.003" expanded="true" height="76" name="Generate Attributes (3)" width="90" x="313" y="210">
<list key="function_descriptions">
<parameter key="Outlier_Label" value="if(outlier>%{threshold},"outlier","normal")"/>
</list>
</operator>
<operator activated="true" class="generate_attributes" compatibility="6.0.003" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="313" y="30">
<list key="function_descriptions">
<parameter key="Outlier_Label" value="if(outlier>%{threshold},"outlier","normal")"/>
</list>
</operator>
<operator activated="true" class="set_role" compatibility="6.0.003" expanded="true" height="76" name="Set Role" width="90" x="447" y="30">
<parameter key="attribute_name" value="Outlier_Label"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="6.0.003" expanded="true" height="76" name="Select Attributes (2)" width="90" x="581" y="30">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="outlier"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="anomalydetection:Generate ROC" compatibility="2.2.000" expanded="true" height="130" name="Generate ROC (2)" width="90" x="715" y="30">
<parameter key="label value for outliers" value="outlier"/>
</operator>
<operator activated="true" class="set_role" compatibility="6.0.003" expanded="true" height="76" name="Set Role (2)" width="90" x="447" y="210">
<parameter key="attribute_name" value="Outlier_Label"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="6.0.003" expanded="true" height="76" name="Select Attributes (3)" width="90" x="581" y="210">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="outlier"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="anomalydetection:Generate ROC" compatibility="2.2.000" expanded="true" height="130" name="Generate ROC" width="90" x="715" y="210">
<parameter key="label value for outliers" value="outlier"/>
</operator>
<operator activated="false" class="compare_rocs" compatibility="6.0.003" expanded="true" height="76" name="Compare ROCs" width="90" x="899" y="435">
<process expanded="true">
<portSpacing port="source_train 1" spacing="0"/>
<portSpacing port="sink_model 1" spacing="0"/>
</process>
</operator>
<operator activated="false" class="generate_attributes" compatibility="6.0.003" expanded="true" height="76" name="Generate Attributes (4)" width="90" x="313" y="390">
<list key="function_descriptions">
<parameter key="Outlier_Label" value="if(outlier>3.5,"outlier","normal")"/>
</list>
</operator>
<operator activated="false" class="set_role" compatibility="6.0.003" expanded="true" height="76" name="Set Role (3)" width="90" x="447" y="390">
<parameter key="attribute_name" value="Outlier_Label"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="false" class="select_attributes" compatibility="6.0.003" expanded="true" height="76" name="Select Attributes (4)" width="90" x="581" y="390">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="outlier"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="false" class="anomalydetection:Generate ROC" compatibility="2.2.000" expanded="true" height="130" name="Generate ROC (3)" width="90" x="715" y="390">
<parameter key="label value for outliers" value="outlier"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="6.0.003" expanded="true" height="76" name="Generate Attributes (5)" width="90" x="313" y="570">
<list key="function_descriptions">
<parameter key="Outlier_Label" value="if(outlier>%{threshold},"outlier","normal")"/>
</list>
</operator>
<operator activated="true" class="set_role" compatibility="6.0.003" expanded="true" height="76" name="Set Role (4)" width="90" x="447" y="570">
<parameter key="attribute_name" value="Outlier_Label"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="6.0.003" expanded="true" height="76" name="Select Attributes (5)" width="90" x="581" y="570">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="outlier"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="anomalydetection:Generate ROC" compatibility="2.2.000" expanded="true" height="130" name="Generate ROC (4)" width="90" x="715" y="570">
<parameter key="label value for outliers" value="outlier"/>
</operator>
<connect from_op="Retrieve Test1" from_port="output" to_op="Normalize" to_port="example set input"/>
<connect from_op="Normalize" from_port="example set output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="k-NN Global Anomaly Score" to_port="example set"/>
<connect from_op="Multiply" from_port="output 2" to_op="Local Outlier Factor (2)" to_port="example set"/>
<connect from_op="Multiply" from_port="output 3" to_op="Connectivity-Based Outlier Factor (COF)" to_port="example set"/>
<connect from_op="Connectivity-Based Outlier Factor (COF)" from_port="example set" to_op="Generate Attributes (5)" to_port="example set input"/>
<connect from_op="Local Outlier Factor (2)" from_port="example set" to_op="Generate Attributes (3)" to_port="example set input"/>
<connect from_op="k-NN Global Anomaly Score" from_port="example set" to_op="Generate Attributes (2)" to_port="example set input"/>
<connect from_op="Generate Attributes (3)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Generate ROC (2)" to_port="example set"/>
<connect from_op="Generate ROC (2)" from_port="example set" to_port="result 1"/>
<connect from_op="Generate ROC (2)" from_port="roc set" to_port="result 2"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Select Attributes (3)" to_port="example set input"/>
<connect from_op="Select Attributes (3)" from_port="example set output" to_op="Generate ROC" to_port="example set"/>
<connect from_op="Generate ROC" from_port="example set" to_port="result 3"/>
<connect from_op="Generate ROC" from_port="roc set" to_port="result 5"/>
<connect from_op="Generate Attributes (4)" from_port="example set output" to_op="Set Role (3)" to_port="example set input"/>
<connect from_op="Set Role (3)" from_port="example set output" to_op="Select Attributes (4)" to_port="example set input"/>
<connect from_op="Select Attributes (4)" from_port="example set output" to_op="Generate ROC (3)" to_port="example set"/>
<connect from_op="Generate Attributes (5)" from_port="example set output" to_op="Set Role (4)" to_port="example set input"/>
<connect from_op="Set Role (4)" from_port="example set output" to_op="Select Attributes (5)" to_port="example set input"/>
<connect from_op="Select Attributes (5)" from_port="example set output" to_op="Generate ROC (4)" to_port="example set"/>
<connect from_op="Generate ROC (4)" from_port="example set" to_port="result 4"/>
<connect from_op="Generate ROC (4)" from_port="roc set" to_port="result 6"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
<portSpacing port="sink_result 6" spacing="0"/>
<portSpacing port="sink_result 7" spacing="0"/>
</process>
</operator>
</process>0