"Evaluating Anomaly detection methods"

cazzi123
cazzi123 New Altair Community Member
edited November 5 in Community Q&A
Hi,

I am trying to evaluate anomaly detection methods. I want to do this using ROC and F-measure.  I am not sure how to do this. i have attached the xml of the process.

i would appreciate any assistance on this as I am still learning Rapidminer.

Thanks :)
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="5.3.015" expanded="true" height="60" name="Retrieve Test1" width="90" x="45" y="75">
        <parameter key="repository_entry" value="Red wine ten outliers"/>
      </operator>
      <operator activated="true" class="normalize" compatibility="5.3.015" expanded="true" height="94" name="Normalize" width="90" x="45" y="165"/>
      <operator activated="true" class="multiply" compatibility="5.3.015" expanded="true" height="130" name="Multiply" width="90" x="45" y="300"/>
      <operator activated="true" class="anomalydetection:Connectivity-Based Outlier Factor (COF)" compatibility="2.1.002" expanded="true" height="94" name="Connectivity-Based Outlier Factor (COF)" width="90" x="179" y="570"/>
      <operator activated="false" class="anomalydetection:Local Correlation Integeral (LOCI)" compatibility="2.1.002" expanded="true" height="76" name="Local Correlation Integeral (LOCI)" width="90" x="179" y="390"/>
      <operator activated="true" class="anomalydetection:Local Outlier Factor (LOF)" compatibility="2.1.002" expanded="true" height="94" name="Local Outlier Factor (2)" width="90" x="179" y="210"/>
      <operator activated="true" class="anomalydetection:k-NN Global Anomaly Score" compatibility="2.1.002" expanded="true" height="94" name="k-NN Global Anomaly Score" width="90" x="179" y="30"/>
      <operator activated="true" class="generate_attributes" compatibility="5.3.015" expanded="true" height="76" name="Generate Attributes (3)" width="90" x="313" y="210">
        <list key="function_descriptions">
          <parameter key="Outlier_Label" value="if(outlier&gt;3.5,&quot;outlier&quot;,&quot;normal&quot;)"/>
        </list>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="5.3.015" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="313" y="30">
        <list key="function_descriptions">
          <parameter key="Outlier_Label" value="if(outlier&gt;3.5,&quot;outlier&quot;,&quot;normal&quot;)"/>
        </list>
      </operator>
      <operator activated="true" class="set_role" compatibility="5.3.015" expanded="true" height="76" name="Set Role" width="90" x="447" y="30">
        <parameter key="attribute_name" value="Outlier_Label"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="5.3.015" expanded="true" height="76" name="Select Attributes (2)" width="90" x="581" y="30">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="outlier"/>
        <parameter key="invert_selection" value="true"/>
      </operator>
      <operator activated="true" class="anomalydetection:Generate ROC" compatibility="2.1.002" expanded="true" height="130" name="Generate ROC (2)" width="90" x="715" y="30">
        <parameter key="label value for outliers" value="outlier"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="5.3.015" expanded="true" height="76" name="Set Role (2)" width="90" x="447" y="210">
        <parameter key="attribute_name" value="Outlier_Label"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="5.3.015" expanded="true" height="76" name="Select Attributes (3)" width="90" x="581" y="210">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="outlier"/>
        <parameter key="invert_selection" value="true"/>
      </operator>
      <operator activated="true" class="anomalydetection:Generate ROC" compatibility="2.1.002" expanded="true" height="130" name="Generate ROC" width="90" x="715" y="210">
        <parameter key="label value for outliers" value="outlier"/>
      </operator>
      <operator activated="false" class="compare_rocs" compatibility="5.3.015" expanded="true" height="76" name="Compare ROCs" width="90" x="899" y="435">
        <process expanded="true">
          <portSpacing port="source_train 1" spacing="0"/>
          <portSpacing port="sink_model 1" spacing="0"/>
        </process>
      </operator>
      <operator activated="false" class="generate_attributes" compatibility="5.3.015" expanded="true" height="76" name="Generate Attributes (4)" width="90" x="313" y="390">
        <list key="function_descriptions">
          <parameter key="Outlier_Label" value="if(outlier&gt;3.5,&quot;outlier&quot;,&quot;normal&quot;)"/>
        </list>
      </operator>
      <operator activated="false" class="set_role" compatibility="5.3.015" expanded="true" height="76" name="Set Role (3)" width="90" x="447" y="390">
        <parameter key="attribute_name" value="Outlier_Label"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="false" class="select_attributes" compatibility="5.3.015" expanded="true" height="76" name="Select Attributes (4)" width="90" x="581" y="390">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="outlier"/>
        <parameter key="invert_selection" value="true"/>
      </operator>
      <operator activated="false" class="anomalydetection:Generate ROC" compatibility="2.1.002" expanded="true" height="130" name="Generate ROC (3)" width="90" x="715" y="390">
        <parameter key="label value for outliers" value="outlier"/>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="5.3.015" expanded="true" height="76" name="Generate Attributes (5)" width="90" x="313" y="570">
        <list key="function_descriptions">
          <parameter key="Outlier_Label" value="if(outlier&gt;3.5,&quot;outlier&quot;,&quot;normal&quot;)"/>
        </list>
      </operator>
      <operator activated="true" class="set_role" compatibility="5.3.015" expanded="true" height="76" name="Set Role (4)" width="90" x="447" y="570">
        <parameter key="attribute_name" value="Outlier_Label"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="5.3.015" expanded="true" height="76" name="Select Attributes (5)" width="90" x="581" y="570">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="outlier"/>
        <parameter key="invert_selection" value="true"/>
      </operator>
      <operator activated="true" class="anomalydetection:Generate ROC" compatibility="2.1.002" expanded="true" height="130" name="Generate ROC (4)" width="90" x="715" y="570">
        <parameter key="label value for outliers" value="outlier"/>
      </operator>
      <connect from_op="Retrieve Test1" from_port="output" to_op="Normalize" to_port="example set input"/>
      <connect from_op="Normalize" from_port="example set output" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="k-NN Global Anomaly Score" to_port="example set"/>
      <connect from_op="Multiply" from_port="output 2" to_op="Local Outlier Factor (2)" to_port="example set"/>
      <connect from_op="Multiply" from_port="output 3" to_op="Connectivity-Based Outlier Factor (COF)" to_port="example set"/>
      <connect from_op="Connectivity-Based Outlier Factor (COF)" from_port="example set" to_op="Generate Attributes (5)" to_port="example set input"/>
      <connect from_op="Local Outlier Factor (2)" from_port="example set" to_op="Generate Attributes (3)" to_port="example set input"/>
      <connect from_op="k-NN Global Anomaly Score" from_port="example set" to_op="Generate Attributes (2)" to_port="example set input"/>
      <connect from_op="Generate Attributes (3)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
      <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Generate ROC (2)" to_port="example set"/>
      <connect from_op="Generate ROC (2)" from_port="example set" to_port="result 1"/>
      <connect from_op="Generate ROC (2)" from_port="roc set" to_port="result 2"/>
      <connect from_op="Set Role (2)" from_port="example set output" to_op="Select Attributes (3)" to_port="example set input"/>
      <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Generate ROC" to_port="example set"/>
      <connect from_op="Generate ROC" from_port="example set" to_port="result 3"/>
      <connect from_op="Generate ROC" from_port="roc set" to_port="result 5"/>
      <connect from_op="Generate Attributes (4)" from_port="example set output" to_op="Set Role (3)" to_port="example set input"/>
      <connect from_op="Set Role (3)" from_port="example set output" to_op="Select Attributes (4)" to_port="example set input"/>
      <connect from_op="Select Attributes (4)" from_port="example set output" to_op="Generate ROC (3)" to_port="example set"/>
      <connect from_op="Generate Attributes (5)" from_port="example set output" to_op="Set Role (4)" to_port="example set input"/>
      <connect from_op="Set Role (4)" from_port="example set output" to_op="Select Attributes (5)" to_port="example set input"/>
      <connect from_op="Select Attributes (5)" from_port="example set output" to_op="Generate ROC (4)" to_port="example set"/>
      <connect from_op="Generate ROC (4)" from_port="example set" to_port="result 4"/>
      <connect from_op="Generate ROC (4)" from_port="roc set" to_port="result 6"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
      <portSpacing port="sink_result 5" spacing="0"/>
      <portSpacing port="sink_result 6" spacing="0"/>
      <portSpacing port="sink_result 7" spacing="0"/>
    </process>
  </operator>
</process>

[\code]

Answers

  • fras
    fras New Altair Community Member
    Your value "3.5" for the threshold delivers no values "outlier" in the
    label attribute "Outlier_Label". I added a macro "threshold" to handle this more comfortable
    (lowering it to "1"):

    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="6.0.003">
      <context>
        <input/>
        <output/>
        <macros>
          <macro>
            <key>threshold</key>
            <value>1</value>
          </macro>
        </macros>
      </context>
      <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="6.0.003" expanded="true" height="60" name="Retrieve Test1" width="90" x="45" y="75">
            <parameter key="repository_entry" value="//Local Repository/data/red-wine"/>
          </operator>
          <operator activated="true" class="normalize" compatibility="6.0.003" expanded="true" height="94" name="Normalize" width="90" x="45" y="165"/>
          <operator activated="true" class="multiply" compatibility="6.0.003" expanded="true" height="112" name="Multiply" width="90" x="45" y="300"/>
          <operator activated="true" class="anomalydetection:Connectivity-Based Outlier Factor (COF)" compatibility="2.2.000" expanded="true" height="94" name="Connectivity-Based Outlier Factor (COF)" width="90" x="179" y="570"/>
          <operator activated="false" class="anomalydetection:Local Correlation Integeral (LOCI)" compatibility="2.2.000" expanded="true" height="76" name="Local Correlation Integeral (LOCI)" width="90" x="179" y="390"/>
          <operator activated="true" class="anomalydetection:Local Outlier Factor (LOF)" compatibility="2.2.000" expanded="true" height="94" name="Local Outlier Factor (2)" width="90" x="179" y="210"/>
          <operator activated="true" class="anomalydetection:k-NN Global Anomaly Score" compatibility="2.2.000" expanded="true" height="94" name="k-NN Global Anomaly Score" width="90" x="179" y="30"/>
          <operator activated="true" class="generate_attributes" compatibility="6.0.003" expanded="true" height="76" name="Generate Attributes (3)" width="90" x="313" y="210">
            <list key="function_descriptions">
              <parameter key="Outlier_Label" value="if(outlier&gt;%{threshold},&quot;outlier&quot;,&quot;normal&quot;)"/>
            </list>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="6.0.003" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="313" y="30">
            <list key="function_descriptions">
              <parameter key="Outlier_Label" value="if(outlier&gt;%{threshold},&quot;outlier&quot;,&quot;normal&quot;)"/>
            </list>
          </operator>
          <operator activated="true" class="set_role" compatibility="6.0.003" expanded="true" height="76" name="Set Role" width="90" x="447" y="30">
            <parameter key="attribute_name" value="Outlier_Label"/>
            <parameter key="target_role" value="label"/>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="6.0.003" expanded="true" height="76" name="Select Attributes (2)" width="90" x="581" y="30">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="outlier"/>
            <parameter key="invert_selection" value="true"/>
          </operator>
          <operator activated="true" class="anomalydetection:Generate ROC" compatibility="2.2.000" expanded="true" height="130" name="Generate ROC (2)" width="90" x="715" y="30">
            <parameter key="label value for outliers" value="outlier"/>
          </operator>
          <operator activated="true" class="set_role" compatibility="6.0.003" expanded="true" height="76" name="Set Role (2)" width="90" x="447" y="210">
            <parameter key="attribute_name" value="Outlier_Label"/>
            <parameter key="target_role" value="label"/>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="6.0.003" expanded="true" height="76" name="Select Attributes (3)" width="90" x="581" y="210">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="outlier"/>
            <parameter key="invert_selection" value="true"/>
          </operator>
          <operator activated="true" class="anomalydetection:Generate ROC" compatibility="2.2.000" expanded="true" height="130" name="Generate ROC" width="90" x="715" y="210">
            <parameter key="label value for outliers" value="outlier"/>
          </operator>
          <operator activated="false" class="compare_rocs" compatibility="6.0.003" expanded="true" height="76" name="Compare ROCs" width="90" x="899" y="435">
            <process expanded="true">
              <portSpacing port="source_train 1" spacing="0"/>
              <portSpacing port="sink_model 1" spacing="0"/>
            </process>
          </operator>
          <operator activated="false" class="generate_attributes" compatibility="6.0.003" expanded="true" height="76" name="Generate Attributes (4)" width="90" x="313" y="390">
            <list key="function_descriptions">
              <parameter key="Outlier_Label" value="if(outlier&gt;3.5,&quot;outlier&quot;,&quot;normal&quot;)"/>
            </list>
          </operator>
          <operator activated="false" class="set_role" compatibility="6.0.003" expanded="true" height="76" name="Set Role (3)" width="90" x="447" y="390">
            <parameter key="attribute_name" value="Outlier_Label"/>
            <parameter key="target_role" value="label"/>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="false" class="select_attributes" compatibility="6.0.003" expanded="true" height="76" name="Select Attributes (4)" width="90" x="581" y="390">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="outlier"/>
            <parameter key="invert_selection" value="true"/>
          </operator>
          <operator activated="false" class="anomalydetection:Generate ROC" compatibility="2.2.000" expanded="true" height="130" name="Generate ROC (3)" width="90" x="715" y="390">
            <parameter key="label value for outliers" value="outlier"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="6.0.003" expanded="true" height="76" name="Generate Attributes (5)" width="90" x="313" y="570">
            <list key="function_descriptions">
              <parameter key="Outlier_Label" value="if(outlier&gt;%{threshold},&quot;outlier&quot;,&quot;normal&quot;)"/>
            </list>
          </operator>
          <operator activated="true" class="set_role" compatibility="6.0.003" expanded="true" height="76" name="Set Role (4)" width="90" x="447" y="570">
            <parameter key="attribute_name" value="Outlier_Label"/>
            <parameter key="target_role" value="label"/>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="6.0.003" expanded="true" height="76" name="Select Attributes (5)" width="90" x="581" y="570">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="outlier"/>
            <parameter key="invert_selection" value="true"/>
          </operator>
          <operator activated="true" class="anomalydetection:Generate ROC" compatibility="2.2.000" expanded="true" height="130" name="Generate ROC (4)" width="90" x="715" y="570">
            <parameter key="label value for outliers" value="outlier"/>
          </operator>
          <connect from_op="Retrieve Test1" from_port="output" to_op="Normalize" to_port="example set input"/>
          <connect from_op="Normalize" from_port="example set output" to_op="Multiply" to_port="input"/>
          <connect from_op="Multiply" from_port="output 1" to_op="k-NN Global Anomaly Score" to_port="example set"/>
          <connect from_op="Multiply" from_port="output 2" to_op="Local Outlier Factor (2)" to_port="example set"/>
          <connect from_op="Multiply" from_port="output 3" to_op="Connectivity-Based Outlier Factor (COF)" to_port="example set"/>
          <connect from_op="Connectivity-Based Outlier Factor (COF)" from_port="example set" to_op="Generate Attributes (5)" to_port="example set input"/>
          <connect from_op="Local Outlier Factor (2)" from_port="example set" to_op="Generate Attributes (3)" to_port="example set input"/>
          <connect from_op="k-NN Global Anomaly Score" from_port="example set" to_op="Generate Attributes (2)" to_port="example set input"/>
          <connect from_op="Generate Attributes (3)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
          <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Set Role" to_port="example set input"/>
          <connect from_op="Set Role" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
          <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Generate ROC (2)" to_port="example set"/>
          <connect from_op="Generate ROC (2)" from_port="example set" to_port="result 1"/>
          <connect from_op="Generate ROC (2)" from_port="roc set" to_port="result 2"/>
          <connect from_op="Set Role (2)" from_port="example set output" to_op="Select Attributes (3)" to_port="example set input"/>
          <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Generate ROC" to_port="example set"/>
          <connect from_op="Generate ROC" from_port="example set" to_port="result 3"/>
          <connect from_op="Generate ROC" from_port="roc set" to_port="result 5"/>
          <connect from_op="Generate Attributes (4)" from_port="example set output" to_op="Set Role (3)" to_port="example set input"/>
          <connect from_op="Set Role (3)" from_port="example set output" to_op="Select Attributes (4)" to_port="example set input"/>
          <connect from_op="Select Attributes (4)" from_port="example set output" to_op="Generate ROC (3)" to_port="example set"/>
          <connect from_op="Generate Attributes (5)" from_port="example set output" to_op="Set Role (4)" to_port="example set input"/>
          <connect from_op="Set Role (4)" from_port="example set output" to_op="Select Attributes (5)" to_port="example set input"/>
          <connect from_op="Select Attributes (5)" from_port="example set output" to_op="Generate ROC (4)" to_port="example set"/>
          <connect from_op="Generate ROC (4)" from_port="example set" to_port="result 4"/>
          <connect from_op="Generate ROC (4)" from_port="roc set" to_port="result 6"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
          <portSpacing port="sink_result 4" spacing="0"/>
          <portSpacing port="sink_result 5" spacing="0"/>
          <portSpacing port="sink_result 6" spacing="0"/>
          <portSpacing port="sink_result 7" spacing="0"/>
        </process>
      </operator>
    </process>