hi,
I did 2 (3) different outlier detection techniques and want to join my rows by ID, and add the outlier column from the 3 detectors into 1 dataset by doing a JOIN by ID...
<?xml version="1.0" encoding="UTF-8"?><process version="7.2.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.2.001" expanded="true" height="68" name="Retrieve Master3Klassen_nominal" width="90" x="45" y="30">
<parameter key="repository_entry" value="../../../data/Master3Klassen_nominal"/>
</operator>
<operator activated="true" class="generate_id" compatibility="7.2.001" expanded="true" height="82" name="Generate ID" width="90" x="112" y="165"/>
<operator activated="true" class="normalize" compatibility="7.1.001" expanded="true" height="103" name="Normalize" width="90" x="246" y="165">
<parameter key="method" value="range transformation"/>
<parameter key="min" value="-1.0"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.2.001" expanded="true" height="103" name="Multiply" width="90" x="380" y="136"/>
<operator activated="true" class="anomalydetection:Local Outlier Factor (LOF)" compatibility="2.3.002" expanded="true" height="103" name="Local Outlier Factor (LOF)" width="90" x="581" y="255">
<parameter key="k_min (MinPtsLB)" value="3"/>
<parameter key="k_max (MinPtsUB)" value="30"/>
<parameter key="numerical_measure" value="CamberraDistance"/>
<parameter key="parallelize evaluation process" value="true"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.2.001" expanded="true" height="82" name="Select Attributes" width="90" x="581" y="391">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value="outlier"/>
<parameter key="attributes" value="id|outlier"/>
</operator>
<operator activated="true" class="anomalydetection:k-NN Global Anomaly Score" compatibility="2.3.002" expanded="true" height="103" name="k-NN Global Anomaly Score (2)" width="90" x="514" y="75">
<parameter key="k" value="5"/>
<parameter key="use k-th neighbor distance only (no average)" value="true"/>
<parameter key="numerical_measure" value="CamberraDistance"/>
<parameter key="parallelize evaluation process" value="true"/>
</operator>
<operator activated="true" class="rename" compatibility="7.2.001" expanded="true" height="82" name="Rename" width="90" x="380" y="391">
<parameter key="old_name" value="outlier"/>
<parameter key="new_name" value="outlier2"/>
<list key="rename_additional_attributes"/>
</operator>
<operator activated="true" class="join" compatibility="7.2.001" expanded="true" height="82" name="Join" width="90" x="514" y="544">
<parameter key="remove_double_attributes" value="false"/>
<list key="key_attributes"/>
</operator>
<connect from_op="Retrieve Master3Klassen_nominal" from_port="output" to_op="Generate ID" to_port="example set input"/>
<connect from_op="Generate ID" from_port="example set output" to_op="Normalize" to_port="example set input"/>
<connect from_op="Normalize" from_port="example set output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="k-NN Global Anomaly Score (2)" to_port="example set"/>
<connect from_op="Multiply" from_port="output 2" to_op="Local Outlier Factor (LOF)" to_port="example set"/>
<connect from_op="Local Outlier Factor (LOF)" from_port="example set" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Local Outlier Factor (LOF)" from_port="model" to_port="result 2"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Join" to_port="left"/>
<connect from_op="k-NN Global Anomaly Score (2)" from_port="example set" to_op="Rename" to_port="example set input"/>
<connect from_op="k-NN Global Anomaly Score (2)" from_port="model" to_port="result 1"/>
<connect from_op="Rename" from_port="example set output" to_op="Join" to_port="right"/>
<connect from_op="Join" from_port="join" to_port="result 3"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<description align="center" color="yellow" colored="false" height="105" resized="false" width="180" x="236" y="425">Type your comment</description>
</process>
</operator>
</process>
however, there is always only 1 outlier column produced... I don't understand why, I want them joined by ID but it doesnt work..