Using Rapidminer 5.3.015
I am trying to process missing values.
After retrieving the data I used a multiply component. One of the multiply component's output is used as an input to the impute missing values component and a second output is connected to the process res port.
After running the process missing values both before and after the impute missing values were replaced !!.
This is strange because the original data should not be changed !!!
(Edited: Same results with RM Studio 6.0.3)
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="5.3.015" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
<parameter key="repository_entry" value="//Samples/data/Labor-Negotiations"/>
</operator>
<operator activated="true" class="multiply" compatibility="5.3.015" expanded="true" height="94" name="Multiply" width="90" x="179" y="165"/>
<operator activated="true" class="impute_missing_values" compatibility="5.3.015" expanded="true" height="60" name="Impute Missing Values" width="90" x="447" y="255">
<parameter key="attribute" value="class"/>
<process expanded="true">
<operator activated="true" class="k_nn" compatibility="5.3.015" expanded="true" height="76" name="k-NN" width="90" x="601" y="30">
<parameter key="k" value="5"/>
</operator>
<connect from_port="example set source" to_op="k-NN" to_port="training set"/>
<connect from_op="k-NN" from_port="model" to_port="model sink"/>
<portSpacing port="source_example set source" spacing="0"/>
<portSpacing port="sink_model sink" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Impute Missing Values" to_port="example set in"/>
<connect from_op="Multiply" from_port="output 2" to_port="result 2"/>
<connect from_op="Impute Missing Values" from_port="example set out" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>