[SOLVED] Replace Missing Values effects can propagate backwards?
tennenrishin
New Altair Community Member
In this process, the Replace Missing Values operator affects even the output that never passed through it, if its "create view" parameter is unchecked. Is this the intended (and even default) behavior?
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
<process expanded="true" height="654" width="1015">
<operator activated="true" class="generate_data_user_specification" compatibility="5.2.008" expanded="true" height="60" name="Generate Data by User Specification" width="90" x="179" y="30">
<list key="attribute_values">
<parameter key="a" value="1"/>
<parameter key="b" value="2"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="5.2.008" expanded="true" height="60" name="Generate Data by User Specification (2)" width="90" x="179" y="120">
<list key="attribute_values">
<parameter key="a" value="1"/>
<parameter key="b" value="2"/>
<parameter key="z" value=""hi""/>
<parameter key="x" value="1"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="union" compatibility="5.2.008" expanded="true" height="76" name="Union" width="90" x="380" y="30"/>
<operator activated="true" class="generate_attributes" compatibility="5.2.008" expanded="true" height="76" name="Generate Attributes" width="90" x="648" y="30">
<list key="function_descriptions">
<parameter key="c" value="a+b"/>
</list>
</operator>
<operator activated="true" class="replace_missing_values" compatibility="5.2.008" expanded="true" height="94" name="Replace Missing Values" width="90" x="648" y="120">
<parameter key="attribute_filter_type" value="value_type"/>
<parameter key="value_type" value="nominal"/>
<parameter key="default" value="value"/>
<list key="columns"/>
<parameter key="replenishment_value" value="???MISSING???"/>
</operator>
<connect from_op="Generate Data by User Specification" from_port="output" to_op="Union" to_port="example set 1"/>
<connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Union" to_port="example set 2"/>
<connect from_op="Union" from_port="union" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
<connect from_op="Generate Attributes" from_port="original" to_op="Replace Missing Values" to_port="example set input"/>
<connect from_op="Replace Missing Values" from_port="example set output" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="72"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
Tagged:
0
Answers
-
Hi,
if you do not check "Create view" the missing values replacement will work directly on the data table your two example sets are based on.
It will replace all missing values for every example from the example table. Because both of your example sets are based upon the same example table
it looks like the changes are being back propagated. If you want to avoid this, check Create view and materialize the data afterwards.
This will create a different example table for the second example set. But be aware that this also will increase the memory use of RapidMiner!
Best,
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
<process expanded="true" height="654" width="1015">
<operator activated="true" class="generate_data_user_specification" compatibility="5.2.008" expanded="true" height="60" name="Generate Data by User Specification" width="90" x="179" y="30">
<list key="attribute_values">
<parameter key="a" value="1"/>
<parameter key="b" value="2"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="5.2.008" expanded="true" height="60" name="Generate Data by User Specification (2)" width="90" x="179" y="120">
<list key="attribute_values">
<parameter key="a" value="1"/>
<parameter key="b" value="2"/>
<parameter key="z" value=""hi""/>
<parameter key="x" value="1"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="union" compatibility="5.2.008" expanded="true" height="76" name="Union" width="90" x="380" y="30"/>
<operator activated="true" breakpoints="before" class="generate_attributes" compatibility="5.2.008" expanded="true" height="76" name="Generate Attributes" width="90" x="648" y="30">
<list key="function_descriptions">
<parameter key="c" value="a+b"/>
</list>
</operator>
<operator activated="true" class="replace_missing_values" compatibility="5.2.008" expanded="true" height="94" name="Replace Missing Values" width="90" x="648" y="120">
<parameter key="create_view" value="true"/>
<parameter key="attribute_filter_type" value="value_type"/>
<parameter key="value_type" value="nominal"/>
<parameter key="default" value="value"/>
<list key="columns"/>
<parameter key="replenishment_value" value="???MISSING???"/>
</operator>
<operator activated="true" class="materialize_data" compatibility="5.2.008" expanded="true" height="76" name="Materialize Data" width="90" x="782" y="120"/>
<connect from_op="Generate Data by User Specification" from_port="output" to_op="Union" to_port="example set 1"/>
<connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Union" to_port="example set 2"/>
<connect from_op="Union" from_port="union" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
<connect from_op="Generate Attributes" from_port="original" to_op="Replace Missing Values" to_port="example set input"/>
<connect from_op="Replace Missing Values" from_port="example set output" to_op="Materialize Data" to_port="example set input"/>
<connect from_op="Materialize Data" from_port="example set output" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="72"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
Nils
0