Hello,
I am new to rapidminer studio, and have little experience with data science. I am trying to predict the value of one certain sensor by using the associated timestamp. At some point I pass the data into a Filter Examples to only keep the non missing values, so I can send the data into a Linear Regression. I check at the end of the Filter, and the beginning of the Linear Regression, and there is no missing values. But when I launch the process, I get a pop-up error from the Linear Regressions saying:
SpoilerMissing values
The date contains missing values which is not allowed for Linear Regression.
Some operators cannot work on data sets with missing values. You should use one of the preprocessing operators like Replace Missing Values before applying this operator in order to replace the missing values by some valid values.
Here is my .rmp
Spoiler<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.0.001" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="8.0.001" expanded="true" height="68" name="Retrieve energy_data" width="90" x="45" y="34">
<parameter key="repository_entry" value="../data/energy_data"/>
</operator>
<operator activated="true" class="date_to_numerical" compatibility="8.0.001" expanded="true" height="82" name="Date to Numerical" width="90" x="112" y="136">
<parameter key="attribute_name" value="data_timestamp"/>
<parameter key="time_unit" value="second"/>
<parameter key="millisecond_relative_to" value="second"/>
<parameter key="second_relative_to" value="day"/>
<parameter key="minute_relative_to" value="hour"/>
<parameter key="hour_relative_to" value="day"/>
<parameter key="day_relative_to" value="month"/>
<parameter key="week_relative_to" value="year"/>
<parameter key="month_relative_to" value="year"/>
<parameter key="quarter_relative_to" value="year"/>
<parameter key="half_year_relative_to" value="year"/>
<parameter key="year_relative_to" value="era"/>
<parameter key="keep_old_attribute" value="false"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value="data_timestamp|sensor_value|id"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="set_role" compatibility="8.0.001" expanded="true" height="82" name="Set Role" width="90" x="313" y="136">
<parameter key="attribute_name" value="sensor_value"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles">
<parameter key="data_timestamp" value="regular"/>
<parameter key="id" value="id"/>
</list>
</operator>
<operator activated="true" class="multiply" compatibility="8.0.001" expanded="true" height="103" name="Multiply" width="90" x="447" y="85"/>
<operator activated="true" class="filter_examples" compatibility="8.0.001" expanded="true" height="103" name="Filter Examples" width="90" x="581" y="34">
<parameter key="parameter_expression" value=""/>
<parameter key="condition_class" value="no_missing_attributes"/>
<parameter key="invert_filter" value="false"/>
<list key="filters_list"/>
<parameter key="filters_logic_and" value="true"/>
<parameter key="filters_check_metadata" value="true"/>
</operator>
<operator activated="true" class="linear_regression" compatibility="8.0.001" expanded="true" height="103" name="Linear Regression" width="90" x="715" y="34">
<parameter key="feature_selection" value="M5 prime"/>
<parameter key="alpha" value="0.05"/>
<parameter key="max_iterations" value="10"/>
<parameter key="forward_alpha" value="0.05"/>
<parameter key="backward_alpha" value="0.05"/>
<parameter key="eliminate_colinear_features" value="true"/>
<parameter key="min_tolerance" value="0.05"/>
<parameter key="use_bias" value="true"/>
<parameter key="ridge" value="1.0E-8"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="8.0.001" expanded="true" height="103" name="Filter Examples (2)" width="90" x="581" y="187">
<parameter key="parameter_expression" value=""/>
<parameter key="condition_class" value="missing_attributes"/>
<parameter key="invert_filter" value="false"/>
<list key="filters_list"/>
<parameter key="filters_logic_and" value="true"/>
<parameter key="filters_check_metadata" value="true"/>
</operator>
<operator activated="true" class="apply_model" compatibility="8.0.001" expanded="true" height="82" name="Apply Model" width="90" x="782" y="238">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<connect from_op="Retrieve energy_data" from_port="output" to_op="Date to Numerical" to_port="example set input"/>
<connect from_op="Date to Numerical" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Multiply" from_port="output 2" to_op="Filter Examples (2)" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Linear Regression" to_port="training set"/>
<connect from_op="Linear Regression" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Filter Examples (2)" from_port="example set output" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_port="result 1"/>
<connect from_op="Apply Model" from_port="model" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
Thanks,
Quentin