[SOLVED] Polynomial Regression - Wrong results
Hi there,
I'm new to RapidMiner or even Data Mining. My task right now is to compare different tools for later use with large data sets and predictive analytics.
I started with some very simple examples and checked the results against R. It worked fine for linear regression with only one attribute.
Now, I want RapidMiner to calculate the results for a Polynomial Regression for a data set with two columns, a numeric label (y) and one numeric attribute (x), 300 entries (x is a sequence from 0 to 30 with steps of 0.1)
The result should be
process attached:
Regards,
Daniela
I'm new to RapidMiner or even Data Mining. My task right now is to compare different tools for later use with large data sets and predictive analytics.
I started with some very simple examples and checked the results against R. It worked fine for linear regression with only one attribute.
Now, I want RapidMiner to calculate the results for a Polynomial Regression for a data set with two columns, a numeric label (y) and one numeric attribute (x), 300 entries (x is a sequence from 0 to 30 with steps of 0.1)
The result should be
y = 0.15 x^2 - 7.34 x + 106,38But it is:
87.714 * x ^ 1.000I must be missing something very obvious, still can't figure it out.
- 90.314 * x ^ 1.000
+ 79.563
process attached:
Thank you so much for your help
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.0.003">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.003" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_csv" compatibility="6.0.003" expanded="true" height="60" name="Read CSV" width="90" x="112" y="75">
<parameter key="csv_file" value="decay_RapidMiner.txt"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<parameter key="encoding" value="windows-1252"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="x.true.numeric.attribute"/>
<parameter key="1" value="y.true.numeric.label"/>
</list>
</operator>
<operator activated="true" class="split_data" compatibility="6.0.003" expanded="true" height="94" name="Split Data" width="90" x="313" y="75">
<enumeration key="partitions">
<parameter key="ratio" value="0.7"/>
<parameter key="ratio" value="0.3"/>
</enumeration>
<parameter key="sampling_type" value="stratified sampling"/>
</operator>
<operator activated="true" class="polynomial_regression" compatibility="6.0.003" expanded="true" height="76" name="Polynomial Regression" width="90" x="514" y="75">
<parameter key="replication_factor" value="2"/>
</operator>
<operator activated="true" class="apply_model" compatibility="6.0.003" expanded="true" height="76" name="Apply Model" width="90" x="648" y="75">
<list key="application_parameters"/>
</operator>
<connect from_op="Read CSV" from_port="output" to_op="Split Data" to_port="example set"/>
<connect from_op="Split Data" from_port="partition 1" to_op="Polynomial Regression" to_port="training set"/>
<connect from_op="Split Data" from_port="partition 2" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Polynomial Regression" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Apply Model" from_port="labelled data" to_port="result 1"/>
<connect from_op="Apply Model" from_port="model" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
Regards,
Daniela