Hi there,
I'm new to RapidMiner or even Data Mining. My task right now is to compare different tools for later use with large data sets and predictive analytics.
I started with some very simple examples and checked the results against R. It worked fine for linear regression with only one attribute.
Now, I want RapidMiner to calculate the results for a Polynomial Regression for a data set with two columns, a numeric label (y) and one numeric attribute (x), 300 entries (x is a sequence from 0 to 30 with steps of 0.1)
The result should be
y = 0.15 x^2 - 7.34 x + 106,38
But it is:
87.714 * x ^ 1.000
- 90.314 * x ^ 1.000
+ 79.563
I must be missing something very obvious, still can't figure it out.
process attached:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.0.003">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.003" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_csv" compatibility="6.0.003" expanded="true" height="60" name="Read CSV" width="90" x="112" y="75">
<parameter key="csv_file" value="decay_RapidMiner.txt"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<parameter key="encoding" value="windows-1252"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="x.true.numeric.attribute"/>
<parameter key="1" value="y.true.numeric.label"/>
</list>
</operator>
<operator activated="true" class="split_data" compatibility="6.0.003" expanded="true" height="94" name="Split Data" width="90" x="313" y="75">
<enumeration key="partitions">
<parameter key="ratio" value="0.7"/>
<parameter key="ratio" value="0.3"/>
</enumeration>
<parameter key="sampling_type" value="stratified sampling"/>
</operator>
<operator activated="true" class="polynomial_regression" compatibility="6.0.003" expanded="true" height="76" name="Polynomial Regression" width="90" x="514" y="75">
<parameter key="replication_factor" value="2"/>
</operator>
<operator activated="true" class="apply_model" compatibility="6.0.003" expanded="true" height="76" name="Apply Model" width="90" x="648" y="75">
<list key="application_parameters"/>
</operator>
<connect from_op="Read CSV" from_port="output" to_op="Split Data" to_port="example set"/>
<connect from_op="Split Data" from_port="partition 1" to_op="Polynomial Regression" to_port="training set"/>
<connect from_op="Split Data" from_port="partition 2" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Polynomial Regression" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Apply Model" from_port="labelled data" to_port="result 1"/>
<connect from_op="Apply Model" from_port="model" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
Thank you so much for your help
Regards,
Daniela