Polynomial Regression
josh321
New Altair Community Member
I don't think I understand this operator and I am unable to produce an acceptable result. This is a very basic sample of a process to test the operator.
I realize I could do this part in excel or scilab, but I figured rapidminer must be capable and I must be doing something wrong. Any help would be greatly appreciated!
<?xml version="1.0" encoding="UTF-8" standalone="no"?>And this is the resulting plot.
<process version="5.2.006">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.006" expanded="true" name="Process">
<process expanded="true" height="620" width="1004">
<operator activated="true" class="read_excel" compatibility="5.2.006" expanded="true" height="60" name="Read Excel" width="90" x="45" y="75">
<parameter key="excel_file" value="C:\Files\1.xls"/>
<parameter key="imported_cell_range" value="A2:BU557"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="A.true.numeric.attribute"/>
<parameter key="1" value="B.true.numeric.attribute"/>
<parameter key="2" value="C.true.numeric.attribute"/>
<parameter key="3" value="D.true.numeric.attribute"/>
<parameter key="4" value="E.true.numeric.attribute"/>
<parameter key="5" value="F.true.numeric.attribute"/>
<parameter key="6" value="G.true.numeric.attribute"/>
<parameter key="7" value="H.true.numeric.attribute"/>
<parameter key="8" value="I.true.numeric.attribute"/>
<parameter key="9" value="J.true.numeric.attribute"/>
<parameter key="10" value="K.true.numeric.attribute"/>
<parameter key="11" value="L.true.numeric.attribute"/>
<parameter key="12" value="M.true.numeric.attribute"/>
<parameter key="13" value="N.true.real.attribute"/>
<parameter key="14" value="O.true.numeric.attribute"/>
<parameter key="15" value="P.true.numeric.attribute"/>
<parameter key="16" value="Q.true.numeric.attribute"/>
<parameter key="17" value="R.true.numeric.attribute"/>
<parameter key="18" value="S.true.numeric.attribute"/>
<parameter key="19" value="T.true.numeric.attribute"/>
<parameter key="20" value="U.true.numeric.attribute"/>
<parameter key="21" value="V.true.numeric.attribute"/>
<parameter key="22" value="W.true.numeric.attribute"/>
<parameter key="23" value="X.true.numeric.attribute"/>
<parameter key="24" value="Y.true.numeric.attribute"/>
<parameter key="25" value="Z.true.real.attribute"/>
<parameter key="26" value="AA.true.numeric.attribute"/>
<parameter key="27" value="AB.true.numeric.attribute"/>
<parameter key="28" value="AC.true.numeric.attribute"/>
<parameter key="29" value="AD.true.numeric.attribute"/>
<parameter key="30" value="AE.true.numeric.attribute"/>
<parameter key="31" value="AF.true.numeric.attribute"/>
<parameter key="32" value="AG.true.numeric.attribute"/>
<parameter key="33" value="AH.true.real.attribute"/>
<parameter key="34" value="AI.true.real.attribute"/>
<parameter key="35" value="AJ.true.real.attribute"/>
<parameter key="36" value="AK.true.numeric.attribute"/>
<parameter key="37" value="AL.true.numeric.attribute"/>
<parameter key="38" value="AM.true.numeric.attribute"/>
<parameter key="39" value="AN.true.numeric.attribute"/>
<parameter key="40" value="AO.true.numeric.attribute"/>
<parameter key="41" value="AP.true.numeric.attribute"/>
<parameter key="42" value="AQ.true.numeric.attribute"/>
<parameter key="43" value="AR.true.numeric.attribute"/>
<parameter key="44" value="AS.true.numeric.attribute"/>
<parameter key="45" value="AT.true.numeric.attribute"/>
<parameter key="46" value="AU.true.numeric.attribute"/>
<parameter key="47" value="AV.true.numeric.attribute"/>
<parameter key="48" value="AW.true.integer.attribute"/>
<parameter key="49" value="AX.true.real.attribute"/>
<parameter key="50" value="AY.true.numeric.attribute"/>
<parameter key="51" value="AZ.true.numeric.attribute"/>
<parameter key="52" value="BA.true.binominal.attribute"/>
<parameter key="53" value="BB.true.real.attribute"/>
<parameter key="54" value="BC.true.real.attribute"/>
<parameter key="55" value="BD.true.real.attribute"/>
<parameter key="56" value="BE.true.real.attribute"/>
<parameter key="57" value="BF.true.real.attribute"/>
<parameter key="58" value="BG.true.real.attribute"/>
<parameter key="59" value="BH.true.real.attribute"/>
<parameter key="60" value="BI.true.real.attribute"/>
<parameter key="61" value="BJ.true.real.attribute"/>
<parameter key="62" value="BK.true.real.attribute"/>
<parameter key="63" value="BL.true.integer.attribute"/>
<parameter key="64" value="BM.true.real.attribute"/>
<parameter key="65" value="BN.true.real.attribute"/>
<parameter key="66" value="BO.true.real.attribute"/>
<parameter key="67" value="BP.true.real.attribute"/>
<parameter key="68" value="BQ.true.real.attribute"/>
<parameter key="69" value="BR.true.real.attribute"/>
<parameter key="70" value="BS.true.integer.attribute"/>
<parameter key="71" value="BT.true.real.attribute"/>
<parameter key="72" value="BU.true.integer.attribute"/>
</list>
</operator>
<operator activated="true" class="work_on_subset" compatibility="5.2.006" expanded="true" height="94" name="Work on Subset" width="90" x="246" y="75">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="|AQ|Y"/>
<process expanded="true" height="638" width="1022">
<operator activated="true" class="set_role" compatibility="5.2.006" expanded="true" height="76" name="Set Role" width="90" x="112" y="30">
<parameter key="name" value="Y"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="polynomial_regression" compatibility="5.2.006" expanded="true" height="76" name="Polynomial Regression" width="90" x="313" y="30">
<parameter key="max_degree" value="2"/>
<parameter key="use_local_random_seed" value="true"/>
<parameter key="local_random_seed" value="500"/>
</operator>
<connect from_port="exampleSet" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Polynomial Regression" to_port="training set"/>
<connect from_op="Polynomial Regression" from_port="model" to_port="through 1"/>
<connect from_op="Polynomial Regression" from_port="exampleSet" to_port="example set"/>
<portSpacing port="source_exampleSet" spacing="0"/>
<portSpacing port="sink_example set" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
<portSpacing port="sink_through 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="apply_model" compatibility="5.2.006" expanded="true" height="76" name="Apply Model" width="90" x="447" y="75">
<list key="application_parameters"/>
</operator>
<connect from_op="Read Excel" from_port="output" to_op="Work on Subset" to_port="example set"/>
<connect from_op="Work on Subset" from_port="example set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Work on Subset" from_port="through 1" to_op="Apply Model" to_port="model"/>
<connect from_op="Apply Model" from_port="labelled data" to_port="result 1"/>
<connect from_op="Apply Model" from_port="model" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
I realize I could do this part in excel or scilab, but I figured rapidminer must be capable and I must be doing something wrong. Any help would be greatly appreciated!
Tagged:
0
Answers
-
You are applying the model on a dataset with different attributes than the training set, since Apply Model is outside of Work on Subset (which is kind of deprecated anyway). I tried the Polynomial Regression operator on random data, and it looks quit well. Please have a look at the attached process.
Best, Marius<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.006">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.006" expanded="true" name="Process">
<process expanded="true" height="658" width="567">
<operator activated="true" class="generate_data" compatibility="5.2.006" expanded="true" height="60" name="Generate Data" width="90" x="45" y="30">
<parameter key="target_function" value="sum"/>
<parameter key="number_of_attributes" value="1"/>
</operator>
<operator activated="true" class="add_noise" compatibility="5.2.006" expanded="true" height="94" name="Add Noise" width="90" x="179" y="30">
<list key="noise"/>
</operator>
<operator activated="true" class="polynomial_regression" compatibility="5.2.006" expanded="true" height="76" name="Polynomial Regression (2)" width="90" x="313" y="30">
<parameter key="max_degree" value="2"/>
<parameter key="use_local_random_seed" value="true"/>
<parameter key="local_random_seed" value="500"/>
</operator>
<operator activated="true" class="apply_model" compatibility="5.2.006" expanded="true" height="76" name="Apply Model" width="90" x="447" y="30">
<list key="application_parameters"/>
</operator>
<connect from_op="Generate Data" from_port="output" to_op="Add Noise" to_port="example set input"/>
<connect from_op="Add Noise" from_port="example set output" to_op="Polynomial Regression (2)" to_port="training set"/>
<connect from_op="Polynomial Regression (2)" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Polynomial Regression (2)" from_port="exampleSet" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_port="result 1"/>
<connect from_op="Apply Model" from_port="model" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>0 -
Hmm.. Your process works for me as well. Yet when I try to structure mine the same, I get the same results as in my first post.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
Also, going on a bit of a tangent.. If I modify your process as below, to include more than one attribute and work on a subset of att1 and label, I get an error saying that no label attribute is found.
<process version="5.2.006">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.006" expanded="true" name="Process">
<process expanded="true" height="620" width="1004">
<operator activated="true" class="read_excel" compatibility="5.2.006" expanded="true" height="60" name="Read Excel" width="90" x="45" y="75">
<parameter key="excel_file" value="C:\Users\Josh\Desktop\ITADS\1.xls"/>
<parameter key="imported_cell_range" value="A2:BU557"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="A.true.numeric.attribute"/>
<parameter key="1" value="B.true.numeric.attribute"/>
<parameter key="2" value="C.true.numeric.attribute"/>
<parameter key="3" value="D.true.numeric.attribute"/>
<parameter key="4" value="E.true.numeric.attribute"/>
<parameter key="5" value="F.true.numeric.attribute"/>
<parameter key="6" value="G.true.numeric.attribute"/>
<parameter key="7" value="H.true.numeric.attribute"/>
<parameter key="8" value="I.true.numeric.attribute"/>
<parameter key="9" value="J.true.numeric.attribute"/>
<parameter key="10" value="K.true.numeric.attribute"/>
<parameter key="11" value="L.true.numeric.attribute"/>
<parameter key="12" value="M.true.numeric.attribute"/>
<parameter key="13" value="N.true.real.attribute"/>
<parameter key="14" value="O.true.numeric.attribute"/>
<parameter key="15" value="P.true.numeric.attribute"/>
<parameter key="16" value="Q.true.numeric.attribute"/>
<parameter key="17" value="R.true.numeric.attribute"/>
<parameter key="18" value="S.true.numeric.attribute"/>
<parameter key="19" value="T.true.numeric.attribute"/>
<parameter key="20" value="U.true.numeric.attribute"/>
<parameter key="21" value="V.true.numeric.attribute"/>
<parameter key="22" value="W.true.numeric.attribute"/>
<parameter key="23" value="X.true.numeric.attribute"/>
<parameter key="24" value="Y.true.numeric.attribute"/>
<parameter key="25" value="Z.true.real.attribute"/>
<parameter key="26" value="AA.true.numeric.attribute"/>
<parameter key="27" value="AB.true.numeric.attribute"/>
<parameter key="28" value="AC.true.numeric.attribute"/>
<parameter key="29" value="AD.true.numeric.attribute"/>
<parameter key="30" value="AE.true.numeric.attribute"/>
<parameter key="31" value="AF.true.numeric.attribute"/>
<parameter key="32" value="AG.true.numeric.attribute"/>
<parameter key="33" value="AH.true.real.attribute"/>
<parameter key="34" value="AI.true.real.attribute"/>
<parameter key="35" value="AJ.true.real.attribute"/>
<parameter key="36" value="AK.true.numeric.attribute"/>
<parameter key="37" value="AL.true.numeric.attribute"/>
<parameter key="38" value="AM.true.numeric.attribute"/>
<parameter key="39" value="AN.true.numeric.attribute"/>
<parameter key="40" value="AO.true.numeric.attribute"/>
<parameter key="41" value="AP.true.numeric.attribute"/>
<parameter key="42" value="AQ.true.numeric.attribute"/>
<parameter key="43" value="AR.true.numeric.attribute"/>
<parameter key="44" value="AS.true.numeric.attribute"/>
<parameter key="45" value="AT.true.numeric.attribute"/>
<parameter key="46" value="AU.true.numeric.attribute"/>
<parameter key="47" value="AV.true.numeric.attribute"/>
<parameter key="48" value="AW.true.integer.attribute"/>
<parameter key="49" value="AX.true.real.attribute"/>
<parameter key="50" value="AY.true.numeric.attribute"/>
<parameter key="51" value="AZ.true.numeric.attribute"/>
<parameter key="52" value="BA.true.binominal.attribute"/>
<parameter key="53" value="BB.true.real.attribute"/>
<parameter key="54" value="BC.true.real.attribute"/>
<parameter key="55" value="BD.true.real.attribute"/>
<parameter key="56" value="BE.true.real.attribute"/>
<parameter key="57" value="BF.true.real.attribute"/>
<parameter key="58" value="BG.true.real.attribute"/>
<parameter key="59" value="BH.true.real.attribute"/>
<parameter key="60" value="BI.true.real.attribute"/>
<parameter key="61" value="BJ.true.real.attribute"/>
<parameter key="62" value="BK.true.real.attribute"/>
<parameter key="63" value="BL.true.integer.attribute"/>
<parameter key="64" value="BM.true.real.attribute"/>
<parameter key="65" value="BN.true.real.attribute"/>
<parameter key="66" value="BO.true.real.attribute"/>
<parameter key="67" value="BP.true.real.attribute"/>
<parameter key="68" value="BQ.true.real.attribute"/>
<parameter key="69" value="BR.true.real.attribute"/>
<parameter key="70" value="BS.true.integer.attribute"/>
<parameter key="71" value="BT.true.real.attribute"/>
<parameter key="72" value="BU.true.integer.attribute"/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="5.2.006" expanded="true" height="76" name="Select Attributes" width="90" x="246" y="75">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="|AQ|Y"/>
</operator>
<operator activated="true" class="set_role" compatibility="5.2.006" expanded="true" height="76" name="Set Role" width="90" x="380" y="75">
<parameter key="name" value="Y"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="polynomial_regression" compatibility="5.2.006" expanded="true" height="76" name="Polynomial Regression" width="90" x="514" y="75">
<parameter key="max_degree" value="2"/>
<parameter key="use_local_random_seed" value="true"/>
<parameter key="local_random_seed" value="500"/>
</operator>
<operator activated="true" class="apply_model" compatibility="5.2.006" expanded="true" height="76" name="Apply Model" width="90" x="648" y="75">
<list key="application_parameters"/>
</operator>
<connect from_op="Read Excel" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Polynomial Regression" to_port="training set"/>
<connect from_op="Polynomial Regression" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Polynomial Regression" from_port="exampleSet" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_port="result 1"/>
<connect from_op="Apply Model" from_port="model" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process><?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.006">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="polynomial_regression" compatibility="5.2.006" expanded="true" height="76" name="Polynomial Regression (2)" width="90" x="313" y="30">
<parameter key="max_iterations" value="5000"/>
<parameter key="replication_factor" value="1"/>
<parameter key="max_degree" value="2"/>
<parameter key="min_coefficient" value="-100.0"/>
<parameter key="max_coefficient" value="100.0"/>
<parameter key="use_local_random_seed" value="true"/>
<parameter key="local_random_seed" value="500"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.006">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="apply_model" compatibility="5.2.006" expanded="true" height="76" name="Apply Model" width="90" x="447" y="30">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
</process>0 -
Hm.. honestly I have no idea why you get that bad results. In your special use case you have linear data anyway, so probably a linear regression would work better here. Maybe you could also try higher values for max iterations.
The second process you posted is not valid, probably something went wrong with copy-paste
Best, Marius0