Running linear regression on each attribute X_i in example set and Age (also in example set)
Hi,
I have an example set with +/- 300 numerical attributes X_i and an attribute "age". I'd like to know if any of the attributes X_i can be predicted by age. For this, I'd like to run a simple linear regression with age as the independent variable (x-axis) and X_i as the dependent variable (y-axis). I'd like to run this regression on each attribute X_i in the example set using some loop operator.
I tried using the "Loop Attributes" operator on just the subset X_i's but I cannot find a way to "inject" the Age attribute as a fixed and unchanging 2nd attribute inside the operator. Perhaps there's a way to do this with macro's but I'm not sure how to do that. So, in each loop iteration I would like to access attribute X_i and Age. X_i changes with the loop index. Age stays the same.
Any help would be greatly appreciated!
Ralph
Best Answer
-
Hi Scott,
Thanks for the quick reply. I'll give it a try!
Ralph
1
Answers
-
hi @ralph_brecheise - yes Loop Attributes with macros will do this nicely. I am attaching a process for you to look at using the Sonar data set.
FWIW you may just want to use the Correlation Matrix operator and look at pairwise r values if that's sufficient. Much easier than full lin reg models!
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.1.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="8.1.000" expanded="true" height="68" name="Retrieve Sonar" width="90" x="45" y="34">
<parameter key="repository_entry" value="//Samples/data/Sonar"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.1.000" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="class"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="multiply" compatibility="8.1.000" expanded="true" height="103" name="Multiply" width="90" x="313" y="136"/>
<operator activated="true" class="concurrency:correlation_matrix" compatibility="8.1.000" expanded="true" height="103" name="Correlation Matrix" width="90" x="514" y="187"/>
<operator activated="true" class="concurrency:loop_attributes" compatibility="8.1.000" expanded="true" height="82" name="Loop Attributes" width="90" x="447" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="attribute_9|attribute_8|attribute_7|attribute_60|attribute_6|attribute_59|attribute_58|attribute_57|attribute_56|attribute_55|attribute_54|attribute_53|attribute_52|attribute_51|attribute_50|attribute_5|attribute_49|attribute_48|attribute_47|attribute_46|attribute_45|attribute_44|attribute_43|attribute_42|attribute_41|attribute_40|attribute_4|attribute_39|attribute_38|attribute_37|attribute_36|attribute_35|attribute_34|attribute_33|attribute_32|attribute_31|attribute_30|attribute_3|attribute_29|attribute_28|attribute_27|attribute_26|attribute_25|attribute_24|attribute_23|attribute_22|attribute_21|attribute_20|attribute_2|attribute_19|attribute_18|attribute_17|attribute_16|attribute_15|attribute_14|attribute_13|attribute_12|attribute_11|attribute_10"/>
<parameter key="enable_parallel_execution" value="false"/>
<process expanded="true">
<operator activated="true" class="select_attributes" compatibility="8.1.000" expanded="true" height="82" name="Select Attributes (2)" width="90" x="45" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="attribute_1|%{loop_attribute}"/>
</operator>
<operator activated="true" class="set_role" compatibility="8.1.000" expanded="true" height="82" name="Set Role" width="90" x="179" y="34">
<parameter key="attribute_name" value="%{loop_attribute}"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="linear_regression" compatibility="8.1.000" expanded="true" height="103" name="Linear Regression" width="90" x="313" y="34"/>
<connect from_port="input 1" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Linear Regression" to_port="training set"/>
<connect from_op="Linear Regression" from_port="model" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve Sonar" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Loop Attributes" to_port="input 1"/>
<connect from_op="Multiply" from_port="output 2" to_op="Correlation Matrix" to_port="example set"/>
<connect from_op="Correlation Matrix" from_port="matrix" to_port="result 2"/>
<connect from_op="Loop Attributes" from_port="output 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="126"/>
<portSpacing port="sink_result 3" spacing="84"/>
</process>
</operator>
</process>Scott
0 -
Hi Scott,
Thanks for the quick reply. I'll give it a try!
Ralph
1