solved
Best Answer
-
danzgb,
you can simply copy the xml code and hit ctrl+v to put it into RM. I do not get the error so this is a bit confusing.
On the decision tree: decision trees are one of the standard ways to do classification. In fact this is only a default comment. Since the i treated the label as numerical (not really needed) i used a SVM in regression mode. That's why i did not need to say 10-13 is enough. I simply try to predict the actual number.
~Martin
0
Answers
-
Hi danzgb,
have you tried something like this? I might have done something wrong, but this process gives an absolute error of 1.8. Which sounds fine?
~Martin
Spoiler<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.1.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.1.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_csv" compatibility="7.1.000" expanded="true" height="68" name="Read CSV" width="90" x="112" y="34">
<parameter key="csv_file" value="/Users/mschmitz/CODING/RM/student/student-por.csv"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<parameter key="encoding" value="UTF-8"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="school.true.polynominal.attribute"/>
<parameter key="1" value="sex.true.polynominal.attribute"/>
<parameter key="2" value="age.true.integer.attribute"/>
<parameter key="3" value="address.true.polynominal.attribute"/>
<parameter key="4" value="famsize.true.polynominal.attribute"/>
<parameter key="5" value="Pstatus.true.polynominal.attribute"/>
<parameter key="6" value="Medu.true.integer.attribute"/>
<parameter key="7" value="Fedu.true.integer.attribute"/>
<parameter key="8" value="Mjob.true.polynominal.attribute"/>
<parameter key="9" value="Fjob.true.polynominal.attribute"/>
<parameter key="10" value="reason.true.polynominal.attribute"/>
<parameter key="11" value="guardian.true.polynominal.attribute"/>
<parameter key="12" value="traveltime.true.integer.attribute"/>
<parameter key="13" value="studytime.true.integer.attribute"/>
<parameter key="14" value="failures.true.integer.attribute"/>
<parameter key="15" value="schoolsup.true.polynominal.attribute"/>
<parameter key="16" value="famsup.true.polynominal.attribute"/>
<parameter key="17" value="paid.true.polynominal.attribute"/>
<parameter key="18" value="activities.true.polynominal.attribute"/>
<parameter key="19" value="nursery.true.polynominal.attribute"/>
<parameter key="20" value="higher.true.polynominal.attribute"/>
<parameter key="21" value="internet.true.polynominal.attribute"/>
<parameter key="22" value="romantic.true.polynominal.attribute"/>
<parameter key="23" value="famrel.true.integer.attribute"/>
<parameter key="24" value="freetime.true.integer.attribute"/>
<parameter key="25" value="goout.true.integer.attribute"/>
<parameter key="26" value="Dalc.true.integer.attribute"/>
<parameter key="27" value="Walc.true.integer.attribute"/>
<parameter key="28" value="health.true.integer.attribute"/>
<parameter key="29" value="absences.true.integer.attribute"/>
<parameter key="30" value="G1.true.integer.attribute"/>
<parameter key="31" value="G2.true.integer.attribute"/>
<parameter key="32" value="G3.true.integer.attribute"/>
</list>
</operator>
<operator activated="true" class="set_role" compatibility="7.1.000" expanded="true" height="82" name="Set Role" width="90" x="246" y="34">
<parameter key="attribute_name" value="G3"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="nominal_to_numerical" compatibility="7.1.000" expanded="true" height="103" name="Nominal to Numerical" width="90" x="447" y="34">
<list key="comparison_groups"/>
</operator>
<operator activated="true" class="x_validation" compatibility="5.0.000" expanded="true" height="124" name="Validation" width="90" x="648" y="34">
<parameter key="sampling_type" value="2"/>
<process expanded="true">
<operator activated="true" class="support_vector_machine" compatibility="7.1.000" expanded="true" height="124" name="SVM" width="90" x="45" y="34"/>
<connect from_port="training" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="5.0.000" expanded="true" height="82" name="Apply Model" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_regression" compatibility="7.1.000" expanded="true" height="82" name="Performance" width="90" x="179" y="34">
<parameter key="absolute_error" value="true"/>
<parameter key="squared_correlation" value="true"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">A cross-validation evaluating a decision tree model.</description>
</operator>
<connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Nominal to Numerical" to_port="example set input"/>
<connect from_op="Nominal to Numerical" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>0 -
Ok
!0 -
.
0 -
danzgb,
you can simply copy the xml code and hit ctrl+v to put it into RM. I do not get the error so this is a bit confusing.
On the decision tree: decision trees are one of the standard ways to do classification. In fact this is only a default comment. Since the i treated the label as numerical (not really needed) i used a SVM in regression mode. That's why i did not need to say 10-13 is enough. I simply try to predict the actual number.
~Martin
0 -
Thanks again Martin, you're really helping me out!0 -
.
0 -
Hi danzg,
the key operator is aggregate. You can built of those values with a combination of Generate Attributes, Aggregate, and Filter Examples. I built the number of people below 10 process and attached it.
Edit: Oops, the processes are obviously only counting how often you have them in the data. To forecast you would simply take < 10 as a label and learn a model to predict. You can apply the model then on the new data and use the aggregate to count on the prediction
Best,
Martin
Spoiler<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.1.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.1.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_csv" compatibility="7.1.001" expanded="true" height="68" name="Read CSV" width="90" x="112" y="34">
<parameter key="csv_file" value="C:\Users\Martin\Arbeit\Forum/student-por.csv"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<parameter key="encoding" value="UTF-8"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="school.true.polynominal.attribute"/>
<parameter key="1" value="sex.true.polynominal.attribute"/>
<parameter key="2" value="age.true.integer.attribute"/>
<parameter key="3" value="address.true.polynominal.attribute"/>
<parameter key="4" value="famsize.true.polynominal.attribute"/>
<parameter key="5" value="Pstatus.true.polynominal.attribute"/>
<parameter key="6" value="Medu.true.integer.attribute"/>
<parameter key="7" value="Fedu.true.integer.attribute"/>
<parameter key="8" value="Mjob.true.polynominal.attribute"/>
<parameter key="9" value="Fjob.true.polynominal.attribute"/>
<parameter key="10" value="reason.true.polynominal.attribute"/>
<parameter key="11" value="guardian.true.polynominal.attribute"/>
<parameter key="12" value="traveltime.true.integer.attribute"/>
<parameter key="13" value="studytime.true.integer.attribute"/>
<parameter key="14" value="failures.true.integer.attribute"/>
<parameter key="15" value="schoolsup.true.polynominal.attribute"/>
<parameter key="16" value="famsup.true.polynominal.attribute"/>
<parameter key="17" value="paid.true.polynominal.attribute"/>
<parameter key="18" value="activities.true.polynominal.attribute"/>
<parameter key="19" value="nursery.true.polynominal.attribute"/>
<parameter key="20" value="higher.true.polynominal.attribute"/>
<parameter key="21" value="internet.true.polynominal.attribute"/>
<parameter key="22" value="romantic.true.polynominal.attribute"/>
<parameter key="23" value="famrel.true.integer.attribute"/>
<parameter key="24" value="freetime.true.integer.attribute"/>
<parameter key="25" value="goout.true.integer.attribute"/>
<parameter key="26" value="Dalc.true.integer.attribute"/>
<parameter key="27" value="Walc.true.integer.attribute"/>
<parameter key="28" value="health.true.integer.attribute"/>
<parameter key="29" value="absences.true.integer.attribute"/>
<parameter key="30" value="G1.true.integer.attribute"/>
<parameter key="31" value="G2.true.integer.attribute"/>
<parameter key="32" value="G3.true.integer.attribute"/>
</list>
</operator>
<operator activated="true" class="set_role" compatibility="7.1.001" expanded="true" height="82" name="Set Role" width="90" x="246" y="34">
<parameter key="attribute_name" value="G3"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.1.001" expanded="true" height="82" name="Generate Attributes" width="90" x="380" y="34">
<list key="function_descriptions">
<parameter key="G3 Below 10" value="G3<10"/>
</list>
</operator>
<operator activated="true" class="aggregate" compatibility="7.1.001" expanded="true" height="82" name="Aggregate" width="90" x="514" y="34">
<list key="aggregation_attributes">
<parameter key="G3 Below 10" value="count"/>
</list>
<parameter key="group_by_attributes" value="G3 Below 10"/>
</operator>
<connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Aggregate" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>0