A program to recognize and reward our most engaged community members
Solved.
danzgb,
you can simply copy the xml code and hit ctrl+v to put it into RM. I do not get the error so this is a bit confusing.
On the decision tree: decision trees are one of the standard ways to do classification. In fact this is only a default comment. Since the i treated the label as numerical (not really needed) i used a SVM in regression mode. That's why i did not need to say 10-13 is enough. I simply try to predict the actual number.
~Martin
Hi danzgb,
have you tried something like this? I might have done something wrong, but this process gives an absolute error of 1.8. Which sounds fine?
<?xml version="1.0" encoding="UTF-8" standalone="no"?><process version="7.1.000"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="7.1.000" expanded="true" name="Process"> <process expanded="true"> <operator activated="true" class="read_csv" compatibility="7.1.000" expanded="true" height="68" name="Read CSV" width="90" x="112" y="34"> <parameter key="csv_file" value="/Users/mschmitz/CODING/RM/student/student-por.csv"/> <parameter key="first_row_as_names" value="false"/> <list key="annotations"> <parameter key="0" value="Name"/> </list> <parameter key="encoding" value="UTF-8"/> <list key="data_set_meta_data_information"> <parameter key="0" value="school.true.polynominal.attribute"/> <parameter key="1" value="sex.true.polynominal.attribute"/> <parameter key="2" value="age.true.integer.attribute"/> <parameter key="3" value="address.true.polynominal.attribute"/> <parameter key="4" value="famsize.true.polynominal.attribute"/> <parameter key="5" value="Pstatus.true.polynominal.attribute"/> <parameter key="6" value="Medu.true.integer.attribute"/> <parameter key="7" value="Fedu.true.integer.attribute"/> <parameter key="8" value="Mjob.true.polynominal.attribute"/> <parameter key="9" value="Fjob.true.polynominal.attribute"/> <parameter key="10" value="reason.true.polynominal.attribute"/> <parameter key="11" value="guardian.true.polynominal.attribute"/> <parameter key="12" value="traveltime.true.integer.attribute"/> <parameter key="13" value="studytime.true.integer.attribute"/> <parameter key="14" value="failures.true.integer.attribute"/> <parameter key="15" value="schoolsup.true.polynominal.attribute"/> <parameter key="16" value="famsup.true.polynominal.attribute"/> <parameter key="17" value="paid.true.polynominal.attribute"/> <parameter key="18" value="activities.true.polynominal.attribute"/> <parameter key="19" value="nursery.true.polynominal.attribute"/> <parameter key="20" value="higher.true.polynominal.attribute"/> <parameter key="21" value="internet.true.polynominal.attribute"/> <parameter key="22" value="romantic.true.polynominal.attribute"/> <parameter key="23" value="famrel.true.integer.attribute"/> <parameter key="24" value="freetime.true.integer.attribute"/> <parameter key="25" value="goout.true.integer.attribute"/> <parameter key="26" value="Dalc.true.integer.attribute"/> <parameter key="27" value="Walc.true.integer.attribute"/> <parameter key="28" value="health.true.integer.attribute"/> <parameter key="29" value="absences.true.integer.attribute"/> <parameter key="30" value="G1.true.integer.attribute"/> <parameter key="31" value="G2.true.integer.attribute"/> <parameter key="32" value="G3.true.integer.attribute"/> </list> </operator> <operator activated="true" class="set_role" compatibility="7.1.000" expanded="true" height="82" name="Set Role" width="90" x="246" y="34"> <parameter key="attribute_name" value="G3"/> <parameter key="target_role" value="label"/> <list key="set_additional_roles"/> </operator> <operator activated="true" class="nominal_to_numerical" compatibility="7.1.000" expanded="true" height="103" name="Nominal to Numerical" width="90" x="447" y="34"> <list key="comparison_groups"/> </operator> <operator activated="true" class="x_validation" compatibility="5.0.000" expanded="true" height="124" name="Validation" width="90" x="648" y="34"> <parameter key="sampling_type" value="2"/> <process expanded="true"> <operator activated="true" class="support_vector_machine" compatibility="7.1.000" expanded="true" height="124" name="SVM" width="90" x="45" y="34"/> <connect from_port="training" to_op="SVM" to_port="training set"/> <connect from_op="SVM" from_port="model" to_port="model"/> <portSpacing port="source_training" spacing="0"/> <portSpacing port="sink_model" spacing="0"/> <portSpacing port="sink_through 1" spacing="0"/> </process> <process expanded="true"> <operator activated="true" class="apply_model" compatibility="5.0.000" expanded="true" height="82" name="Apply Model" width="90" x="45" y="30"> <list key="application_parameters"/> </operator> <operator activated="true" class="performance_regression" compatibility="7.1.000" expanded="true" height="82" name="Performance" width="90" x="179" y="34"> <parameter key="absolute_error" value="true"/> <parameter key="squared_correlation" value="true"/> </operator> <connect from_port="model" to_op="Apply Model" to_port="model"/> <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/> <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/> <connect from_op="Performance" from_port="performance" to_port="averagable 1"/> <portSpacing port="source_model" spacing="0"/> <portSpacing port="source_test set" spacing="0"/> <portSpacing port="source_through 1" spacing="0"/> <portSpacing port="sink_averagable 1" spacing="0"/> <portSpacing port="sink_averagable 2" spacing="0"/> </process> <description align="center" color="transparent" colored="false" width="126">A cross-validation evaluating a decision tree model.</description> </operator> <connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/> <connect from_op="Set Role" from_port="example set output" to_op="Nominal to Numerical" to_port="example set input"/> <connect from_op="Nominal to Numerical" from_port="example set output" to_op="Validation" to_port="training"/> <connect from_op="Validation" from_port="model" to_port="result 1"/> <connect from_op="Validation" from_port="averagable 1" to_port="result 2"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> <portSpacing port="sink_result 3" spacing="0"/> </process> </operator></process>
First of all thanks a lot for your reply @mschmitz.I'm really newbie in RapidMiner.. Where should i paste that code you gave me? In the "XML view"? I can't find where is the xml view...
Thanks one more time.
Ok !
.
Thanks again Martin, you're really helping me out!
Hi danzg,
the key operator is aggregate. You can built of those values with a combination of Generate Attributes, Aggregate, and Filter Examples. I built the number of people below 10 process and attached it.
Edit: Oops, the processes are obviously only counting how often you have them in the data. To forecast you would simply take < 10 as a label and learn a model to predict. You can apply the model then on the new data and use the aggregate to count on the prediction
Best,
Martin
<?xml version="1.0" encoding="UTF-8" standalone="no"?><process version="7.1.001"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="7.1.001" expanded="true" name="Process"> <process expanded="true"> <operator activated="true" class="read_csv" compatibility="7.1.001" expanded="true" height="68" name="Read CSV" width="90" x="112" y="34"> <parameter key="csv_file" value="C:\Users\Martin\Arbeit\Forum/student-por.csv"/> <parameter key="first_row_as_names" value="false"/> <list key="annotations"> <parameter key="0" value="Name"/> </list> <parameter key="encoding" value="UTF-8"/> <list key="data_set_meta_data_information"> <parameter key="0" value="school.true.polynominal.attribute"/> <parameter key="1" value="sex.true.polynominal.attribute"/> <parameter key="2" value="age.true.integer.attribute"/> <parameter key="3" value="address.true.polynominal.attribute"/> <parameter key="4" value="famsize.true.polynominal.attribute"/> <parameter key="5" value="Pstatus.true.polynominal.attribute"/> <parameter key="6" value="Medu.true.integer.attribute"/> <parameter key="7" value="Fedu.true.integer.attribute"/> <parameter key="8" value="Mjob.true.polynominal.attribute"/> <parameter key="9" value="Fjob.true.polynominal.attribute"/> <parameter key="10" value="reason.true.polynominal.attribute"/> <parameter key="11" value="guardian.true.polynominal.attribute"/> <parameter key="12" value="traveltime.true.integer.attribute"/> <parameter key="13" value="studytime.true.integer.attribute"/> <parameter key="14" value="failures.true.integer.attribute"/> <parameter key="15" value="schoolsup.true.polynominal.attribute"/> <parameter key="16" value="famsup.true.polynominal.attribute"/> <parameter key="17" value="paid.true.polynominal.attribute"/> <parameter key="18" value="activities.true.polynominal.attribute"/> <parameter key="19" value="nursery.true.polynominal.attribute"/> <parameter key="20" value="higher.true.polynominal.attribute"/> <parameter key="21" value="internet.true.polynominal.attribute"/> <parameter key="22" value="romantic.true.polynominal.attribute"/> <parameter key="23" value="famrel.true.integer.attribute"/> <parameter key="24" value="freetime.true.integer.attribute"/> <parameter key="25" value="goout.true.integer.attribute"/> <parameter key="26" value="Dalc.true.integer.attribute"/> <parameter key="27" value="Walc.true.integer.attribute"/> <parameter key="28" value="health.true.integer.attribute"/> <parameter key="29" value="absences.true.integer.attribute"/> <parameter key="30" value="G1.true.integer.attribute"/> <parameter key="31" value="G2.true.integer.attribute"/> <parameter key="32" value="G3.true.integer.attribute"/> </list> </operator> <operator activated="true" class="set_role" compatibility="7.1.001" expanded="true" height="82" name="Set Role" width="90" x="246" y="34"> <parameter key="attribute_name" value="G3"/> <parameter key="target_role" value="label"/> <list key="set_additional_roles"/> </operator> <operator activated="true" class="generate_attributes" compatibility="7.1.001" expanded="true" height="82" name="Generate Attributes" width="90" x="380" y="34"> <list key="function_descriptions"> <parameter key="G3 Below 10" value="G3<10"/> </list> </operator> <operator activated="true" class="aggregate" compatibility="7.1.001" expanded="true" height="82" name="Aggregate" width="90" x="514" y="34"> <list key="aggregation_attributes"> <parameter key="G3 Below 10" value="count"/> </list> <parameter key="group_by_attributes" value="G3 Below 10"/> </operator> <connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/> <connect from_op="Set Role" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/> <connect from_op="Generate Attributes" from_port="example set output" to_op="Aggregate" to_port="example set input"/> <connect from_op="Aggregate" from_port="example set output" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator></process>