A program to recognize and reward our most engaged community members
Hi,
Good choice! I personally think that MO feature selection is severely underrated but consistently delivers great results. You need to do a couple of things in RapidMiner to get this working:
This is a lot of things to consider but it is totally worth it. I have attached a simple sample process below. You can copy the XML into RapidMiner by following the instructions here: http://community.rapidminer.com/t5/RapidMiner-Studio-Knowledge-Base/How-can-I-share-processes-without-RapidMiner-Server/ta-p/37047
Finally, if you want to learn more about this topic, I recommend some of my research papers, especially my PhD: http://www-ai.cs.uni-dortmund.de/auto?self=%24Publication_fz5hgy8b
Hope this helps,
Ingo
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="7.5.003" expanded="true" name="Process"> <process expanded="true"> <operator activated="true" class="retrieve" compatibility="7.5.003" expanded="true" height="68" name="Retrieve Sonar" width="90" x="45" y="34"> <parameter key="repository_entry" value="//Samples/data/Sonar"/> </operator> <operator activated="true" class="optimize_selection_evolutionary" compatibility="7.5.003" expanded="true" height="103" name="Optimize Selection (Evolutionary)" width="90" x="179" y="34"> <parameter key="population_size" value="20"/> <parameter key="maximum_number_of_generations" value="50"/> <parameter key="show_population_plotter" value="true"/> <parameter key="plot_generations" value="1"/> <parameter key="selection_scheme" value="non dominated sorting"/> <parameter key="keep_best_individual" value="true"/> <process expanded="true"> <operator activated="true" class="concurrency:cross_validation" compatibility="7.5.003" expanded="true" height="145" name="Cross Validation" width="90" x="45" y="34"> <parameter key="number_of_folds" value="5"/> <parameter key="use_local_random_seed" value="true"/> <parameter key="local_random_seed" value="1993"/> <process expanded="true"> <operator activated="true" class="linear_regression" compatibility="7.5.003" expanded="true" height="103" name="Linear Regression" width="90" x="45" y="34"> <parameter key="feature_selection" value="none"/> </operator> <connect from_port="training set" to_op="Linear Regression" to_port="training set"/> <connect from_op="Linear Regression" from_port="model" to_port="model"/> <portSpacing port="source_training set" spacing="0"/> <portSpacing port="sink_model" spacing="0"/> <portSpacing port="sink_through 1" spacing="0"/> </process> <process expanded="true"> <operator activated="true" class="apply_model" compatibility="7.5.003" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34"> <list key="application_parameters"/> </operator> <operator activated="true" class="performance_classification" compatibility="7.5.003" expanded="true" height="82" name="Performance" width="90" x="179" y="34"> <list key="class_weights"/> </operator> <connect from_port="model" to_op="Apply Model" to_port="model"/> <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/> <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/> <connect from_op="Performance" from_port="performance" to_port="performance 1"/> <portSpacing port="source_model" spacing="0"/> <portSpacing port="source_test set" spacing="0"/> <portSpacing port="source_through 1" spacing="0"/> <portSpacing port="sink_test set results" spacing="0"/> <portSpacing port="sink_performance 1" spacing="0"/> <portSpacing port="sink_performance 2" spacing="0"/> </process> </operator> <operator activated="true" class="performance_attribute_count" compatibility="7.5.003" expanded="true" height="82" name="Performance (2)" width="90" x="179" y="34"/> <connect from_port="example set" to_op="Cross Validation" to_port="example set"/> <connect from_op="Cross Validation" from_port="example set" to_op="Performance (2)" to_port="example set"/> <connect from_op="Cross Validation" from_port="performance 1" to_op="Performance (2)" to_port="performance"/> <connect from_op="Performance (2)" from_port="performance" to_port="performance"/> <portSpacing port="source_example set" spacing="0"/> <portSpacing port="source_through 1" spacing="0"/> <portSpacing port="sink_performance" spacing="0"/> </process> </operator> <connect from_op="Retrieve Sonar" from_port="output" to_op="Optimize Selection (Evolutionary)" to_port="example set in"/> <connect from_op="Optimize Selection (Evolutionary)" from_port="example set out" to_port="result 1"/> <connect from_op="Optimize Selection (Evolutionary)" from_port="weights" to_port="result 2"/> <connect from_op="Optimize Selection (Evolutionary)" from_port="performance" to_port="result 3"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> <portSpacing port="sink_result 3" spacing="0"/> <portSpacing port="sink_result 4" spacing="0"/> </process> </operator></process>