Process that worked under RapidMiner 4.4 is now giving JAVA.OUTOFMEMORY ERROR
Hello,
I'm trying to use a feature selection with embeded validation and JSVMLearner to select for relevant features in a dataset. The dataset is a CSV file with 28 examples each containing 2000 attributes between the values of 0 and 1 with a signle label that can either be true or false. In the previous version of RapidMiner, I had no problem doing this...it just took lots of time. Now with 4.5, I'm getting an out of memory error from Java within 45 minutes of the run.
Here's my code:
<operator name="Root" class="Process" expanded="yes">
<operator name="CSVExampleSource" class="CSVExampleSource">
<parameter key="filename" value="D:\Lab Projects\Roberto Lleras\Machine Learning Algorithms\Survival with 3 yr followup top 2000- JUNE09.csv"/>
<parameter key="label_column" value="2"/>
<parameter key="id_column" value="1"/>
</operator>
<operator name="ExampleSetTranspose" class="ExampleSetTranspose">
</operator>
<operator name="ChangeAttributeRole" class="ChangeAttributeRole">
<parameter key="name" value="Survival Beyond 3 yrs"/>
<parameter key="target_role" value="label"/>
</operator>
<operator name="MissingValueReplenishment" class="MissingValueReplenishment">
<parameter key="default" value="zero"/>
<list key="columns">
</list>
</operator>
<operator name="NominalNumbers2Numerical" class="NominalNumbers2Numerical">
</operator>
<operator name="WrapperXValidation" class="WrapperXValidation" expanded="yes">
<operator name="FeatureSelection" class="FeatureSelection" expanded="yes">
<parameter key="show_stop_dialog" value="true"/>
<parameter key="show_population_plotter" value="true"/>
<parameter key="plot_generations" value="1"/>
<parameter key="keep_best" value="25"/>
<operator name="XValidation (2)" class="XValidation" expanded="yes">
<parameter key="average_performances_only" value="false"/>
<parameter key="leave_one_out" value="true"/>
<operator name="JMySVMLearner" class="JMySVMLearner">
<parameter key="calculate_weights" value="true"/>
</operator>
<operator name="OperatorChain" class="OperatorChain" expanded="yes">
<operator name="ModelApplier" class="ModelApplier">
<parameter key="keep_model" value="true"/>
<list key="application_parameters">
</list>
<parameter key="create_view" value="true"/>
</operator>
<operator name="ClassificationPerformance" class="ClassificationPerformance">
<parameter key="keep_example_set" value="true"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="true"/>
<parameter key="kappa" value="true"/>
<parameter key="weighted_mean_recall" value="true"/>
<parameter key="weighted_mean_precision" value="true"/>
<parameter key="spearman_rho" value="true"/>
<parameter key="kendall_tau" value="true"/>
<parameter key="absolute_error" value="true"/>
<parameter key="relative_error" value="true"/>
<parameter key="relative_error_lenient" value="true"/>
<parameter key="relative_error_strict" value="true"/>
<parameter key="correlation" value="true"/>
<list key="class_weights">
</list>
</operator>
<operator name="MinMaxWrapper" class="MinMaxWrapper">
<parameter key="minimum_weight" value="0.5"/>
</operator>
</operator>
</operator>
</operator>
<operator name="LibSVMLearner (2)" class="LibSVMLearner">
<parameter key="keep_example_set" value="true"/>
<parameter key="cache_size" value="5000"/>
<list key="class_weights">
</list>
<parameter key="calculate_confidences" value="true"/>
</operator>
<operator name="OperatorChain (2)" class="OperatorChain" expanded="yes">
<operator name="ModelApplier (2)" class="ModelApplier">
<parameter key="keep_model" value="true"/>
<list key="application_parameters">
</list>
<parameter key="create_view" value="true"/>
</operator>
<operator name="ClassificationPerformance (2)" class="ClassificationPerformance">
<parameter key="keep_example_set" value="true"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="true"/>
<parameter key="kappa" value="true"/>
<parameter key="weighted_mean_recall" value="true"/>
<parameter key="weighted_mean_precision" value="true"/>
<parameter key="spearman_rho" value="true"/>
<parameter key="kendall_tau" value="true"/>
<parameter key="absolute_error" value="true"/>
<parameter key="relative_error" value="true"/>
<parameter key="relative_error_lenient" value="true"/>
<parameter key="relative_error_strict" value="true"/>
<parameter key="correlation" value="true"/>
<list key="class_weights">
</list>
</operator>
<operator name="ModelWriter" class="ModelWriter">
<parameter key="model_file" value="D:\Lab Projects\Roberto Lleras\Machine Learning Algorithms\Methylation machine\3 yr survival OP with complete followup.mod"/>
</operator>
</operator>
</operator>
</operator>
Any help would be appreciated! Thanks!
Roberto
...A second, less pertinent question is the wrapper validation takes forever to process, in the past I have used just a Weighted Feature Selection on the dataset after performing a SVMWeighting operator that was not nested like this and gotten 100% accuracy within a couple of hours. Can I trust the results from that, or is the wrapper validation the way to go? Again, thanks so much!
I'm trying to use a feature selection with embeded validation and JSVMLearner to select for relevant features in a dataset. The dataset is a CSV file with 28 examples each containing 2000 attributes between the values of 0 and 1 with a signle label that can either be true or false. In the previous version of RapidMiner, I had no problem doing this...it just took lots of time. Now with 4.5, I'm getting an out of memory error from Java within 45 minutes of the run.
Here's my code:
<operator name="Root" class="Process" expanded="yes">
<operator name="CSVExampleSource" class="CSVExampleSource">
<parameter key="filename" value="D:\Lab Projects\Roberto Lleras\Machine Learning Algorithms\Survival with 3 yr followup top 2000- JUNE09.csv"/>
<parameter key="label_column" value="2"/>
<parameter key="id_column" value="1"/>
</operator>
<operator name="ExampleSetTranspose" class="ExampleSetTranspose">
</operator>
<operator name="ChangeAttributeRole" class="ChangeAttributeRole">
<parameter key="name" value="Survival Beyond 3 yrs"/>
<parameter key="target_role" value="label"/>
</operator>
<operator name="MissingValueReplenishment" class="MissingValueReplenishment">
<parameter key="default" value="zero"/>
<list key="columns">
</list>
</operator>
<operator name="NominalNumbers2Numerical" class="NominalNumbers2Numerical">
</operator>
<operator name="WrapperXValidation" class="WrapperXValidation" expanded="yes">
<operator name="FeatureSelection" class="FeatureSelection" expanded="yes">
<parameter key="show_stop_dialog" value="true"/>
<parameter key="show_population_plotter" value="true"/>
<parameter key="plot_generations" value="1"/>
<parameter key="keep_best" value="25"/>
<operator name="XValidation (2)" class="XValidation" expanded="yes">
<parameter key="average_performances_only" value="false"/>
<parameter key="leave_one_out" value="true"/>
<operator name="JMySVMLearner" class="JMySVMLearner">
<parameter key="calculate_weights" value="true"/>
</operator>
<operator name="OperatorChain" class="OperatorChain" expanded="yes">
<operator name="ModelApplier" class="ModelApplier">
<parameter key="keep_model" value="true"/>
<list key="application_parameters">
</list>
<parameter key="create_view" value="true"/>
</operator>
<operator name="ClassificationPerformance" class="ClassificationPerformance">
<parameter key="keep_example_set" value="true"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="true"/>
<parameter key="kappa" value="true"/>
<parameter key="weighted_mean_recall" value="true"/>
<parameter key="weighted_mean_precision" value="true"/>
<parameter key="spearman_rho" value="true"/>
<parameter key="kendall_tau" value="true"/>
<parameter key="absolute_error" value="true"/>
<parameter key="relative_error" value="true"/>
<parameter key="relative_error_lenient" value="true"/>
<parameter key="relative_error_strict" value="true"/>
<parameter key="correlation" value="true"/>
<list key="class_weights">
</list>
</operator>
<operator name="MinMaxWrapper" class="MinMaxWrapper">
<parameter key="minimum_weight" value="0.5"/>
</operator>
</operator>
</operator>
</operator>
<operator name="LibSVMLearner (2)" class="LibSVMLearner">
<parameter key="keep_example_set" value="true"/>
<parameter key="cache_size" value="5000"/>
<list key="class_weights">
</list>
<parameter key="calculate_confidences" value="true"/>
</operator>
<operator name="OperatorChain (2)" class="OperatorChain" expanded="yes">
<operator name="ModelApplier (2)" class="ModelApplier">
<parameter key="keep_model" value="true"/>
<list key="application_parameters">
</list>
<parameter key="create_view" value="true"/>
</operator>
<operator name="ClassificationPerformance (2)" class="ClassificationPerformance">
<parameter key="keep_example_set" value="true"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="true"/>
<parameter key="kappa" value="true"/>
<parameter key="weighted_mean_recall" value="true"/>
<parameter key="weighted_mean_precision" value="true"/>
<parameter key="spearman_rho" value="true"/>
<parameter key="kendall_tau" value="true"/>
<parameter key="absolute_error" value="true"/>
<parameter key="relative_error" value="true"/>
<parameter key="relative_error_lenient" value="true"/>
<parameter key="relative_error_strict" value="true"/>
<parameter key="correlation" value="true"/>
<list key="class_weights">
</list>
</operator>
<operator name="ModelWriter" class="ModelWriter">
<parameter key="model_file" value="D:\Lab Projects\Roberto Lleras\Machine Learning Algorithms\Methylation machine\3 yr survival OP with complete followup.mod"/>
</operator>
</operator>
</operator>
</operator>
Any help would be appreciated! Thanks!
Roberto
...A second, less pertinent question is the wrapper validation takes forever to process, in the past I have used just a Weighted Feature Selection on the dataset after performing a SVMWeighting operator that was not nested like this and gotten 100% accuracy within a couple of hours. Can I trust the results from that, or is the wrapper validation the way to go? Again, thanks so much!