Hi,
I keep getting an NPE when I try to apply my model to by test set. The model works fine when I run it through a validator for example to get performance info but not otherwise.
It seems to happen when using meta-learners - I've running 5.3.013 on Ubuntu 10.0.4 LTS.
I'm sure I've done something wrong - or I misunderstand, but I've lost all afternoon trying different combinations to get this to work.
Any help would be fantastic.
The log dump is:
Aug 19, 2013 8:38:41 PM INFO: Loading initial data.
Aug 19, 2013 8:38:45 PM SEVERE: Process failed: operator cannot be executed. Check the log messages...
Aug 19, 2013 8:38:45 PM SEVERE: Here: Process[1] (Process)
subprocess 'Main Process'
+- Training[1] (Retrieve)
+- Scrub Training[1] (Subprocess)
subprocess 'Nested Chain'
| +- SurvivedToBinominal[1] (Numerical to Binominal)
| +- Select Attributes[1] (Select Attributes)
| +- Set Roles (2)[1] (Set Role)
| +- Set Missing P Class[1] (Replace Missing Values)
| +- Replace Missing Values (3)[1] (Replace Missing Values)
+- Retrieve Titanic_Testing[1] (Retrieve)
+- Scrub Test[1] (Subprocess)
subprocess 'Nested Chain'
| +- Select Attributes (2)[1] (Select Attributes)
| +- Set Roles[1] (Set Role)
| +- Set Missing P Class (2)[1] (Replace Missing Values)
| +- Replace Missing Values (5)[1] (Replace Missing Values)
+- Bagging[1] (Bagging)
subprocess 'Learning Process'
| +- Stacking (2)[10] (Stacking)
subprocess 'Base Learner'
| | +- AdaBoost (3)[10] (AdaBoost)
subprocess 'Learning Process'
| | | +- Random Forest (2)[32] (Random Forest)
| | +- AdaBoost (4)[10] (AdaBoost)
subprocess 'Learning Process'
| | +- Naive Bayes (2)[58] (Naive Bayes (Kernel))
subprocess 'Stacking Model Learner'
| +- Naive Bayes (3)[10] (Naive Bayes)
==> +- Apply Model[1] (Apply Model)
Aug 19, 2013 8:38:45 PM SEVERE: java.lang.NullPointerException
And my process is:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.013">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.013" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="5.3.013" expanded="true" height="60" name="Training" width="90" x="45" y="75">
<parameter key="repository_entry" value="//Local Repository/data/Titanic_Training"/>
</operator>
<operator activated="true" class="subprocess" compatibility="5.3.013" expanded="true" height="76" name="Scrub Training" width="90" x="179" y="75">
<process expanded="true">
<operator activated="true" class="numerical_to_binominal" compatibility="5.3.013" expanded="true" height="76" name="SurvivedToBinominal" width="90" x="45" y="30">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Survived"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="5.3.013" expanded="true" height="76" name="Select Attributes" width="90" x="179" y="30">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="Age|PassengerId|Pclass|Sex|Survived"/>
</operator>
<operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="76" name="Set Roles (2)" width="90" x="313" y="30">
<parameter key="attribute_name" value="PassengerId"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles">
<parameter key="Survived" value="label"/>
</list>
</operator>
<operator activated="true" class="replace_missing_values" compatibility="5.3.013" expanded="true" height="94" name="Set Missing P Class" width="90" x="112" y="165">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Pclass"/>
<parameter key="default" value="minimum"/>
<list key="columns"/>
</operator>
<operator activated="true" class="replace_missing_values" compatibility="5.3.013" expanded="true" height="94" name="Replace Missing Values (3)" width="90" x="313" y="165">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Age"/>
<list key="columns"/>
<parameter key="replenishment_value" value="S"/>
</operator>
<connect from_port="in 1" to_op="SurvivedToBinominal" to_port="example set input"/>
<connect from_op="SurvivedToBinominal" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Set Roles (2)" to_port="example set input"/>
<connect from_op="Set Roles (2)" from_port="example set output" to_op="Set Missing P Class" to_port="example set input"/>
<connect from_op="Set Missing P Class" from_port="example set output" to_op="Replace Missing Values (3)" to_port="example set input"/>
<connect from_op="Replace Missing Values (3)" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="retrieve" compatibility="5.3.013" expanded="true" height="60" name="Retrieve Titanic_Testing" width="90" x="45" y="390">
<parameter key="repository_entry" value="../data/Titanic_Testing"/>
</operator>
<operator activated="true" class="subprocess" compatibility="5.3.013" expanded="true" height="76" name="Scrub Test" width="90" x="179" y="390">
<process expanded="true">
<operator activated="true" class="select_attributes" compatibility="5.3.013" expanded="true" height="76" name="Select Attributes (2)" width="90" x="45" y="30">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="Age|PassengerId|Pclass|Sex"/>
</operator>
<operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="76" name="Set Roles" width="90" x="179" y="75">
<parameter key="attribute_name" value="PassengerId"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="replace_missing_values" compatibility="5.3.013" expanded="true" height="94" name="Set Missing P Class (2)" width="90" x="313" y="210">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Pclass"/>
<parameter key="default" value="minimum"/>
<list key="columns"/>
</operator>
<operator activated="true" class="replace_missing_values" compatibility="5.3.013" expanded="true" height="94" name="Replace Missing Values (5)" width="90" x="447" y="165">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Age"/>
<list key="columns"/>
<parameter key="replenishment_value" value="S"/>
</operator>
<connect from_port="in 1" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Set Roles" to_port="example set input"/>
<connect from_op="Set Roles" from_port="example set output" to_op="Set Missing P Class (2)" to_port="example set input"/>
<connect from_op="Set Missing P Class (2)" from_port="example set output" to_op="Replace Missing Values (5)" to_port="example set input"/>
<connect from_op="Replace Missing Values (5)" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="bagging" compatibility="5.3.013" expanded="true" height="76" name="Bagging" width="90" x="112" y="255">
<process expanded="true">
<operator activated="true" class="stacking" compatibility="5.3.013" expanded="true" height="60" name="Stacking (2)" width="90" x="246" y="30">
<process expanded="true">
<operator activated="true" class="adaboost" compatibility="5.3.013" expanded="true" name="AdaBoost (3)">
<process expanded="true">
<operator activated="true" class="random_forest" compatibility="5.3.013" expanded="true" name="Random Forest (2)"/>
<connect from_port="training set" to_op="Random Forest (2)" to_port="training set"/>
<connect from_op="Random Forest (2)" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
</process>
</operator>
<operator activated="true" class="adaboost" compatibility="5.3.013" expanded="true" name="AdaBoost (4)">
<process expanded="true">
<operator activated="true" class="naive_bayes_kernel" compatibility="5.3.013" expanded="true" name="Naive Bayes (2)"/>
<connect from_port="training set" to_op="Naive Bayes (2)" to_port="training set"/>
<connect from_op="Naive Bayes (2)" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
</process>
</operator>
<connect from_port="training set 1" to_op="AdaBoost (3)" to_port="training set"/>
<connect from_port="training set 2" to_op="AdaBoost (4)" to_port="training set"/>
<connect from_op="AdaBoost (3)" from_port="model" to_port="base model 1"/>
<connect from_op="AdaBoost (4)" from_port="model" to_port="base model 2"/>
<portSpacing port="source_training set 1" spacing="0"/>
<portSpacing port="source_training set 2" spacing="0"/>
<portSpacing port="source_training set 3" spacing="0"/>
<portSpacing port="sink_base model 1" spacing="0"/>
<portSpacing port="sink_base model 2" spacing="0"/>
<portSpacing port="sink_base model 3" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="naive_bayes" compatibility="5.3.013" expanded="true" name="Naive Bayes (3)"/>
<connect from_port="stacking examples" to_op="Naive Bayes (3)" to_port="training set"/>
<connect from_op="Naive Bayes (3)" from_port="model" to_port="stacking model"/>
<portSpacing port="source_stacking examples" spacing="0"/>
<portSpacing port="sink_stacking model" spacing="0"/>
</process>
</operator>
<connect from_port="training set" to_op="Stacking (2)" to_port="training set"/>
<connect from_op="Stacking (2)" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
</process>
</operator>
<operator activated="true" class="apply_model" compatibility="5.3.013" expanded="true" height="76" name="Apply Model" width="90" x="380" y="300">
<list key="application_parameters"/>
</operator>
<connect from_op="Training" from_port="output" to_op="Scrub Training" to_port="in 1"/>
<connect from_op="Scrub Training" from_port="out 1" to_op="Bagging" to_port="training set"/>
<connect from_op="Retrieve Titanic_Testing" from_port="output" to_op="Scrub Test" to_port="in 1"/>
<connect from_op="Scrub Test" from_port="out 1" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Bagging" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Apply Model" from_port="labelled data" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Thanks
Sam
PS - the data sets are from Kaggle's competition - if that helps. I can put links to these if necessary.