"Predictive analysis - problems with accuracy."

I have set the windowsize on 5 because I think information about 5 days is enough but I could easily change this. Stepsize is 1 because you would like to predict every day and horizon of 2 because you predict the value of tomorrow and not today.
- Forecasting Performance - Horizon.
Answers
-
What you can do now is use an Optimize Parameter operator to tune your NN parameters like Momentum, Learning Rate, and Training Cycles. It will go through all those parameters and deliver the optimized model that you can use downstream in the process. See the sample process attached below.
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.002">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.002" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.6.002" expanded="true" height="68" name="Retrieve test data only flow oktober days train set" width="90" x="45" y="34">
<parameter key="repository_entry" value="//Local Repository/data/test data only flow oktober days train set"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing" width="90" x="179" y="34">
<parameter key="window_size" value="5"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="A"/>
<parameter key="horizon" value="2"/>
</operator>
<operator activated="true" class="optimize_parameters_grid" compatibility="7.6.002" expanded="true" height="124" name="Optimize Parameters (Grid)" width="90" x="313" y="34">
<list key="parameters">
<parameter key="Neural Net.training_cycles" value="[100;1000;9;linear]"/>
<parameter key="Neural Net.learning_rate" value="[0.1;0.9;8;linear]"/>
<parameter key="Neural Net.momentum" value="[0.1;0.9;8;linear]"/>
</list>
<process expanded="true">
<operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="124" name="Validation" width="90" x="112" y="34">
<parameter key="training_window_width" value="20"/>
<parameter key="training_window_step_size" value="5"/>
<parameter key="test_window_width" value="10"/>
<process expanded="true">
<operator activated="true" class="neural_net" compatibility="7.6.002" expanded="true" height="82" name="Neural Net" width="90" x="179" y="34">
<list key="hidden_layers"/>
</operator>
<connect from_port="training" to_op="Neural Net" to_port="training set"/>
<connect from_op="Neural Net" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.6.002" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="series:forecasting_performance" compatibility="7.4.000" expanded="true" height="82" name="Performance" width="90" x="179" y="34">
<parameter key="horizon" value="2"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<connect from_port="input 1" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_port="performance"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="retrieve" compatibility="7.6.002" expanded="true" height="68" name="Retrieve test data only flow oktober days test set" width="90" x="45" y="340">
<parameter key="repository_entry" value="//Local Repository/data/test data only flow oktober days test set"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing (2)" width="90" x="179" y="340">
<parameter key="window_size" value="5"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="A"/>
<parameter key="horizon" value="2"/>
</operator>
<operator activated="true" class="apply_model" compatibility="7.6.002" expanded="true" height="82" name="Apply Model (2)" width="90" x="648" y="238">
<list key="application_parameters"/>
</operator>
<connect from_op="Retrieve test data only flow oktober days train set" from_port="output" to_op="Windowing" to_port="example set input"/>
<connect from_op="Windowing" from_port="example set output" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="performance" to_port="result 3"/>
<connect from_op="Optimize Parameters (Grid)" from_port="parameter" to_port="result 4"/>
<connect from_op="Optimize Parameters (Grid)" from_port="result 1" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Retrieve test data only flow oktober days test set" from_port="output" to_op="Windowing (2)" to_port="example set input"/>
<connect from_op="Windowing (2)" from_port="example set output" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_port="result 1"/>
<connect from_op="Apply Model (2)" from_port="model" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
</process>
</operator>
</process>2 -
In your current process I don't much sense on doing CV, because you already have an independent test dataset.
However, the CV makes sense inside the Optimize Parameters operator. It is nice to test several models using only the training dataset and then measure the performance of the final model in an independent dataset (sometimes called validation dataset).
2 -
Sorry i try to figure out what the meaning of "CV" is, but I couldn't find it.
0 -
0
-
Alright clear enough, I will use the optimize parameter. Does this operator also change my sliding window validation parameter?
When I run your example test @Thomas_Ott, I got the error message: "Cannot reset network to a smaller learning rate".
I deleted 0-values in my trainig and testset, people on the forums said this might be a solution, but still got the error message. Do you know what I'm doing wrong?
0 -
@maurits_freriks you might need to change the range of the Learning Rate to say 0.2 to 0.9 in optimization, it can't converge on the global minimum with that small of LR.
0 -
I've changed this to 0.9 but still the same error ..
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.6.001" expanded="true" height="68" name="Retrieve test data only flow oktober days train set.2" width="90" x="45" y="34">
<parameter key="repository_entry" value="//Local Repository/data/test data only flow oktober days train set.2"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing" width="90" x="179" y="34">
<parameter key="window_size" value="5"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="A"/>
<parameter key="horizon" value="2"/>
</operator>
<operator activated="true" class="optimize_parameters_grid" compatibility="7.6.001" expanded="true" height="124" name="Optimize Parameters (Grid)" width="90" x="313" y="34">
<list key="parameters">
<parameter key="Neural Net.training_cycles" value="[100;1000;9;linear]"/>
<parameter key="Neural Net.learning_rate" value="[0.1;0.9;8;linear]"/>
<parameter key="Neural Net.momentum" value="[0.1;0.9;8;linear]"/>
</list>
<process expanded="true">
<operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="124" name="Validation" width="90" x="112" y="34">
<parameter key="training_window_width" value="20"/>
<parameter key="training_window_step_size" value="5"/>
<parameter key="test_window_width" value="10"/>
<process expanded="true">
<operator activated="true" class="neural_net" compatibility="7.6.001" expanded="true" height="82" name="Neural Net" width="90" x="179" y="34">
<list key="hidden_layers"/>
<parameter key="training_cycles" value="800"/>
<parameter key="learning_rate" value="0.9"/>
<parameter key="momentum" value="0.5"/>
</operator>
<connect from_port="training" to_op="Neural Net" to_port="training set"/>
<connect from_op="Neural Net" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.6.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="series:forecasting_performance" compatibility="7.4.000" expanded="true" height="82" name="Performance" width="90" x="179" y="34">
<parameter key="horizon" value="2"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<connect from_port="input 1" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_port="performance"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="retrieve" compatibility="7.6.001" expanded="true" height="68" name="Retrieve test data only flow oktober days train set.2 (2)" width="90" x="45" y="340">
<parameter key="repository_entry" value="//Local Repository/data/test data only flow oktober days train set.2"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing (2)" width="90" x="179" y="340">
<parameter key="window_size" value="5"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="A"/>
<parameter key="horizon" value="2"/>
</operator>
<operator activated="true" class="apply_model" compatibility="7.6.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="648" y="238">
<list key="application_parameters"/>
</operator>
<connect from_op="Retrieve test data only flow oktober days train set.2" from_port="output" to_op="Windowing" to_port="example set input"/>
<connect from_op="Windowing" from_port="example set output" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="performance" to_port="result 3"/>
<connect from_op="Optimize Parameters (Grid)" from_port="parameter" to_port="result 4"/>
<connect from_op="Optimize Parameters (Grid)" from_port="result 1" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Retrieve test data only flow oktober days train set.2 (2)" from_port="output" to_op="Windowing (2)" to_port="example set input"/>
<connect from_op="Windowing (2)" from_port="example set output" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_port="result 1"/>
<connect from_op="Apply Model (2)" from_port="model" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
</process>
</operator>
</process>0 -
@maurits_freriks do you have a sample data set I can use? I was saying you should change the lower value of LR to 0.2 or 0.25
0 -
Hello
I'm having trouble calculating accuracy
The precision of my decision tree was 72 before the new Windows installation
But now that I've changed Windows and installed the software, the accuracy is 60
Steps to achieve accuracy are the same
I do not know where the problem is
Now weka software has a precision of 72, but the RapidMiner software is 60
And I want to work with RapidMiner software
Where do you think the problem is?0 -
Hi,
did you upgrade from 7.6 to 8.0 in the meanwhile?
if yes: We changed a bit in the decision trees. But that should not be worsen but increase your perfromance. You could try to set the compatibility level of your DecTree back to 7.6.
Cheers,
Martin
0