Hi all,
I'm already months begin struggling with a problem about prediction. Now after a few optimalization runs (takes days and days) I advanced (probably) with the problem overfitting?!
As you can see in the picture below the 6th column represent the performance of the model. The 3rd, 4th an 5th are the parameters of the sliding windows validation(training width, step width, testing width). Probably the ratio training vs testing is too high. But if I decrease the ratio the performance will decrease. So I don't know what the perfect ratio will be such that the perfromance is not suspect anymore.
So could anyone advice me what the ratio is in respect to my datasets:
https://drive.google.com/open?id=12XjPKw2diSLnc9-MtAv_--SVfntA3nR-

Below the XML code of the proces. I used the score object to combine these values against my test set in a score process.
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.6.001" expanded="true" height="68" name="Retrieve VRIES train set" width="90" x="45" y="34">
<parameter key="repository_entry" value="../data/VRIES train set"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.6.001" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="A"/>
<description align="center" color="transparent" colored="false" width="126">Select the 'A' column</description>
</operator>
<operator activated="true" class="series:lag_series" compatibility="7.4.000" expanded="true" height="82" name="Lag Series" width="90" x="313" y="34">
<list key="attributes">
<parameter key="A" value="1"/>
</list>
<description align="center" color="transparent" colored="false" width="126">Lag 'A' column for striping out spikes</description>
</operator>
<operator activated="true" class="aggregate" compatibility="7.6.001" expanded="true" height="82" name="Aggregate" width="90" x="447" y="34">
<list key="aggregation_attributes">
<parameter key="A" value="standard_deviation"/>
</list>
<description align="center" color="transparent" colored="false" width="126">Calculate std dev of 'A', push to macro</description>
</operator>
<operator activated="true" class="extract_macro" compatibility="7.6.001" expanded="true" height="68" name="Extract Macro" width="90" x="648" y="34">
<parameter key="macro" value="stdev"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="standard_deviation(A)"/>
<parameter key="example_index" value="1"/>
<list key="additional_macros"/>
<description align="center" color="transparent" colored="false" width="126">extract std dev value to use in Generate Attributes</description>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.6.001" expanded="true" height="82" name="Generate Attributes" width="90" x="648" y="238">
<list key="function_descriptions">
<parameter key="Maintainence" value="if(A < ([A-1]-A), 1, 0)"/>
</list>
<description align="center" color="transparent" colored="false" width="126">Create a Maintenance attribute to help filter out the days it's in maintenance mode</description>
</operator>
<operator activated="true" class="filter_examples" compatibility="7.6.001" expanded="true" height="103" name="Filter Examples" width="90" x="782" y="238">
<list key="filters_list">
<parameter key="filters_entry_key" value="Maintainence.eq.0"/>
</list>
<description align="center" color="transparent" colored="false" width="126">Select only non maintenance mode days</description>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.6.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="916" y="238">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="A"/>
<description align="center" color="transparent" colored="false" width="126">Select 'A' again</description>
</operator>
<operator activated="true" class="optimize_parameters_grid" compatibility="7.6.001" expanded="true" height="145" name="Optimize Parameters (Grid)" width="90" x="1050" y="238">
<list key="parameters">
<parameter key="Validation.cumulative_training" value="true,false"/>
<parameter key="SVM.kernel_gamma" value="[0.1;0.8;5;logarithmic]"/>
<parameter key="SVM.C" value="[6000;10000;4;linear]"/>
<parameter key="Validation.training_window_width" value="[40;60;10;linear]"/>
<parameter key="Set Macro.macro" value="1,2,3"/>
<parameter key="Validation.training_window_step_size" value="[2;10;5;linear]"/>
<parameter key="Validation.test_window_width" value="[1.0;10;5;linear]"/>
</list>
<process expanded="true">
<operator activated="true" class="set_macro" compatibility="7.6.001" expanded="true" height="82" name="Set Macro" width="90" x="45" y="34">
<parameter key="macro" value="2"/>
<parameter key="value" value="2"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing Train" width="90" x="179" y="34">
<parameter key="window_size" value="%{1}"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="A"/>
</operator>
<operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="124" name="Validation" width="90" x="380" y="34">
<parameter key="training_window_width" value="190"/>
<parameter key="test_window_width" value="1"/>
<parameter key="cumulative_training" value="true"/>
<process expanded="true">
<operator activated="true" class="support_vector_machine" compatibility="7.6.001" expanded="true" height="124" name="SVM" width="90" x="112" y="34">
<parameter key="kernel_type" value="radial"/>
<parameter key="kernel_gamma" value="0.5278031643091577"/>
<parameter key="C" value="9000.0"/>
</operator>
<connect from_port="training" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.6.001" expanded="true" height="82" name="Apply Model" width="90" x="112" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="series:forecasting_performance" compatibility="7.4.000" expanded="true" height="82" name="Performance" width="90" x="246" y="34">
<parameter key="horizon" value="%{1}"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log" compatibility="7.6.001" expanded="true" height="82" name="Log" width="90" x="581" y="85">
<parameter key="filename" value="tmp"/>
<list key="log">
<parameter key="C" value="operator.SVM.parameter.C"/>
<parameter key="Gamma" value="operator.SVM.parameter.kernel_gamma"/>
<parameter key="Training Width" value="operator.Validation.parameter.training_window_width"/>
<parameter key="Step Width" value="operator.Validation.parameter.training_window_step_size"/>
<parameter key="Testing Width" value="operator.Validation.parameter.test_window_width"/>
<parameter key="Perf" value="operator.Validation.value.performance"/>
<parameter key="Set Macro Value" value="operator.Set Macro.value.macro_value"/>
</list>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing Test" width="90" x="380" y="187">
<parameter key="window_size" value="%{1}"/>
</operator>
<connect from_port="input 1" to_op="Set Macro" to_port="through 1"/>
<connect from_op="Set Macro" from_port="through 1" to_op="Windowing Train" to_port="example set input"/>
<connect from_op="Windowing Train" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Windowing Train" from_port="original" to_op="Windowing Test" to_port="example set input"/>
<connect from_op="Validation" from_port="model" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_op="Log" to_port="through 1"/>
<connect from_op="Log" from_port="through 1" to_port="performance"/>
<connect from_op="Windowing Test" from_port="example set output" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Optimize and store optimized model</description>
</operator>
<operator activated="true" class="store" compatibility="7.6.001" expanded="true" height="68" name="Store" width="90" x="1251" y="187">
<parameter key="repository_entry" value="../data/Thomas ott test 2"/>
<description align="center" color="transparent" colored="false" width="126">Store optimized model</description>
</operator>
<operator activated="true" class="apply_model" compatibility="7.6.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="1385" y="289">
<list key="application_parameters"/>
<description align="center" color="transparent" colored="false" width="126">Sanity Check. Review 'A' time series against predicted 'A' time series from training data set.</description>
</operator>
<connect from_op="Retrieve VRIES train set" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Lag Series" to_port="example set input"/>
<connect from_op="Lag Series" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Aggregate" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Aggregate" from_port="original" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="performance" to_port="result 2"/>
<connect from_op="Optimize Parameters (Grid)" from_port="parameter" to_port="result 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="result 1" to_op="Store" to_port="input"/>
<connect from_op="Optimize Parameters (Grid)" from_port="result 2" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Store" from_port="through" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_port="result 3"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
</process>
</operator>
</process>
