Hi all!,
Already 3 months I'm focusing on a forecasting process. With historical data I would like to forecast the flow of the future days. With hulp from @Thomas_Ott and @lionelderkrikor I obtained the next process (see code). I would thank those two people from helping me out! But I desturb them all the time therefore I'm posting it now in this forum.
The idea from me to built the process is as follows:
------------------------------------------------------------------------------------------------
Example:
Imagine the following days.
Monday - day before yesterday
Tuesday - yesterday
Wednesday - today
Thursday - tomorrow
It is now wednesday and we obviously don't know the flow of today because it's now flowing. But we know the flow's of monday and tuesday. Now the client ask for an forecast of the flow of tomorrow (thursday). Therefore we used the flow's of monday and tuesday to determine and predict the flow on thursday.
And so on of eacht individual day in the future ..
------------------------------------------------------------------------------------------------
Am I doing this right in the process I built?!
I'm wondering because I obtained the next results (see the images below). The graph I get looks too accurate that this could not be an realistic forecast. It looks like the model takes alse the flow of the day which should be predicted. Also the tabel which I get as output looks not the way I would.
1 Scoring process:
2 Trainingsprocess:
<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="8.0.001" expanded="true" height="68" name="Retrieve Thomas ott test VRS" width="90" x="112" y="34">
<parameter key="repository_entry" value="../data/Thomas ott test VRS"/>
</operator>
<operator activated="true" class="retrieve" compatibility="8.0.001" expanded="true" height="68" name="Retrieve Test VRS with dates" width="90" x="112" y="238">
<parameter key="repository_entry" value="../data/Test VRS with dates"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing Test" width="90" x="246" y="238">
<parameter key="window_size" value="1"/>
<description align="center" color="transparent" colored="false" width="126">Set the Window size parameter based on the what the optimization said was the best in Process 01.</description>
</operator>
<operator activated="true" class="apply_model" compatibility="8.0.001" expanded="true" height="82" name="Apply Model" width="90" x="447" y="34">
<list key="application_parameters"/>
<description align="center" color="transparent" colored="false" width="126">Apply model and Windowed data, output predictions.</description>
</operator>
<operator activated="true" class="write_excel" compatibility="8.0.001" expanded="true" height="82" name="Write Excel" width="90" x="581" y="34">
<parameter key="excel_file" value="/Users/Maurits/Documents/BA 3/Minor/stage/Tests/SVM/Bedum/Output RapidMiner Thomas ott Bedum Test.xlsx"/>
</operator>
<connect from_op="Retrieve Thomas ott test VRS" from_port="output" to_op="Apply Model" to_port="model"/>
<connect from_op="Retrieve Test VRS with dates" from_port="output" to_op="Windowing Test" to_port="example set input"/>
<connect from_op="Windowing Test" from_port="example set output" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Write Excel" to_port="input"/>
<connect from_op="Write Excel" from_port="through" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="false" class="write_excel" compatibility="8.0.001" expanded="true" height="82" name="Write Excel" width="90" x="1519" y="187">
<parameter key="excel_file" value="/Users/Maurits/Documents/BA 3/Minor/stage/Tests/SVM/Output RapidMiner Thomas ott.xlsx"/>
</operator>
<operator activated="true" class="retrieve" compatibility="8.0.001" expanded="true" height="68" name="Retrieve Train VRS with dates" width="90" x="45" y="187">
<parameter key="repository_entry" value="../data/Train VRS with dates"/>
</operator>
<operator activated="true" class="sort" compatibility="8.0.001" expanded="true" height="82" name="Sort" width="90" x="45" y="34">
<parameter key="attribute_name" value="time"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="data"/>
<description align="center" color="transparent" colored="false" width="126">Select the 'A' column</description>
</operator>
<operator activated="true" class="series:lag_series" compatibility="7.4.000" expanded="true" height="82" name="Lag Series" width="90" x="313" y="34">
<list key="attributes">
<parameter key="data" value="1"/>
</list>
<description align="center" color="transparent" colored="false" width="126">Lag 'A' column for striping out spikes</description>
</operator>
<operator activated="true" class="aggregate" compatibility="8.0.001" expanded="true" height="82" name="Aggregate" width="90" x="447" y="34">
<list key="aggregation_attributes">
<parameter key="data" value="standard_deviation"/>
</list>
<description align="center" color="transparent" colored="false" width="126">Calculate std dev of 'A', push to macro</description>
</operator>
<operator activated="true" class="extract_macro" compatibility="8.0.001" expanded="true" height="68" name="Extract Macro" width="90" x="648" y="34">
<parameter key="macro" value="stdev"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="standard_deviation(data)"/>
<parameter key="example_index" value="1"/>
<list key="additional_macros"/>
<description align="center" color="transparent" colored="false" width="126">extract std dev value to use in Generate Attributes</description>
</operator>
<operator activated="true" class="generate_attributes" compatibility="8.0.001" expanded="true" height="82" name="Generate Attributes" width="90" x="648" y="238">
<list key="function_descriptions">
<parameter key="Maintainence" value="if(data < ([data-1]-data), 1, 0)"/>
</list>
<description align="center" color="transparent" colored="false" width="126">Create a Maintenance attribute to help filter out the days it's in maintenance mode</description>
</operator>
<operator activated="true" class="filter_examples" compatibility="8.0.001" expanded="true" height="103" name="Filter Examples" width="90" x="782" y="238">
<list key="filters_list">
<parameter key="filters_entry_key" value="Maintainence.eq.0"/>
</list>
<description align="center" color="transparent" colored="false" width="126">Select only non maintenance mode days</description>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="916" y="238">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="data"/>
<description align="center" color="transparent" colored="false" width="126">Select 'A' again</description>
</operator>
<operator activated="true" class="optimize_parameters_grid" compatibility="8.0.001" expanded="true" height="145" name="Optimize Parameters (Grid)" width="90" x="1050" y="238">
<list key="parameters">
<parameter key="Validation.cumulative_training" value="true,false"/>
<parameter key="SVM.kernel_gamma" value="[0.01;1;5;logarithmic]"/>
<parameter key="SVM.C" value="[0;10000;4;linear]"/>
<parameter key="Validation.training_window_width" value="[40;60;5;linear]"/>
<parameter key="Validation.training_window_step_size" value="[4;6;2;linear]"/>
<parameter key="Validation.test_window_width" value="[3;5;2;linear]"/>
</list>
<process expanded="true">
<operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Macro" width="90" x="45" y="34">
<parameter key="macro" value="day_ahead"/>
<parameter key="value" value="1"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing Train" width="90" x="179" y="34">
<parameter key="window_size" value="%{day_ahead}"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="data"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing Test" width="90" x="380" y="187">
<parameter key="window_size" value="1"/>
</operator>
<operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="124" name="Validation" width="90" x="380" y="34">
<parameter key="training_window_width" value="60"/>
<parameter key="training_window_step_size" value="6"/>
<parameter key="test_window_width" value="5"/>
<process expanded="true">
<operator activated="true" class="support_vector_machine" compatibility="8.0.001" expanded="true" height="124" name="SVM" width="90" x="112" y="34">
<parameter key="kernel_type" value="radial"/>
<parameter key="C" value="10000.0"/>
</operator>
<connect from_port="training" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="8.0.001" expanded="true" height="82" name="Apply Model" width="90" x="112" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_regression" compatibility="8.0.001" expanded="true" height="82" name="Performance (2)" width="90" x="246" y="34">
<parameter key="main_criterion" value="root_mean_squared_error"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
<connect from_op="Performance (2)" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log" compatibility="8.0.001" expanded="true" height="82" name="Log" width="90" x="581" y="85">
<parameter key="filename" value="tmp"/>
<list key="log">
<parameter key="C" value="operator.SVM.parameter.C"/>
<parameter key="Gamma" value="operator.SVM.parameter.kernel_gamma"/>
<parameter key="Training Width" value="operator.Validation.parameter.training_window_width"/>
<parameter key="Step Width" value="operator.Validation.parameter.training_window_step_size"/>
<parameter key="Testing Width" value="operator.Validation.parameter.test_window_width"/>
<parameter key="Perf" value="operator.Validation.value.performance"/>
<parameter key="Set Macro Value" value="operator.Set Macro.value.macro_value"/>
</list>
</operator>
<connect from_port="input 1" to_op="Set Macro" to_port="through 1"/>
<connect from_op="Set Macro" from_port="through 1" to_op="Windowing Train" to_port="example set input"/>
<connect from_op="Windowing Train" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Windowing Train" from_port="original" to_op="Windowing Test" to_port="example set input"/>
<connect from_op="Windowing Test" from_port="example set output" to_port="result 2"/>
<connect from_op="Validation" from_port="model" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_op="Log" to_port="through 1"/>
<connect from_op="Log" from_port="through 1" to_port="performance"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Optimize and store optimized model</description>
</operator>
<operator activated="true" class="store" compatibility="8.0.001" expanded="true" height="68" name="Store" width="90" x="1251" y="187">
<parameter key="repository_entry" value="../data/Thomas ott test VRS"/>
<description align="center" color="transparent" colored="false" width="126">Store optimized model</description>
</operator>
<operator activated="true" class="apply_model" compatibility="8.0.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="1385" y="289">
<list key="application_parameters"/>
<description align="center" color="transparent" colored="false" width="126">Sanity Check. Review 'A' time series against predicted 'A' time series from training data set.</description>
</operator>
<connect from_op="Retrieve Train VRS with dates" from_port="output" to_op="Sort" to_port="example set input"/>
<connect from_op="Sort" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Lag Series" to_port="example set input"/>
<connect from_op="Lag Series" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Aggregate" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Aggregate" from_port="original" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="performance" to_port="result 2"/>
<connect from_op="Optimize Parameters (Grid)" from_port="parameter" to_port="result 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="result 1" to_op="Store" to_port="input"/>
<connect from_op="Optimize Parameters (Grid)" from_port="result 2" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Store" from_port="through" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_port="result 3"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
</process>
</operator>
</process>