Predicting values based on historical data

Hi,
I'm doing an assignment about prediciting flows based on historical data. So with data from last days I would like to predict the flow of tomorrow. Below you could find my XML code.
I built process #1 to optimize the parameters of the validation operator and the SVM algorithm. The log file output this tabel.
I used this parameters for process #2 because in my opinion this is these parameters are the best ones to use, but I really obtained strange results and graphs. Below a picture of the predicted graph (blue) and the original flow of these days (red).
Probably I'm doing something wrong but I don't know what exactly. Ofcourse the flows does have some strange spikes and outliers, but the direction of the flow is completely nonsense. Is there someone who could help me out. I could share the datasets if someone do have some interest.
If there are still questions, feel free to sent me a message. I'm really stucked at the moment and I have to hand in this assignment end of the month.
With kind regards,
Maurits Freriks
p.s. I'm still a student so this is for education.
#1
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.6.001" expanded="true" height="68" name="Retrieve test data only flow oktober days train set" width="90" x="45" y="34">
<parameter key="repository_entry" value="../data/test data only flow oktober days train set"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.6.001" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="A"/>
</operator>
<operator activated="true" class="anomalydetection:Histogram-based Outlier Score (HBOS)" compatibility="2.4.001" expanded="true" height="82" name="Histogram-based Outlier Score (HBOS)" width="90" x="313" y="34">
<list key="histogram properties">
<parameter key="A" value="fixed binwidth.-1"/>
</list>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.6.001" expanded="true" height="82" name="Generate Attributes" width="90" x="447" y="34">
<list key="function_descriptions">
<parameter key="Maintainence" value="if(score>0.7,1,0)"/>
</list>
</operator>
<operator activated="true" class="optimize_parameters_grid" compatibility="7.6.001" expanded="true" height="145" name="Optimize Parameters (Grid)" width="90" x="581" y="34">
<list key="parameters">
<parameter key="Validation.cumulative_training" value="true,false"/>
<parameter key="SVM.kernel_gamma" value="[0.1;0.8;5;logarithmic]"/>
<parameter key="SVM.C" value="[6000;10000;4;linear]"/>
<parameter key="Validation.training_window_width" value="[190;220;10;linear]"/>
</list>
<process expanded="true">
<operator activated="true" class="set_macro" compatibility="7.6.001" expanded="true" height="82" name="Set Macro" width="90" x="45" y="34">
<parameter key="macro" value="horizon"/>
<parameter key="value" value="2"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing Train" width="90" x="179" y="34">
<parameter key="window_size" value="%{horizon}"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="A"/>
</operator>
<operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="124" name="Validation" width="90" x="380" y="34">
<parameter key="training_window_width" value="220"/>
<parameter key="training_window_step_size" value="5"/>
<parameter key="test_window_width" value="4"/>
<process expanded="true">
<operator activated="true" class="support_vector_machine" compatibility="7.6.001" expanded="true" height="124" name="SVM" width="90" x="112" y="34">
<parameter key="kernel_type" value="radial"/>
<parameter key="kernel_gamma" value="0.8"/>
<parameter key="C" value="10000.0"/>
</operator>
<connect from_port="training" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.6.001" expanded="true" height="82" name="Apply Model" width="90" x="112" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="series:forecasting_performance" compatibility="7.4.000" expanded="true" height="82" name="Performance" width="90" x="246" y="34">
<parameter key="horizon" value="%{horizon}"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log" compatibility="7.6.001" expanded="true" height="82" name="Log" width="90" x="581" y="85">
<parameter key="filename" value="tmp"/>
<list key="log">
<parameter key="C" value="operator.SVM.parameter.C"/>
<parameter key="Gamma" value="operator.SVM.parameter.kernel_gamma"/>
<parameter key="Training Width" value="operator.Validation.parameter.training_window_width"/>
<parameter key="Step Width" value="operator.Validation.parameter.training_window_step_size"/>
<parameter key="Testing Width" value="operator.Validation.parameter.test_window_width"/>
<parameter key="Perf" value="operator.Validation.value.performance"/>
<parameter key="Set Macro Value" value="operator.Set Macro.value.macro_value"/>
</list>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing Test" width="90" x="380" y="187">
<parameter key="window_size" value="%{horizon}"/>
</operator>
<connect from_port="input 1" to_op="Set Macro" to_port="through 1"/>
<connect from_op="Set Macro" from_port="through 1" to_op="Windowing Train" to_port="example set input"/>
<connect from_op="Windowing Train" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Windowing Train" from_port="original" to_op="Windowing Test" to_port="example set input"/>
<connect from_op="Validation" from_port="model" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_op="Log" to_port="through 1"/>
<connect from_op="Log" from_port="through 1" to_port="performance"/>
<connect from_op="Windowing Test" from_port="example set output" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve test data only flow oktober days train set" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Histogram-based Outlier Score (HBOS)" to_port="example set"/>
<connect from_op="Histogram-based Outlier Score (HBOS)" from_port="example set" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="performance" to_port="result 2"/>
<connect from_op="Optimize Parameters (Grid)" from_port="parameter" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
#2
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.6.001" expanded="true" height="68" name="Retrieve VRIES test set" width="90" x="45" y="238">
<parameter key="repository_entry" value="../data/VRIES test set"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing (2)" width="90" x="179" y="238">
<parameter key="window_size" value="5"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="A"/>
<parameter key="horizon" value="2"/>
</operator>
<operator activated="true" class="retrieve" compatibility="7.6.001" expanded="true" height="68" name="Retrieve VRIES train set" width="90" x="45" y="34">
<parameter key="repository_entry" value="../data/VRIES train set"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing" width="90" x="179" y="34">
<parameter key="window_size" value="5"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="A"/>
<parameter key="horizon" value="2"/>
</operator>
<operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="124" name="Validation" width="90" x="313" y="34">
<parameter key="training_window_width" value="214"/>
<parameter key="training_window_step_size" value="5"/>
<parameter key="test_window_width" value="4"/>
<parameter key="horizon" value="2"/>
<process expanded="true">
<operator activated="true" class="support_vector_machine" compatibility="7.6.001" expanded="true" height="124" name="SVM" width="90" x="112" y="34">
<parameter key="kernel_type" value="radial"/>
<parameter key="kernel_gamma" value="0.152"/>
<parameter key="C" value="7000.0"/>
</operator>
<connect from_port="training" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.6.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="series:forecasting_performance" compatibility="7.4.000" expanded="true" height="82" name="Performance" width="90" x="179" y="34">
<parameter key="horizon" value="2"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="apply_model" compatibility="7.6.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="313" y="238">
<list key="application_parameters"/>
</operator>
<connect from_op="Retrieve VRIES test set" from_port="output" to_op="Windowing (2)" to_port="example set input"/>
<connect from_op="Windowing (2)" from_port="example set output" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Retrieve VRIES train set" from_port="output" to_op="Windowing" to_port="example set input"/>
<connect from_op="Windowing" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Validation" from_port="training" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 2"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_port="result 3"/>
<connect from_op="Apply Model (2)" from_port="model" to_port="result 4"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
</process>
</operator>
</process>
Answers
-
0
-
I was helping @maurits_freriks offline for a bit but I'm crunched with work. My suggestion is to filter out the downward spikes because my convo's with him indicated that these were times we the system in a maintenance mode.
0 -
Hereby the datasets:
https://drive.google.com/open?id=12XjPKw2diSLnc9-MtAv_--SVfntA3nR-
Regards,
Maurits Freriks
0 -
An reaction to @Thomas_Ott. He helped be very well, I really appreciate his effort! But don't have te right results I would like to have.
That's correct, those downward spikes have sometimes a relation with mainainance. But could also be something like a glitch. So sometimes you know before that the flow will be respectively lower but sometimes this is a surprise.
0 -
Hi @maurits_freriks,
I obtain this with a Deep Learning model without optimization (Windows size = 20 / Horizon = 1) :
The spikes are difficult to predict in my opinion.
To test and maybe to improve the model, you can try to increase the windows size in the following process :
<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
<context>
<input/>
<output/>
<macros>
<macro>
<key>futureMonths</key>
<value>15</value>
</macro>
<macro>
<key>horizon</key>
<value>1</value>
</macro>
<macro>
<key>windowSize</key>
<value>6</value>
</macro>
</macros>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_excel" compatibility="8.0.001" expanded="true" height="68" name="Read Excel" width="90" x="45" y="85">
<parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\TimeData_Maintenance\Rapidminer\Train.xlsx"/>
<parameter key="imported_cell_range" value="A1:F274"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<list key="data_set_meta_data_information">
<parameter key="0" value="time.true.date_time.attribute"/>
<parameter key="1" value="data.true.real.attribute"/>
<parameter key="2" value="C.true.attribute_value.attribute"/>
<parameter key="3" value="D.true.attribute_value.attribute"/>
<parameter key="4" value="E.true.attribute_value.attribute"/>
<parameter key="5" value="F.true.attribute_value.attribute"/>
</list>
</operator>
<operator activated="true" class="subprocess" compatibility="8.0.001" expanded="true" height="82" name="Set Predictions_Params" width="90" x="179" y="85">
<process expanded="true">
<operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Window_Size" width="90" x="45" y="34">
<parameter key="macro" value="WindowSize"/>
<parameter key="value" value="20"/>
</operator>
<operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Horizon" width="90" x="179" y="34">
<parameter key="macro" value="horizon"/>
<parameter key="value" value="1"/>
</operator>
<operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Future_Days" width="90" x="313" y="34">
<parameter key="macro" value="futureDays"/>
<parameter key="value" value="40"/>
</operator>
<connect from_port="in 1" to_op="Set Window_Size" to_port="through 1"/>
<connect from_op="Set Window_Size" from_port="through 1" to_op="Set Horizon" to_port="through 1"/>
<connect from_op="Set Horizon" from_port="through 1" to_op="Set Future_Days" to_port="through 1"/>
<connect from_op="Set Future_Days" from_port="through 1" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="82" name="Set Role" width="90" x="313" y="85">
<parameter key="attribute_name" value="time"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="85">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="data"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples" width="90" x="648" y="85">
<parameter key="condition_class" value="no_missing_attributes"/>
<list key="filters_list"/>
</operator>
<operator activated="true" breakpoints="after" class="series:windowing" compatibility="5.2.000" expanded="true" height="82" name="Windowing for Training" width="90" x="782" y="85">
<parameter key="window_size" value="%{WindowSize}"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="data"/>
<parameter key="horizon" value="%{horizon}"/>
</operator>
<operator activated="true" class="h2o:deep_learning" compatibility="7.6.001" expanded="true" height="82" name="Deep Learning" width="90" x="916" y="34">
<enumeration key="hidden_layer_sizes">
<parameter key="hidden_layer_sizes" value="50"/>
<parameter key="hidden_layer_sizes" value="50"/>
</enumeration>
<enumeration key="hidden_dropout_ratios"/>
<list key="expert_parameters"/>
<list key="expert_parameters_"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="5.2.000" expanded="true" height="82" name="Windowing for Application" width="90" x="916" y="136">
<parameter key="window_size" value="%{WindowSize}"/>
<parameter key="label_attribute" value="inputYt"/>
</operator>
<operator activated="true" class="extract_macro" compatibility="8.0.001" expanded="true" height="68" name="Extract Example Count" width="90" x="1117" y="136">
<parameter key="macro" value="exampleCount"/>
<list key="additional_macros"/>
</operator>
<operator activated="true" class="filter_example_range" compatibility="8.0.001" expanded="true" height="82" name="Filter Example Range" width="90" x="1251" y="136">
<parameter key="first_example" value="%{exampleCount}"/>
<parameter key="last_example" value="%{exampleCount}"/>
</operator>
<operator activated="true" class="remember" compatibility="8.0.001" expanded="true" height="68" name="Remember" width="90" x="1385" y="136">
<parameter key="name" value="data"/>
</operator>
<operator activated="true" class="loop" compatibility="8.0.001" expanded="true" height="82" name="Loop" width="90" x="1117" y="34">
<parameter key="iterations" value="%{futureDays}"/>
<process expanded="true">
<operator activated="true" class="recall" compatibility="8.0.001" expanded="true" height="68" name="Recall" width="90" x="45" y="136">
<parameter key="name" value="data"/>
</operator>
<operator activated="true" class="apply_model" compatibility="7.1.001" expanded="true" height="82" name="Apply Model" width="90" x="179" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="multiply" compatibility="8.0.001" expanded="true" height="103" name="Multiply" width="90" x="447" y="30"/>
<operator activated="true" class="materialize_data" compatibility="8.0.001" expanded="true" height="82" name="Materialize Data (2)" width="90" x="179" y="165"/>
<operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="82" name="Increase Date (2)" width="90" x="313" y="187">
<list key="function_descriptions">
<parameter key="time" value="date_add(time, 1, DATE_UNIT_DAY)"/>
</list>
</operator>
<operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="82" name="Set Role (2)" width="90" x="447" y="187">
<parameter key="attribute_name" value="prediction(label)"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes (3)" width="90" x="179" y="289">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="data-19"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="rename" compatibility="8.0.001" expanded="true" height="82" name="Rename" width="90" x="313" y="289">
<parameter key="old_name" value="data-18"/>
<parameter key="new_name" value="data-19"/>
<list key="rename_additional_attributes">
<parameter key="data-17" value="data-18"/>
<parameter key="data-16" value="data-17"/>
<parameter key="data-15" value="data-16"/>
<parameter key="data-14" value="data-15"/>
<parameter key="data-13" value="data-14"/>
<parameter key="data-12" value="data-13"/>
<parameter key="data-11" value="data-12"/>
<parameter key="data-10" value="data-11"/>
<parameter key="data-9" value="data-10"/>
<parameter key="data-8" value="data-9"/>
<parameter key="data-7" value="data-8"/>
<parameter key="data-6" value="data-7"/>
<parameter key="data-5" value="data-6"/>
<parameter key="data-4" value="data-5"/>
<parameter key="data-3" value="data-4"/>
<parameter key="data-2" value="data-3"/>
<parameter key="data-1" value="data-2"/>
<parameter key="data-0" value="data-1"/>
<parameter key="prediction(label)" value="data-0"/>
</list>
</operator>
<operator activated="true" class="remember" compatibility="8.0.001" expanded="true" height="68" name="Remember (2)" width="90" x="447" y="289">
<parameter key="name" value="data"/>
</operator>
<connect from_port="input 1" to_op="Apply Model" to_port="model"/>
<connect from_op="Recall" from_port="result" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_port="output 1"/>
<connect from_op="Multiply" from_port="output 2" to_op="Materialize Data (2)" to_port="example set input"/>
<connect from_op="Materialize Data (2)" from_port="example set output" to_op="Increase Date (2)" to_port="example set input"/>
<connect from_op="Increase Date (2)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Select Attributes (3)" to_port="example set input"/>
<connect from_op="Select Attributes (3)" from_port="example set output" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Remember (2)" to_port="store"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="append" compatibility="8.0.001" expanded="true" height="82" name="Append" width="90" x="1251" y="34"/>
<operator activated="true" class="read_excel" compatibility="8.0.001" expanded="true" height="68" name="Read Excel (2)" width="90" x="916" y="238">
<parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\TimeData_Maintenance\Rapidminer\Test.xlsx"/>
<parameter key="imported_cell_range" value="A1:B32"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<list key="data_set_meta_data_information">
<parameter key="0" value="time.true.date_time.attribute"/>
<parameter key="1" value="data.true.numeric.attribute"/>
</list>
</operator>
<operator activated="true" class="join" compatibility="8.0.001" expanded="true" height="82" name="Join" width="90" x="1385" y="34">
<parameter key="use_id_attribute_as_key" value="false"/>
<list key="key_attributes">
<parameter key="time" value="time"/>
</list>
</operator>
<connect from_op="Read Excel" from_port="output" to_op="Set Predictions_Params" to_port="in 1"/>
<connect from_op="Set Predictions_Params" from_port="out 1" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Windowing for Training" to_port="example set input"/>
<connect from_op="Windowing for Training" from_port="example set output" to_op="Deep Learning" to_port="training set"/>
<connect from_op="Windowing for Training" from_port="original" to_op="Windowing for Application" to_port="example set input"/>
<connect from_op="Deep Learning" from_port="model" to_op="Loop" to_port="input 1"/>
<connect from_op="Windowing for Application" from_port="example set output" to_op="Extract Example Count" to_port="example set"/>
<connect from_op="Extract Example Count" from_port="example set" to_op="Filter Example Range" to_port="example set input"/>
<connect from_op="Filter Example Range" from_port="example set output" to_op="Remember" to_port="store"/>
<connect from_op="Loop" from_port="output 1" to_op="Append" to_port="example set 1"/>
<connect from_op="Append" from_port="merged set" to_op="Join" to_port="left"/>
<connect from_op="Read Excel (2)" from_port="output" to_op="Join" to_port="right"/>
<connect from_op="Join" from_port="join" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>I hope it will help you in your project.
Regards,
Lionel
0 -
Thanks for helping me out!
Correct me if I'm wrong: The graph you showed is not really accurate right? Or do you think this is the best possible way to predict? Shoud a NN give a better approach, If I'm trying to run this with an optimization operator it takes me days because my device is to slow, how about yours?
Regards,
Maurits Freriks
0 -
Hi @maurits_freriks,
You're right, the graph I showed is not of course the best possible way to predict, only a track, however
"who on Earth can boast of finding the best possible way to predict ......?"....
More seriously, I have the same problem as you : Optimization process is too long. I perform optimization only with an only one parameter every time.
With Neural Networks, i haven't good results :
- the predictive curve is constant (horizontal line) or
- the predictive curve increase in the second part of the test point (so the predictive curve get away from the test curve).
For the moment, the best fit I found, it's always with Deep Learning (n_epochs = 3.3) and Windows size = 82 / Horizon = 1.
Here the curve(s) :
and here the associated process :
<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
<context>
<input/>
<output/>
<macros>
<macro>
<key>futureMonths</key>
<value>15</value>
</macro>
<macro>
<key>horizon</key>
<value>1</value>
</macro>
<macro>
<key>windowSize</key>
<value>6</value>
</macro>
</macros>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_excel" compatibility="8.0.001" expanded="true" height="68" name="Read Excel" width="90" x="45" y="85">
<parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\TimeData_Maintenance\Rapidminer\Train.xlsx"/>
<parameter key="imported_cell_range" value="A1:F274"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<list key="data_set_meta_data_information">
<parameter key="0" value="time.true.date_time.attribute"/>
<parameter key="1" value="data.true.real.attribute"/>
<parameter key="2" value="C.true.attribute_value.attribute"/>
<parameter key="3" value="D.true.attribute_value.attribute"/>
<parameter key="4" value="E.true.attribute_value.attribute"/>
<parameter key="5" value="F.true.attribute_value.attribute"/>
</list>
</operator>
<operator activated="true" class="subprocess" compatibility="8.0.001" expanded="true" height="82" name="Set Predictions_Params" width="90" x="179" y="85">
<process expanded="true">
<operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Window_Size" width="90" x="45" y="34">
<parameter key="macro" value="WindowSize"/>
<parameter key="value" value="82"/>
</operator>
<operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Horizon" width="90" x="179" y="34">
<parameter key="macro" value="horizon"/>
<parameter key="value" value="1"/>
</operator>
<operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Future_Days" width="90" x="313" y="34">
<parameter key="macro" value="futureDays"/>
<parameter key="value" value="40"/>
</operator>
<connect from_port="in 1" to_op="Set Window_Size" to_port="through 1"/>
<connect from_op="Set Window_Size" from_port="through 1" to_op="Set Horizon" to_port="through 1"/>
<connect from_op="Set Horizon" from_port="through 1" to_op="Set Future_Days" to_port="through 1"/>
<connect from_op="Set Future_Days" from_port="through 1" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="82" name="Set Role" width="90" x="313" y="85">
<parameter key="attribute_name" value="time"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="85">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="data"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples" width="90" x="648" y="85">
<parameter key="condition_class" value="no_missing_attributes"/>
<list key="filters_list"/>
</operator>
<operator activated="true" breakpoints="after" class="series:windowing" compatibility="5.2.000" expanded="true" height="82" name="Windowing for Training" width="90" x="782" y="85">
<parameter key="window_size" value="%{WindowSize}"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="data"/>
<parameter key="horizon" value="%{horizon}"/>
</operator>
<operator activated="true" class="concurrency:optimize_parameters_grid" compatibility="8.0.001" expanded="true" height="124" name="Optimize Parameters (Grid)" width="90" x="983" y="34">
<list key="parameters">
<parameter key="Deep Learning.epochs" value="[0.1;10.1;100;linear]"/>
<parameter key="Deep Learning.adaptive_rate" value="true,false"/>
</list>
<process expanded="true">
<operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="124" name="Validation" width="90" x="380" y="34">
<parameter key="training_window_width" value="75"/>
<parameter key="test_window_width" value="50"/>
<process expanded="true">
<operator activated="true" class="h2o:deep_learning" compatibility="7.6.001" expanded="true" height="82" name="Deep Learning" width="90" x="179" y="34">
<enumeration key="hidden_layer_sizes">
<parameter key="hidden_layer_sizes" value="50"/>
<parameter key="hidden_layer_sizes" value="50"/>
</enumeration>
<enumeration key="hidden_dropout_ratios"/>
<list key="expert_parameters"/>
<list key="expert_parameters_"/>
</operator>
<connect from_port="training" to_op="Deep Learning" to_port="training set"/>
<connect from_op="Deep Learning" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="8.0.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="112" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_regression" compatibility="8.0.001" expanded="true" height="82" name="Performance" width="90" x="246" y="34"/>
<connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<connect from_port="input 1" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_port="model"/>
<connect from_op="Validation" from_port="averagable 1" to_port="performance"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
</process>
</operator>
<operator activated="true" class="series:windowing" compatibility="5.2.000" expanded="true" height="82" name="Windowing for Application" width="90" x="916" y="136">
<parameter key="window_size" value="%{WindowSize}"/>
<parameter key="label_attribute" value="inputYt"/>
</operator>
<operator activated="true" class="extract_macro" compatibility="8.0.001" expanded="true" height="68" name="Extract Example Count" width="90" x="1117" y="136">
<parameter key="macro" value="exampleCount"/>
<list key="additional_macros"/>
</operator>
<operator activated="true" class="filter_example_range" compatibility="8.0.001" expanded="true" height="82" name="Filter Example Range" width="90" x="1251" y="136">
<parameter key="first_example" value="%{exampleCount}"/>
<parameter key="last_example" value="%{exampleCount}"/>
</operator>
<operator activated="true" class="remember" compatibility="8.0.001" expanded="true" height="68" name="Remember" width="90" x="1385" y="136">
<parameter key="name" value="data"/>
</operator>
<operator activated="true" class="read_excel" compatibility="8.0.001" expanded="true" height="68" name="Read Excel (2)" width="90" x="916" y="238">
<parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\TimeData_Maintenance\Rapidminer\Test.xlsx"/>
<parameter key="imported_cell_range" value="A1:B32"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<list key="data_set_meta_data_information">
<parameter key="0" value="time.true.date_time.attribute"/>
<parameter key="1" value="data.true.numeric.attribute"/>
</list>
</operator>
<operator activated="true" class="loop" compatibility="8.0.001" expanded="true" height="82" name="Loop" width="90" x="1117" y="34">
<parameter key="iterations" value="%{futureDays}"/>
<process expanded="true">
<operator activated="true" class="recall" compatibility="8.0.001" expanded="true" height="68" name="Recall" width="90" x="45" y="136">
<parameter key="name" value="data"/>
</operator>
<operator activated="true" class="apply_model" compatibility="7.1.001" expanded="true" height="82" name="Apply Model" width="90" x="179" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="multiply" compatibility="8.0.001" expanded="true" height="103" name="Multiply" width="90" x="447" y="30"/>
<operator activated="true" class="materialize_data" compatibility="8.0.001" expanded="true" height="82" name="Materialize Data (2)" width="90" x="179" y="165"/>
<operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="82" name="Increase Date (2)" width="90" x="313" y="187">
<list key="function_descriptions">
<parameter key="time" value="date_add(time, 1, DATE_UNIT_DAY)"/>
</list>
</operator>
<operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="82" name="Set Role (2)" width="90" x="447" y="187">
<parameter key="attribute_name" value="prediction(label)"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes (3)" width="90" x="179" y="289">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="data-81"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="rename" compatibility="8.0.001" expanded="true" height="82" name="Rename" width="90" x="313" y="289">
<parameter key="old_name" value="data-80"/>
<parameter key="new_name" value="data-81"/>
<list key="rename_additional_attributes">
<parameter key="data-79" value="data-80"/>
<parameter key="data-78" value="data-79"/>
<parameter key="data-77" value="data-78"/>
<parameter key="data-76" value="data-77"/>
<parameter key="data-75" value="data-76"/>
<parameter key="data-74" value="data-75"/>
<parameter key="data-73" value="data-74"/>
<parameter key="data-72" value="data-73"/>
<parameter key="data-71" value="data-72"/>
<parameter key="data-70" value="data-71"/>
<parameter key="data-69" value="data-70"/>
<parameter key="data-68" value="data-69"/>
<parameter key="data-67" value="data-68"/>
<parameter key="data-66" value="data-67"/>
<parameter key="data-65" value="data-66"/>
<parameter key="data-64" value="data-65"/>
<parameter key="data-63" value="data-64"/>
<parameter key="data-62" value="data-63"/>
<parameter key="data-61" value="data-62"/>
<parameter key="data-60" value="data-61"/>
<parameter key="data-59" value="data-60"/>
<parameter key="data-58" value="data-59"/>
<parameter key="data-57" value="data-58"/>
<parameter key="data-56" value="data-57"/>
<parameter key="data-55" value="data-56"/>
<parameter key="data-54" value="data-55"/>
<parameter key="data-53" value="data-54"/>
<parameter key="data-52" value="data-53"/>
<parameter key="data-51" value="data-52"/>
<parameter key="data-50" value="data-51"/>
<parameter key="data-49" value="data-50"/>
<parameter key="data-48" value="data-49"/>
<parameter key="data-47" value="data-48"/>
<parameter key="data-46" value="data-47"/>
<parameter key="data-45" value="data-46"/>
<parameter key="data-44" value="data-45"/>
<parameter key="data-43" value="data-44"/>
<parameter key="data-42" value="data-43"/>
<parameter key="data-41" value="data-42"/>
<parameter key="data-40" value="data-41"/>
<parameter key="data-39" value="data-40"/>
<parameter key="data-38" value="data-39"/>
<parameter key="data-37" value="data-38"/>
<parameter key="data-36" value="data-37"/>
<parameter key="data-35" value="data-36"/>
<parameter key="data-34" value="data-35"/>
<parameter key="data-33" value="data-34"/>
<parameter key="data-32" value="data-33"/>
<parameter key="data-31" value="data-32"/>
<parameter key="data-30" value="data-31"/>
<parameter key="data-29" value="data-30"/>
<parameter key="data-28" value="data-29"/>
<parameter key="data-27" value="data-28"/>
<parameter key="data-26" value="data-27"/>
<parameter key="data-25" value="data-26"/>
<parameter key="data-24" value="data-25"/>
<parameter key="data-23" value="data-24"/>
<parameter key="data-22" value="data-23"/>
<parameter key="data-21" value="data-22"/>
<parameter key="data-20" value="data-21"/>
<parameter key="data-19" value="data-20"/>
<parameter key="data-18" value="data-19"/>
<parameter key="data-17" value="data-18"/>
<parameter key="data-16" value="data-17"/>
<parameter key="data-15" value="data-16"/>
<parameter key="data-14" value="data-15"/>
<parameter key="data-13" value="data-14"/>
<parameter key="data-12" value="data-13"/>
<parameter key="data-11" value="data-12"/>
<parameter key="data-10" value="data-11"/>
<parameter key="data-9" value="data-10"/>
<parameter key="data-8" value="data-9"/>
<parameter key="data-7" value="data-8"/>
<parameter key="data-6" value="data-7"/>
<parameter key="data-5" value="data-6"/>
<parameter key="data-4" value="data-5"/>
<parameter key="data-3" value="data-4"/>
<parameter key="data-2" value="data-3"/>
<parameter key="data-1" value="data-2"/>
<parameter key="data-0" value="data-1"/>
<parameter key="prediction(label)" value="data-0"/>
</list>
</operator>
<operator activated="true" class="remember" compatibility="8.0.001" expanded="true" height="68" name="Remember (2)" width="90" x="447" y="289">
<parameter key="name" value="data"/>
</operator>
<connect from_port="input 1" to_op="Apply Model" to_port="model"/>
<connect from_op="Recall" from_port="result" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_port="output 1"/>
<connect from_op="Multiply" from_port="output 2" to_op="Materialize Data (2)" to_port="example set input"/>
<connect from_op="Materialize Data (2)" from_port="example set output" to_op="Increase Date (2)" to_port="example set input"/>
<connect from_op="Increase Date (2)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Select Attributes (3)" to_port="example set input"/>
<connect from_op="Select Attributes (3)" from_port="example set output" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Remember (2)" to_port="store"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="append" compatibility="8.0.001" expanded="true" height="82" name="Append" width="90" x="1251" y="34"/>
<operator activated="true" class="join" compatibility="8.0.001" expanded="true" height="82" name="Join" width="90" x="1385" y="34">
<parameter key="use_id_attribute_as_key" value="false"/>
<list key="key_attributes">
<parameter key="time" value="time"/>
</list>
</operator>
<connect from_op="Read Excel" from_port="output" to_op="Set Predictions_Params" to_port="in 1"/>
<connect from_op="Set Predictions_Params" from_port="out 1" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Windowing for Training" to_port="example set input"/>
<connect from_op="Windowing for Training" from_port="example set output" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
<connect from_op="Windowing for Training" from_port="original" to_op="Windowing for Application" to_port="example set input"/>
<connect from_op="Optimize Parameters (Grid)" from_port="performance" to_port="result 3"/>
<connect from_op="Optimize Parameters (Grid)" from_port="model" to_op="Loop" to_port="input 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="parameter set" to_port="result 2"/>
<connect from_op="Windowing for Application" from_port="example set output" to_op="Extract Example Count" to_port="example set"/>
<connect from_op="Extract Example Count" from_port="example set" to_op="Filter Example Range" to_port="example set input"/>
<connect from_op="Filter Example Range" from_port="example set output" to_op="Remember" to_port="store"/>
<connect from_op="Read Excel (2)" from_port="output" to_op="Join" to_port="right"/>
<connect from_op="Loop" from_port="output 1" to_op="Append" to_port="example set 1"/>
<connect from_op="Append" from_port="merged set" to_op="Join" to_port="left"/>
<connect from_op="Join" from_port="join" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
</process>
</operator>
</process>I hope it will be helpful,
Regards,
Lionel
0 -
Try filtering out the spikes downward and run the model again. I think they're really messing with the analysis.
0