<?xml version="1.0" encoding="UTF-8"?><process version="9.1.000"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="9.1.000" expanded="true" name="Process" origin="GENERATED_SAMPLE"> <parameter key="logverbosity" value="init"/> <parameter key="random_seed" value="2001"/> <parameter key="send_mail" value="never"/> <parameter key="notification_email" value=""/> <parameter key="process_duration_for_mail" value="30"/> <parameter key="encoding" value="SYSTEM"/> <process expanded="true"> <operator activated="true" class="retrieve" compatibility="9.1.000" expanded="true" height="68" name="Retrieve Prices of Gas Station" origin="GENERATED_SAMPLE" width="90" x="45" y="34"> <parameter key="repository_entry" value="//Samples/Time Series/data sets/Prices of Gas Station"/> </operator> <operator activated="true" class="filter_example_range" compatibility="9.1.000" expanded="true" height="82" name="Filter Example Range" origin="GENERATED_SAMPLE" width="90" x="179" y="34"> <parameter key="first_example" value="1"/> <parameter key="last_example" value="16"/> <parameter key="invert_filter" value="true"/> </operator> <operator activated="true" breakpoints="after" class="time_series:windowing" compatibility="9.1.000" expanded="true" height="82" name="Windowing" origin="GENERATED_SAMPLE" width="90" x="447" y="34"> <parameter key="attribute_filter_type" value="single"/> <parameter key="attribute" value="gas price / euro (times 1000)"/> <parameter key="attributes" value=""/> <parameter key="use_except_expression" value="false"/> <parameter key="value_type" value="numeric"/> <parameter key="use_value_type_exception" value="false"/> <parameter key="except_value_type" value="real"/> <parameter key="block_type" value="value_series"/> <parameter key="use_block_type_exception" value="false"/> <parameter key="except_block_type" value="value_series_end"/> <parameter key="invert_selection" value="false"/> <parameter key="include_special_attributes" value="false"/> <parameter key="has_indices" value="true"/> <parameter key="indices_attribute" value="date"/> <parameter key="window_size" value="48"/> <parameter key="no_overlapping_windows" value="false"/> <parameter key="step_size" value="24"/> <parameter key="create_horizon_(labels)" value="true"/> <parameter key="horizon_attribute" value="gas price / euro (times 1000)"/> <parameter key="horizon_size" value="1"/> <parameter key="horizon_offset" value="23"/> </operator> <operator activated="true" class="concurrency:cross_validation" compatibility="9.1.000" expanded="true" height="145" name="Cross Validation" origin="GENERATED_SAMPLE" width="90" x="782" y="34"> <parameter key="split_on_batch_attribute" value="false"/> <parameter key="leave_one_out" value="false"/> <parameter key="number_of_folds" value="10"/> <parameter key="sampling_type" value="automatic"/> <parameter key="use_local_random_seed" value="false"/> <parameter key="local_random_seed" value="1992"/> <parameter key="enable_parallel_execution" value="true"/> <process expanded="true"> <operator activated="true" class="h2o:gradient_boosted_trees" compatibility="9.0.000" expanded="true" height="103" name="Gradient Boosted Trees" origin="GENERATED_SAMPLE" width="90" x="179" y="34"> <parameter key="number_of_trees" value="100"/> <parameter key="reproducible" value="false"/> <parameter key="maximum_number_of_threads" value="4"/> <parameter key="use_local_random_seed" value="false"/> <parameter key="local_random_seed" value="1992"/> <parameter key="maximal_depth" value="5"/> <parameter key="min_rows" value="10.0"/> <parameter key="min_split_improvement" value="0.0"/> <parameter key="number_of_bins" value="20"/> <parameter key="learning_rate" value="0.1"/> <parameter key="sample_rate" value="1.0"/> <parameter key="distribution" value="AUTO"/> <parameter key="early_stopping" value="false"/> <parameter key="stopping_rounds" value="1"/> <parameter key="stopping_metric" value="AUTO"/> <parameter key="stopping_tolerance" value="0.001"/> <parameter key="max_runtime_seconds" value="0"/> <list key="expert_parameters"/> </operator> <connect from_port="training set" to_op="Gradient Boosted Trees" to_port="training set"/> <connect from_op="Gradient Boosted Trees" from_port="model" to_port="model"/> <portSpacing port="source_training set" spacing="0"/> <portSpacing port="sink_model" spacing="0"/> <portSpacing port="sink_through 1" spacing="0"/> </process> <process expanded="true"> <operator activated="true" class="apply_model" compatibility="9.1.000" expanded="true" height="82" name="Apply Model" origin="GENERATED_SAMPLE" width="90" x="45" y="34"> <list key="application_parameters"/> <parameter key="create_view" value="false"/> </operator> <operator activated="true" class="performance_regression" compatibility="9.1.000" expanded="true" height="82" name="Performance" origin="GENERATED_SAMPLE" width="90" x="246" y="34"> <parameter key="main_criterion" value="first"/> <parameter key="root_mean_squared_error" value="true"/> <parameter key="absolute_error" value="false"/> <parameter key="relative_error" value="true"/> <parameter key="relative_error_lenient" value="false"/> <parameter key="relative_error_strict" value="false"/> <parameter key="normalized_absolute_error" value="false"/> <parameter key="root_relative_squared_error" value="false"/> <parameter key="squared_error" value="false"/> <parameter key="correlation" value="false"/> <parameter key="squared_correlation" value="false"/> <parameter key="prediction_average" value="false"/> <parameter key="spearman_rho" value="false"/> <parameter key="kendall_tau" value="false"/> <parameter key="skip_undefined_labels" value="true"/> <parameter key="use_example_weights" value="true"/> </operator> <connect from_port="model" to_op="Apply Model" to_port="model"/> <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/> <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/> <connect from_op="Performance" from_port="performance" to_port="performance 1"/> <connect from_op="Performance" from_port="example set" to_port="test set results"/> <portSpacing port="source_model" spacing="0"/> <portSpacing port="source_test set" spacing="0"/> <portSpacing port="source_through 1" spacing="0"/> <portSpacing port="sink_test set results" spacing="0"/> <portSpacing port="sink_performance 1" spacing="0"/> <portSpacing port="sink_performance 2" spacing="0"/> <description align="center" color="yellow" colored="false" height="105" resized="false" width="180" x="246" y="124">Type your comment</description> </process> </operator> <connect from_op="Retrieve Prices of Gas Station" from_port="output" to_op="Filter Example Range" to_port="example set input"/> <connect from_op="Filter Example Range" from_port="example set output" to_op="Windowing" to_port="example set"/> <connect from_op="Windowing" from_port="windowed example set" to_op="Cross Validation" to_port="example set"/> <connect from_op="Cross Validation" from_port="model" to_port="result 1"/> <connect from_op="Cross Validation" from_port="example set" to_port="result 2"/> <connect from_op="Cross Validation" from_port="test result set" to_port="result 3"/> <connect from_op="Cross Validation" from_port="performance 1" to_port="result 4"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> <portSpacing port="sink_result 3" spacing="0"/> <portSpacing port="sink_result 4" spacing="0"/> <portSpacing port="sink_result 5" spacing="0"/> <description align="center" color="blue" colored="true" height="166" resized="true" width="259" x="27" y="130">Retrieve the German gas prices data set from the Samples/Time Series folder.<br><br>Remove the first 16 Examples, so that the remaining Examples starts at 9:00 AM</description> <description align="center" color="green" colored="true" height="427" resized="true" width="366" x="313" y="130">Perform a Windowing on the data set.<br><br>The window size is set to 48, to include the prices of the previous 48 hours for each window.<br><br>The step size is set to 24, so that we only look at windows which ends at 8:00 AM.<br><br>The horizon size is set to 1, cause we want to forecast 1 price.<br><br>The horizon offset is set to 23, so that the horizon is 23+1 hours after the window, hence the gas price of the next day at the same time.<br><br>The resulting ExampleSet contains all we need to train any machine learning model on it. A label (the price of the next day, (gas price / euro cents (times 1000) + 24 (horizon); 48 Attributes containing the prices of the last 48 hours (gas price / euro cents (times 1000) - i) and a special attribute holding the last date in window, which is not used in the training).</description> <description align="center" color="yellow" colored="false" height="91" resized="true" width="230" x="703" y="198">Train a Gradient Boosted Tree on the ExampleSet created by the Windowing operator.</description> </process> </operator> </process>