Hi
We have a process which is a time series prediction of daily data.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input>
<location/>
</input>
<output>
<location/>
<location/>
<location/>
</output>
<macros/>
</context>
<operator activated="true" class="process" expanded="true" name="Process">
<process expanded="true" height="325" width="1173">
<operator activated="true" class="read_excel" expanded="true" height="60" name="Reference Data" width="90" x="45" y="30">
<parameter key="excel_file" value="C:\Documents and Settings\Nev\My Documents\My Dropbox\Project Files SVN\C1borgs Space\Files\GBPUSD.xls"/>
<parameter key="sheet_number" value="3"/>
</operator>
<operator activated="true" class="set_role" expanded="true" height="76" name="ID" width="90" x="179" y="30">
<parameter key="name" value="Date"/>
<parameter key="target_role" value="id"/>
</operator>
<operator activated="true" class="split_data" expanded="true" height="94" name="Split Data" width="90" x="313" y="30">
<enumeration key="partitions">
<parameter key="ratio" value="0.9"/>
<parameter key="ratio" value="0.1"/>
</enumeration>
<parameter key="sampling_type" value="linear sampling"/>
</operator>
<operator activated="true" class="set_role" expanded="true" height="76" name="Prediction" width="90" x="447" y="210">
<parameter key="name" value="Label"/>
<parameter key="target_role" value="prediction"/>
</operator>
<operator activated="true" class="set_role" expanded="true" height="76" name="Label" width="90" x="447" y="30">
<parameter key="name" value="Label"/>
<parameter key="target_role" value="label"/>
</operator>
<operator activated="true" class="optimize_weights_evolutionary" expanded="true" height="94" name="Optimize Weights (Evolutionary)" width="90" x="581" y="30">
<parameter key="population_size" value="10"/>
<parameter key="use_early_stopping" value="true"/>
<parameter key="selection_scheme" value="roulette wheel"/>
<parameter key="p_crossover" value="0.2"/>
<parameter key="crossover_type" value="shuffle"/>
<process expanded="true" height="287" width="527">
<operator activated="true" class="split_validation" expanded="true" height="112" name="Validation" width="90" x="45" y="30">
<process expanded="true" height="305" width="238">
<operator activated="true" class="neural_net" expanded="true" height="76" name="Neural Net" width="90" x="112" y="30">
<list key="hidden_layers"/>
<parameter key="learning_rate" value="0.5"/>
<parameter key="decay" value="true"/>
</operator>
<connect from_port="training" to_op="Neural Net" to_port="training set"/>
<connect from_op="Neural Net" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="305" width="419">
<operator activated="true" class="apply_model" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="write_model" expanded="true" height="60" name="Write Model" width="90" x="246" y="165">
<parameter key="model_file" value="C:\Projects\RM5\Forex\data\models\model.mod"/>
</operator>
<operator activated="true" class="performance" expanded="true" height="76" name="Performance" width="90" x="246" y="30"/>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Apply Model" from_port="model" to_op="Write Model" to_port="input"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<connect from_port="example set" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="averagable 1" to_port="performance"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
</process>
</operator>
<operator activated="true" class="select_by_weights" expanded="true" height="94" name="Select by Weights" width="90" x="715" y="210">
<parameter key="weight_relation" value="greater"/>
<parameter key="weight" value="0.5"/>
<parameter key="use_absolute_weights" value="false"/>
</operator>
<operator activated="true" class="read_model" expanded="true" height="60" name="Read Model" width="90" x="715" y="120">
<parameter key="model_file" value="C:\Projects\RM5\Forex\data\models\model.mod"/>
</operator>
<operator activated="true" class="apply_model" expanded="true" height="76" name="Apply Model (2)" width="90" x="849" y="165">
<list key="application_parameters"/>
</operator>
<connect from_op="Reference Data" from_port="output" to_op="ID" to_port="example set input"/>
<connect from_op="ID" from_port="example set output" to_op="Split Data" to_port="example set"/>
<connect from_op="Split Data" from_port="partition 1" to_op="Label" to_port="example set input"/>
<connect from_op="Split Data" from_port="partition 2" to_op="Prediction" to_port="example set input"/>
<connect from_op="Prediction" from_port="example set output" to_op="Select by Weights" to_port="example set input"/>
<connect from_op="Label" from_port="example set output" to_op="Optimize Weights (Evolutionary)" to_port="example set in"/>
<connect from_op="Optimize Weights (Evolutionary)" from_port="weights" to_op="Select by Weights" to_port="weights"/>
<connect from_op="Optimize Weights (Evolutionary)" from_port="performance" to_port="result 1"/>
<connect from_op="Select by Weights" from_port="example set output" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Read Model" from_port="output" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="36"/>
<portSpacing port="sink_result 2" spacing="72"/>
<portSpacing port="sink_result 3" spacing="18"/>
</process>
</operator>
</process>
The datas are split in two subsets through the operator "split data".
90% (about 225 bars) will be the part we use to learn, 10% (about 25) are unseen bars.
Split validation operator will further split the 90% into 70% (about 157 bars) and 30% (about 68 bars).
The results are acceptable but there seems to be a major error the 25 unseen bars are either all BUY or all SELL predictions? However the Testing data has mixed signals of BUY and SELL.
Any guidance you can give as to why this is happening would be appreciated