Hi,
There is a change in accuracy in the process below, that I find rather unintuitive, when deselecting "create view" in the box of the Nominal to Numerical operator. Any clarifying comments would be welcome.
Note that local random seeds have been used wherever possible to minimise the impact of randomness in getting different results. Change in accuracy happened whether the data was shuffled or not just before feeding the neural net to be trained (see shuffle parameter in the Neural Net operator box). Obviously, different orders of feeding - with the same data - the neural net to be trained, may lead to different models, and thus to possible different accuracy rates, but that is why the process was tested with and without data shuffling at this stage.
Regards,
Dan
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.0.10" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<parameter key="parallelize_main_process" value="false"/>
<process expanded="true" height="494" width="882">
<operator activated="true" class="generate_direct_mailing_data" compatibility="5.0.10" expanded="true" height="60" name="Generate Direct Mailing Data" width="90" x="45" y="165">
<parameter key="number_examples" value="500"/>
<parameter key="use_local_random_seed" value="true"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
<operator activated="true" class="nominal_to_binominal" compatibility="5.0.10" expanded="true" height="94" name="Nominal to Binominal" width="90" x="45" y="30">
<parameter key="return_preprocessing_model" value="false"/>
<parameter key="create_view" value="false"/>
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="label"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="transform_binominal" value="false"/>
<parameter key="use_underscore_in_name" value="false"/>
</operator>
<operator activated="true" class="remap_binominals" compatibility="5.0.10" expanded="true" height="76" name="Remap Binominals" width="90" x="179" y="30">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="label"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="binominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="binominal"/>
<parameter key="block_type" value="value_matrix_start"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="negative_value" value="no response"/>
<parameter key="positive_value" value="response"/>
</operator>
<operator activated="true" class="split_data" compatibility="5.0.10" expanded="true" height="94" name="Split Data" width="90" x="313" y="165">
<enumeration key="partitions">
<parameter key="ratio" value="0.66"/>
<parameter key="ratio" value="0.34"/>
</enumeration>
<parameter key="sampling_type" value="shuffled sampling"/>
<parameter key="use_local_random_seed" value="true"/>
<parameter key="local_random_seed" value="2000"/>
</operator>
<operator activated="true" class="nominal_to_numerical" compatibility="5.0.10" expanded="true" height="94" name="Nominal to Numerical" width="90" x="313" y="30">
<parameter key="return_preprocessing_model" value="false"/>
<parameter key="create_view" value="true"/>
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="neural_net" compatibility="5.0.10" expanded="true" height="76" name="Neural Net" width="90" x="447" y="30">
<list key="hidden_layers"/>
<parameter key="training_cycles" value="500"/>
<parameter key="learning_rate" value="0.3"/>
<parameter key="momentum" value="0.2"/>
<parameter key="decay" value="false"/>
<parameter key="shuffle" value="true"/>
<parameter key="normalize" value="true"/>
<parameter key="error_epsilon" value="1.0E-5"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
<operator activated="true" class="apply_model" compatibility="5.0.10" expanded="true" height="76" name="Apply Model" width="90" x="581" y="165">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance" compatibility="5.0.10" expanded="true" height="76" name="Performance (2)" width="90" x="715" y="165">
<parameter key="use_example_weights" value="true"/>
</operator>
<connect from_op="Generate Direct Mailing Data" from_port="output" to_op="Nominal to Binominal" to_port="example set input"/>
<connect from_op="Nominal to Binominal" from_port="example set output" to_op="Remap Binominals" to_port="example set input"/>
<connect from_op="Remap Binominals" from_port="example set output" to_op="Split Data" to_port="example set"/>
<connect from_op="Split Data" from_port="partition 1" to_op="Nominal to Numerical" to_port="example set input"/>
<connect from_op="Split Data" from_port="partition 2" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Nominal to Numerical" from_port="example set output" to_op="Neural Net" to_port="training set"/>
<connect from_op="Neural Net" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
<connect from_op="Performance (2)" from_port="performance" to_port="result 1"/>
<connect from_op="Performance (2)" from_port="example set" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="126"/>
<portSpacing port="sink_result 2" spacing="18"/>
<portSpacing port="sink_result 3" spacing="18"/>
</process>
</operator>
</process>