Hi,
Could someone comment on the following simple process built for test (not practical) purposes?
It retrieves a dataset, learns a decision tree, stores and retrieves the model via
Remember and Recall operators, and applies it for scoring the dataset.
In particular, if one displays the order of executing the process operators,
it shows an attempt to retrieve the model (via Recall) before this is stored (via Remember). A corresponding error is displayed when trying to run the process.
Since the two operators refer to the same object (identified by the value 'dt' for name), one would expect Remember to be executed first, and then Recall.
Any comments on this surprising execution order are welcome. Moreover, how is the execution order determined, in general, in a RM process?
Thanks,
Dan
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.0.0" expanded="true" name="Root">
<parameter key="logverbosity" value="3"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="1"/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<parameter key="parallelize_main_process" value="false"/>
<process expanded="true" height="566" width="900">
<operator activated="true" class="retrieve" compatibility="5.0.0" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
<parameter key="repository_entry" value="//Samples/data/Golf"/>
</operator>
<operator activated="true" class="decision_tree" compatibility="5.0.10" expanded="true" height="76" name="Decision Tree" width="90" x="179" y="30">
<parameter key="criterion" value="gain_ratio"/>
<parameter key="minimal_size_for_split" value="4"/>
<parameter key="minimal_leaf_size" value="2"/>
<parameter key="minimal_gain" value="0.05"/>
<parameter key="maximal_depth" value="20"/>
<parameter key="confidence" value="0.25"/>
<parameter key="number_of_prepruning_alternatives" value="3"/>
<parameter key="no_pre_pruning" value="false"/>
<parameter key="no_pruning" value="false"/>
</operator>
<operator activated="true" class="recall" compatibility="5.0.10" expanded="true" height="60" name="Recall" width="90" x="179" y="165">
<parameter key="name" value="dt"/>
<parameter key="io_object" value="Model"/>
<parameter key="remove_from_store" value="true"/>
</operator>
<operator activated="true" class="remember" compatibility="5.0.10" expanded="true" height="60" name="Remember" width="90" x="313" y="30">
<parameter key="name" value="dt"/>
<parameter key="io_object" value="Model"/>
<parameter key="store_which" value="1"/>
<parameter key="remove_from_process" value="true"/>
</operator>
<operator activated="true" class="apply_model" compatibility="5.0.10" expanded="true" height="76" name="Apply Model" width="90" x="391" y="170">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Decision Tree" from_port="model" to_op="Remember" to_port="store"/>
<connect from_op="Decision Tree" from_port="exampleSet" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Recall" from_port="result" to_op="Apply Model" to_port="model"/>
<connect from_op="Apply Model" from_port="labelled data" to_port="result 1"/>
<connect from_op="Apply Model" from_port="model" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>