Hi guys,
I am trying to optimize my workfows in terms of calculation time. Here is one example where I dont know to put "cleanup"-Operators to sped up the calcuation. Comments on where you would put
Free Memory, Materialize Data or other Operators that should speed up the process.
<?xml version="1.0" encoding="UTF-8" standalone="no"?> <process version="5.0"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="5.0.11" expanded="true" name="Root"> <parameter key="logverbosity" value="notes"/> <process expanded="true" height="505" width="1217"> <operator activated="true" class="generate_data" compatibility="5.0.11" expanded="true" height="60" name="Generate Data" width="90" x="45" y="30"> <parameter key="number_examples" value="300"/> <parameter key="number_of_attributes" value="10"/> </operator> <operator activated="true" class="discretize_by_user_specification" compatibility="5.0.11" expanded="true" height="94" name="Discretize" width="90" x="179" y="30"> <parameter key="attribute_filter_type" value="single"/> <parameter key="attribute" value="label"/> <parameter key="include_special_attributes" value="true"/> <list key="classes"> <parameter key="ffirst" value="0.5"/> <parameter key="last" value="Infinity"/> </list> </operator> <operator activated="true" class="rename" compatibility="5.0.11" expanded="true" height="76" name="Rename" width="90" x="313" y="30"> <parameter key="old_name" value="att1"/> <parameter key="new_name" value="id"/> </operator> <operator activated="true" class="set_role" compatibility="5.0.11" expanded="true" height="76" name="Set Role" width="90" x="447" y="30"> <parameter key="name" value="id"/> <parameter key="target_role" value="id"/> </operator> <operator activated="true" class="loop_parameters" compatibility="5.0.0" expanded="true" height="94" name="ParameterIteration" width="90" x="581" y="30"> <list key="parameters"> <parameter key="Permutation.local_random_seed" value="[0.0;100000.0;999;linear]"/> </list> <process expanded="true" height="523" width="547"> <operator activated="true" class="subprocess" compatibility="5.0.0" expanded="true" height="76" name="Label Random Permutation (Y-Scrambling)" width="90" x="45" y="30"> <process expanded="true" height="521" width="1083"> <operator activated="true" class="multiply" compatibility="5.0.11" expanded="true" height="94" name="IOMultiplier (2)_1" width="90" x="45" y="30"/> <operator activated="true" class="select_attributes" compatibility="5.0.0" expanded="true" height="76" name="AttributeFilter (3)" width="90" x="179" y="30"> <parameter key="attribute_filter_type" value="regular_expression"/> <parameter key="regular_expression" value="label"/> <parameter key="include_special_attributes" value="true"/> </operator> <operator activated="true" class="shuffle" compatibility="5.0.0" expanded="true" height="76" name="Permutation" width="90" x="313" y="30"> <parameter key="use_local_random_seed" value="true"/> <parameter key="local_random_seed" value="22322"/> </operator> <operator activated="true" class="generate_id" compatibility="5.0.0" expanded="true" height="76" name="IdTagging (3)" width="90" x="447" y="30"/> <operator activated="true" class="set_role" compatibility="5.0.0" expanded="true" height="76" name="ChangeAttributeRole (2)" width="90" x="179" y="165"> <parameter key="name" value="label"/> </operator> <operator activated="true" class="select_attributes" compatibility="5.0.0" expanded="true" height="76" name="AttributeFilter (4)" width="90" x="313" y="165"> <parameter key="attribute_filter_type" value="regular_expression"/> <parameter key="regular_expression" value="label|id"/> <parameter key="invert_selection" value="true"/> <parameter key="include_special_attributes" value="true"/> </operator> <operator activated="true" class="generate_id" compatibility="5.0.0" expanded="true" height="76" name="IdTagging (4)" width="90" x="447" y="165"/> <operator activated="true" class="join" compatibility="5.0.0" expanded="true" height="76" name="ExampleSetJoin (2)" width="90" x="581" y="30"/> <operator activated="true" class="materialize_data" compatibility="5.0.11" expanded="true" height="76" name="Materialize Data" width="90" x="715" y="30"/> <operator activated="true" class="free_memory" compatibility="5.0.11" expanded="true" height="76" name="Free Memory (2)" width="90" x="849" y="30"/> <connect from_port="in 1" to_op="IOMultiplier (2)_1" to_port="input"/> <connect from_op="IOMultiplier (2)_1" from_port="output 1" to_op="AttributeFilter (3)" to_port="example set input"/> <connect from_op="IOMultiplier (2)_1" from_port="output 2" to_op="ChangeAttributeRole (2)" to_port="example set input"/> <connect from_op="AttributeFilter (3)" from_port="example set output" to_op="Permutation" to_port="example set input"/> <connect from_op="Permutation" from_port="example set output" to_op="IdTagging (3)" to_port="example set input"/> <connect from_op="IdTagging (3)" from_port="example set output" to_op="ExampleSetJoin (2)" to_port="left"/> <connect from_op="ChangeAttributeRole (2)" from_port="example set output" to_op="AttributeFilter (4)" to_port="example set input"/> <connect from_op="AttributeFilter (4)" from_port="example set output" to_op="IdTagging (4)" to_port="example set input"/> <connect from_op="IdTagging (4)" from_port="example set output" to_op="ExampleSetJoin (2)" to_port="right"/> <connect from_op="ExampleSetJoin (2)" from_port="join" to_op="Materialize Data" to_port="example set input"/> <connect from_op="Materialize Data" from_port="example set output" to_op="Free Memory (2)" to_port="through 1"/> <connect from_op="Free Memory (2)" from_port="through 1" to_port="out 1"/> <portSpacing port="source_in 1" spacing="0"/> <portSpacing port="source_in 2" spacing="54"/> <portSpacing port="sink_out 1" spacing="0"/> <portSpacing port="sink_out 2" spacing="54"/> </process> </operator> <operator activated="true" class="subprocess" compatibility="5.0.0" expanded="true" height="112" name="TrainingSet: Modell mit XValidierung" width="90" x="313" y="30"> <process expanded="true" height="523" width="897"> <operator activated="true" class="x_validation" compatibility="5.0.0" expanded="true" height="112" name="XValidation" width="90" x="45" y="30"> <parameter key="create_complete_model" value="true"/> <parameter key="sampling_type" value="shuffled sampling"/> <parameter key="local_random_seed" value="10"/> <process expanded="true" height="523" width="165"> <operator activated="true" class="random_forest" compatibility="5.0.0" expanded="true" height="76" name="RandomForest" width="90" x="45" y="30"> <parameter key="number_of_trees" value="5"/> <parameter key="criterion" value="information_gain"/> <parameter key="minimal_leaf_size" value="1"/> <parameter key="maximal_depth" value="10"/> <parameter key="subset_ratio" value="-1.0"/> <parameter key="local_random_seed" value="-1"/> </operator> <connect from_port="training" to_op="RandomForest" to_port="training set"/> <connect from_op="RandomForest" from_port="model" to_port="model"/> <connect from_op="RandomForest" from_port="exampleSet" to_port="through 1"/> <portSpacing port="source_training" spacing="0"/> <portSpacing port="sink_model" spacing="0"/> <portSpacing port="sink_through 1" spacing="0"/> <portSpacing port="sink_through 2" spacing="0"/> </process> <process expanded="true" height="523" width="300"> <operator activated="true" class="apply_model" compatibility="5.0.0" expanded="true" height="76" name="ModelApplier" width="90" x="45" y="30"> <list key="application_parameters"/> <parameter key="create_view" value="true"/> </operator> <operator activated="true" class="performance_binominal_classification" compatibility="5.0.11" expanded="true" height="76" name="BinominalClassificationPerformance" width="90" x="180" y="30"> <parameter key="main_criterion" value="youden"/> <parameter key="psep" value="true"/> </operator> <connect from_port="model" to_op="ModelApplier" to_port="model"/> <connect from_port="test set" to_op="ModelApplier" to_port="unlabelled data"/> <connect from_op="ModelApplier" from_port="labelled data" to_op="BinominalClassificationPerformance" to_port="labelled data"/> <connect from_op="BinominalClassificationPerformance" from_port="performance" to_port="averagable 1"/> <portSpacing port="source_model" spacing="0"/> <portSpacing port="source_test set" spacing="0"/> <portSpacing port="source_through 1" spacing="0"/> <portSpacing port="source_through 2" spacing="0"/> <portSpacing port="sink_averagable 1" spacing="0"/> <portSpacing port="sink_averagable 2" spacing="0"/> </process> </operator> <operator activated="true" class="log" compatibility="5.0.0" expanded="true" height="112" name="ProcessLog" width="90" x="179" y="30"> <list key="log"> <parameter key="youden" value="operator.XValidation.value.performance"/> <parameter key="accuracy" value="operator.XValidation.value.performance1"/> <parameter key="psep" value="operator.XValidation.value.performance3"/> <parameter key="deviation" value="operator.XValidation.value.deviation"/> </list> <parameter key="sorting_type" value="top-k"/> <parameter key="sorting_dimension" value="youden"/> <parameter key="sorting_k" value="1000"/> </operator> <connect from_port="in 1" to_op="XValidation" to_port="training"/> <connect from_op="XValidation" from_port="model" to_op="ProcessLog" to_port="through 1"/> <connect from_op="XValidation" from_port="training" to_op="ProcessLog" to_port="through 2"/> <connect from_op="XValidation" from_port="averagable 1" to_op="ProcessLog" to_port="through 3"/> <connect from_op="ProcessLog" from_port="through 1" to_port="out 1"/> <connect from_op="ProcessLog" from_port="through 2" to_port="out 2"/> <connect from_op="ProcessLog" from_port="through 3" to_port="out 3"/> <portSpacing port="source_in 1" spacing="0"/> <portSpacing port="source_in 2" spacing="0"/> <portSpacing port="sink_out 1" spacing="0"/> <portSpacing port="sink_out 2" spacing="0"/> <portSpacing port="sink_out 3" spacing="0"/> <portSpacing port="sink_out 4" spacing="0"/> </process> </operator> <connect from_port="input 1" to_op="Label Random Permutation (Y-Scrambling)" to_port="in 1"/> <connect from_op="Label Random Permutation (Y-Scrambling)" from_port="out 1" to_op="TrainingSet: Modell mit XValidierung" to_port="in 1"/> <connect from_op="TrainingSet: Modell mit XValidierung" from_port="out 1" to_port="result 1"/> <connect from_op="TrainingSet: Modell mit XValidierung" from_port="out 2" to_port="result 2"/> <connect from_op="TrainingSet: Modell mit XValidierung" from_port="out 3" to_port="performance"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="source_input 2" spacing="0"/> <portSpacing port="sink_performance" spacing="0"/> <portSpacing port="sink_result 1" spacing="36"/> <portSpacing port="sink_result 2" spacing="0"/> <portSpacing port="sink_result 3" spacing="0"/> </process> </operator> <operator activated="true" class="log_to_data" compatibility="5.0.0" expanded="true" height="76" name="ProcessLog2ExampleSet" width="90" x="715" y="30"/> <operator activated="true" class="store" compatibility="5.0.11" expanded="true" height="60" name="Store" width="90" x="849" y="30"> <parameter key="repository_entry" value="../Data/Y_Scrambling"/> </operator> <operator activated="true" class="free_memory" compatibility="5.0.0" expanded="true" height="76" name="MemoryCleanUp (2)" width="90" x="983" y="30"/> <connect from_op="Generate Data" from_port="output" to_op="Discretize" to_port="example set input"/> <connect from_op="Discretize" from_port="example set output" to_op="Rename" to_port="example set input"/> <connect from_op="Rename" from_port="example set output" to_op="Set Role" to_port="example set input"/> <connect from_op="Set Role" from_port="example set output" to_op="ParameterIteration" to_port="input 1"/> <connect from_op="ProcessLog2ExampleSet" from_port="exampleSet" to_op="Store" to_port="input"/> <connect from_op="Store" from_port="through" to_op="MemoryCleanUp (2)" to_port="through 1"/> <connect from_op="MemoryCleanUp (2)" from_port="through 1" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="144"/> </process> </operator> </process>
|
Best,
Markus