Hi,
I am trying to construct a workflow, that writes a log entry for each new feature. Currentyl it runs for every iteration step. I already thought about using Branch for the log, but I dont know if this slows the system down.
What I want would be a log like this:
chosen_attribute, performance
atts45 | 0.500
atts45, atts90 | 0,750
atts45, atts90, atts 2 | 0,800
Here is an exmaple workflow:
<?xml version="1.0" encoding="UTF-8" standalone="no"?> <process version="5.0"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="5.0.0" expanded="true" name="Global"> <parameter key="logfile" value="advanced1.log"/> <process expanded="true" height="668" width="1421"> <operator activated="true" class="generate_data" compatibility="5.0.11" expanded="true" height="60" name="Generate Data" width="90" x="45" y="30"> <parameter key="number_examples" value="1000"/> <parameter key="number_of_attributes" value="300"/> </operator> <operator activated="true" class="optimize_selection" compatibility="5.0.11" expanded="true" height="94" name="FS" width="90" x="179" y="30"> <process expanded="true" height="642" width="1070"> <operator activated="true" class="materialize_data" compatibility="5.0.11" expanded="true" height="76" name="Materialize Data" width="90" x="180" y="30"/> <operator activated="true" class="multiply" compatibility="5.0.11" expanded="true" height="94" name="Multiply (2)" width="90" x="315" y="30"/> <operator activated="true" class="x_validation" compatibility="5.0.11" expanded="true" height="112" name="XValidation" width="90" x="450" y="30"> <parameter key="number_of_validations" value="5"/> <process expanded="true" height="660" width="519"> <operator activated="true" class="linear_regression" compatibility="5.0.11" expanded="true" height="94" name="Linear Regression (2)" width="90" x="282" y="30"> <parameter key="feature_selection" value="none"/> </operator> <connect from_port="training" to_op="Linear Regression (2)" to_port="training set"/> <connect from_op="Linear Regression (2)" from_port="model" to_port="model"/> <portSpacing port="source_training" spacing="0"/> <portSpacing port="sink_model" spacing="0"/> <portSpacing port="sink_through 1" spacing="0"/> </process> <process expanded="true" height="660" width="519"> <operator activated="true" class="apply_model" compatibility="5.0.11" expanded="true" height="76" name="Applier" width="90" x="45" y="30"> <list key="application_parameters"/> </operator> <operator activated="true" class="performance_regression" compatibility="5.0.11" expanded="true" height="76" name="Performance" width="90" x="282" y="30"/> <connect from_port="model" to_op="Applier" to_port="model"/> <connect from_port="test set" to_op="Applier" to_port="unlabelled data"/> <connect from_op="Applier" from_port="labelled data" to_op="Performance" to_port="labelled data"/> <connect from_op="Performance" from_port="performance" to_port="averagable 1"/> <portSpacing port="source_model" spacing="0"/> <portSpacing port="source_test set" spacing="0"/> <portSpacing port="source_through 1" spacing="0"/> <portSpacing port="sink_averagable 1" spacing="0"/> <portSpacing port="sink_averagable 2" spacing="0"/> </process> </operator> <operator activated="true" class="log" compatibility="5.0.11" expanded="true" height="94" name="ProcessLog" width="90" x="581" y="30"> <parameter key="filename" value="%{data}_%{ham}_%{mode}_fs.log"/> <list key="log"> <parameter key="generation" value="operator.FS.value.generation"/> <parameter key="performance" value="operator.FS.value.performance"/> </list> </operator> <connect from_port="example set" to_op="Materialize Data" to_port="example set input"/> <connect from_op="Materialize Data" from_port="example set output" to_op="Multiply (2)" to_port="input"/> <connect from_op="Multiply (2)" from_port="output 1" to_op="XValidation" to_port="training"/> <connect from_op="Multiply (2)" from_port="output 2" to_op="ProcessLog" to_port="through 2"/> <connect from_op="XValidation" from_port="averagable 1" to_op="ProcessLog" to_port="through 1"/> <connect from_op="ProcessLog" from_port="through 1" to_port="performance"/> <portSpacing port="source_example set" spacing="0"/> <portSpacing port="source_through 1" spacing="0"/> <portSpacing port="sink_performance" spacing="0"/> </process> </operator> <connect from_op="Generate Data" from_port="output" to_op="FS" to_port="example set in"/> <connect from_op="FS" from_port="example set out" to_port="result 1"/> <connect from_op="FS" from_port="weights" to_port="result 2"/> <connect from_op="FS" from_port="performance" to_port="result 3"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> <portSpacing port="sink_result 3" spacing="0"/> <portSpacing port="sink_result 4" spacing="126"/> </process> </operator> </process>
|
Best,
Markus