Hello again,
I am again asking to community for help, as my wisdom has come to and end here. Here is a short descriptoion of what the code should do:
read input --> preprocessing + data preparation --> loop over combination of 2 of attributes --> Build and evaluate Decision tree for all combinations --> Write Attributes and Feature names to log and then to csv
<?xml version="1.0" encoding="UTF-8" standalone="no"?> <process version="5.0"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="5.0.8" expanded="true" name="Process"> <process expanded="true" height="566" width="1619"> <operator activated="true" class="read_csv" compatibility="5.0.8" expanded="true" height="60" name="Read CSV" width="90" x="45" y="120"> <parameter key="file_name" value="E:\binary_preprocessed.csv"/> <parameter key="comment_characters" value="*"/> <parameter key="column_separators" value=","/> </operator> <operator activated="true" class="set_role" compatibility="5.0.8" expanded="true" height="76" name="Set Role" width="90" x="179" y="120"> <parameter key="name" value="ID"/> <parameter key="target_role" value="id"/> </operator> <operator activated="true" class="set_role" compatibility="5.0.8" expanded="true" height="76" name="Set Role (2)" width="90" x="313" y="120"> <parameter key="name" value="activity"/> <parameter key="target_role" value="label"/> </operator> <operator activated="true" class="select_attributes" compatibility="5.0.8" expanded="true" height="76" name="only numeric" width="90" x="447" y="120"> <parameter key="attribute_filter_type" value="value_type"/> <parameter key="regular_expression" value="pKa1ACD10|pKa2ACD10"/> <parameter key="value_type" value="numeric"/> </operator> <operator activated="true" class="filter_examples" compatibility="5.0.8" expanded="true" height="76" name="Filter Examples" width="90" x="581" y="120"> <parameter key="condition_class" value="no_missing_attributes"/> </operator> <operator activated="true" class="filter_examples" compatibility="5.0.8" expanded="true" height="76" name="Filter Examples (2)" width="90" x="715" y="120"> <parameter key="condition_class" value="no_missing_labels"/> </operator> <operator activated="true" class="loop_attribute_subsets" compatibility="5.0.8" expanded="true" height="60" name="Loop Subsets" width="90" x="849" y="120"> <parameter key="use_exact_number" value="true"/> <parameter key="exact_number_of_attributes" value="2"/> <parameter key="max_number_of_attributes" value="5"/> <process expanded="true" height="665" width="1094"> <operator activated="true" class="extract_macro" compatibility="5.0.8" expanded="true" height="60" name="Extract Macro" width="90" x="45" y="30"> <parameter key="macro" value="atts"/> <parameter key="macro_type" value="number_of_attributes"/> </operator> <operator activated="true" class="generate_macro" compatibility="5.0.8" expanded="true" height="76" name="treedepth" width="90" x="179" y="30"> <list key="function_descriptions"> <parameter key="treedepth" value="2 * %{atts} + 1"/> </list> </operator> <operator activated="true" class="x_validation" compatibility="5.0.8" expanded="true" height="112" name="Validation" width="90" x="313" y="30"> <parameter key="use_local_random_seed" value="true"/> <parameter key="local_random_seed" value="10"/> <process expanded="true" height="647" width="424"> <operator activated="true" class="decision_tree" compatibility="5.0.8" expanded="true" height="76" name="Decision Tree" width="90" x="112" y="30"> <parameter key="criterion" value="information_gain"/> </operator> <connect from_port="training" to_op="Decision Tree" to_port="training set"/> <connect from_op="Decision Tree" from_port="model" to_port="model"/> <portSpacing port="source_training" spacing="0"/> <portSpacing port="sink_model" spacing="0"/> <portSpacing port="sink_through 1" spacing="0"/> </process> <process expanded="true" height="647" width="424"> <operator activated="true" class="apply_model" compatibility="5.0.8" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30"> <list key="application_parameters"/> <parameter key="create_view" value="true"/> </operator> <operator activated="true" class="performance_binominal_classification" compatibility="5.0.8" expanded="true" height="76" name="Performance" width="90" x="179" y="30"> <parameter key="main_criterion" value="youden"/> <parameter key="youden" value="true"/> <parameter key="psep" value="true"/> </operator> <connect from_port="model" to_op="Apply Model" to_port="model"/> <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/> <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/> <connect from_op="Performance" from_port="performance" to_port="averagable 1"/> <portSpacing port="source_model" spacing="0"/> <portSpacing port="source_test set" spacing="0"/> <portSpacing port="source_through 1" spacing="0"/> <portSpacing port="sink_averagable 1" spacing="0"/> <portSpacing port="sink_averagable 2" spacing="0"/> </process> </operator> <operator activated="true" class="log" compatibility="5.0.8" expanded="true" height="76" name="Log" width="90" x="447" y="30"> <parameter key="filename" value="fs_2_atts.log"/> <list key="log"> <parameter key="youden" value="operator.Validation.value.performance"/> <parameter key="psep" value="operator.Validation.value.performance2"/> <parameter key="accuracy" value="operator.Validation.value.performance3"/> <parameter key="feature_names" value="operator.Loop Subsets.value.feature_names"/> <parameter key="feature_number" value="operator.Loop Subsets.value.feature_number"/> <parameter key="deviation" value="operator.Validation.value.deviation"/> </list> <parameter key="sorting_type" value="top-k"/> <parameter key="sorting_dimension" value="youden"/> </operator> <connect from_port="example set" to_op="Extract Macro" to_port="example set"/> <connect from_op="Extract Macro" from_port="example set" to_op="treedepth" to_port="through 1"/> <connect from_op="treedepth" from_port="through 1" to_op="Validation" to_port="training"/> <connect from_op="Validation" from_port="averagable 1" to_op="Log" to_port="through 1"/> <portSpacing port="source_example set" spacing="0"/> </process> </operator> <operator activated="true" class="log_to_data" compatibility="5.0.8" expanded="true" height="94" name="Log to Data (2)" width="90" x="983" y="120"/> <operator activated="true" class="write_csv" compatibility="5.0.8" expanded="true" height="60" name="Write CSV" width="90" x="1117" y="120"> <parameter key="csv_file" value="%{path}\%{set}_%{subset}_fs_2_atts.csv"/> </operator> <connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/> <connect from_op="Set Role" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/> <connect from_op="Set Role (2)" from_port="example set output" to_op="only numeric" to_port="example set input"/> <connect from_op="only numeric" from_port="example set output" to_op="Filter Examples" to_port="example set input"/> <connect from_op="Filter Examples" from_port="example set output" to_op="Filter Examples (2)" to_port="example set input"/> <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Loop Subsets" to_port="example set"/> <connect from_op="Loop Subsets" from_port="example set" to_op="Log to Data (2)" to_port="through 1"/> <connect from_op="Log to Data (2)" from_port="exampleSet" to_op="Write CSV" to_port="input"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> </process> </operator> </process>
|
But unfortunately is is stopping after some time, crashing the whole programm. I dont know if i make an error in der workflow or maybe it is using too much memory. Well, maybe some of you guys can give mie a tips.