Aggregration
wessel
New Altair Community Member
Dear All,
I'm trying to compare 2 learning curves.
The aggregration part seems rather cumbersome.
Is there an easier way to do all this?
Wessel
<operator name="Root" class="Process" expanded="yes">
<parameter key="logverbosity" value="warning"/>
<parameter key="random_seed" value="2004"/>
<operator name="400 CYT_rest, accuracy j48 = 85%" class="ArffExampleSource" activated="no">
<parameter key="data_file" value="D:\wessel\Desktop\400_CYT_rest.arff"/>
<parameter key="label_attribute" value="class"/>
</operator>
<operator name="store name = source" class="IOStorer" activated="no">
<parameter key="name" value="source"/>
<parameter key="io_object" value="ExampleSet"/>
</operator>
<operator name="PCA on / off" class="ParameterIteration" expanded="yes">
<list key="parameters">
<parameter key="enablePCA.enable" value="true,false"/>
</list>
<operator name="400 MIT_NUC, accuracy j48 = 75%" class="ArffExampleSource">
<parameter key="data_file" value="D:\wessel\Desktop\400_MIT_NUC.arff"/>
<parameter key="label_attribute" value="class"/>
</operator>
<operator name="enablePCA" class="OperatorEnabler" expanded="yes">
<parameter key="operator_name" value="PrincipalComponentsGenerator"/>
<operator name="PrincipalComponentsGenerator" class="PrincipalComponentsGenerator" activated="no">
</operator>
</operator>
<operator name="store name = target" class="IOStorer">
<parameter key="name" value="target"/>
<parameter key="io_object" value="ExampleSet"/>
</operator>
<operator name="ITER" class="IteratingOperatorChain" expanded="yes">
<parameter key="iterations" value="20"/>
<operator name="retrieve target" class="IORetriever">
<parameter key="name" value="target"/>
<parameter key="io_object" value="ExampleSet"/>
<parameter key="remove_from_store" value="false"/>
</operator>
<operator name="split at 50%, step 1%" class="LearningCurve" expanded="yes">
<parameter key="training_ratio" value="0.5"/>
<parameter key="step_fraction" value="0.01"/>
<parameter key="start_fraction" value="0.0"/>
<parameter key="sampling_type" value="shuffled sampling"/>
<operator name="OperatorChain" class="OperatorChain" expanded="yes">
<operator name="number_of_training_examples" class="DataMacroDefinition">
<parameter key="macro" value="number_of_training_examples"/>
</operator>
<operator name="W-J48" class="W-J48">
<parameter key="keep_example_set" value="true"/>
</operator>
</operator>
<operator name="ApplierChain" class="OperatorChain" expanded="yes">
<operator name="ModelApplier" class="ModelApplier">
<list key="application_parameters">
</list>
</operator>
<operator name="ACC" class="ClassificationPerformance">
<parameter key="accuracy" value="true"/>
<parameter key="use_example_weights" value="false"/>
<list key="class_weights">
</list>
</operator>
</operator>
<operator name="LOG" class="ProcessLog">
<list key="log">
<parameter key="ACC" value="operator.ACC.value.accuracy"/>
<parameter key="ITER" value="operator.ITER.value.iteration"/>
<parameter key="NUMOTE" value="operator.number_of_training_examples.value.macro_value"/>
<parameter key="enablePCA" value="operator.enablePCA.parameter.enable"/>
</list>
</operator>
</operator>
</operator>
</operator>
<operator name="ProcessLog2ExampleSet" class="ProcessLog2ExampleSet">
<parameter key="log_name" value="LOG"/>
</operator>
<operator name="ClearProcessLog" class="ClearProcessLog">
<parameter key="log_name" value="LOG"/>
<parameter key="delete_table" value="true"/>
</operator>
<operator name="ArffExampleSetWriter" class="ArffExampleSetWriter" activated="no">
<parameter key="example_set_file" value="D:\wessel\Desktop\results.arff"/>
</operator>
<operator name="Aggregation" class="Aggregation">
<list key="aggregation_attributes">
<parameter key="ACC" value="average"/>
<parameter key="ACC" value="standard_deviation"/>
</list>
<parameter key="group_by_attributes" value="NUMOTE|enablePCA"/>
<parameter key="only_distinct" value="true"/>
</operator>
<operator name="Example2AttributePivoting" class="Example2AttributePivoting">
<parameter key="keep_example_set" value="true"/>
<parameter key="group_attribute" value="NUMOTE"/>
<parameter key="index_attribute" value="enablePCA"/>
<parameter key="consider_weights" value="false"/>
</operator>
<operator name="NominalNumbers2Numerical" class="NominalNumbers2Numerical">
</operator>
<operator name="ChangeAttributeNamesReplace" class="ChangeAttributeNamesReplace">
<parameter key="replace_what" value="\(|\)"/>
<parameter key="replace_by" value="_"/>
</operator>
<operator name="AttributeConstruction" class="AttributeConstruction">
<list key="function_descriptions">
<parameter key="tmf" value="average_ACC__true - average_ACC__false"/>
</list>
</operator>
</operator>
I'm trying to compare 2 learning curves.
The aggregration part seems rather cumbersome.
Is there an easier way to do all this?
Wessel
<operator name="Root" class="Process" expanded="yes">
<parameter key="logverbosity" value="warning"/>
<parameter key="random_seed" value="2004"/>
<operator name="400 CYT_rest, accuracy j48 = 85%" class="ArffExampleSource" activated="no">
<parameter key="data_file" value="D:\wessel\Desktop\400_CYT_rest.arff"/>
<parameter key="label_attribute" value="class"/>
</operator>
<operator name="store name = source" class="IOStorer" activated="no">
<parameter key="name" value="source"/>
<parameter key="io_object" value="ExampleSet"/>
</operator>
<operator name="PCA on / off" class="ParameterIteration" expanded="yes">
<list key="parameters">
<parameter key="enablePCA.enable" value="true,false"/>
</list>
<operator name="400 MIT_NUC, accuracy j48 = 75%" class="ArffExampleSource">
<parameter key="data_file" value="D:\wessel\Desktop\400_MIT_NUC.arff"/>
<parameter key="label_attribute" value="class"/>
</operator>
<operator name="enablePCA" class="OperatorEnabler" expanded="yes">
<parameter key="operator_name" value="PrincipalComponentsGenerator"/>
<operator name="PrincipalComponentsGenerator" class="PrincipalComponentsGenerator" activated="no">
</operator>
</operator>
<operator name="store name = target" class="IOStorer">
<parameter key="name" value="target"/>
<parameter key="io_object" value="ExampleSet"/>
</operator>
<operator name="ITER" class="IteratingOperatorChain" expanded="yes">
<parameter key="iterations" value="20"/>
<operator name="retrieve target" class="IORetriever">
<parameter key="name" value="target"/>
<parameter key="io_object" value="ExampleSet"/>
<parameter key="remove_from_store" value="false"/>
</operator>
<operator name="split at 50%, step 1%" class="LearningCurve" expanded="yes">
<parameter key="training_ratio" value="0.5"/>
<parameter key="step_fraction" value="0.01"/>
<parameter key="start_fraction" value="0.0"/>
<parameter key="sampling_type" value="shuffled sampling"/>
<operator name="OperatorChain" class="OperatorChain" expanded="yes">
<operator name="number_of_training_examples" class="DataMacroDefinition">
<parameter key="macro" value="number_of_training_examples"/>
</operator>
<operator name="W-J48" class="W-J48">
<parameter key="keep_example_set" value="true"/>
</operator>
</operator>
<operator name="ApplierChain" class="OperatorChain" expanded="yes">
<operator name="ModelApplier" class="ModelApplier">
<list key="application_parameters">
</list>
</operator>
<operator name="ACC" class="ClassificationPerformance">
<parameter key="accuracy" value="true"/>
<parameter key="use_example_weights" value="false"/>
<list key="class_weights">
</list>
</operator>
</operator>
<operator name="LOG" class="ProcessLog">
<list key="log">
<parameter key="ACC" value="operator.ACC.value.accuracy"/>
<parameter key="ITER" value="operator.ITER.value.iteration"/>
<parameter key="NUMOTE" value="operator.number_of_training_examples.value.macro_value"/>
<parameter key="enablePCA" value="operator.enablePCA.parameter.enable"/>
</list>
</operator>
</operator>
</operator>
</operator>
<operator name="ProcessLog2ExampleSet" class="ProcessLog2ExampleSet">
<parameter key="log_name" value="LOG"/>
</operator>
<operator name="ClearProcessLog" class="ClearProcessLog">
<parameter key="log_name" value="LOG"/>
<parameter key="delete_table" value="true"/>
</operator>
<operator name="ArffExampleSetWriter" class="ArffExampleSetWriter" activated="no">
<parameter key="example_set_file" value="D:\wessel\Desktop\results.arff"/>
</operator>
<operator name="Aggregation" class="Aggregation">
<list key="aggregation_attributes">
<parameter key="ACC" value="average"/>
<parameter key="ACC" value="standard_deviation"/>
</list>
<parameter key="group_by_attributes" value="NUMOTE|enablePCA"/>
<parameter key="only_distinct" value="true"/>
</operator>
<operator name="Example2AttributePivoting" class="Example2AttributePivoting">
<parameter key="keep_example_set" value="true"/>
<parameter key="group_attribute" value="NUMOTE"/>
<parameter key="index_attribute" value="enablePCA"/>
<parameter key="consider_weights" value="false"/>
</operator>
<operator name="NominalNumbers2Numerical" class="NominalNumbers2Numerical">
</operator>
<operator name="ChangeAttributeNamesReplace" class="ChangeAttributeNamesReplace">
<parameter key="replace_what" value="\(|\)"/>
<parameter key="replace_by" value="_"/>
</operator>
<operator name="AttributeConstruction" class="AttributeConstruction">
<list key="function_descriptions">
<parameter key="tmf" value="average_ACC__true - average_ACC__false"/>
</list>
</operator>
</operator>
Tagged:
0
Answers
-
Hi,
congratulations to your process setup. You seem to make use of all the complex operations available
Perhaps any of the other members could help you, unfortunately it would cost me to much time to only understand your process. Optimizing it would be rather consulting than helping beginners...
Greetings,
Sebastian0