Altair RISE
A program to recognize and reward our most engaged community members
Nominate Yourself Now!
Home
Discussions
Community Q&A
Best way to combine multiple Featureset Objects into one?
pblack476
I would like to combine 5 FeatureSets into a single one. Ideally I would like all "5 factorial (!)" combinations of sets possible. But for now a single bundle will do.
How does one would go about this on RM?
Find more posts tagged with
AI Studio
Accepted answers
varunm1
Here is the working sample of how to apply the model using loop attribute subset. "Log" has performance.
<?xml version="1.0" encoding="UTF-8"?><process version="9.5.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="9.5.001" expanded="true" height="68" name="Retrieve Titanic Training" width="90" x="313" y="136">
<parameter key="repository_entry" value="//Samples/data/Titanic Training"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="9.5.001" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="136">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Age"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="loop_attribute_subsets" compatibility="9.5.001" expanded="true" height="68" name="Loop Subsets" origin="GENERATED_TUTORIAL" width="90" x="581" y="136">
<parameter key="use_exact_number" value="false"/>
<parameter key="exact_number_of_attributes" value="-1"/>
<parameter key="min_number_of_attributes" value="1"/>
<parameter key="limit_max_number" value="false"/>
<parameter key="max_number_of_attributes" value="-1"/>
<process expanded="true">
<operator activated="true" class="concurrency:cross_validation" compatibility="9.5.001" expanded="true" height="145" name="Cross Validation" width="90" x="112" y="85">
<parameter key="split_on_batch_attribute" value="false"/>
<parameter key="leave_one_out" value="false"/>
<parameter key="number_of_folds" value="5"/>
<parameter key="sampling_type" value="automatic"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="concurrency:parallel_decision_tree" compatibility="9.5.001" expanded="true" height="103" name="Decision Tree" width="90" x="112" y="34">
<parameter key="criterion" value="gain_ratio"/>
<parameter key="maximal_depth" value="10"/>
<parameter key="apply_pruning" value="true"/>
<parameter key="confidence" value="0.1"/>
<parameter key="apply_prepruning" value="true"/>
<parameter key="minimal_gain" value="0.01"/>
<parameter key="minimal_leaf_size" value="2"/>
<parameter key="minimal_size_for_split" value="4"/>
<parameter key="number_of_prepruning_alternatives" value="3"/>
</operator>
<connect from_port="training set" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Decision Tree" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="9.5.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="9.5.001" expanded="true" height="82" name="Performance" width="90" x="179" y="34">
<parameter key="main_criterion" value="first"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="false"/>
<parameter key="kappa" value="true"/>
<parameter key="weighted_mean_recall" value="false"/>
<parameter key="weighted_mean_precision" value="false"/>
<parameter key="spearman_rho" value="false"/>
<parameter key="kendall_tau" value="false"/>
<parameter key="absolute_error" value="false"/>
<parameter key="relative_error" value="false"/>
<parameter key="relative_error_lenient" value="false"/>
<parameter key="relative_error_strict" value="false"/>
<parameter key="normalized_absolute_error" value="false"/>
<parameter key="root_mean_squared_error" value="false"/>
<parameter key="root_relative_squared_error" value="false"/>
<parameter key="squared_error" value="false"/>
<parameter key="correlation" value="false"/>
<parameter key="squared_correlation" value="false"/>
<parameter key="cross-entropy" value="false"/>
<parameter key="margin" value="false"/>
<parameter key="soft_margin_loss" value="false"/>
<parameter key="logistic_loss" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="performance 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log" compatibility="9.5.001" expanded="true" height="82" name="Log" origin="GENERATED_TUTORIAL" width="90" x="447" y="85">
<list key="log">
<parameter key="Attributes" value="operator.Loop Subsets.value.feature_names"/>
<parameter key="Performance_Accuracy" value="operator.Cross Validation.value.performance 1"/>
<parameter key="Performance_Kappa" value="operator.Cross Validation.value.performance 2"/>
</list>
<parameter key="sorting_type" value="none"/>
<parameter key="sorting_k" value="100"/>
<parameter key="persistent" value="false"/>
</operator>
<connect from_port="example set" to_op="Cross Validation" to_port="example set"/>
<connect from_op="Cross Validation" from_port="performance 1" to_op="Log" to_port="through 1"/>
<portSpacing port="source_example set" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve Titanic Training" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Loop Subsets" to_port="example set"/>
<connect from_op="Loop Subsets" from_port="example set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="90"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Hope this helps
All comments
lionelderkrikor
Hi
@pblack476
,
I don't know how to create the "5 factorial" combinaisons of sets possible automatically. But to combine 5 featureSets , have you tried to use the Apply Feature Set operator .. ??
Regards,
Lionel
varunm1
I would like all "5 factorial (!)" combinations of sets possible.
Can you explain this requirement? I am a bit confused about "5 factorial" are you looking for 120 combinations?
pblack476
@varunm1
Yes. that is what I would like. All 120 possible combinations of 5 featuresets without repetition. (I dont need all possible combinations of attributes, Just featuresets)
@lionelderkrikor
The problem with that approach is that it cannot add multiple featuresets at once. I have to loop through them somehow (probably a collection since there is no Loop FeatureSet operator) and use 'keep originals' option. But I am running into trouble there as well because 'keep original' brings back every attribute. Even the ones I had previously filtered with select attributes before, and I don't want that. I want to have an exampleset with the combination of 5 featuresets and just those 5. This really seems like a bug to me but for now that is where I am at.
The best I can do so far is use 'keep originals' with 'originals special role' to make RM ignore them on further processing. And that bugs if I use inside a loop collection operator.
varunm1
Yes. that is what I would like. All 120 possible combinations of 5 feature sets
without repetition.
So you are looking for 31 combinations as you said they shouldn't repeat. This is a tricky and nice challenge.
120 combinations will have repeats like column A, Column B in one set and Column B, Column A in another (order changes).
varunm1
@pblack476
Did you try "Loop Attribute Subsets". This is doing what you are asking I guess
pblack476
I got a part of it!
I Merge Attributes after the loop. No need to keep orignals. It merges all ExampleSets in the collection into one. And My collection is nothing more than 5 ExampleSets of the applied featureSets one by one.
Now I will try to figure out how to get all 120 combinations....
pblack476
@varunm1
True. I had my math wrong there. I will try to use loop attribute subsets and check back.
varunm1
Here is the working sample of how to apply the model using loop attribute subset. "Log" has performance.
<?xml version="1.0" encoding="UTF-8"?><process version="9.5.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="9.5.001" expanded="true" height="68" name="Retrieve Titanic Training" width="90" x="313" y="136">
<parameter key="repository_entry" value="//Samples/data/Titanic Training"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="9.5.001" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="136">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Age"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="loop_attribute_subsets" compatibility="9.5.001" expanded="true" height="68" name="Loop Subsets" origin="GENERATED_TUTORIAL" width="90" x="581" y="136">
<parameter key="use_exact_number" value="false"/>
<parameter key="exact_number_of_attributes" value="-1"/>
<parameter key="min_number_of_attributes" value="1"/>
<parameter key="limit_max_number" value="false"/>
<parameter key="max_number_of_attributes" value="-1"/>
<process expanded="true">
<operator activated="true" class="concurrency:cross_validation" compatibility="9.5.001" expanded="true" height="145" name="Cross Validation" width="90" x="112" y="85">
<parameter key="split_on_batch_attribute" value="false"/>
<parameter key="leave_one_out" value="false"/>
<parameter key="number_of_folds" value="5"/>
<parameter key="sampling_type" value="automatic"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="concurrency:parallel_decision_tree" compatibility="9.5.001" expanded="true" height="103" name="Decision Tree" width="90" x="112" y="34">
<parameter key="criterion" value="gain_ratio"/>
<parameter key="maximal_depth" value="10"/>
<parameter key="apply_pruning" value="true"/>
<parameter key="confidence" value="0.1"/>
<parameter key="apply_prepruning" value="true"/>
<parameter key="minimal_gain" value="0.01"/>
<parameter key="minimal_leaf_size" value="2"/>
<parameter key="minimal_size_for_split" value="4"/>
<parameter key="number_of_prepruning_alternatives" value="3"/>
</operator>
<connect from_port="training set" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Decision Tree" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="9.5.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="9.5.001" expanded="true" height="82" name="Performance" width="90" x="179" y="34">
<parameter key="main_criterion" value="first"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="false"/>
<parameter key="kappa" value="true"/>
<parameter key="weighted_mean_recall" value="false"/>
<parameter key="weighted_mean_precision" value="false"/>
<parameter key="spearman_rho" value="false"/>
<parameter key="kendall_tau" value="false"/>
<parameter key="absolute_error" value="false"/>
<parameter key="relative_error" value="false"/>
<parameter key="relative_error_lenient" value="false"/>
<parameter key="relative_error_strict" value="false"/>
<parameter key="normalized_absolute_error" value="false"/>
<parameter key="root_mean_squared_error" value="false"/>
<parameter key="root_relative_squared_error" value="false"/>
<parameter key="squared_error" value="false"/>
<parameter key="correlation" value="false"/>
<parameter key="squared_correlation" value="false"/>
<parameter key="cross-entropy" value="false"/>
<parameter key="margin" value="false"/>
<parameter key="soft_margin_loss" value="false"/>
<parameter key="logistic_loss" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="performance 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log" compatibility="9.5.001" expanded="true" height="82" name="Log" origin="GENERATED_TUTORIAL" width="90" x="447" y="85">
<list key="log">
<parameter key="Attributes" value="operator.Loop Subsets.value.feature_names"/>
<parameter key="Performance_Accuracy" value="operator.Cross Validation.value.performance 1"/>
<parameter key="Performance_Kappa" value="operator.Cross Validation.value.performance 2"/>
</list>
<parameter key="sorting_type" value="none"/>
<parameter key="sorting_k" value="100"/>
<parameter key="persistent" value="false"/>
</operator>
<connect from_port="example set" to_op="Cross Validation" to_port="example set"/>
<connect from_op="Cross Validation" from_port="performance 1" to_op="Log" to_port="through 1"/>
<portSpacing port="source_example set" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve Titanic Training" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Loop Subsets" to_port="example set"/>
<connect from_op="Loop Subsets" from_port="example set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="90"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Hope this helps
Quick Links
All Categories
Recent Discussions
Activity
Unanswered
日本語 (Japanese)
한국어(Korean)
Groups