Combining Example Set Attributes

dman99
New Altair Community Member
Looking for a way to combine attributes from an variable number of examplesets into a single exampleset. Given that I'm running a loop to create each ExampleSet I end up with an IOObjectCollection on the output. I've tried adding a join within loop but it is limited to knowing the first exampleset before combining others. Is there a method using loops or set operators to get to the following:
ExampleSet 1:
Row# Test1
ExampleSet 2:
Row# Test2
ExampleSet 3:
Row# Test3
Resulting ExampleSet:
Row# Test1 Test2 Test3
ExampleSet 1:
Row# Test1
ExampleSet 2:
Row# Test2
ExampleSet 3:
Row# Test3
Resulting ExampleSet:
Row# Test1 Test2 Test3
Thanks for the help. Dan.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.0.11" expanded="true" name="Process">
<process expanded="true" height="690" width="882">
<operator activated="true" class="text:create_document" compatibility="5.0.2" expanded="true" height="60" name="Create Document" width="90" x="45" y="75">
<parameter key="text" value="Test1 Test2 Test3"/>
</operator>
<operator activated="true" class="text:documents_to_data" compatibility="5.0.2" expanded="true" height="76" name="Documents to Data" width="90" x="179" y="75">
<parameter key="text_attribute" value="DateSet"/>
</operator>
<operator activated="true" class="split" compatibility="5.0.11" expanded="true" height="76" name="Split" width="90" x="313" y="75">
<parameter key="attribute" value="DataSet"/>
<parameter key="split_pattern" value="\n"/>
</operator>
<operator activated="true" class="transpose" compatibility="5.0.11" expanded="true" height="76" name="Transpose" width="90" x="447" y="75"/>
<operator activated="true" class="loop_values" compatibility="5.0.11" expanded="true" height="76" name="Loop Values" width="90" x="581" y="75">
<parameter key="attribute" value="att_1"/>
<process expanded="true" height="708" width="922">
<operator activated="true" class="generate_data" compatibility="5.0.11" expanded="true" height="60" name="Generate Data" width="90" x="45" y="30">
<parameter key="number_of_attributes" value="1"/>
</operator>
<operator activated="true" class="rename" compatibility="5.0.11" expanded="true" height="76" name="Rename" width="90" x="179" y="30">
<parameter key="old_name" value="att1"/>
<parameter key="new_name" value="%{loop_value}"/>
</operator>
<operator activated="true" class="work_on_subset" compatibility="5.0.11" expanded="true" height="76" name="Work on Subset" width="90" x="313" y="30">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="label"/>
<parameter key="invert_selection" value="true"/>
<parameter key="keep_subset_only" value="true"/>
<process expanded="true" height="708" width="922">
<connect from_port="exampleSet" to_port="example set"/>
<portSpacing port="source_exampleSet" spacing="0"/>
<portSpacing port="sink_example set" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
</operator>
<connect from_op="Generate Data" from_port="output" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Work on Subset" to_port="example set"/>
<connect from_op="Work on Subset" from_port="example set" to_port="out 1"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<connect from_op="Create Document" from_port="output" to_op="Documents to Data" to_port="documents 1"/>
<connect from_op="Documents to Data" from_port="example set" to_op="Split" to_port="example set input"/>
<connect from_op="Split" from_port="example set output" to_op="Transpose" to_port="example set input"/>
<connect from_op="Transpose" from_port="example set output" to_op="Loop Values" to_port="example set"/>
<connect from_op="Loop Values" from_port="out 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Tagged:
0
Answers
-
Hi there dman,
I think that you'll need some unifying Id to match the rows up, and a pivot to split out the test values by result set, like this...<?xml version="1.0" encoding="UTF-8" standalone="no"?>
Which I hope is the sort of thing you were after. A point to note is the regex replacement, which appears to repeat; I am most reluctant to even mutter the dreaded '*ug' word, but...
<process version="5.1.003">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.1.003" expanded="true" name="Process">
<process expanded="true" height="400" width="701">
<operator activated="true" class="loop" compatibility="5.1.003" expanded="true" height="76" name="Generate N Sets" width="90" x="112" y="120">
<parameter key="iterations" value="3"/>
<process expanded="true" height="418" width="710">
<operator activated="true" class="generate_data" compatibility="5.1.003" expanded="true" height="60" name="Generate Data" width="90" x="45" y="30">
<parameter key="target_function" value="multi classification"/>
<parameter key="number_of_attributes" value="4"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="5.1.003" expanded="true" height="76" name="Just 1 Attribute" width="90" x="45" y="120">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="att1"/>
</operator>
<operator activated="true" class="rename" compatibility="5.1.003" expanded="true" height="76" name="Rename" width="90" x="179" y="120">
<parameter key="old_name" value="att1"/>
<parameter key="new_name" value="Result_set"/>
<list key="rename_additional_attributes"/>
</operator>
<operator activated="true" class="generate_id" compatibility="5.1.003" expanded="true" height="76" name="Generate ID" width="90" x="246" y="30">
<parameter key="create_nominal_ids" value="true"/>
</operator>
<operator activated="true" class="set_role" compatibility="5.1.003" expanded="true" height="76" name="Change label Role" width="90" x="380" y="30">
<parameter key="name" value="id"/>
<list key="set_additional_roles">
<parameter key="label" value="regular"/>
</list>
</operator>
<operator activated="true" class="replace" compatibility="5.1.003" expanded="true" height="76" name="Set RSet" width="90" x="514" y="30">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="label"/>
<parameter key="replace_what" value=".*"/>
<parameter key="replace_by" value="%{a}"/>
</operator>
<connect from_op="Generate Data" from_port="output" to_op="Just 1 Attribute" to_port="example set input"/>
<connect from_op="Just 1 Attribute" from_port="example set output" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
<connect from_op="Generate ID" from_port="example set output" to_op="Change label Role" to_port="example set input"/>
<connect from_op="Change label Role" from_port="example set output" to_op="Set RSet" to_port="example set input"/>
<connect from_op="Set RSet" from_port="example set output" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="append" compatibility="5.1.003" expanded="true" height="76" name="Join 'em up" width="90" x="246" y="120"/>
<operator activated="true" class="pivot" compatibility="5.1.003" expanded="true" height="76" name="As Columns" width="90" x="380" y="120">
<parameter key="group_attribute" value="id"/>
<parameter key="index_attribute" value="label"/>
<parameter key="consider_weights" value="false"/>
</operator>
<operator activated="true" class="set_role" compatibility="5.1.003" expanded="true" height="76" name="with Row IDs" width="90" x="514" y="120">
<parameter key="name" value="id"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles"/>
</operator>
<connect from_op="Generate N Sets" from_port="output 1" to_op="Join 'em up" to_port="example set 1"/>
<connect from_op="Join 'em up" from_port="merged set" to_op="As Columns" to_port="example set input"/>
<connect from_op="As Columns" from_port="example set output" to_op="with Row IDs" to_port="example set input"/>
<connect from_op="with Row IDs" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
0 -
i have a problem like this and i dont know how to solve yet
0