matching datasets according to best ranked item

vabm
New Altair Community Member
Hi, I have two data sets that I need to match according to some specific value.
The first one (table 1) contains the IDs of a set of products, and the performance of these product according to the age of a user, something like:
The first one (table 1) contains the IDs of a set of products, and the performance of these product according to the age of a user, something like:
Then, the other one (table 2) has the age of a set of users, like:
item_id, user_age, item_performance
01, 45, 2
02, 23, 4
03, 23, 3
04, 45, 5
etc
What I need to do is to find the item that has the best performance for each age group, and then match them as a single dataset. Samething like:
user_id, age
001, 45
002, 23
003, 29
(etc)
I was trying to use aggregate to find the best score per age, but I am getting the best scores for a single age group... any ideas on how to find the best scores per age group, and then how to aggregate them to table 2? Thanks !!
user_id, item_id
001, 04
002, 02
003, 06
(etc)
Tagged:
0
Answers
-
I think this is hard to do at all, because there is no way to join your first and your second table if ages are not unique.
~Martin0 -
Here's something that seems to do the trick...
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
regards
<process version="7.0.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.0.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="7.0.000" expanded="true" height="82" name="user data" width="90" x="112" y="34">
<process expanded="true">
<operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (5)" width="90" x="179" y="187">
<list key="attribute_values">
<parameter key="userid" value=""001""/>
<parameter key="age" value=""45""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (6)" width="90" x="179" y="289">
<list key="attribute_values">
<parameter key="userid" value=""002""/>
<parameter key="age" value=""23""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (7)" width="90" x="179" y="391">
<list key="attribute_values">
<parameter key="userid" value=""003""/>
<parameter key="age" value=""29""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="append" compatibility="7.0.000" expanded="true" height="124" name="Append (2)" width="90" x="313" y="187"/>
<connect from_op="Generate Data by User Specification (5)" from_port="output" to_op="Append (2)" to_port="example set 1"/>
<connect from_op="Generate Data by User Specification (6)" from_port="output" to_op="Append (2)" to_port="example set 2"/>
<connect from_op="Generate Data by User Specification (7)" from_port="output" to_op="Append (2)" to_port="example set 3"/>
<connect from_op="Append (2)" from_port="merged set" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="subprocess" compatibility="7.0.000" expanded="true" height="82" name="item data" width="90" x="112" y="289">
<process expanded="true">
<operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="179" y="187">
<list key="attribute_values">
<parameter key="itemid" value=""01""/>
<parameter key="age" value=""45""/>
<parameter key="perf" value="2"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (3)" width="90" x="179" y="289">
<list key="attribute_values">
<parameter key="itemid" value=""02""/>
<parameter key="age" value=""23""/>
<parameter key="perf" value="4"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (4)" width="90" x="179" y="391">
<list key="attribute_values">
<parameter key="itemid" value=""03""/>
<parameter key="age" value=""45""/>
<parameter key="perf" value="3"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (8)" width="90" x="179" y="493">
<list key="attribute_values">
<parameter key="itemid" value=""04""/>
<parameter key="age" value=""45""/>
<parameter key="perf" value="4"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (9)" width="90" x="179" y="595">
<list key="attribute_values">
<parameter key="itemid" value=""05""/>
<parameter key="age" value=""45""/>
<parameter key="perf" value="1"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (10)" width="90" x="179" y="697">
<list key="attribute_values">
<parameter key="itemid" value=""06""/>
<parameter key="age" value=""29""/>
<parameter key="perf" value="1"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="append" compatibility="7.0.000" expanded="true" height="187" name="Append" width="90" x="447" y="187"/>
<connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 1"/>
<connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 2"/>
<connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 3"/>
<connect from_op="Generate Data by User Specification (8)" from_port="output" to_op="Append" to_port="example set 4"/>
<connect from_op="Generate Data by User Specification (9)" from_port="output" to_op="Append" to_port="example set 5"/>
<connect from_op="Generate Data by User Specification (10)" from_port="output" to_op="Append" to_port="example set 6"/>
<connect from_op="Append" from_port="merged set" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="multiply" compatibility="7.0.000" expanded="true" height="103" name="Multiply" width="90" x="246" y="289"/>
<operator activated="true" class="join" compatibility="7.0.000" expanded="true" height="82" name="Join" width="90" x="447" y="34">
<parameter key="join_type" value="left"/>
<parameter key="use_id_attribute_as_key" value="false"/>
<list key="key_attributes">
<parameter key="age" value="age"/>
</list>
</operator>
<operator activated="true" class="aggregate" compatibility="7.0.000" expanded="true" height="82" name="Aggregate" width="90" x="581" y="34">
<list key="aggregation_attributes">
<parameter key="perf" value="maximum"/>
</list>
<parameter key="group_by_attributes" value="userid|age"/>
</operator>
<operator activated="true" class="rename" compatibility="7.0.000" expanded="true" height="82" name="Rename" width="90" x="715" y="34">
<parameter key="old_name" value="maximum(perf)"/>
<parameter key="new_name" value="perf"/>
<list key="rename_additional_attributes"/>
</operator>
<operator activated="true" class="join" compatibility="7.0.000" expanded="true" height="82" name="Join (2)" width="90" x="916" y="289">
<parameter key="use_id_attribute_as_key" value="false"/>
<list key="key_attributes">
<parameter key="age" value="age"/>
<parameter key="perf" value="perf"/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.0.000" expanded="true" height="82" name="Select Attributes" width="90" x="1050" y="289">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="itemid|userid"/>
</operator>
<connect from_op="user data" from_port="out 1" to_op="Join" to_port="left"/>
<connect from_op="item data" from_port="out 1" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Join" to_port="right"/>
<connect from_op="Multiply" from_port="output 2" to_op="Join (2)" to_port="right"/>
<connect from_op="Join" from_port="join" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Aggregate" from_port="example set output" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Join (2)" to_port="left"/>
<connect from_op="Join (2)" from_port="join" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Andrew0