Multiply a record based on a field
alejandro_tobon
New Altair Community Member
I have table1 with 2 columns (FruitName, Qty) see the sample data.
FruitName Qty
Apple 4
Orange 2
I need a way to convert the above data (FruitName, Qty) to be the quantity of records.
In the case of Apple, I need to get as a result 4 records and 2 for the Orange.
FruitName Qty
Apple 1
Apple 1
Apple 1
Apple 1
Orange 1
Orange 1
Thanks.
FruitName Qty
Apple 4
Orange 2
I need a way to convert the above data (FruitName, Qty) to be the quantity of records.
In the case of Apple, I need to get as a result 4 records and 2 for the Orange.
FruitName Qty
Apple 1
Apple 1
Apple 1
Apple 1
Orange 1
Orange 1
Thanks.
Tagged:
0
Answers
-
Hi,
at least I can't think of any operator that does this "de-aggregation" out of the box. So you have two possibilities as always if you lack some certain functionality in RapidMiner:
Build a rather complex process or extend RapidMiner. The extension has the advantage that native code of course will be much faster in the application, but you need programming skills or money to hire us for adding this operator.
The process can be built entirely inside RapidMiner and using macros and such stuff you can program right within your process. I have built a small example process, that will solve such a problem. I hope you can adapt it for your own problem:<?xml version="1.0" encoding="UTF-8" standalone="no"?>
Greetings,
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" expanded="true" name="Process">
<process expanded="true" height="444" width="828">
<operator activated="true" class="generate_data" expanded="true" height="60" name="Generate Data" width="90" x="45" y="30">
<parameter key="attributes_lower_bound" value="0.0"/>
</operator>
<operator activated="true" class="real_to_integer" expanded="true" height="76" name="generate quantity" width="90" x="179" y="30">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="att1"/>
</operator>
<operator activated="true" class="rename" expanded="true" height="76" name="Rename" width="90" x="313" y="30">
<parameter key="old_name" value="att1"/>
<parameter key="new_name" value="Qty"/>
</operator>
<operator activated="true" class="subprocess" expanded="true" height="76" name="remember empty copy" width="90" x="447" y="30">
<process expanded="true" height="444" width="828">
<operator activated="true" class="multiply" expanded="true" height="94" name="Multiply" width="90" x="45" y="165"/>
<operator activated="true" class="filter_examples" expanded="true" height="76" name="Mustn't match any" width="90" x="179" y="300">
<parameter key="condition_class" value="attribute_value_filter"/>
<parameter key="parameter_string" value="Qty=-1"/>
</operator>
<operator activated="true" class="remember" expanded="true" height="60" name="Remember" width="90" x="313" y="300">
<parameter key="name" value="Result"/>
<parameter key="io_object" value="ExampleSet"/>
</operator>
<connect from_port="in 1" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Mustn't match any" to_port="example set input"/>
<connect from_op="Multiply" from_port="output 2" to_port="out 1"/>
<connect from_op="Mustn't match any" from_port="example set output" to_op="Remember" to_port="store"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="loop_examples" expanded="true" height="76" name="Loop Examples" width="90" x="581" y="30">
<process expanded="true" height="444" width="828">
<operator activated="true" class="extract_macro" expanded="true" height="60" name="Extract Macro" width="90" x="112" y="165">
<parameter key="macro" value="QuantityDouble"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="Qty"/>
<parameter key="example_index" value="%{example}"/>
</operator>
<operator activated="true" class="generate_macro" expanded="true" height="76" name="Generate Macro" width="90" x="246" y="165">
<list key="function_descriptions">
<parameter key="Quantity" value="round(%{QuantityDouble})"/>
</list>
</operator>
<operator activated="true" class="filter_example_range" expanded="true" height="76" name="Filter Example Range" width="90" x="246" y="30">
<parameter key="first_example" value="%{example}"/>
<parameter key="last_example" value="%{example}"/>
</operator>
<operator activated="true" class="loop" expanded="true" height="76" name="Loop" width="90" x="380" y="30">
<parameter key="iterations" value="%{Quantity}"/>
<process expanded="true" height="444" width="828">
<operator activated="true" class="recall" expanded="true" height="60" name="Recall" width="90" x="112" y="120">
<parameter key="name" value="Result"/>
<parameter key="io_object" value="ExampleSet"/>
</operator>
<operator activated="true" class="append" expanded="true" height="94" name="Append" width="90" x="313" y="30"/>
<operator activated="true" class="remember" expanded="true" height="60" name="Remember (2)" width="90" x="525" y="131">
<parameter key="name" value="Result"/>
<parameter key="io_object" value="ExampleSet"/>
</operator>
<connect from_port="input 1" to_op="Append" to_port="example set 1"/>
<connect from_op="Recall" from_port="result" to_op="Append" to_port="example set 2"/>
<connect from_op="Append" from_port="merged set" to_op="Remember (2)" to_port="store"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
</process>
</operator>
<connect from_port="example set" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Extract Macro" from_port="example set" to_op="Generate Macro" to_port="through 1"/>
<connect from_op="Generate Macro" from_port="through 1" to_op="Filter Example Range" to_port="example set input"/>
<connect from_op="Filter Example Range" from_port="example set output" to_op="Loop" to_port="input 1"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_example set" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
</process>
</operator>
<operator activated="true" class="recall" expanded="true" height="60" name="Recall (2)" width="90" x="581" y="120">
<parameter key="name" value="Result"/>
<parameter key="io_object" value="ExampleSet"/>
</operator>
<connect from_op="Generate Data" from_port="output" to_op="generate quantity" to_port="example set input"/>
<connect from_op="generate quantity" from_port="example set output" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="remember empty copy" to_port="in 1"/>
<connect from_op="remember empty copy" from_port="out 1" to_op="Loop Examples" to_port="example set"/>
<connect from_op="Loop Examples" from_port="example set" to_port="result 1"/>
<connect from_op="Recall (2)" from_port="result" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
Sebastian0 -
Thank you very mucho, you helped me a lot.
0