How to convert a database dataset to item set dataset

soheil008
soheil008 New Altair Community Member
edited November 5 in Community Q&A

Hi

 

I want to convert my database to item set form. For example assume table:

Tid      Item

 1           1

 1           2

 2           1

 2           3

 

I want it changes to:

 

Tid  Item1    Item2    Item3

 1     true       true      false

 2     true       false     true

 

How can I do that in rapidminer studio?

Best Answer

  • MartinLiebig
    MartinLiebig
    Altair Employee
    Answer ✓

    Hey,

     

    attached is a process solving it.

     

    ~Martin

     

    <?xml version="1.0" encoding="UTF-8"?><process version="7.4.000">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="7.4.000" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="subprocess" compatibility="7.4.000" expanded="true" height="82" name="Subprocess" width="90" x="45" y="34">
    <process expanded="true">
    <operator activated="true" class="generate_data_user_specification" compatibility="7.4.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="45" y="34">
    <list key="attribute_values">
    <parameter key="Tid" value="1"/>
    <parameter key="id" value="1"/>
    </list>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="generate_data_user_specification" compatibility="7.4.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="45" y="136">
    <list key="attribute_values">
    <parameter key="Tid" value="1"/>
    <parameter key="id" value="2"/>
    </list>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="generate_data_user_specification" compatibility="7.4.000" expanded="true" height="68" name="Generate Data by User Specification (3)" width="90" x="45" y="238">
    <list key="attribute_values">
    <parameter key="Tid" value="2"/>
    <parameter key="id" value="1"/>
    </list>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="generate_data_user_specification" compatibility="7.4.000" expanded="true" height="68" name="Generate Data by User Specification (4)" width="90" x="45" y="340">
    <list key="attribute_values">
    <parameter key="Tid" value="2"/>
    <parameter key="id" value="3"/>
    </list>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="append" compatibility="7.4.000" expanded="true" height="145" name="Append" width="90" x="246" y="136"/>
    <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 4"/>
    <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 3"/>
    <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 2"/>
    <connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 1"/>
    <connect from_op="Append" from_port="merged set" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="generate_attributes" compatibility="7.4.000" expanded="true" height="82" name="Generate Attributes" width="90" x="179" y="34">
    <list key="function_descriptions">
    <parameter key="value" value="1"/>
    </list>
    <description align="center" color="transparent" colored="false" width="126">Create a constant for pivot</description>
    </operator>
    <operator activated="true" class="pivot" compatibility="7.4.000" expanded="true" height="82" name="Pivot" width="90" x="313" y="34">
    <parameter key="group_attribute" value="Tid"/>
    <parameter key="index_attribute" value="id"/>
    <parameter key="skip_constant_attributes" value="false"/>
    </operator>
    <operator activated="true" class="replace_missing_values" compatibility="7.4.000" expanded="true" height="103" name="Replace Missing Values" width="90" x="447" y="34">
    <parameter key="default" value="zero"/>
    <list key="columns"/>
    </operator>
    <operator activated="true" class="numerical_to_binominal" compatibility="7.4.000" expanded="true" height="82" name="Numerical to Binominal" width="90" x="581" y="34"/>
    <connect from_op="Subprocess" from_port="out 1" to_op="Generate Attributes" to_port="example set input"/>
    <connect from_op="Generate Attributes" from_port="example set output" to_op="Pivot" to_port="example set input"/>
    <connect from_op="Pivot" from_port="example set output" to_op="Replace Missing Values" to_port="example set input"/>
    <connect from_op="Replace Missing Values" from_port="example set output" to_op="Numerical to Binominal" to_port="example set input"/>
    <connect from_op="Numerical to Binominal" from_port="example set output" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

Answers

  • MartinLiebig
    MartinLiebig
    Altair Employee

    Hey,

     

    please have a look at the tutorial process of FP-Growth. It's done there. Should be Pivot + Numerical to Binominal.

     

    I recommend Beta mode for this.

     

    ~Martin

  • soheil008
    soheil008 New Altair Community Member
    @mschmitz wrote:

    Hey,

     

    please have a look at the tutorial process of FP-Growth. It's done there. Should be Pivot + Numerical to Binominal.

     

    I recommend Beta mode for this.

     

    ~Martin


    I tried that but no ideal answer! can you give me more detail, pls

  • MartinLiebig
    MartinLiebig
    Altair Employee
    Answer ✓

    Hey,

     

    attached is a process solving it.

     

    ~Martin

     

    <?xml version="1.0" encoding="UTF-8"?><process version="7.4.000">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="7.4.000" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="subprocess" compatibility="7.4.000" expanded="true" height="82" name="Subprocess" width="90" x="45" y="34">
    <process expanded="true">
    <operator activated="true" class="generate_data_user_specification" compatibility="7.4.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="45" y="34">
    <list key="attribute_values">
    <parameter key="Tid" value="1"/>
    <parameter key="id" value="1"/>
    </list>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="generate_data_user_specification" compatibility="7.4.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="45" y="136">
    <list key="attribute_values">
    <parameter key="Tid" value="1"/>
    <parameter key="id" value="2"/>
    </list>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="generate_data_user_specification" compatibility="7.4.000" expanded="true" height="68" name="Generate Data by User Specification (3)" width="90" x="45" y="238">
    <list key="attribute_values">
    <parameter key="Tid" value="2"/>
    <parameter key="id" value="1"/>
    </list>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="generate_data_user_specification" compatibility="7.4.000" expanded="true" height="68" name="Generate Data by User Specification (4)" width="90" x="45" y="340">
    <list key="attribute_values">
    <parameter key="Tid" value="2"/>
    <parameter key="id" value="3"/>
    </list>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="append" compatibility="7.4.000" expanded="true" height="145" name="Append" width="90" x="246" y="136"/>
    <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 4"/>
    <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 3"/>
    <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 2"/>
    <connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 1"/>
    <connect from_op="Append" from_port="merged set" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="generate_attributes" compatibility="7.4.000" expanded="true" height="82" name="Generate Attributes" width="90" x="179" y="34">
    <list key="function_descriptions">
    <parameter key="value" value="1"/>
    </list>
    <description align="center" color="transparent" colored="false" width="126">Create a constant for pivot</description>
    </operator>
    <operator activated="true" class="pivot" compatibility="7.4.000" expanded="true" height="82" name="Pivot" width="90" x="313" y="34">
    <parameter key="group_attribute" value="Tid"/>
    <parameter key="index_attribute" value="id"/>
    <parameter key="skip_constant_attributes" value="false"/>
    </operator>
    <operator activated="true" class="replace_missing_values" compatibility="7.4.000" expanded="true" height="103" name="Replace Missing Values" width="90" x="447" y="34">
    <parameter key="default" value="zero"/>
    <list key="columns"/>
    </operator>
    <operator activated="true" class="numerical_to_binominal" compatibility="7.4.000" expanded="true" height="82" name="Numerical to Binominal" width="90" x="581" y="34"/>
    <connect from_op="Subprocess" from_port="out 1" to_op="Generate Attributes" to_port="example set input"/>
    <connect from_op="Generate Attributes" from_port="example set output" to_op="Pivot" to_port="example set input"/>
    <connect from_op="Pivot" from_port="example set output" to_op="Replace Missing Values" to_port="example set input"/>
    <connect from_op="Replace Missing Values" from_port="example set output" to_op="Numerical to Binominal" to_port="example set input"/>
    <connect from_op="Numerical to Binominal" from_port="example set output" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>