Create chart for binominal attributes
Greetings
I have a grocery dataset that shows each transaction has wich items in a binary format (I have attached th eimage to clarify). I want to make the chart to see for example 10 top most bougth stuffs; a histogram or anything else. But I cant, for each chart, it shows how many false and trues are for each items, I don't know how to make it.
Thanks in Advanced
Best Answer
-
Hi @AliMajed,
I think that it is not possible to build your chart directly.
You have to pre-calculate the sum for each product in your process.
Here a possible element of answer :
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.1.003" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_excel" compatibility="8.1.003" expanded="true" height="68" name="Read Excel" width="90" x="112" y="34">
<parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\Binominal_Charts\Binominal_Charts.xlsx"/>
<parameter key="imported_cell_range" value="A1:I15"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<list key="data_set_meta_data_information">
<parameter key="0" value="TID.true.integer.attribute"/>
<parameter key="1" value="Milk.true.integer.attribute"/>
<parameter key="2" value="yogurt.true.integer.attribute"/>
<parameter key="3" value="Cream.true.integer.attribute"/>
<parameter key="4" value="Ice cream.true.integer.attribute"/>
<parameter key="5" value="Curd.true.integer.attribute"/>
<parameter key="6" value="Cheese.true.integer.attribute"/>
<parameter key="7" value="Butter.true.integer.attribute"/>
<parameter key="8" value="Eggs.true.integer.attribute"/>
</list>
</operator>
<operator activated="true" class="concurrency:loop_attributes" compatibility="8.1.003" expanded="true" height="82" name="Loop Attributes" width="90" x="313" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="TID"/>
<parameter key="invert_selection" value="true"/>
<process expanded="true">
<operator activated="true" class="aggregate" compatibility="8.1.003" expanded="true" height="82" name="Aggregate" width="90" x="313" y="34">
<list key="aggregation_attributes">
<parameter key="%{loop_attribute}" value="sum"/>
</list>
</operator>
<operator activated="true" class="transpose" compatibility="8.1.003" expanded="true" height="82" name="Transpose" width="90" x="447" y="34"/>
<connect from_port="input 1" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Aggregate" from_port="example set output" to_op="Transpose" to_port="example set input"/>
<connect from_op="Transpose" from_port="example set output" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="append" compatibility="8.1.003" expanded="true" height="82" name="Append" width="90" x="447" y="34"/>
<operator activated="true" class="rename" compatibility="8.1.003" expanded="true" height="82" name="Rename" width="90" x="581" y="34">
<parameter key="old_name" value="att_1"/>
<parameter key="new_name" value="Sum (grocery products)"/>
<list key="rename_additional_attributes"/>
</operator>
<connect from_op="Read Excel" from_port="output" to_op="Loop Attributes" to_port="input 1"/>
<connect from_op="Loop Attributes" from_port="output 1" to_op="Append" to_port="example set 1"/>
<connect from_op="Append" from_port="merged set" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>After running this process, you obtain an example set with the sum for each product :
Then you can represent these datas on a bars chart :
Does this process answer to your need ?
Regards,
Lionel
NB : The link to the Excel file used in this process (created from the screenshot you shared) :
https://drive.google.com/open?id=1iIwBaWIiPt24McNXov_13dc8DmG5nzTC
2
Answers
-
Hi @AliMajed,
I think that it is not possible to build your chart directly.
You have to pre-calculate the sum for each product in your process.
Here a possible element of answer :
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.1.003" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_excel" compatibility="8.1.003" expanded="true" height="68" name="Read Excel" width="90" x="112" y="34">
<parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\Binominal_Charts\Binominal_Charts.xlsx"/>
<parameter key="imported_cell_range" value="A1:I15"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<list key="data_set_meta_data_information">
<parameter key="0" value="TID.true.integer.attribute"/>
<parameter key="1" value="Milk.true.integer.attribute"/>
<parameter key="2" value="yogurt.true.integer.attribute"/>
<parameter key="3" value="Cream.true.integer.attribute"/>
<parameter key="4" value="Ice cream.true.integer.attribute"/>
<parameter key="5" value="Curd.true.integer.attribute"/>
<parameter key="6" value="Cheese.true.integer.attribute"/>
<parameter key="7" value="Butter.true.integer.attribute"/>
<parameter key="8" value="Eggs.true.integer.attribute"/>
</list>
</operator>
<operator activated="true" class="concurrency:loop_attributes" compatibility="8.1.003" expanded="true" height="82" name="Loop Attributes" width="90" x="313" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="TID"/>
<parameter key="invert_selection" value="true"/>
<process expanded="true">
<operator activated="true" class="aggregate" compatibility="8.1.003" expanded="true" height="82" name="Aggregate" width="90" x="313" y="34">
<list key="aggregation_attributes">
<parameter key="%{loop_attribute}" value="sum"/>
</list>
</operator>
<operator activated="true" class="transpose" compatibility="8.1.003" expanded="true" height="82" name="Transpose" width="90" x="447" y="34"/>
<connect from_port="input 1" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Aggregate" from_port="example set output" to_op="Transpose" to_port="example set input"/>
<connect from_op="Transpose" from_port="example set output" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="append" compatibility="8.1.003" expanded="true" height="82" name="Append" width="90" x="447" y="34"/>
<operator activated="true" class="rename" compatibility="8.1.003" expanded="true" height="82" name="Rename" width="90" x="581" y="34">
<parameter key="old_name" value="att_1"/>
<parameter key="new_name" value="Sum (grocery products)"/>
<list key="rename_additional_attributes"/>
</operator>
<connect from_op="Read Excel" from_port="output" to_op="Loop Attributes" to_port="input 1"/>
<connect from_op="Loop Attributes" from_port="output 1" to_op="Append" to_port="example set 1"/>
<connect from_op="Append" from_port="merged set" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>After running this process, you obtain an example set with the sum for each product :
Then you can represent these datas on a bars chart :
Does this process answer to your need ?
Regards,
Lionel
NB : The link to the Excel file used in this process (created from the screenshot you shared) :
https://drive.google.com/open?id=1iIwBaWIiPt24McNXov_13dc8DmG5nzTC
2