Hi guys,
I am quite new to Rapid Miner and here is my "problem"
I want to build a process in which I have 2 columns in a csv file (Desc - Description and Bebidas - 0 or 1 ), I want to predict if a product is a beverage (portuguese for bebida) by the description. I have gotten here so far
My processAfter I pass through this transformation though I put a Random Forest algorithm, but somehow I'm not able to tell which column is the prediction column, I also tried with Naive Bayes. I mean, the algorithm choice itself isn't an issue, but after processing documents I would like a manner to transform it to data again in order to use it for the prediction. Can someone help me to do it the right way? I'm kind of stuck.. thanks in advance.
Follow below the xml of my process
<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="8.0.001" expanded="true" height="68" name="Retrieve Bebidas_100" width="90" x="45" y="34">
<parameter key="repository_entry" value="../../Workbooks/Bebidas_100"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="8.0.001" expanded="true" height="82" name="Generate Attributes" width="90" x="179" y="85">
<list key="function_descriptions">
<parameter key="Description" value="lower(Desc)"/>
<parameter key="É Bebida" value="if(Bebida==0,"Não","Sim")"/>
</list>
</operator>
<operator activated="true" class="filter_examples" compatibility="8.0.001" expanded="true" height="103" name="Filter Examples" width="90" x="313" y="136">
<list key="filters_list">
<parameter key="filters_entry_key" value="Bebida.is_not_missing."/>
</list>
</operator>
<operator activated="true" class="replace" compatibility="8.0.001" expanded="true" height="82" name="Replace" width="90" x="447" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Description"/>
<parameter key="attributes" value="Description|É Bebida"/>
<parameter key="regular_expression" value="[a-z]"/>
<parameter key="replace_what" value="[-!0-9"#$%&'()*+,./:;<=>?@\[\\\]_`{|}~]"/>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="8.0.001" expanded="true" height="82" name="Nominal to Text" width="90" x="581" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Description"/>
<parameter key="attributes" value="Description|É Bebida"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="text:data_to_documents" compatibility="7.5.000" expanded="true" height="68" name="Data to Documents" width="90" x="715" y="136">
<list key="specify_weights"/>
</operator>
<operator activated="true" class="text:process_documents" compatibility="7.5.000" expanded="true" height="103" name="Process Documents" width="90" x="916" y="34">
<parameter key="keep_text" value="true"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="7.5.000" expanded="true" height="68" name="Tokenize" width="90" x="112" y="34"/>
<operator activated="true" class="text:filter_stopwords_dictionary" compatibility="7.5.000" expanded="true" height="82" name="Filter Stopwords (Dictionary)" width="90" x="514" y="34">
<parameter key="file" value="C:\Users\luiz.vidal\Desktop\Cloudera\SEFA-PA\stopwords.txt"/>
</operator>
<connect from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (Dictionary)" to_port="document"/>
<connect from_op="Filter Stopwords (Dictionary)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve Bebidas_100" from_port="output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Replace" to_port="example set input"/>
<connect from_op="Replace" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
<connect from_op="Nominal to Text" from_port="example set output" to_op="Data to Documents" to_port="example set"/>
<connect from_op="Data to Documents" from_port="documents" to_op="Process Documents" to_port="documents 1"/>
<connect from_op="Process Documents" from_port="example set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>