[SOLVED]normalization operator in Rapidminer
I am trying to normalize the vector generated after processing the documents. TThe normalization operator is to perform the normalization across each feature. How can I make the normalization against each instance? The following is the process
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
<process expanded="true" height="362" width="656">
<operator activated="true" class="text:process_document_from_file" compatibility="5.3.000" expanded="true" height="76" name="Process Documents from Files (2)" width="90" x="112" y="75">
<list key="text_directories">
<parameter key="Responsive" value="C:\Desktop\dataset\R"/>
<parameter key="Not Responsive" value="C:\Desktop\dataset\NR"/>
</list>
<parameter key="extract_text_only" value="false"/>
<parameter key="vector_creation" value="Binary Term Occurrences"/>
<parameter key="prune_method" value="absolute"/>
<parameter key="prune_below_absolute" value="10"/>
<parameter key="prune_above_absolute" value="5000000"/>
<parameter key="prune_above_rank" value="0.05"/>
<process expanded="true" height="380" width="674">
<operator activated="true" class="text:tokenize" compatibility="5.3.000" expanded="true" height="60" name="Tokenize (2)" width="90" x="45" y="30"/>
<operator activated="true" class="text:transform_cases" compatibility="5.3.000" expanded="true" height="60" name="Transform Cases (2)" width="90" x="180" y="30"/>
<operator activated="true" class="text:filter_stopwords_english" compatibility="5.3.000" expanded="true" height="60" name="Filter Stopwords (English)" width="90" x="427" y="30"/>
<connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
<connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
<connect from_op="Transform Cases (2)" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="normalize" compatibility="5.2.008" expanded="true" height="94" name="Normalize" width="90" x="293" y="81">
<parameter key="method" value="proportion transformation"/>
</operator>
<connect from_op="Process Documents from Files (2)" from_port="example set" to_op="Normalize" to_port="example set input"/>
<connect from_op="Normalize" from_port="example set output" to_port="result 1"/>
<connect from_op="Normalize" from_port="original" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
Tagged:
0
Answers
-
Hi,
transpose the data before the normalization to interchange rows and columns, normalize, and transpose back. The transpose operation is performed by the Transpose operator.
Please note that the transpose only works well for pure-numerical data, and that you have to re-set all special roles after the second transpose operator.
Best regards,
Marius0