Hello,
I want to read a text containing positive and negative comments and rank, that is, identify which comments are positive and which are negative.
I created two procesoss.
The first reads and negative comments posistivos are in different files. Here I apply Naive-Bayes for classification and building the model.
The second process reads the comments I want to classify (all in different files) and apply the model generated above.
My problem is that not consistent classification. I classified as negative comments, comments which are really positive.
Anyone have any idea of why this happens?
The xml of the processes are:
Process 1:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.000" expanded="true" name="Process">
<process expanded="true" height="235" width="480">
<operator activated="true" class="text:process_document_from_file" compatibility="5.2.001" expanded="true" height="76" name="Process Documents from Files" width="90" x="45" y="30">
<list key="text_directories">
<parameter key="positivos" value="C:\Users\yop\Desktop\archivos de tesis\comentarios positivos"/>
<parameter key="negativos" value="C:\Users\yop\Desktop\archivos de tesis\comentarios negativos"/>
</list>
<parameter key="encoding" value="UTF-8"/>
<parameter key="vector_creation" value="Term Frequency"/>
<process expanded="true" height="352" width="586">
<operator activated="true" class="text:transform_cases" compatibility="5.2.001" expanded="true" height="60" name="Transform Cases" width="90" x="45" y="75"/>
<operator activated="true" class="text:tokenize" compatibility="5.2.001" expanded="true" height="60" name="Tokenize" width="90" x="179" y="75"/>
<operator activated="true" class="text:filter_by_length" compatibility="5.2.001" expanded="true" height="60" name="Filter Tokens (by Length)" width="90" x="313" y="75"/>
<operator activated="true" class="text:stem_snowball" compatibility="5.2.001" expanded="true" height="60" name="Stem (Snowball)" width="90" x="447" y="75">
<parameter key="language" value="Spanish"/>
</operator>
<connect from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
<connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Stem (Snowball)" to_port="document"/>
<connect from_op="Stem (Snowball)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="x_validation" compatibility="5.2.000" expanded="true" height="130" name="Validation" width="90" x="179" y="30">
<parameter key="number_of_validations" value="2"/>
<process expanded="true" height="352" width="268">
<operator activated="true" class="naive_bayes" compatibility="5.2.000" expanded="true" height="76" name="Naive Bayes" width="90" x="99" y="59">
<parameter key="laplace_correction" value="false"/>
</operator>
<connect from_port="training" to_op="Naive Bayes" to_port="training set"/>
<connect from_op="Naive Bayes" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="352" width="279">
<operator activated="true" class="apply_model" compatibility="5.2.000" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance" compatibility="5.2.000" expanded="true" height="76" name="Performance" width="90" x="112" y="165"/>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
<portSpacing port="sink_averagable 3" spacing="0"/>
</process>
</operator>
<operator activated="true" class="write_model" compatibility="5.2.000" expanded="true" height="60" name="Write Model" width="90" x="380" y="165">
<parameter key="model_file" value="C:\Users\yop\Desktop\archivos de tesis\model\model"/>
</operator>
<connect from_op="Process Documents from Files" from_port="example set" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_op="Write Model" to_port="input"/>
<connect from_op="Validation" from_port="training" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 2"/>
<connect from_op="Validation" from_port="averagable 2" to_port="result 3"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
</process>
</operator>
</process>
and process 2:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.000" expanded="true" name="Process">
<process expanded="true" height="325" width="547">
<operator activated="true" class="read_model" compatibility="5.2.000" expanded="true" height="60" name="Read Model" width="90" x="41" y="44">
<parameter key="model_file" value="C:\Users\yop\Desktop\archivos de tesis\model\model"/>
</operator>
<operator activated="true" class="text:process_document_from_file" compatibility="5.2.001" expanded="true" height="76" name="Process Documents from Files" width="90" x="45" y="165">
<list key="text_directories">
<parameter key="comentarios" value="C:\Users\yop\Desktop\archivos de tesis\comentarios"/>
</list>
<process expanded="true" height="352" width="586">
<operator activated="true" class="text:transform_cases" compatibility="5.2.001" expanded="true" height="60" name="Transform Cases" width="90" x="37" y="40"/>
<operator activated="true" class="text:tokenize" compatibility="5.2.001" expanded="true" height="60" name="Tokenize" width="90" x="179" y="30"/>
<operator activated="true" class="text:stem_snowball" compatibility="5.2.001" expanded="true" height="60" name="Stem (Snowball)" width="90" x="380" y="30">
<parameter key="language" value="Spanish"/>
</operator>
<connect from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Stem (Snowball)" to_port="document"/>
<connect from_op="Stem (Snowball)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="apply_model" compatibility="5.2.000" expanded="true" height="76" name="Apply Model" width="90" x="179" y="75">
<list key="application_parameters"/>
<parameter key="create_view" value="true"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="5.2.000" expanded="true" height="76" name="Select Attributes" width="90" x="313" y="75">
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="write_excel" compatibility="5.2.000" expanded="true" height="60" name="Write Excel" width="90" x="447" y="120">
<parameter key="excel_file" value="C:\Users\yop\Desktop\archivos de tesis\aml\exit.xls"/>
</operator>
<connect from_op="Read Model" from_port="output" to_op="Apply Model" to_port="model"/>
<connect from_op="Process Documents from Files" from_port="example set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Write Excel" to_port="input"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
</process>
Thanks