"Problem with filtering the text"
hi everyone
i want to filter a txt document and remove the stopwords.i just put the procces read document,then tokenize.then filter stopwords and then write document but the result is the same.The stop words did not removed.the xml is here.no broblem or warning found just the result forlder is the same just like the text i put in the read document.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.1.006">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.1.006" expanded="true" name="Process">
<parameter key="parallelize_main_process" value="true"/>
<process expanded="true" height="341" width="681">
<operator activated="true" class="text:read_document" compatibility="5.1.001" expanded="true" height="60" name="Read Document" width="90" x="36" y="86">
<parameter key="file" value="C:\Users\Αlkis_!!\Desktop\negative.txt"/>
</operator>
<operator activated="true" class="text:tokenize" compatibility="5.1.001" expanded="true" height="60" name="Tokenize" width="90" x="176" y="88"/>
<operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.1.001" expanded="true" height="60" name="Filter Stopwords (Dictionary)" width="90" x="317" y="79">
<parameter key="file" value="C:\Users\Αlkis_!!\Desktop\stopwords_greek.txt"/>
</operator>
<operator activated="true" class="text:write_document" compatibility="5.1.001" expanded="true" height="60" name="Write Document" width="90" x="447" y="75">
<parameter key="file" value="C:\Users\Αlkis_!!\Desktop\result\ρεσσσσσσσσσσσσσσσσσ"/>
</operator>
<connect from_op="Read Document" from_port="output" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (Dictionary)" to_port="document"/>
<connect from_op="Filter Stopwords (Dictionary)" from_port="document" to_op="Write Document" to_port="document"/>
<connect from_op="Write Document" from_port="document" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
anynone that could help me.the stopwords funtion is the dictionary cause i use greek characters.
i want to filter a txt document and remove the stopwords.i just put the procces read document,then tokenize.then filter stopwords and then write document but the result is the same.The stop words did not removed.the xml is here.no broblem or warning found just the result forlder is the same just like the text i put in the read document.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.1.006">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.1.006" expanded="true" name="Process">
<parameter key="parallelize_main_process" value="true"/>
<process expanded="true" height="341" width="681">
<operator activated="true" class="text:read_document" compatibility="5.1.001" expanded="true" height="60" name="Read Document" width="90" x="36" y="86">
<parameter key="file" value="C:\Users\Αlkis_!!\Desktop\negative.txt"/>
</operator>
<operator activated="true" class="text:tokenize" compatibility="5.1.001" expanded="true" height="60" name="Tokenize" width="90" x="176" y="88"/>
<operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.1.001" expanded="true" height="60" name="Filter Stopwords (Dictionary)" width="90" x="317" y="79">
<parameter key="file" value="C:\Users\Αlkis_!!\Desktop\stopwords_greek.txt"/>
</operator>
<operator activated="true" class="text:write_document" compatibility="5.1.001" expanded="true" height="60" name="Write Document" width="90" x="447" y="75">
<parameter key="file" value="C:\Users\Αlkis_!!\Desktop\result\ρεσσσσσσσσσσσσσσσσσ"/>
</operator>
<connect from_op="Read Document" from_port="output" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (Dictionary)" to_port="document"/>
<connect from_op="Filter Stopwords (Dictionary)" from_port="document" to_op="Write Document" to_port="document"/>
<connect from_op="Write Document" from_port="document" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
anynone that could help me.the stopwords funtion is the dictionary cause i use greek characters.