Getting wordlist for each separate file in the Process Documents operator
Hey folks,
I am trying to get the top words from text files using the process in the XML below to get the top words in text files. However I wish to get the top words for each text file in a folder seperately the operation below gives me the results for the whole collection of text files. Is there anyway to get the operation to process them individually rather than as a group?
Thanks in advance,
Neil.
I am trying to get the top words from text files using the process in the XML below to get the top words in text files. However I wish to get the top words for each text file in a folder seperately the operation below gives me the results for the whole collection of text files. Is there anyway to get the operation to process them individually rather than as a group?
Thanks in advance,
Neil.
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.1.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="social_media:search_twitter" compatibility="8.1.000" expanded="true" height="68" name="Search Twitter" width="90" x="112" y="34">
<parameter key="connection" value="dkk"/>
<parameter key="query" value="tesla"/>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="8.1.000" expanded="true" height="82" name="Nominal to Text" width="90" x="246" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Text"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.1.000" expanded="true" height="82" name="Select Attributes" width="90" x="380" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Text"/>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="514" y="34">
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="380" y="34"/>
<connect from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="text:wordlist_to_data" compatibility="8.1.000" expanded="true" height="82" name="WordList to Data" width="90" x="648" y="85"/>
<operator activated="true" class="sort" compatibility="8.1.000" expanded="true" height="82" name="Sort" width="90" x="782" y="85">
<parameter key="attribute_name" value="total"/>
<parameter key="sorting_direction" value="decreasing"/>
</operator>
<operator activated="true" class="filter_example_range" compatibility="8.1.000" expanded="true" height="82" name="Filter Example Range" width="90" x="916" y="85">
<parameter key="first_example" value="1"/>
<parameter key="last_example" value="5"/>
</operator>
<connect from_op="Search Twitter" from_port="output" to_op="Nominal to Text" to_port="example set input"/>
<connect from_op="Nominal to Text" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="example set" to_port="result 1"/>
<connect from_op="Process Documents from Data" from_port="word list" to_op="WordList to Data" to_port="word list"/>
<connect from_op="WordList to Data" from_port="example set" to_op="Sort" to_port="example set input"/>
<connect from_op="Sort" from_port="example set output" to_op="Filter Example Range" to_port="example set input"/>
<connect from_op="Sort" from_port="original" to_port="result 3"/>
<connect from_op="Filter Example Range" from_port="example set output" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
</process>
</operator>
</process>