🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

Extracting the Tokenized Result Data

User: "maxfax"
New Altair Community Member
Updated by Jocelyn
Hi,

First I read a txt-file and afterwards i would like to do some simple text processing steps and then i would like to export the newly stemmed and processed data to a txt data.

I got all the steps working but i just cant export results - I dont now if its clear but i would like to have a txt-file which contains the processed file as it is show in the Resulttable.
      <operator activated="true" class="text:read_document" compatibility="5.2.004" expanded="true" height="60" name="Read Document" width="90" x="45" y="120">
        <parameter key="file" value="C:\mystring.txt"/>
        <parameter key="extract_text_only" value="true"/>
        <parameter key="use_file_extension_as_type" value="true"/>
        <parameter key="content_type" value="txt"/>
        <parameter key="encoding" value="SYSTEM"/>
      </operator>
      <operator activated="true" class="text:tokenize" compatibility="5.2.004" expanded="true" height="60" name="Tokenize" width="90" x="196" y="138">
        <parameter key="mode" value="non letters"/>
        <parameter key="characters" value=".:"/>
        <parameter key="language" value="English"/>
        <parameter key="max_token_length" value="3"/>
      </operator>
      <operator activated="true" class="text:transform_cases" compatibility="5.2.004" expanded="true" height="60" name="Transform Cases" width="90" x="311" y="69">
        <parameter key="transform_to" value="lower case"/>
      </operator>
      <operator activated="true" class="text:filter_stopwords_german" compatibility="5.2.004" expanded="true" height="60" name="Filter Stopwords (German)" width="90" x="313" y="210">
        <parameter key="stop_word_list" value="Standard"/>
      </operator>
      <operator activated="true" class="text:stem_german" compatibility="5.2.004" expanded="true" height="60" name="Stem (German)" width="90" x="447" y="30"/>
      <operator activated="true" class="text:filter_by_length" compatibility="5.2.004" expanded="true" height="60" name="Filter Tokens (by Length)" width="90" x="447" y="210">
        <parameter key="min_chars" value="2"/>
        <parameter key="max_chars" value="25"/>
      </operator>
      <connect from_op="Read Document" from_port="output" to_op="Tokenize" to_port="document"/>
      <connect from_op="Tokenize" from_port="document" to_op="Transform Cases" to_port="document"/>
      <connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (German)" to_port="document"/>
      <connect from_op="Filter Stopwords (German)" from_port="document" to_op="Stem (German)" to_port="document"/>
      <connect from_op="Stem (German)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
      <connect from_op="Filter Tokens (by Length)" from_port="document" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="108"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

Find more posts tagged with