id;text1;cat dog fish1;fish dog2;cat cheese mouse2;dog2;cat dog3;dog
<?xml version="1.0" encoding="UTF-8" standalone="no"?><process version="5.1.017"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="5.1.017" expanded="true" name="Process"> <process expanded="true" height="531" width="915"> <operator activated="false" class="text:process_documents" compatibility="5.2.000" expanded="true" height="76" name="Process Documents" width="90" x="447" y="210"> <process expanded="true" height="633" width="705"> <operator activated="false" class="text:transform_cases" compatibility="5.2.000" expanded="true" height="60" name="Transform Cases" width="90" x="45" y="30"/> <operator activated="false" class="text:tokenize" compatibility="5.2.000" expanded="true" height="60" name="Tokenize" width="90" x="180" y="30"/> <operator activated="false" class="text:filter_stopwords_english" compatibility="5.2.000" expanded="true" height="60" name="Filter Stopwords (English)" width="90" x="315" y="30"/> <operator activated="false" class="text:filter_by_length" compatibility="5.2.000" expanded="true" height="60" name="Filter Tokens (by Length)" width="90" x="450" y="30"> <parameter key="min_chars" value="3"/> <parameter key="max_chars" value="999"/> </operator> <operator activated="false" class="text:generate_n_grams_terms" compatibility="5.2.000" expanded="true" height="60" name="Generate n-Grams (Terms)" width="90" x="585" y="30"> <parameter key="max_length" value="3"/> </operator> <connect from_port="document" to_op="Transform Cases" to_port="document"/> <connect from_op="Transform Cases" from_port="document" to_op="Tokenize" to_port="document"/> <connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/> <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/> <connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Generate n-Grams (Terms)" to_port="document"/> <connect from_op="Generate n-Grams (Terms)" from_port="document" to_port="document 1"/> <portSpacing port="source_document" spacing="0"/> <portSpacing port="sink_document 1" spacing="0"/> <portSpacing port="sink_document 2" spacing="0"/> </process> </operator> <operator activated="true" class="read_csv" compatibility="5.1.017" expanded="true" height="60" name="Read CSV" width="90" x="45" y="30"> <parameter key="csv_file" value="C:\Users\marius\Documents\development\test.csv"/> <parameter key="first_row_as_names" value="false"/> <list key="annotations"> <parameter key="0" value="Name"/> </list> <parameter key="encoding" value="windows-1252"/> <list key="data_set_meta_data_information"> <parameter key="0" value="id.true.integer.attribute"/> <parameter key="1" value="text.true.polynominal.attribute"/> </list> </operator> <operator activated="true" class="set_role" compatibility="5.1.017" expanded="true" height="76" name="Set Role" width="90" x="179" y="30"> <parameter key="name" value="id"/> <parameter key="target_role" value="id"/> <list key="set_additional_roles"/> </operator> <operator activated="true" class="nominal_to_text" compatibility="5.1.017" expanded="true" height="76" name="Nominal to Text" width="90" x="313" y="30"/> <operator activated="true" class="text:process_document_from_data" compatibility="5.2.000" expanded="true" height="76" name="Process Documents from Data" width="90" x="447" y="30"> <parameter key="vector_creation" value="Term Occurrences"/> <list key="specify_weights"/> <process expanded="true" height="633" width="705"> <operator activated="true" class="text:transform_cases" compatibility="5.2.000" expanded="true" height="60" name="Transform Cases (2)" width="90" x="45" y="30"/> <operator activated="true" class="text:tokenize" compatibility="5.2.000" expanded="true" height="60" name="Tokenize (2)" width="90" x="180" y="30"/> <operator activated="true" class="text:filter_stopwords_english" compatibility="5.2.000" expanded="true" height="60" name="Filter Stopwords (2)" width="90" x="315" y="30"/> <operator activated="true" class="text:filter_by_length" compatibility="5.2.000" expanded="true" height="60" name="Filter Tokens (2)" width="90" x="450" y="30"> <parameter key="min_chars" value="3"/> <parameter key="max_chars" value="999"/> </operator> <operator activated="true" class="text:generate_n_grams_terms" compatibility="5.2.000" expanded="true" height="60" name="Generate n-Grams (2)" width="90" x="585" y="30"> <parameter key="max_length" value="3"/> </operator> <connect from_port="document" to_op="Transform Cases (2)" to_port="document"/> <connect from_op="Transform Cases (2)" from_port="document" to_op="Tokenize (2)" to_port="document"/> <connect from_op="Tokenize (2)" from_port="document" to_op="Filter Stopwords (2)" to_port="document"/> <connect from_op="Filter Stopwords (2)" from_port="document" to_op="Filter Tokens (2)" to_port="document"/> <connect from_op="Filter Tokens (2)" from_port="document" to_op="Generate n-Grams (2)" to_port="document"/> <connect from_op="Generate n-Grams (2)" from_port="document" to_port="document 1"/> <portSpacing port="source_document" spacing="0"/> <portSpacing port="sink_document 1" spacing="0"/> <portSpacing port="sink_document 2" spacing="0"/> </process> </operator> <operator activated="true" class="aggregate" compatibility="5.1.017" expanded="true" height="76" name="Aggregate" width="90" x="581" y="30"> <parameter key="use_default_aggregation" value="true"/> <parameter key="default_aggregation_function" value="sum"/> <list key="aggregation_attributes"/> <parameter key="group_by_attributes" value="|id"/> </operator> <operator activated="true" class="rename_by_replacing" compatibility="5.1.017" expanded="true" height="76" name="Rename by Replacing" width="90" x="715" y="30"> <parameter key="replace_what" value="sum\((.*)\)"/> <parameter key="replace_by" value="$1"/> </operator> <connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/> <connect from_op="Set Role" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/> <connect from_op="Nominal to Text" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/> <connect from_op="Process Documents from Data" from_port="example set" to_op="Aggregate" to_port="example set input"/> <connect from_op="Aggregate" from_port="example set output" to_op="Rename by Replacing" to_port="example set input"/> <connect from_op="Rename by Replacing" from_port="example set output" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator></process>
hennessy wrote:PS: What do you think, what's the best solution to dispay the results in a chart?