Query_ID;Query;frequency1;hautarzt;1039212;zahnarzt;1016843;augenarzt;892334;frauenarzt;751165;arzt;707556;ärzte;651767;zahnärzte;578368;allgemeinarzt;541119;tierarzt;5238710;augenärzte;4985511;hautärzte;3314112;kinderarzt;3298913;kinderärzte;2637714;hno arzt;2298415;tierärzte;2209016;frauenärzte;2069417;lungenfacharzt;1646818;praktische ärzte;1417519;hno-ärzte;1329020;hausarzt;1259521;hautarztpraxen;1226222;allgemeinärzte;1190623;ärzte allgemeinmedizin und praktische ärzte;1178124;ärzte orthopädie;1083325;hals nasen ohrenärzte;545726;hno ärzte;460727;hals nasen ohren arzt;431928;ärzte innere medizin;405329;ärzte urologie;388630;ärzte frauenheilkunde und geburtshilfe;3837
<?xml version="1.0" encoding="UTF-8" standalone="no"?><process version="5.3.008"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="5.3.008" expanded="true" name="Process"> <process expanded="true"> <operator activated="true" class="read_csv" compatibility="5.3.008" expanded="true" height="60" name="Read CSV" width="90" x="45" y="75"> <parameter key="csv_file" value="C:\Users\retegniw\Documents\Office\Excel\Sample queries.csv"/> <parameter key="first_row_as_names" value="false"/> <list key="annotations"> <parameter key="0" value="Name"/> </list> <parameter key="encoding" value="windows-1252"/> <list key="data_set_meta_data_information"> <parameter key="0" value="Query_ID.true.integer.id"/> <parameter key="1" value="Query.true.text.attribute"/> <parameter key="2" value="frequency.true.integer.attribute"/> </list> </operator> <operator activated="true" class="set_role" compatibility="5.3.008" expanded="true" height="76" name="Set Role" width="90" x="179" y="75"> <parameter key="attribute_name" value="Query_ID"/> <parameter key="target_role" value="id"/> <list key="set_additional_roles"/> </operator> <operator activated="true" class="nominal_to_text" compatibility="5.3.008" expanded="true" height="76" name="Nominal to Text" width="90" x="313" y="75"/> <operator activated="true" class="text:process_document_from_data" compatibility="5.3.000" expanded="true" height="76" name="Process Documents from Data" width="90" x="447" y="75"> <parameter key="keep_text" value="true"/> <list key="specify_weights"/> <process expanded="true"> <operator activated="true" class="text:tokenize" compatibility="5.3.000" expanded="true" height="60" name="Tokenize" width="90" x="45" y="30"/> <operator activated="true" class="text:filter_stopwords_german" compatibility="5.3.000" expanded="true" height="60" name="Filter Stopwords (German)" width="90" x="179" y="30"/> <connect from_port="document" to_op="Tokenize" to_port="document"/> <connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (German)" to_port="document"/> <connect from_op="Filter Stopwords (German)" from_port="document" to_port="document 1"/> <portSpacing port="source_document" spacing="0"/> <portSpacing port="sink_document 1" spacing="0"/> <portSpacing port="sink_document 2" spacing="0"/> </process> </operator> <connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/> <connect from_op="Set Role" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/> <connect from_op="Nominal to Text" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/> <connect from_op="Process Documents from Data" from_port="example set" to_port="result 1"/> <connect from_op="Process Documents from Data" from_port="word list" to_port="result 2"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> <portSpacing port="sink_result 3" spacing="0"/> </process> </operator></process>
<?xml version="1.0" encoding="UTF-8" standalone="no"?><process version="5.3.009"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="5.3.009" expanded="true" name="Process"> <process expanded="true"> <operator activated="true" class="read_csv" compatibility="5.3.009" expanded="true" height="60" name="Read CSV" width="90" x="45" y="75"> <parameter key="csv_file" value="/home/marcin/temp/forum-6582.csv"/> <parameter key="first_row_as_names" value="false"/> <list key="annotations"> <parameter key="0" value="Name"/> </list> <parameter key="encoding" value="windows-1252"/> <list key="data_set_meta_data_information"> <parameter key="0" value="Query_ID.true.integer.id"/> <parameter key="1" value="Query.true.text.attribute"/> <parameter key="2" value="frequency.true.integer.attribute"/> </list> </operator> <operator activated="true" class="set_role" compatibility="5.3.009" expanded="true" height="76" name="Set Role" width="90" x="179" y="75"> <parameter key="attribute_name" value="Query_ID"/> <parameter key="target_role" value="id"/> <list key="set_additional_roles"/> </operator> <operator activated="true" class="nominal_to_text" compatibility="5.3.009" expanded="true" height="76" name="Nominal to Text" width="90" x="313" y="75"/> <operator activated="true" class="text:process_document_from_data" compatibility="5.3.001" expanded="true" height="76" name="Process Documents from Data" width="90" x="447" y="75"> <parameter key="keep_text" value="true"/> <list key="specify_weights"/> <process expanded="true"> <operator activated="true" class="text:tokenize" compatibility="5.3.001" expanded="true" height="60" name="Tokenize" width="90" x="45" y="30"/> <operator activated="true" class="text:filter_stopwords_german" compatibility="5.3.001" expanded="true" height="60" name="Filter Stopwords (German)" width="90" x="179" y="30"/> <connect from_port="document" to_op="Tokenize" to_port="document"/> <connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (German)" to_port="document"/> <connect from_op="Filter Stopwords (German)" from_port="document" to_port="document 1"/> <portSpacing port="source_document" spacing="0"/> <portSpacing port="sink_document 1" spacing="0"/> <portSpacing port="sink_document 2" spacing="0"/> </process> </operator> <operator activated="true" class="select_attributes" compatibility="5.3.009" expanded="true" height="76" name="Select Attributes" width="90" x="246" y="210"> <parameter key="attribute_filter_type" value="subset"/> <parameter key="attributes" value="text|frequency"/> <parameter key="invert_selection" value="true"/> <parameter key="include_special_attributes" value="true"/> </operator> <operator activated="true" class="de_pivot" compatibility="5.3.009" expanded="true" height="76" name="De-Pivot" width="90" x="380" y="210"> <list key="attribute_name"> <parameter key="TF-IDF" value="^(?!Query_ID).*"/> </list> <parameter key="index_attribute" value="Term"/> <parameter key="create_nominal_index" value="true"/> </operator> <connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/> <connect from_op="Set Role" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/> <connect from_op="Nominal to Text" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/> <connect from_op="Process Documents from Data" from_port="example set" to_op="Select Attributes" to_port="example set input"/> <connect from_op="Select Attributes" from_port="example set output" to_op="De-Pivot" to_port="example set input"/> <connect from_op="De-Pivot" from_port="example set output" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator></process>
Marcin wrote:The operator you are looking for is the "De-Pivot" which is indeed not easy to use (in my opinion).