A program to recognize and reward our most engaged community members
<?xml version="1.0" encoding="UTF-8" standalone="no"?><process version="5.0"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="5.0.11" expanded="true" name="Process"> <process expanded="true" height="656" width="815"> <operator activated="true" class="generate_data" compatibility="5.0.11" expanded="true" height="60" name="Generate Data" width="90" x="45" y="30"> <parameter key="target_function" value="sum classification"/> </operator> <operator activated="true" class="generate_attributes" compatibility="5.0.11" expanded="true" height="76" name="Generate Attributes" width="90" x="45" y="120"> <list key="function_descriptions"> <parameter key="label1" value="att1+att2"/> <parameter key="label2" value="att1+att3+att4"/> <parameter key="label3" value="abs(att1*att2)"/> </list> </operator> <operator activated="true" class="discretize_by_bins" compatibility="5.0.11" expanded="true" height="94" name="Discretize" width="90" x="45" y="210"> <parameter key="attribute_filter_type" value="subset"/> <parameter key="attributes" value="label1|label2|label3"/> <parameter key="range_name_type" value="short"/> </operator> <operator activated="true" class="set_role" compatibility="5.0.11" expanded="true" height="76" name="Set Role" width="90" x="45" y="345"> <parameter key="name" value="label1"/> <parameter key="target_role" value="label1"/> </operator> <operator activated="true" class="set_role" compatibility="5.0.11" expanded="true" height="76" name="Set Role (2)" width="90" x="45" y="435"> <parameter key="name" value="label2"/> <parameter key="target_role" value="label2"/> </operator> <operator activated="true" class="set_role" compatibility="5.0.11" expanded="true" height="76" name="Set Role (3)" width="90" x="45" y="525"> <parameter key="name" value="label3"/> <parameter key="target_role" value="label3"/> </operator> <operator activated="true" class="select_attributes" compatibility="5.0.11" expanded="true" height="76" name="Select Attributes" width="90" x="246" y="255"> <parameter key="attribute_filter_type" value="subset"/> <parameter key="attributes" value="att1|att2|att3|att4|att5|label1|label2|label3"/> <parameter key="include_special_attributes" value="true"/> </operator> <operator activated="true" class="loop_labels" compatibility="5.0.11" expanded="true" height="76" name="Loop Labels" width="90" x="447" y="255"> <process expanded="true" height="616" width="945"> <operator activated="true" class="x_validation" compatibility="5.0.0" expanded="true" height="112" name="Validation" width="90" x="313" y="120"> <description>A cross-validation evaluating a decision tree model.</description> <process expanded="true" height="654" width="466"> <operator activated="true" class="naive_bayes" compatibility="5.0.11" expanded="true" height="76" name="Naive Bayes" width="90" x="188" y="30"/> <connect from_port="training" to_op="Naive Bayes" to_port="training set"/> <connect from_op="Naive Bayes" from_port="model" to_port="model"/> <portSpacing port="source_training" spacing="0"/> <portSpacing port="sink_model" spacing="0"/> <portSpacing port="sink_through 1" spacing="0"/> </process> <process expanded="true" height="654" width="466"> <operator activated="true" class="apply_model" compatibility="5.0.0" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30"> <list key="application_parameters"/> </operator> <operator activated="true" class="performance" compatibility="5.0.0" expanded="true" height="76" name="Performance" width="90" x="179" y="30"/> <connect from_port="model" to_op="Apply Model" to_port="model"/> <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/> <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/> <connect from_op="Performance" from_port="performance" to_port="averagable 1"/> <portSpacing port="source_model" spacing="0"/> <portSpacing port="source_test set" spacing="0"/> <portSpacing port="source_through 1" spacing="0"/> <portSpacing port="sink_averagable 1" spacing="0"/> <portSpacing port="sink_averagable 2" spacing="0"/> </process> </operator> <connect from_port="example set" to_op="Validation" to_port="training"/> <connect from_op="Validation" from_port="averagable 1" to_port="out 1"/> <portSpacing port="source_example set" spacing="0"/> <portSpacing port="sink_out 1" spacing="0"/> <portSpacing port="sink_out 2" spacing="0"/> </process> </operator> <connect from_op="Generate Data" from_port="output" to_op="Generate Attributes" to_port="example set input"/> <connect from_op="Generate Attributes" from_port="example set output" to_op="Discretize" to_port="example set input"/> <connect from_op="Discretize" from_port="example set output" to_op="Set Role" to_port="example set input"/> <connect from_op="Set Role" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/> <connect from_op="Set Role (2)" from_port="example set output" to_op="Set Role (3)" to_port="example set input"/> <connect from_op="Set Role (3)" from_port="example set output" to_op="Select Attributes" to_port="example set input"/> <connect from_op="Select Attributes" from_port="example set output" to_op="Loop Labels" to_port="example set"/> <connect from_op="Loop Labels" from_port="out 1" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator></process>
<?xml version="1.0" encoding="UTF-8" standalone="no"?><process version="5.0"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="5.0.8" expanded="true" name="Process"> <process expanded="true" height="370" width="547"> <operator activated="true" class="text:process_document_from_file" compatibility="5.0.6" expanded="true" height="76" name="Process Documents from Files" width="90" x="179" y="120"> <list key="text_directories"/> <parameter key="extract_text_only" value="false"/> <process expanded="true" height="590" width="912"> <operator activated="true" class="text:extract_information" compatibility="5.0.6" expanded="true" height="60" name="Extract Information" width="90" x="246" y="75"> <parameter key="query_type" value="XPath"/> <list key="string_machting_queries"/> <list key="regular_expression_queries"/> <list key="regular_region_queries"/> <list key="xpath_queries"> <parameter key="att1" value="some xpath query"/> <parameter key="att2" value="another xpath query"/> </list> <list key="namespaces"/> <list key="index_queries"/> </operator> <connect from_port="document" to_op="Extract Information" to_port="document"/> <connect from_op="Extract Information" from_port="document" to_port="document 1"/> <portSpacing port="source_document" spacing="18"/> <portSpacing port="sink_document 1" spacing="0"/> <portSpacing port="sink_document 2" spacing="0"/> </process> </operator> <connect from_op="Process Documents from Files" from_port="example set" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator></process>
Sebastian Loh wrote:Regarding the multi label problem:You can learn a model for each label. Therefor you need to set the current label attribute to the role "label" and the other labels to the role "other" (just not regular, because then the current label is also learned on the other labels).Ciao Sebastianps. thanks Matthias! I modified the example above.
tron42 wrote:2) I'm building a small csv-example for testing, something like: title;abstract;keyword;keyword;keyword;.....As you can see I have multiple columns, each with one keyword. Is it possible to mark more than one column as an label? I tried, but when I change the next column, the previous is changing back.
Sebastian Loh wrote:Hi tron42,can you explain your intention again please? What I understood from is, that each keyword is an indicator/label. For example keyword1 indicates the sentiment good/bad review for quality, keyword2 indicates good/bad review for service, keyword3 for....
<?xml version="1.0" encoding="UTF-8" standalone="no"?><process version="5.1.002"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="5.1.002" expanded="true" name="Process"> <process expanded="true" height="431" width="547"> <operator activated="true" class="text:process_document_from_file" compatibility="5.1.001" expanded="true" height="76" name="Process Documents from Files (3)" width="90" x="179" y="255"> <list key="text_directories"> <parameter key="SGML" value="C:\Users\Kirk\Desktop\tests"/> </list> <parameter key="extract_text_only" value="false"/> <parameter key="encoding" value="UTF-8"/> <parameter key="create_word_vector" value="false"/> <parameter key="prune_below_absolute" value="5"/> <parameter key="prune_above_absolute" value="1000000"/> <process expanded="true" height="650" width="710"> <operator activated="true" class="text:extract_information" compatibility="5.1.001" expanded="true" height="60" name="Extract Information (2)" width="90" x="45" y="210"> <parameter key="query_type" value="XPath"/> <list key="string_machting_queries"> <parameter key="intro_m/d" value="<intro_m\.*>.</intro\.*>"/> </list> <list key="regular_expression_queries"/> <list key="regular_region_queries"/> <list key="xpath_queries"> <parameter key="Move 1" value="//title"/> </list> <list key="namespaces"/> <parameter key="assume_html" value="false"/> <list key="index_queries"/> </operator> <connect from_port="document" to_op="Extract Information (2)" to_port="document"/> <connect from_op="Extract Information (2)" from_port="document" to_port="document 1"/> <portSpacing port="source_document" spacing="0"/> <portSpacing port="sink_document 1" spacing="0"/> <portSpacing port="sink_document 2" spacing="0"/> </process> </operator> <connect from_op="Process Documents from Files (3)" from_port="example set" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator></process>
<?xml version="1.0" encoding="ISO-8859-1"?><bookstore><book category="COOKING"> <title lang="en">Everyday Italian</title> <author>Giada De Laurentiis</author> <year>2005</year> <price>30.00</price></book><book category="CHILDREN"> <title lang="en">Harry Potter</title> <author>J K. Rowling</author> <year>2005</year> <price>29.99</price></book><book category="WEB"> <title lang="en">XQuery Kick Start</title> <author>James McGovern</author> <author>Per Bothner</author> <author>Kurt Cagle</author> <author>James Linn</author> <author>Vaidyanathan Nagarajan</author> <year>2003</year> <price>49.99</price></book><book category="WEB"> <title lang="en">Learning XML</title> <author>Erik T. Ray</author> <year>2003</year> <price>39.95</price></book></bookstore>