A program to recognize and reward our most engaged community members
<?xml version="1.0" encoding="UTF-8" standalone="no"?><process version="5.1.011"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="5.1.011" expanded="true" name="Process"> <process expanded="true" height="145" width="413"> <operator activated="true" class="web:get_webpage" compatibility="5.1.002" expanded="true" height="60" name="Get Page" width="90" x="45" y="30"> <parameter key="url" value="http://microsoft.com"/> <parameter key="random_user_agent" value="true"/> <list key="query_parameters"/> </operator> <operator activated="true" class="text:cut_document" compatibility="5.1.001" expanded="true" height="60" name="Cut Document" width="90" x="179" y="30"> <parameter key="query_type" value="XPath"/> <list key="string_machting_queries"/> <list key="regular_expression_queries"/> <list key="regular_region_queries"/> <list key="xpath_queries"> <parameter key="paragraph" value="//h:p"/> </list> <list key="namespaces"/> <list key="index_queries"/> <process expanded="true" height="607" width="763"> <connect from_port="segment" to_port="document 1"/> <portSpacing port="source_segment" spacing="0"/> <portSpacing port="sink_document 1" spacing="0"/> <portSpacing port="sink_document 2" spacing="0"/> </process> </operator> <operator activated="true" class="text:documents_to_data" compatibility="5.1.001" expanded="true" height="76" name="Documents to Data" width="90" x="313" y="30"> <parameter key="text_attribute" value="segment"/> </operator> <connect from_op="Get Page" from_port="output" to_op="Cut Document" to_port="document"/> <connect from_op="Cut Document" from_port="documents" to_op="Documents to Data" to_port="documents 1"/> <connect from_op="Documents to Data" from_port="example set" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator></process>
<?xml version="1.0" encoding="UTF-8" standalone="no"?><process version="5.1.011"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="5.1.011" expanded="true" name="Process"> <process expanded="true" height="554" width="1217"> <operator activated="true" class="web:get_webpage" compatibility="5.1.003" expanded="true" height="60" name="Get Page" width="90" x="45" y="120"> <parameter key="url" value="http://www.microsoft.com/about/legal/en/us/IntellectualProperty/Copyright/Default.aspx"/> <parameter key="random_user_agent" value="true"/> <list key="query_parameters"/> </operator> <operator activated="true" class="text:cut_document" compatibility="5.1.002" expanded="true" height="60" name="Cut Document" width="90" x="246" y="120"> <parameter key="query_type" value="XPath"/> <list key="string_machting_queries"/> <list key="regular_expression_queries"/> <list key="regular_region_queries"/> <list key="xpath_queries"> <parameter key="paragraph" value="//h:p"/> <parameter key="list" value="//h:li"/> </list> <list key="namespaces"/> <list key="index_queries"/> <process expanded="true" height="673" width="1293"> <connect from_port="segment" to_port="document 1"/> <portSpacing port="source_segment" spacing="0"/> <portSpacing port="sink_document 1" spacing="0"/> <portSpacing port="sink_document 2" spacing="0"/> </process> </operator> <operator activated="true" class="text:process_documents" compatibility="5.1.002" expanded="true" height="94" name="Process Documents" width="90" x="514" y="120"> <parameter key="create_word_vector" value="false"/> <parameter key="add_meta_information" value="false"/> <parameter key="keep_text" value="true"/> <process expanded="true" height="673" width="1293"> <operator activated="true" class="web:extract_html_text_content" compatibility="5.1.003" expanded="true" height="60" name="Extract Content" width="90" x="447" y="30"/> <connect from_port="document" to_op="Extract Content" to_port="document"/> <connect from_op="Extract Content" from_port="document" to_port="document 1"/> <portSpacing port="source_document" spacing="0"/> <portSpacing port="sink_document 1" spacing="0"/> <portSpacing port="sink_document 2" spacing="0"/> </process> </operator> <operator activated="true" class="remove_duplicates" compatibility="5.1.011" expanded="true" height="76" name="Remove Duplicates" width="90" x="648" y="120"> <parameter key="attribute" value="text"/> <parameter key="include_special_attributes" value="true"/> </operator> <operator activated="true" class="generate_id" compatibility="5.1.011" expanded="true" height="76" name="Generate ID" width="90" x="782" y="120"/> <operator activated="true" class="write_excel" compatibility="5.1.011" expanded="true" height="60" name="Write Excel" width="90" x="983" y="210"> <parameter key="excel_file" value="D:\Desktop\documents.xls"/> </operator> <connect from_op="Get Page" from_port="output" to_op="Cut Document" to_port="document"/> <connect from_op="Cut Document" from_port="documents" to_op="Process Documents" to_port="documents 1"/> <connect from_op="Process Documents" from_port="example set" to_op="Remove Duplicates" to_port="example set input"/> <connect from_op="Remove Duplicates" from_port="example set output" to_op="Generate ID" to_port="example set input"/> <connect from_op="Generate ID" from_port="example set output" to_op="Write Excel" to_port="input"/> <connect from_op="Write Excel" from_port="through" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator></process>
<?xml version="1.0" encoding="UTF-8" standalone="no"?><process version="5.1.011"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="5.1.011" expanded="true" name="Process"> <process expanded="true" height="589" width="835"> <operator activated="true" class="web:get_webpage" compatibility="5.1.002" expanded="true" height="60" name="Get Page" width="90" x="45" y="30"> <parameter key="url" value="http://www.microsoft.com/about/legal/en/us/IntellectualProperty/Copyright/Default.aspx"/> <parameter key="random_user_agent" value="true"/> <list key="query_parameters"/> </operator> <operator activated="true" class="text:cut_document" compatibility="5.1.001" expanded="true" height="60" name="Cut Document" width="90" x="179" y="30"> <parameter key="query_type" value="XPath"/> <list key="string_machting_queries"/> <list key="regular_expression_queries"/> <list key="regular_region_queries"/> <list key="xpath_queries"> <parameter key="paragraph" value="//h:p"/> <parameter key="list" value="//h:li"/> </list> <list key="namespaces"/> <list key="index_queries"/> <process expanded="true" height="589" width="30"> <connect from_port="segment" to_port="document 1"/> <portSpacing port="source_segment" spacing="0"/> <portSpacing port="sink_document 1" spacing="0"/> <portSpacing port="sink_document 2" spacing="0"/> </process> </operator> <operator activated="true" class="text:process_documents" compatibility="5.1.001" expanded="true" height="94" name="Process Documents" width="90" x="313" y="30"> <parameter key="create_word_vector" value="false"/> <parameter key="add_meta_information" value="false"/> <parameter key="keep_text" value="true"/> <process expanded="true" height="589" width="567"> <operator activated="true" class="web:extract_html_text_content" compatibility="5.1.002" expanded="true" height="60" name="Extract Content" width="90" x="45" y="30"/> <connect from_port="document" to_op="Extract Content" to_port="document"/> <connect from_op="Extract Content" from_port="document" to_port="document 1"/> <portSpacing port="source_document" spacing="0"/> <portSpacing port="sink_document 1" spacing="0"/> <portSpacing port="sink_document 2" spacing="0"/> </process> </operator> <operator activated="true" class="filter_examples" compatibility="5.1.011" expanded="true" height="76" name="Filter Examples" width="90" x="447" y="120"> <parameter key="condition_class" value="attribute_value_filter"/> <parameter key="parameter_string" value="text != \w*"/> </operator> <operator activated="false" class="remove_duplicates" compatibility="5.1.011" expanded="true" height="76" name="Remove Duplicates" width="90" x="447" y="30"> <parameter key="attribute" value="text"/> <parameter key="include_special_attributes" value="true"/> </operator> <operator activated="true" class="generate_id" compatibility="5.1.011" expanded="true" height="76" name="Generate ID" width="90" x="581" y="30"/> <operator activated="true" class="write_excel" compatibility="5.1.011" expanded="true" height="60" name="Write Excel" width="90" x="715" y="30"> <parameter key="excel_file" value="D:\Desktop\documents.xls"/> </operator> <connect from_op="Get Page" from_port="output" to_op="Cut Document" to_port="document"/> <connect from_op="Cut Document" from_port="documents" to_op="Process Documents" to_port="documents 1"/> <connect from_op="Process Documents" from_port="example set" to_op="Filter Examples" to_port="example set input"/> <connect from_op="Filter Examples" from_port="example set output" to_op="Generate ID" to_port="example set input"/> <connect from_op="Generate ID" from_port="example set output" to_op="Write Excel" to_port="input"/> <connect from_op="Write Excel" from_port="through" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator></process>