A program to recognize and reward our most engaged community members
<?xml version="1.0" encoding="UTF-8" standalone="no"?><process version="5.0"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" expanded="true" name="Process"> <parameter key="encoding" value="ISO-8859-1"/> <process expanded="true" height="325" width="685"> <operator activated="true" class="web:crawl_web" expanded="true" height="60" name="Crawl Web" width="90" x="45" y="30"> <parameter key="url" value="http://www.madrimasd.org/informacionidi/noticias/default.asp?Page=1&Tipo=2"/> <list key="crawling_rules"> <parameter key="2" value="http://www.madrimasd.org/noticias/.*"/> <parameter key="0" value="http://www.madrimasd.org/noticias/.*"/> </list> <parameter key="add_pages_as_attribute" value="true"/> <parameter key="output_dir" value="C:\"/> <parameter key="extension" value="htm"/> <parameter key="max_pages" value="3"/> <parameter key="delay" value="100"/> <parameter key="max_threads" value="3"/> <parameter key="max_page_size" value="1000"/> </operator> <operator activated="true" class="web:process_web" expanded="true" height="60" name="Process Web" width="90" x="45" y="120"> <parameter key="url" value="http://www.madrimasd.org/informacionidi/noticias/default.asp?Page=1&Tipo=2"/> <list key="crawling_rules"> <parameter key="2" value="http://www.madrimasd.org/noticias/.*"/> <parameter key="0" value="http://www.madrimasd.org/noticias/.*"/> </list> <parameter key="add_pages_as_attribute" value="true"/> <parameter key="max_pages" value="3"/> <parameter key="delay" value="100"/> <parameter key="max_threads" value="3"/> <process expanded="true" height="422" width="752"> <operator activated="true" class="text:transform_cases" expanded="true" height="60" name="Transform Cases" width="90" x="112" y="30"/> <operator activated="true" class="text:extract_information" expanded="true" height="60" name="Extract Information" width="90" x="313" y="30"> <parameter key="query_type" value="XPath"/> <list key="string_machting_queries"/> <list key="regular_expression_queries"/> <list key="regular_region_queries"/> <list key="xpath_queries"> <parameter key="Introduccion" value="//h:p/text()"/> </list> <list key="namespaces"/> <list key="index_queries"/> </operator> <connect from_port="document" to_op="Transform Cases" to_port="document"/> <connect from_op="Transform Cases" from_port="document" to_op="Extract Information" to_port="document"/> <connect from_op="Extract Information" from_port="document" to_port="document 1"/> <portSpacing port="source_document" spacing="0"/> <portSpacing port="sink_document 1" spacing="0"/> <portSpacing port="sink_document 2" spacing="0"/> </process> </operator> <operator activated="true" breakpoints="after" class="text:generate_extract" expanded="true" height="60" name="Generate Extract" width="90" x="246" y="30"> <parameter key="source_attribute" value="Page"/> <parameter key="query_type" value="XPath"/> <list key="string_machting_queries"> <parameter key="parrafismo" value="<p>.</p>"/> </list> <list key="regular_expression_queries"> <parameter key="Jurjur" value="Sin(.*)Blasco"/> </list> <list key="regular_region_queries"/> <list key="xpath_queries"> <parameter key="Introduccion" value="//h:p/text()"/> </list> <list key="namespaces"/> <list key="index_queries"/> <parameter key="value_seperator" value="***"/> </operator> <connect from_op="Crawl Web" from_port="Example Set" to_op="Generate Extract" to_port="Example Set"/> <connect from_op="Process Web" from_port="example set" to_port="result 2"/> <connect from_op="Generate Extract" from_port="Example Set" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> <portSpacing port="sink_result 3" spacing="0"/> </process> </operator></process>