A program to recognize and reward our most engaged community members
<?xml version="1.0" encoding="UTF-8" standalone="no"?><process version="5.0"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" expanded="true" name="Process"> <process expanded="true" height="145" width="212"> <operator activated="true" class="web:crawl_web" expanded="true" height="60" name="Crawl Web" width="90" x="45" y="30"> <parameter key="url" value="http://rapid-i.com/index.php?lang=en"/> <list key="crawling_rules"> <parameter key="2" value="http://rapid-i\.com/.*"/> <parameter key="1" value=".*Rapid.*"/> </list> <parameter key="write_pages_into_files" value="false"/> <parameter key="max_pages" value="2"/> </operator> <connect from_op="Crawl Web" from_port="Example Set" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator></process>
<?xml version="1.0" encoding="UTF-8" standalone="no"?><process version="5.0"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" expanded="true" name="Process"> <process expanded="true" height="-20" width="-50"> <operator activated="true" class="web:crawl_web" expanded="true" height="60" name="Crawl Web" width="90" x="53" y="53"> <parameter key="url" value="http://rapid-i.com/rapidforum/index.php?action=recent"/> <list key="crawling_rules"> <parameter key="0" value="http://rapid-i.com/rapidforum.*"/> <parameter key="2" value="http://rapid-i.com/rapidforum.*"/> </list> <parameter key="write_pages_into_files" value="false"/> <parameter key="add_pages_as_attribute" value="true"/> <parameter key="output_dir" value="C:\Documents and Settings\Administrator\My Documents\WebCrawler"/> <parameter key="max_pages" value="10"/> <parameter key="max_depth" value="3"/> <parameter key="max_threads" value="12"/> <parameter key="user_agent" value="haddock checking rapid-miner-crawler"/> <parameter key="obey_robot_exclusion" value="false"/> <parameter key="really_ignore_exclusion" value="true"/> </operator> <operator activated="true" class="text:process_document_from_data" expanded="true" height="76" name="Process Documents from Data" width="90" x="360" y="46"> <list key="specify_weights"/> <process expanded="true" height="353" width="808"> <operator activated="true" class="web:unescape_html" expanded="true" height="60" name="Unescape Content" width="90" x="187" y="28"/> <operator activated="true" class="text:tokenize" expanded="true" height="60" name="Tokenize" width="90" x="400" y="26"/> <operator activated="true" class="text:filter_stopwords_english" expanded="true" height="60" name="Filter Stopwords (English)" width="90" x="543" y="26"/> <connect from_port="document" to_op="Unescape Content" to_port="document"/> <connect from_op="Unescape Content" from_port="document" to_op="Tokenize" to_port="document"/> <connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/> <connect from_op="Filter Stopwords (English)" from_port="document" to_port="document 1"/> <portSpacing port="source_document" spacing="0"/> <portSpacing port="sink_document 1" spacing="0"/> <portSpacing port="sink_document 2" spacing="0"/> </process> </operator> <connect from_op="Crawl Web" from_port="Example Set" to_op="Process Documents from Data" to_port="example set"/> <connect from_op="Process Documents from Data" from_port="example set" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator></process>