Webcrawler Doubt
Hi All,
I am using RM 5.1 and I am currently experimenting with web mining.My objective is to crawl a web page and display according to the crawling rules. After applying the crawling rules I am not able to see any output.
Appreciate help and thanks in advance.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.1.008">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.1.008" expanded="true" name="Process">
<process expanded="true" height="503" width="604">
<operator activated="true" class="web:crawl_web" compatibility="5.1.002" expanded="true" height="60" name="Crawl Web" width="90" x="122" y="119">
<parameter key="url" value="http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&u=/netahtml/PTO/search-adv.htm&r=0&p=1&f=S&l=50&Query=apple&d=PTXT"/>
<list key="crawling_rules">
<parameter key="store_with_matching_url" value=".*(Apple)"/>
<parameter key="store_with_matching_content" value=".*(Apple"/>
<parameter key="follow_link_with_matching_text" value=".*(Apple"/>
</list>
<parameter key="write_pages_into_files" value="false"/>
<parameter key="max_pages" value="5"/>
</operator>
<connect from_op="Crawl Web" from_port="Example Set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Thanks
I am using RM 5.1 and I am currently experimenting with web mining.My objective is to crawl a web page and display according to the crawling rules. After applying the crawling rules I am not able to see any output.
Appreciate help and thanks in advance.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.1.008">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.1.008" expanded="true" name="Process">
<process expanded="true" height="503" width="604">
<operator activated="true" class="web:crawl_web" compatibility="5.1.002" expanded="true" height="60" name="Crawl Web" width="90" x="122" y="119">
<parameter key="url" value="http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&u=/netahtml/PTO/search-adv.htm&r=0&p=1&f=S&l=50&Query=apple&d=PTXT"/>
<list key="crawling_rules">
<parameter key="store_with_matching_url" value=".*(Apple)"/>
<parameter key="store_with_matching_content" value=".*(Apple"/>
<parameter key="follow_link_with_matching_text" value=".*(Apple"/>
</list>
<parameter key="write_pages_into_files" value="false"/>
<parameter key="max_pages" value="5"/>
</operator>
<connect from_op="Crawl Web" from_port="Example Set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Thanks