"need help with the web mining"
ani25
New Altair Community Member
hello i try to put some links in the document to data and connect to the get pages..and after the process there is no results..its empty..i tried to look in the web for the help but i didn't find
so maybe someone can help me please.
thank you
so maybe someone can help me please.
thank you
Tagged:
0
Answers
-
Hi ani25,
here is a little example which should work with the csv file containint the following three lines
link
http://www.rapid-i.com
http://www.yahoo.com
We offer also a Web and Text Mining webinar where you can learn more about the text and web plug-ins
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.0.8" expanded="true" name="Process">
<process expanded="true" height="492" width="829">
<operator activated="true" class="read_csv" compatibility="5.0.8" expanded="true" height="60" name="Read CSV" width="90" x="45" y="75">
<parameter key="file_name" value="file:/C:/Users/SebastianLoh/tmp/links.csv"/>
<list key="data_set_meta_data_information"/>
</operator>
<operator activated="true" class="web:retrieve_webpages" compatibility="5.0.3" expanded="true" height="60" name="Get Pages" width="90" x="179" y="75">
<parameter key="link_attribute" value="link"/>
<parameter key="page_attribute" value="page"/>
<parameter key="random_user_agent" value="true"/>
</operator>
<operator activated="true" class="text:data_to_documents" compatibility="5.0.7" expanded="true" height="60" name="Data to Documents" width="90" x="313" y="75">
<list key="specify_weights"/>
</operator>
<connect from_op="Read CSV" from_port="output" to_op="Get Pages" to_port="Example Set"/>
<connect from_op="Get Pages" from_port="Example Set" to_op="Data to Documents" to_port="example set"/>
<connect from_op="Data to Documents" from_port="documents" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
http://rapid-i.com/component/page,shop.product_details/flypage,garden_flypage.tpl/product_id,63/category_id,16/option,com_virtuemart/Itemid,180/
Ciao Sebastian0