linkhttp://www.rapid-i.comhttp://www.yahoo.com
<?xml version="1.0" encoding="UTF-8" standalone="no"?><process version="5.0"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="5.0.8" expanded="true" name="Process"> <process expanded="true" height="492" width="829"> <operator activated="true" class="read_csv" compatibility="5.0.8" expanded="true" height="60" name="Read CSV" width="90" x="45" y="75"> <parameter key="file_name" value="file:/C:/Users/SebastianLoh/tmp/links.csv"/> <list key="data_set_meta_data_information"/> </operator> <operator activated="true" class="web:retrieve_webpages" compatibility="5.0.3" expanded="true" height="60" name="Get Pages" width="90" x="179" y="75"> <parameter key="link_attribute" value="link"/> <parameter key="page_attribute" value="page"/> <parameter key="random_user_agent" value="true"/> </operator> <operator activated="true" class="text:data_to_documents" compatibility="5.0.7" expanded="true" height="60" name="Data to Documents" width="90" x="313" y="75"> <list key="specify_weights"/> </operator> <connect from_op="Read CSV" from_port="output" to_op="Get Pages" to_port="Example Set"/> <connect from_op="Get Pages" from_port="Example Set" to_op="Data to Documents" to_port="example set"/> <connect from_op="Data to Documents" from_port="documents" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator></process>
here is a little example which should work with the csv file containint the following three lines We offer also a Web and Text Mining webinar where you can learn more about the text and web plug-ins
http://rapid-i.com/component/page,shop.product_details/flypage,garden_flypage.tpl/product_id,63/category_id,16/option,com_virtuemart/Itemid,180/
Ciao Sebastian