Hi,
My program reads a list of urls from excel crawls these and should extract something. But whatever X-Query I try nothing gets displayed in the results. The log says that the results are saved, but there is anything.
This is my code:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.006">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.006" expanded="true" name="Process">
<process expanded="true" height="415" width="815">
<operator activated="true" class="read_excel" compatibility="5.2.006" expanded="true" height="60" name="Read Excel" width="90" x="112" y="75">
<parameter key="excel_file" value="C:\Dokumente und Einstellungen\Home\Eigene Dateien\Rapidminer\test.xls"/>
<parameter key="imported_cell_range" value="A1:A3"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<list key="data_set_meta_data_information">
<parameter key="0" value="url.true.nominal.attribute"/>
</list>
<parameter key="read_not_matching_values_as_missings" value="false"/>
</operator>
<operator activated="true" class="loop_examples" compatibility="5.2.006" expanded="true" height="112" name="Loop Examples" width="90" x="581" y="75">
<process expanded="true" height="460" width="709">
<operator activated="true" class="extract_macro" compatibility="5.2.006" expanded="true" height="60" name="Extract Macro" width="90" x="179" y="30">
<parameter key="macro" value="weburl"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="url"/>
<parameter key="example_index" value="%{example}"/>
</operator>
<operator activated="true" class="web:process_web" compatibility="5.2.000" expanded="true" height="60" name="Process Documents from Web" width="90" x="179" y="120">
<parameter key="url" value="%{weburl}"/>
<list key="crawling_rules">
<parameter key="follow_link_with_matching_url" value=".+onta.+|.+about.+|.+info.+|.+suppo.+|.+impre.+"/>
</list>
<parameter key="add_pages_as_attribute" value="true"/>
<parameter key="max_depth" value="3"/>
<parameter key="delay" value="500"/>
<parameter key="max_threads" value="5"/>
<parameter key="user_agent" value="Mozilla/5.0 (Windows NT 5.1; rv:12.0) Gecko/20100101 Firefox/12.0 "/>
<process expanded="true" height="605" width="974">
<operator activated="true" class="text:extract_information" compatibility="5.2.002" expanded="true" height="60" name="Extract Information" width="90" x="380" y="255">
<parameter key="query_type" value="XPath"/>
<list key="string_machting_queries"/>
<list key="regular_expression_queries"/>
<list key="regular_region_queries"/>
<list key="xpath_queries">
<parameter key="mail" value="//h:@href"/>
</list>
<list key="namespaces"/>
<list key="index_queries"/>
</operator>
<connect from_port="document" to_op="Extract Information" to_port="document"/>
<connect from_op="Extract Information" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<connect from_port="example set" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Extract Macro" from_port="example set" to_port="example set"/>
<connect from_op="Process Documents from Web" from_port="example set" to_port="output 2"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_example set" spacing="0"/>
<portSpacing port="sink_output 1" spacing="72"/>
<portSpacing port="sink_output 2" spacing="0"/>
<portSpacing port="sink_output 3" spacing="0"/>
</process>
</operator>
<connect from_op="Read Excel" from_port="output" to_op="Loop Examples" to_port="example set"/>
<connect from_op="Loop Examples" from_port="output 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Everything work, but the extraction
Can you help me please?
Regards
Ben