Hello all,
we want to take some comments(only text) from a website using xpath.we tried a lot of differents commands but we cant find what goes wrong.Can anyone help?
platanas20
our xml code is:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.1.004">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.1.004" expanded="true" name="Process">
<parameter key="encoding" value="UTF-8"/>
<process expanded="true" height="603" width="880">
<operator activated="true" class="web:process_web" compatibility="5.1.000" expanded="true" height="60" name="Process Documents from Web" width="90" x="246" y="165">
<parameter key="url" value="
http://www.opengov.gr/ypes/?p=877#comments"/>
<list key="crawling_rules">
<parameter key="store_with_matching_url" value=".*page.*"/>
<parameter key="follow_link_with_matching_url" value=".*page.*|.*.gr.*"/>
</list>
<parameter key="max_pages" value="10"/>
<parameter key="user_agent" value="Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.71 Safari/534.24"/>
<process expanded="true" height="485" width="979">
<operator activated="true" class="text:extract_information" compatibility="5.1.001" expanded="true" height="60" name="Extract Information (2)" width="90" x="210" y="30">
<parameter key="query_type" value="XPath"/>
<list key="string_machting_queries"/>
<list key="regular_expression_queries"/>
<list key="regular_region_queries"/>
<list key="xpath_queries">
<parameter key="comment" value="//div[
@class=&quot;comment even thread-even depth-1"]/p/h:/text()"/>
</list>
<list key="namespaces"/>
<list key="index_queries"/>
</operator>
<connect from_port="document" to_op="Extract Information (2)" to_port="document"/>
<connect from_op="Extract Information (2)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<connect from_op="Process Documents from Web" from_port="example set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>