Hello,
I am having trouble getting a value from a HTML using XPATH. This is my process:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.0.8" expanded="true" name="Process">
<process expanded="true" height="505" width="415">
<operator activated="true" class="text:create_document" compatibility="5.0.6" expanded="true" height="60" name="Create Document" width="90" x="45" y="30">
<parameter key="text" value="<html> <head> <title>hello</title> </head> <body> <div class="class1">goodbye</div> </body> </html>"/>
</operator>
<operator activated="true" class="text:extract_information" compatibility="5.0.6" expanded="true" height="60" name="Extract Information" width="90" x="179" y="165">
<parameter key="query_type" value="XPath"/>
<list key="string_machting_queries"/>
<list key="regular_expression_queries"/>
<list key="regular_region_queries"/>
<list key="xpath_queries">
<parameter key="some_value" value="/html/head/title"/>
</list>
<list key="namespaces"/>
<list key="index_queries"/>
</operator>
<connect from_op="Create Document" from_port="output" to_op="Extract Information" to_port="document"/>
<connect from_op="Extract Information" from_port="document" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
The resulting value is "?"
What I am doing wrong?
Works when I change the xpath to this:
/h:html/h:head/h:title/text()
Is there a way to get rid of that "h:" ?
Something to do with the namespace I suspect
Thanks
Neil