🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

Writing an XPATH query to retrieve text within quotes

User: "a_heavey2"
New Altair Community Member
Updated by Jocelyn

I'm having trouble retrieving text within double quotes from a webpage using information extraction. I already have a number of xpaths which are working as expected (all of my xpaths work apart from the last one in the xml process code). Does anyone know what the terminology is for retrieving text that is inside double quotes? 

 

The following xpath works fine in google docs but doesn't in rapidminer: Google docs is still retireves the text even though it's within quotes. In Rapidminer it gives blank values.

<parameter key="TEST" value="//*[@class=&amp;quot;single-review&quot;]/text()"/>

Overall process:

<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="false" class="text:extract_information" compatibility="7.5.000" expanded="true" height="68" name="Extract Information" width="90" x="514" y="34">
<list key="string_machting_queries"/>
<list key="regular_expression_queries"/>
<list key="regular_region_queries"/>
<list key="xpath_queries"/>
<list key="namespaces"/>
<list key="index_queries"/>
<list key="jsonpath_queries"/>
</operator>
<operator activated="true" class="text:process_document_from_file" compatibility="7.5.000" expanded="true" height="82" name="Process Documents from Files (2)" width="90" x="246" y="34">
<list key="text_directories">
<parameter key="all" value="C:\Users\heaveya\Desktop\Text-Mining\project_1"/>
</list>
<parameter key="use_file_extension_as_type" value="false"/>
<parameter key="prune_below_absolute" value="2"/>
<parameter key="prune_above_absolute" value="999"/>
<process expanded="true">
<operator activated="true" class="text:extract_information" compatibility="7.5.000" expanded="true" height="68" name="Extract Information (2)" width="90" x="246" y="34">
<parameter key="query_type" value="XPath"/>
<list key="string_machting_queries"/>
<list key="regular_expression_queries"/>
<list key="regular_region_queries"/>
<list key="xpath_queries">
<parameter key="Game Title" value="//*[@class=&amp;quot;id-app-title&quot;]/text()"/>
<parameter key="Date of First Review" value="//*[@class=&amp;quot;review-date&quot;]/text()"/>
<parameter key="Description" value="//*[@jsname=&amp;quot;C4s9Ed&quot;]/text()"/>
<parameter key="No:OfReviews" value="//*[@class=&amp;quot;reviews-num&quot;]/text()"/>
<parameter key="Overall Average Rating" value="//*[@class=&amp;quot;score&quot;]/text()"/>
<parameter key="Game Makers" value="//*[@class=&amp;quot;document-subtitle primary&quot;]/h:span/text()"/>
<parameter key="No. of Downloads" value="//*[@itemprop=&amp;quot;numDownloads&quot;]/text()"/>
<parameter key="Last Updated" value="//*[@itemprop=&amp;quot;datePublished&quot;]/text()"/>
<parameter key="What's new" value="//*[@class=&amp;quot;recent-change&quot;]/text()"/>
<parameter key="What's new 1" value="//h:div[2][contains(@class,'recent-change')]/text()"/>
<parameter key="What's new 2" value="//h:div[3][contains(@class,'recent-change')]/text()"/>
<parameter key="What's new 3" value="//h:div[4][contains(@class,'recent-change')]/text()"/>
<parameter key="TEST" value="//*[@class=&amp;quot;single-review&quot;]/text()"/>
</list>
<list key="namespaces"/>
<list key="index_queries"/>
<list key="jsonpath_queries"/>
</operator>
<connect from_port="document" to_op="Extract Information (2)" to_port="document"/>
<connect from_op="Extract Information (2)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<connect from_op="Process Documents from Files (2)" from_port="example set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>

Find more posts tagged with

Sort by:
1 - 1 of 11