[solved] Issue with Extract Information operator JsonPath query type

mrmikev
New Altair Community Member
I'm currently trying a basic example of the Extract Information operator using the JsonPath Query type. No matter how I structure the jsonpath query expression(s), I get either the entire document or an error:
Here's the process:
RapidMiner Studio 6.3.0000 (rev: 251598) - Professional Plus
Windows 8.1
- $.store.book yields the entire document, not just the books.
- $.store.book[0] yields: Process Failed. net.minidev.json.JSONObject cannot be cast to net.minidev.json.JSONArray.
Here's the process:
Any direction on how the jsonpath query expressions should look for the RapidMiner is appreciated.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.3.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.3.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="text:create_document" compatibility="6.1.000" expanded="true" height="60" name="Create Document" width="90" x="45" y="30">
<parameter key="text" value="{ "store": { "book": [ { "category": "reference", "author": "Nigel Rees", "title": "Sayings of the Century", "price": 8.95 }, { "category": "fiction", "author": "Evelyn Waugh", "title": "Sword of Honour", "price": 12.99 }, { "category": "fiction", "author": "Herman Melville", "title": "Moby ****", "isbn": "0-553-21311-3", "price": 8.99 }, { "category": "fiction", "author": "J. R. R. Tolkien", "title": "The Lord of the Rings", "isbn": "0-395-19395-8", "price": 22.99 } ], "bicycle": { "color": "red", "price": 19.95 } } }"/>
</operator>
<operator activated="true" class="text:extract_information" compatibility="6.1.000" expanded="true" height="60" name="Extract Information" width="90" x="447" y="30">
<parameter key="query_type" value="JsonPath"/>
<list key="string_machting_queries"/>
<list key="regular_expression_queries"/>
<list key="regular_region_queries"/>
<list key="xpath_queries"/>
<list key="namespaces"/>
<list key="index_queries"/>
<list key="jsonpath_queries">
<parameter key="booksOnly" value="$.store.book"/>
</list>
</operator>
<connect from_op="Create Document" from_port="output" to_op="Extract Information" to_port="document"/>
<connect from_op="Extract Information" from_port="document" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
RapidMiner Studio 6.3.0000 (rev: 251598) - Professional Plus
Windows 8.1
Tagged:
0
Answers
-
Hi mrmikev,
the problem here is that you need a "Documents to Data" in order to make use of the meta data "Extract Information" generates. Even in this case only the first item of a list is shown. You may use "Cut Document" to get a collection of those items and "Combine Document" to merge them to one line. Here is a process that shows how to do that:<?xml version="1.0" encoding="UTF-8" standalone="no"?>
Cheers,
<process version="6.3.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.3.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="text:create_document" compatibility="6.1.000" expanded="true" height="60" name="Create Document" width="90" x="45" y="30">
<parameter key="text" value="{ "store": { "book": [ { "category": "reference", "author": "Nigel Rees", "title": "Sayings of the Century", "price": 8.95 }, { "category": "fiction", "author": "Evelyn Waugh", "title": "Sword of Honour", "price": 12.99 }, { "category": "fiction", "author": "Herman Melville", "title": "Moby ****", "isbn": "0-553-21311-3", "price": 8.99 }, { "category": "fiction", "author": "J. R. R. Tolkien", "title": "The Lord of the Rings", "isbn": "0-395-19395-8", "price": 22.99 } ], "bicycle": { "color": "red", "price": 19.95 } } }"/>
</operator>
<operator activated="true" class="multiply" compatibility="6.3.001" expanded="true" height="94" name="Multiply" width="90" x="179" y="30"/>
<operator activated="true" class="text:extract_information" compatibility="6.1.000" expanded="true" height="60" name="Extract Information" width="90" x="313" y="30">
<parameter key="query_type" value="JsonPath"/>
<list key="string_machting_queries"/>
<list key="regular_expression_queries"/>
<list key="regular_region_queries"/>
<list key="xpath_queries"/>
<list key="namespaces"/>
<list key="index_queries"/>
<list key="jsonpath_queries">
<parameter key="booksOnly" value="$.store.book.title"/>
</list>
</operator>
<operator activated="true" breakpoints="after" class="text:documents_to_data" compatibility="6.1.000" expanded="true" height="76" name="Documents to Data" width="90" x="447" y="30">
<parameter key="text_attribute" value="text"/>
</operator>
<operator activated="true" breakpoints="after" class="text:cut_document" compatibility="6.1.000" expanded="true" height="60" name="Cut Document" width="90" x="313" y="120">
<parameter key="query_type" value="JsonPath"/>
<list key="string_machting_queries"/>
<list key="regular_expression_queries"/>
<list key="regular_region_queries"/>
<list key="xpath_queries"/>
<list key="namespaces"/>
<list key="index_queries"/>
<list key="jsonpath_queries">
<parameter key="booksOnly" value="$.store.book.title"/>
</list>
<process expanded="true">
<connect from_port="segment" to_port="document 1"/>
<portSpacing port="source_segment" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="text:combine_documents" compatibility="6.1.000" expanded="true" height="76" name="Combine Documents" width="90" x="447" y="120"/>
<connect from_op="Create Document" from_port="output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Extract Information" to_port="document"/>
<connect from_op="Multiply" from_port="output 2" to_op="Cut Document" to_port="document"/>
<connect from_op="Extract Information" from_port="document" to_op="Documents to Data" to_port="documents 1"/>
<connect from_op="Documents to Data" from_port="example set" to_port="result 1"/>
<connect from_op="Cut Document" from_port="documents" to_op="Combine Documents" to_port="documents 1"/>
<connect from_op="Combine Documents" from_port="document" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
Helge0