JSON processing using JQ operator in Rapidminer
Somnath_Das
New Altair Community Member
Answers
-
Hi,
when I try your input on jqplay.org with the filter [.A, .B] | @csv, it returns the correct output.
The library used in the RapidMiner extension only returns the first element though. I'm not sure what to do there.
Your input is a bit atypical because it's a list of objects, but not in a syntactically explicit way.
When changing the input to this:[{"A":"a1","B":"b1"},{"A":"a2","B":"b2"},{"A":"a3","B":"b3"}
]
it works with the filter .[] | [.A, .B].
Example process:<?xml version="1.0" encoding="UTF-8"?><process version="9.8.001"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="9.8.001" expanded="true" name="Process"> <parameter key="logverbosity" value="init"/> <parameter key="random_seed" value="-1"/> <parameter key="send_mail" value="never"/> <parameter key="notification_email" value=""/> <parameter key="process_duration_for_mail" value="30"/> <parameter key="encoding" value="SYSTEM"/> <process expanded="true"> <operator activated="true" class="text:create_document" compatibility="9.3.001" expanded="true" height="68" name="Create Document" width="90" x="112" y="34"> <parameter key="text" value="[ {"A":"a1","B":"b1"}, {"A":"a2","B":"b2"}, {"A":"a3","B":"b3"} ]"/> <parameter key="add label" value="false"/> <parameter key="label_type" value="nominal"/> </operator> <operator activated="true" class="multiply" compatibility="9.8.001" expanded="true" height="103" name="Multiply" width="90" x="246" y="34"/> <operator activated="true" class="text:json_to_data" compatibility="9.3.001" expanded="true" height="82" name="JSON To Data" width="90" x="380" y="34"> <parameter key="ignore_arrays" value="false"/> <parameter key="limit_attributes" value="false"/> <parameter key="skip_invalid_documents" value="false"/> <parameter key="guess_data_types" value="true"/> <parameter key="keep_missing_attributes" value="false"/> <parameter key="missing_values_aliases" value=", null, NaN, missing"/> </operator> <operator activated="true" class="json_processing_with_jq:process_document_with_jq" compatibility="0.8.001" expanded="true" height="68" name="Process Document with jq" width="90" x="380" y="136"> <parameter key="indent_json" value="true"/> <parameter key="jq_expression" value=".[] | [.A, .B]"/> <parameter key="output_format" value="csv"/> </operator> <operator activated="true" class="text:write_document" compatibility="9.3.001" expanded="true" height="82" name="Write Document" width="90" x="514" y="136"> <parameter key="overwrite" value="true"/> <parameter key="encoding" value="SYSTEM"/> </operator> <operator activated="true" class="read_csv" compatibility="9.8.001" expanded="true" height="68" name="Read CSV" width="90" x="648" y="136"> <parameter key="column_separators" value=","/> <parameter key="trim_lines" value="false"/> <parameter key="use_quotes" value="true"/> <parameter key="quotes_character" value="""/> <parameter key="escape_character" value="\"/> <parameter key="skip_comments" value="false"/> <parameter key="comment_characters" value="#"/> <parameter key="starting_row" value="1"/> <parameter key="parse_numbers" value="true"/> <parameter key="decimal_character" value="."/> <parameter key="grouped_digits" value="false"/> <parameter key="grouping_character" value=","/> <parameter key="infinity_representation" value=""/> <parameter key="date_format" value=""/> <parameter key="first_row_as_names" value="false"/> <list key="annotations"/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="locale" value="English (United States)"/> <parameter key="encoding" value="SYSTEM"/> <parameter key="read_all_values_as_polynominal" value="false"/> <list key="data_set_meta_data_information"> <parameter key="0" value="A.true.nominal.attribute"/> <parameter key="1" value="B.true.nominal.attribute"/> </list> <parameter key="read_not_matching_values_as_missings" value="true"/> <parameter key="datamanagement" value="double_array"/> <parameter key="data_management" value="auto"/> </operator> <connect from_op="Create Document" from_port="output" to_op="Multiply" to_port="input"/> <connect from_op="Multiply" from_port="output 1" to_op="JSON To Data" to_port="documents 1"/> <connect from_op="Multiply" from_port="output 2" to_op="Process Document with jq" to_port="input 1"/> <connect from_op="JSON To Data" from_port="example set" to_port="result 1"/> <connect from_op="Process Document with jq" from_port="result 1" to_op="Write Document" to_port="document"/> <connect from_op="Write Document" from_port="file" to_op="Read CSV" to_port="file"/> <connect from_op="Read CSV" from_port="output" to_port="result 2"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> <portSpacing port="sink_result 3" spacing="0"/> </process> </operator> </process>
With this input the output of "JSON to Data" is also meaningful.
Regards,
Balázs
0