JSON to data and de-pivot for exampleset
I'm having a 'mare trying to get some data to de-pivot from json...
The below process gets data from Poloniex in json format. The first row works fine and the exampleset is generated as I'd like, the other 2 (the orderbook and historical trades) I just can't get my head around. The Poloniex public api info is here https://poloniex.com/support/api/ but seems to be outdated. Any help would be appreciated.
I came across another post whilst trying to figure out how to use the json file which gave me the de-pivot, I also found one using the json -> xml & xpath way but it seems like json -> data and de-pivot should work.
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page" width="90" x="45" y="34">
<parameter key="url" value="https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XMR&end=9999999999&period=7200&start=1505057287"/>
<parameter key="random_user_agent" value="false"/>
<parameter key="connection_timeout" value="10000"/>
<parameter key="read_timeout" value="10000"/>
<parameter key="follow_redirects" value="true"/>
<parameter key="accept_cookies" value="none"/>
<parameter key="cookie_scope" value="global"/>
<parameter key="request_method" value="GET"/>
<list key="query_parameters"/>
<list key="request_properties"/>
<parameter key="override_encoding" value="false"/>
<parameter key="encoding" value="SYSTEM"/>
<description align="center" color="orange" colored="true" width="126">Get historical price data</description>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="text:json_to_data" compatibility="7.4.001" expanded="true" height="82" name="JSON To Data" width="90" x="179" y="34">
<parameter key="ignore_arrays" value="false"/>
<parameter key="limit_attributes" value="false"/>
<parameter key="skip_invalid_documents" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page (2)" width="90" x="45" y="187">
<parameter key="url" value="https://poloniex.com/public?command=returnTradeHistory&currencyPair=BTC_XMR"/>
<parameter key="random_user_agent" value="false"/>
<parameter key="connection_timeout" value="10000"/>
<parameter key="read_timeout" value="10000"/>
<parameter key="follow_redirects" value="true"/>
<parameter key="accept_cookies" value="none"/>
<parameter key="cookie_scope" value="global"/>
<parameter key="request_method" value="GET"/>
<list key="query_parameters"/>
<list key="request_properties"/>
<parameter key="override_encoding" value="false"/>
<parameter key="encoding" value="SYSTEM"/>
<description align="center" color="orange" colored="true" width="126">Get historical trade data (last month / 50,000 trades (API limit))</description>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="text:json_to_data" compatibility="7.4.001" expanded="true" height="82" name="JSON To Data (2)" width="90" x="179" y="187">
<parameter key="ignore_arrays" value="false"/>
<parameter key="limit_attributes" value="false"/>
<parameter key="skip_invalid_documents" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="de_pivot" compatibility="7.6.001" expanded="true" height="82" name="De-Pivot (2)" width="90" x="313" y="187">
<list key="attribute_name">
<parameter key="amount" value="amount"/>
<parameter key="date" value="date"/>
<parameter key="globalTradeID" value="globalTradeID"/>
<parameter key="rate" value="rate"/>
<parameter key="total" value="total"/>
<parameter key="tradeID" value="tradeID"/>
</list>
<parameter key="index_attribute" value="id"/>
<parameter key="create_nominal_index" value="false"/>
<parameter key="keep_missings" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page (3)" width="90" x="45" y="340">
<parameter key="url" value="https://poloniex.com/public?command=returnOrderBook&currencyPair=BTC_XMR&depth=10&start=1505057287end=9999999999"/>
<parameter key="random_user_agent" value="false"/>
<parameter key="connection_timeout" value="10000"/>
<parameter key="read_timeout" value="10000"/>
<parameter key="follow_redirects" value="true"/>
<parameter key="accept_cookies" value="none"/>
<parameter key="cookie_scope" value="global"/>
<parameter key="request_method" value="GET"/>
<list key="query_parameters"/>
<list key="request_properties"/>
<parameter key="override_encoding" value="false"/>
<parameter key="encoding" value="SYSTEM"/>
<description align="center" color="orange" colored="true" width="126">Get orderbook data</description>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="text:json_to_data" compatibility="7.4.001" expanded="true" height="82" name="JSON To Data (3)" width="90" x="179" y="340">
<parameter key="ignore_arrays" value="false"/>
<parameter key="limit_attributes" value="false"/>
<parameter key="skip_invalid_documents" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="de_pivot" compatibility="7.6.001" expanded="true" height="82" name="De-Pivot" width="90" x="313" y="34">
<list key="attribute_name">
<parameter key="close" value="\[\d+\]\.close"/>
<parameter key="high" value="\[\d+\]\.high"/>
<parameter key="low" value="\[\d+\]\.low"/>
<parameter key="open" value="\[\d+\]\.open"/>
<parameter key="date" value="\[\d+\]\.date"/>
<parameter key="volume" value="\[\d+\]\.volume"/>
<parameter key="quoteVolume" value="\[\d+\]\.quoteVolume"/>
<parameter key="weightedAverage" value="\[\d+\]\.weightedAverage"/>
</list>
<parameter key="index_attribute" value="id"/>
<parameter key="create_nominal_index" value="false"/>
<parameter key="keep_missings" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="de_pivot" compatibility="7.6.001" expanded="true" height="82" name="De-Pivot (3)" width="90" x="313" y="340">
<list key="attribute_name">
<parameter key="asks" value="asks"/>
<parameter key="bids" value="bids"/>
</list>
<parameter key="index_attribute" value="id"/>
<parameter key="create_nominal_index" value="false"/>
<parameter key="keep_missings" value="false"/>
</operator>
</process>
Thanks,
Alex
Best Answer
-
hello @alex_fawzi - ok that process came through fine. The good news is that you're not really missing anything; the bad news is that second JSON array is just not that easy to work with in RapidMiner. You can do what was suggested in that thread (JSON to XML, Write Document, Read XML) if you're just looking for one or two things. If you really want to "unwrap" the array, you need to do some work. Here is a quick-and-dirty for you that will give you the general idea (I probably don't have all the fields correct...):
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page (3)" width="90" x="45" y="34">
<parameter key="url" value="https://poloniex.com/public?command=returnOrderBook&currencyPair=BTC_XMR&depth=10&start=1506279150&end=9999999999&period=7200"/>
<list key="query_parameters"/>
<list key="request_properties"/>
</operator>
<operator activated="true" class="text:json_to_data" compatibility="7.5.000" expanded="true" height="82" name="JSON To Data (3)" width="90" x="179" y="34"/>
<operator activated="true" class="transpose" compatibility="7.6.001" expanded="true" height="82" name="Transpose" width="90" x="313" y="34"/>
<operator activated="true" class="filter_examples" compatibility="7.6.001" expanded="true" height="103" name="Filter Examples" width="90" x="447" y="34">
<list key="filters_list">
<parameter key="filters_entry_key" value="id.contains.["/>
</list>
</operator>
<operator activated="true" class="replace" compatibility="7.6.001" expanded="true" height="82" name="Replace" width="90" x="581" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="id"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="replace_what" value="\]"/>
</operator>
<operator activated="true" class="split" compatibility="7.6.001" expanded="true" height="82" name="Split" width="90" x="715" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="id"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="split_pattern" value="\["/>
</operator>
<operator activated="true" class="pivot" compatibility="7.6.001" expanded="true" height="82" name="Pivot" width="90" x="849" y="34">
<parameter key="group_attribute" value="id_2"/>
<parameter key="index_attribute" value="id_1"/>
<parameter key="consider_weights" value="false"/>
</operator>
<connect from_op="Get Page (3)" from_port="output" to_op="JSON To Data (3)" to_port="documents 1"/>
<connect from_op="JSON To Data (3)" from_port="example set" to_op="Transpose" to_port="example set input"/>
<connect from_op="Transpose" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Replace" to_port="example set input"/>
<connect from_op="Replace" from_port="example set output" to_op="Split" to_port="example set input"/>
<connect from_op="Split" from_port="example set output" to_op="Pivot" to_port="example set input"/>
<connect from_op="Pivot" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>Funny enough I have been working on a small project that will require a tool that will do exactly this much easier. Stay tuned.
Scott
0
Answers
-
hello @alex_fawzi - happy to help but can you please paste only one process XML in the thread? It looks like you have two or three.
Scott0 -
Hi Scott, sorry about that! Let me try again.
I'm just copying and pasting directly from studio so I'll try process by process.
This one works
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page" width="90" x="45" y="34">
<parameter key="url" value="https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XMR&end=9999999999&period=7200&start=1505057287"/>
<parameter key="random_user_agent" value="false"/>
<parameter key="connection_timeout" value="10000"/>
<parameter key="read_timeout" value="10000"/>
<parameter key="follow_redirects" value="true"/>
<parameter key="accept_cookies" value="none"/>
<parameter key="cookie_scope" value="global"/>
<parameter key="request_method" value="GET"/>
<list key="query_parameters"/>
<list key="request_properties"/>
<parameter key="override_encoding" value="false"/>
<parameter key="encoding" value="SYSTEM"/>
<description align="center" color="orange" colored="true" width="126">Get historical price data</description>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="text:json_to_data" compatibility="7.4.001" expanded="true" height="82" name="JSON To Data" width="90" x="179" y="34">
<parameter key="ignore_arrays" value="false"/>
<parameter key="limit_attributes" value="false"/>
<parameter key="skip_invalid_documents" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="de_pivot" compatibility="7.6.001" expanded="true" height="82" name="De-Pivot" width="90" x="313" y="34">
<list key="attribute_name">
<parameter key="close" value="\[\d+\]\.close"/>
<parameter key="high" value="\[\d+\]\.high"/>
<parameter key="low" value="\[\d+\]\.low"/>
<parameter key="open" value="\[\d+\]\.open"/>
<parameter key="date" value="\[\d+\]\.date"/>
<parameter key="volume" value="\[\d+\]\.volume"/>
<parameter key="quoteVolume" value="\[\d+\]\.quoteVolume"/>
<parameter key="weightedAverage" value="\[\d+\]\.weightedAverage"/>
</list>
<parameter key="index_attribute" value="id"/>
<parameter key="create_nominal_index" value="false"/>
<parameter key="keep_missings" value="false"/>
</operator>
</process>This one doesn't (I'll leave out the third as I think whatever it is that's stopping me from getting it right is the same for both)
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page (2)" width="90" x="45" y="187">
<parameter key="url" value="https://poloniex.com/public?command=returnTradeHistory&currencyPair=BTC_XMR"/>
<parameter key="random_user_agent" value="false"/>
<parameter key="connection_timeout" value="10000"/>
<parameter key="read_timeout" value="10000"/>
<parameter key="follow_redirects" value="true"/>
<parameter key="accept_cookies" value="none"/>
<parameter key="cookie_scope" value="global"/>
<parameter key="request_method" value="GET"/>
<list key="query_parameters"/>
<list key="request_properties"/>
<parameter key="override_encoding" value="false"/>
<parameter key="encoding" value="SYSTEM"/>
<description align="center" color="orange" colored="true" width="126">Get historical trade data (last month / 50,000 trades (API limit))</description>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="text:json_to_data" compatibility="7.4.001" expanded="true" height="82" name="JSON To Data (2)" width="90" x="179" y="187">
<parameter key="ignore_arrays" value="false"/>
<parameter key="limit_attributes" value="false"/>
<parameter key="skip_invalid_documents" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="de_pivot" compatibility="7.6.001" expanded="true" height="82" name="De-Pivot (2)" width="90" x="313" y="187">
<list key="attribute_name">
<parameter key="amount" value=".amount"/>
<parameter key="date" value=".date"/>
<parameter key="globalTradeID" value="\[\d+\]\.globalTradeID"/>
<parameter key="rate" value="\[\d+\]\.rate"/>
<parameter key="total" value="\[\d+\]\.total"/>
<parameter key="tradeID" value="\[\d+\]\.tradeID"/>
</list>
<parameter key="index_attribute" value="id"/>
<parameter key="create_nominal_index" value="false"/>
<parameter key="keep_missings" value="false"/>
</operator>
</process>Thanks
0 -
hello @alex_fawzi - so I think you're still pasting in more than one process. Just take everything in the XML pane and paste here. One process per insert. Otherwise you get this mess:
Scott
0 -
Third time lucky... sorry about this. Removed one of the lines as it was getting stuck so in the proess the first line seems to work but the second doesn't transform as I would expect and I don't know why.
This is where I found the info that led to me to where I am now - http://community.rapidminer.com/t5/RapidMiner-Studio-Forum/Rapidminer-and-JSON/m-p/33574
?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page" width="90" x="112" y="34">
<parameter key="url" value="https://poloniex.com/public?command=returnChartData&currencyPair=BTC_XMR&start=1506279150&end=9999999999&period=7200"/>
<list key="query_parameters"/>
<list key="request_properties"/>
</operator>
<operator activated="true" class="text:json_to_data" compatibility="7.4.001" expanded="true" height="82" name="JSON To Data" width="90" x="246" y="34"/>
<operator activated="true" class="de_pivot" compatibility="7.6.001" expanded="true" height="82" name="De-Pivot" width="90" x="380" y="34">
<list key="attribute_name">
<parameter key="close" value="\[\d+\]\.close"/>
<parameter key="high" value="\[\d+\]\.high"/>
<parameter key="low" value="\[\d+\]\.low"/>
<parameter key="open" value="\[\d+\]\.open"/>
<parameter key="date" value="\[\d+\]\.date"/>
<parameter key="volume" value="\[\d+\]\.volume"/>
<parameter key="quoteVolume" value="\[\d+\]\.quoteVolume"/>
<parameter key="weightedAverage" value="\[\d+\]\.weightedAverage"/>
</list>
<parameter key="index_attribute" value="id"/>
</operator>
<operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page (3)" width="90" x="45" y="340">
<parameter key="url" value="https://poloniex.com/public?command=returnOrderBook&currencyPair=BTC_XMR&depth=10&start=1506279150&end=9999999999&period=7200"/>
<list key="query_parameters"/>
<list key="request_properties"/>
</operator>
<operator activated="true" class="text:json_to_data" compatibility="7.4.001" expanded="true" height="82" name="JSON To Data (3)" width="90" x="179" y="340"/>
<operator activated="true" class="de_pivot" compatibility="7.6.001" expanded="true" height="82" name="De-Pivot (3)" width="90" x="313" y="340">
<list key="attribute_name">
<parameter key="asks" value="\[\d+\]\.asks"/>
<parameter key="bids" value="\[\d+\]\.bids"/>
<parameter key="isFrozen" value="\[\d+\]\.isFrozen"/>
<parameter key="seq" value="\[\d+\]\.seq"/>
</list>
<parameter key="index_attribute" value="id"/>
</operator>
<connect from_op="Get Page" from_port="output" to_op="JSON To Data" to_port="documents 1"/>
<connect from_op="JSON To Data" from_port="example set" to_op="De-Pivot" to_port="example set input"/>
<connect from_op="De-Pivot" from_port="example set output" to_port="result 1"/>
<connect from_op="Get Page (3)" from_port="output" to_op="JSON To Data (3)" to_port="documents 1"/>
<connect from_op="JSON To Data (3)" from_port="example set" to_op="De-Pivot (3)" to_port="example set input"/>
<connect from_op="De-Pivot (3)" from_port="example set output" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>Thanks again
0 -
hello @alex_fawzi - ok that process came through fine. The good news is that you're not really missing anything; the bad news is that second JSON array is just not that easy to work with in RapidMiner. You can do what was suggested in that thread (JSON to XML, Write Document, Read XML) if you're just looking for one or two things. If you really want to "unwrap" the array, you need to do some work. Here is a quick-and-dirty for you that will give you the general idea (I probably don't have all the fields correct...):
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page (3)" width="90" x="45" y="34">
<parameter key="url" value="https://poloniex.com/public?command=returnOrderBook&currencyPair=BTC_XMR&depth=10&start=1506279150&end=9999999999&period=7200"/>
<list key="query_parameters"/>
<list key="request_properties"/>
</operator>
<operator activated="true" class="text:json_to_data" compatibility="7.5.000" expanded="true" height="82" name="JSON To Data (3)" width="90" x="179" y="34"/>
<operator activated="true" class="transpose" compatibility="7.6.001" expanded="true" height="82" name="Transpose" width="90" x="313" y="34"/>
<operator activated="true" class="filter_examples" compatibility="7.6.001" expanded="true" height="103" name="Filter Examples" width="90" x="447" y="34">
<list key="filters_list">
<parameter key="filters_entry_key" value="id.contains.["/>
</list>
</operator>
<operator activated="true" class="replace" compatibility="7.6.001" expanded="true" height="82" name="Replace" width="90" x="581" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="id"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="replace_what" value="\]"/>
</operator>
<operator activated="true" class="split" compatibility="7.6.001" expanded="true" height="82" name="Split" width="90" x="715" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="id"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="split_pattern" value="\["/>
</operator>
<operator activated="true" class="pivot" compatibility="7.6.001" expanded="true" height="82" name="Pivot" width="90" x="849" y="34">
<parameter key="group_attribute" value="id_2"/>
<parameter key="index_attribute" value="id_1"/>
<parameter key="consider_weights" value="false"/>
</operator>
<connect from_op="Get Page (3)" from_port="output" to_op="JSON To Data (3)" to_port="documents 1"/>
<connect from_op="JSON To Data (3)" from_port="example set" to_op="Transpose" to_port="example set input"/>
<connect from_op="Transpose" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Replace" to_port="example set input"/>
<connect from_op="Replace" from_port="example set output" to_op="Split" to_port="example set input"/>
<connect from_op="Split" from_port="example set output" to_op="Pivot" to_port="example set input"/>
<connect from_op="Pivot" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>Funny enough I have been working on a small project that will require a tool that will do exactly this much easier. Stay tuned.
Scott
0 -
Amazing, thanks for that. I'll have a tinker about but it's looking good!
1 -
Hi @sgenzer
can you help me out with the timestampI cant work out how to convert from string of numbers to date. I just keep ending up with 1970 ?
to get it from a string to a date time format, I spent hours trying to suss it out?
thanks in advance, lee
0 -
hello @websiteguy (great screenname, BTW) - sure happy to help. Can you please post your XML and dataset so I can take a look?
Scott
0 -
@sgenzer I just posted the answer in the other thread: https://community.rapidminer.com/t5/RapidMiner-Studio-Forum/Rapidminer-and-JSON/m-p/33574#M23740
2