Date Time Formatting UTC

JEdward
New Altair Community Member
Hi,
I'm trying to import a column of times in the format
Currently RapidMiner uses SimpleDateFormat for the date formatting http://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html
but I'm struggling to work out how to implement it for the above values. It's the ISO 8601 datetime format so I had thought it would be pretty straightforward http://www.w3.org/TR/NOTE-datetime
I have tired the patterns
yyyy-MM-dd'T'HH:mm:ss.SSSZ (this works for 1,3 & 4 but fails on 2)
yyyy-MM-dd'T'HH:mm:ss.SZ (this fails on all)
yyyy-MM-dd'T'HH:mm:ss.Sz (this fails on all)
Anyone have any ideas?
I'm trying to import a column of times in the format
1 | 2014-09-01T14:30:12:123Z |
2 | 2014-09-01T14:32:13:33Z |
3 | 2014-09-01T14:12:12:523Z |
4 | 2014-09-01T14:23:12:323Z |
but I'm struggling to work out how to implement it for the above values. It's the ISO 8601 datetime format so I had thought it would be pretty straightforward http://www.w3.org/TR/NOTE-datetime
I have tired the patterns
yyyy-MM-dd'T'HH:mm:ss.SSSZ (this works for 1,3 & 4 but fails on 2)
yyyy-MM-dd'T'HH:mm:ss.SZ (this fails on all)
yyyy-MM-dd'T'HH:mm:ss.Sz (this fails on all)
Anyone have any ideas?
Tagged:
0
Answers
-
I've now realised that
yyyy-MM-dd'T'HH:mm:ss.S works however I have a new value that's throwing errors.
2014-07-08T14:20:52Z
In this example as the event happened exactly on .000 the server has removed all the numbers that it deems unnecessary.
I need to therefore work out how to be a bit more flexible with this. Arrgh!! )0 -
Okay, I've solved this for my needs and added an additional converter to first import the dates as nominal and then loop through them to concat the data as necessary for SimpleDateFormat formatting.
I'm leaving this open for now as this only works for UTC dates so if you have any other dates (or a better way of doing it than two loop attributes) let me know below.<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="5.3.015" expanded="true" height="76" name="ExampleData" width="90" x="45" y="30">
<process expanded="true">
<operator activated="true" class="text:create_document" compatibility="5.3.002" expanded="true" height="60" name="Create Document" width="90" x="45" y="75">
<parameter key="text" value="Date 2014-09-01T14:30:12.123Z 2014-09-01T14:32:13.33Z 2014-09-01T14:12:12.5Z 2014-09-01T14:23:12.323Z 2014-07-08T14:20:52Z 2014-07-08T14:20Z 2014-07-08T14Z 2014-07-08Z"/>
</operator>
<operator activated="true" class="text:write_document" compatibility="5.3.002" expanded="true" height="76" name="Write Document" width="90" x="315" y="30">
<parameter key="encoding" value="UTF-8"/>
</operator>
<operator activated="true" class="read_csv" compatibility="5.3.015" expanded="true" height="60" name="Read CSV" width="90" x="45" y="165">
<parameter key="date_format" value="yyyy-MM-dd'T'HH:mm:ss.SSSZ"/>
<list key="annotations"/>
<parameter key="time_zone" value="UTC"/>
<parameter key="locale" value="English (United Kingdom)"/>
<parameter key="encoding" value="UTF-8"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="DateField.true.nominal.attribute"/>
</list>
</operator>
<operator activated="true" class="generate_attributes" compatibility="5.3.015" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="179" y="165">
<list key="function_descriptions">
<parameter key="DoubleDate" value="DateField"/>
</list>
</operator>
<connect from_op="Create Document" from_port="output" to_op="Write Document" to_port="document"/>
<connect from_op="Write Document" from_port="file" to_op="Read CSV" to_port="file"/>
<connect from_op="Read CSV" from_port="output" to_op="Generate Attributes (2)" to_port="example set input"/>
<connect from_op="Generate Attributes (2)" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="loop_attributes" compatibility="5.3.015" expanded="true" height="76" name="DateFixing Pt1" width="90" x="179" y="30">
<description>Select the 'Date fields stored as nominal to convert' using the Select Attributes operator here.
NOTE: This only works for dates stored as UTC as it removes the timezone code 'Z'. </description>
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="DateField|DoubleDate|"/>
<parameter key="include_special_attributes" value="true"/>
<process expanded="true">
<operator activated="true" class="generate_attributes" compatibility="5.3.015" expanded="true" height="76" name="Generate Attributes" width="90" x="112" y="30">
<list key="function_descriptions">
<parameter key="%{loop_attribute}" value="replace(%{loop_attribute},"Z","")"/>
<parameter key="%{loop_attribute}" value="if(length(%{loop_attribute})==19,concat(%{loop_attribute},".000"),DateField)"/>
<parameter key="%{loop_attribute}" value="if(length(%{loop_attribute})==16,concat(%{loop_attribute},":00.000"),DateField)"/>
<parameter key="%{loop_attribute}" value="if(length(%{loop_attribute})==13,concat(%{loop_attribute},":00:00.000"),DateField)"/>
<parameter key="%{loop_attribute}" value="if(length(%{loop_attribute})==10,concat(%{loop_attribute},"T00:00:00.000"),DateField)"/>
</list>
</operator>
<connect from_port="example set" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_port="example set"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_example set" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
<operator activated="true" class="loop_attributes" compatibility="5.3.015" expanded="true" height="76" name="DateFixing Pt2" width="90" x="313" y="75">
<description>Select the 'Date fields stored as nominal to convert' using the Select Attributes operator here.
NOTE: This only works for dates stored as UTC as it removes the timezone code 'Z'.
Annoyingly this is a two part conversion as the loop will try to execute again if you convert from nominal to date within the process and so two loops is the best alternative thus far. </description>
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="DateField|DoubleDate|"/>
<parameter key="include_special_attributes" value="true"/>
<process expanded="true">
<operator activated="true" class="nominal_to_date" compatibility="5.3.015" expanded="true" height="76" name="Nominal to Date" width="90" x="179" y="30">
<parameter key="attribute_name" value="%{loop_attribute}"/>
<parameter key="date_type" value="date_time"/>
<parameter key="date_format" value="yyyy-MM-dd'T'HH:mm:ss.S"/>
<parameter key="time_zone" value="UTC"/>
<parameter key="locale" value="English (United Kingdom)"/>
</operator>
<connect from_port="example set" to_op="Nominal to Date" to_port="example set input"/>
<connect from_op="Nominal to Date" from_port="example set output" to_port="example set"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_example set" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
<connect from_op="ExampleData" from_port="out 1" to_op="DateFixing Pt1" to_port="example set"/>
<connect from_op="DateFixing Pt1" from_port="example set" to_op="DateFixing Pt2" to_port="example set"/>
<connect from_op="DateFixing Pt2" from_port="example set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>0