examples split
wessel
New Altair Community Member
Dear All,
I would like to split a dataset.
Write the first 70% of the data to a training.csv
In the other 30%, remove the first 24 examples, and write the remainder to test.csv.
As an idea this xml select the first 1200 instances.
But it's bad because a fixed number of attributes has to be specified, instead of a percentage.
<operator name="Root" class="Process" expanded="yes">
<operator name="load final" class="CSVExampleSource">
<parameter key="filename" value="C:\Users\wluijben\Desktop\final.csv"/>
</operator>
<operator name="remove all but wind" class="FeatureNameFilter">
<parameter key="skip_features_with_name" value=".*"/>
<parameter key="except_features_with_name" value="wind"/>
</operator>
<operator name="window size 96" class="MultivariateSeries2WindowExamples">
<parameter key="window_size" value="96"/>
</operator>
<operator name="ChangeAttributeRole" class="ChangeAttributeRole">
<parameter key="name" value="wind-0"/>
<parameter key="target_role" value="label"/>
</operator>
<operator name="IdTagging" class="IdTagging">
</operator>
<operator name="1-1200" class="ExampleRangeFilter">
<parameter key="first_example" value="1"/>
<parameter key="last_example" value="1200"/>
</operator>
<operator name="CSVExampleSetWriter" class="CSVExampleSetWriter">
<parameter key="csv_file" value="C:\Users\wluijben\Desktop\training.csv"/>
<parameter key="column_separator" value=","/>
</operator>
</operator>
I would like to split a dataset.
Write the first 70% of the data to a training.csv
In the other 30%, remove the first 24 examples, and write the remainder to test.csv.
As an idea this xml select the first 1200 instances.
But it's bad because a fixed number of attributes has to be specified, instead of a percentage.
<operator name="Root" class="Process" expanded="yes">
<operator name="load final" class="CSVExampleSource">
<parameter key="filename" value="C:\Users\wluijben\Desktop\final.csv"/>
</operator>
<operator name="remove all but wind" class="FeatureNameFilter">
<parameter key="skip_features_with_name" value=".*"/>
<parameter key="except_features_with_name" value="wind"/>
</operator>
<operator name="window size 96" class="MultivariateSeries2WindowExamples">
<parameter key="window_size" value="96"/>
</operator>
<operator name="ChangeAttributeRole" class="ChangeAttributeRole">
<parameter key="name" value="wind-0"/>
<parameter key="target_role" value="label"/>
</operator>
<operator name="IdTagging" class="IdTagging">
</operator>
<operator name="1-1200" class="ExampleRangeFilter">
<parameter key="first_example" value="1"/>
<parameter key="last_example" value="1200"/>
</operator>
<operator name="CSVExampleSetWriter" class="CSVExampleSetWriter">
<parameter key="csv_file" value="C:\Users\wluijben\Desktop\training.csv"/>
<parameter key="column_separator" value=","/>
</operator>
</operator>
0
Answers
-
Hi there,
Use the datamacro operator to do the counting, like this...<operator name="Root" class="Process" expanded="yes">
<operator name="ExampleSetGenerator" class="ExampleSetGenerator">
<parameter key="target_function" value="random"/>
</operator>
<operator name="DataMacroDefinition" class="DataMacroDefinition">
<parameter key="macro" value="Exs"/>
</operator>
<operator name="MacroConstruction" class="MacroConstruction">
<list key="function_descriptions">
<parameter key="3Q" value="round((%{Exs}*0.75),0)"/>
</list>
</operator>
<operator name="ExampleRangeFilter" class="ExampleRangeFilter">
<parameter key="first_example" value="1"/>
<parameter key="last_example" value="%{3Q}"/>
</operator>
</operator>0