Separating Row from CSV File with a condition
P2017000971
New Altair Community Member
Hello Guys,
I have imported a CSV File containing different attributes and Rows
I Want to separate this file into multiple files or multiple outputs using a condition
As seen The data are merged, and I want the data which starts with runningsensor at row 1 row to be as separate output alone until row no (8).
After that the other running sensor to be separated starting with "runningsensor" again from row no (9) untill the last row before the new runningsensor row
I want it to be automatic to separate it
any feedbacks
I have imported a CSV File containing different attributes and Rows
I Want to separate this file into multiple files or multiple outputs using a condition
As seen The data are merged, and I want the data which starts with runningsensor at row 1 row to be as separate output alone until row no (8).
After that the other running sensor to be separated starting with "runningsensor" again from row no (9) untill the last row before the new runningsensor row
I want it to be automatic to separate it
any feedbacks
0
Answers
-
Hi @P2017000971, Bit quick and dirty but with the right looping and some macros it should work out, below example uses your data in a simplified way but it does the trick.
<?xml version="1.0" encoding="UTF-8"?><process version="9.4.001"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="9.4.001" expanded="true" name="Process"> <parameter key="logverbosity" value="init"/> <parameter key="random_seed" value="2001"/> <parameter key="send_mail" value="never"/> <parameter key="notification_email" value=""/> <parameter key="process_duration_for_mail" value="30"/> <parameter key="encoding" value="UTF-8"/> <process expanded="true"> <operator activated="true" class="utility:create_exampleset" compatibility="9.4.001" expanded="true" height="68" name="Create ExampleSet" width="90" x="112" y="34"> <parameter key="generator_type" value="comma separated text"/> <parameter key="number_of_examples" value="100"/> <parameter key="use_stepsize" value="false"/> <list key="function_descriptions"/> <parameter key="add_id_attribute" value="false"/> <list key="numeric_series_configuration"/> <list key="date_series_configuration"/> <list key="date_series_configuration (interval)"/> <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="input_csv_text" value="att1	att2	att3	att4 runningsensor	1	1	1 sensor	1	1	1 sensor	1	1	1 sensor	1	1	1 sensor	1	1	1 sensor	1	1	1 sensor	1	1	1 sensor	1	1	1 sensor	1	1	1 runningsensor	1	1	1 sensor	1	1	1 sensor	1	1	1 sensor	1	1	1 sensor	1	1	1 sensor	1	1	1 sensor	1	1	1 sensor	1	1	1 sensor	1	1	1 runningsensor	1	1	1 sensor	1	1	1 sensor	1	1	1 sensor	1	1	1 sensor	1	1	1 sensor	1	1	1 sensor	1	1	1 sensor	1	1	1 sensor	1	1	1 "/> <parameter key="column_separator" value="\t"/> <parameter key="parse_all_as_nominal" value="false"/> <parameter key="decimal_point_character" value="."/> <parameter key="trim_attribute_names" value="true"/> </operator> <operator activated="true" class="generate_macro" compatibility="9.4.001" expanded="true" height="82" name="Generate Macro" width="90" x="246" y="34"> <list key="function_descriptions"> <parameter key="set" value="0"/> </list> </operator> <operator activated="true" class="loop_examples" compatibility="9.4.001" expanded="true" height="103" name="Loop Examples" width="90" x="380" y="34"> <parameter key="iteration_macro" value="row"/> <process expanded="true"> <operator activated="true" class="filter_example_range" compatibility="9.4.001" expanded="true" height="82" name="Filter Example Range" width="90" x="112" y="34"> <parameter key="first_example" value="%{row}"/> <parameter key="last_example" value="%{row}"/> <parameter key="invert_filter" value="false"/> </operator> <operator activated="true" class="extract_macro" compatibility="9.4.001" expanded="true" height="68" name="Extract Macro" width="90" x="246" y="34"> <parameter key="macro" value="a1"/> <parameter key="macro_type" value="data_value"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value="att1"/> <parameter key="example_index" value="1"/> <list key="additional_macros"/> </operator> <operator activated="true" class="branch" compatibility="9.4.001" expanded="true" height="82" name="Branch" width="90" x="380" y="34"> <parameter key="condition_type" value="expression"/> <parameter key="expression" value="equals(%{a1},"runningsensor")"/> <parameter key="io_object" value="ANOVAMatrix"/> <parameter key="return_inner_output" value="true"/> <process expanded="true"> <operator activated="true" class="generate_macro" compatibility="9.4.001" expanded="true" height="82" name="Generate Macro (2)" width="90" x="179" y="34"> <list key="function_descriptions"> <parameter key="set" value="parse(%{set})+1"/> </list> </operator> <connect from_port="condition" to_op="Generate Macro (2)" to_port="through 1"/> <connect from_op="Generate Macro (2)" from_port="through 1" to_port="input 1"/> <portSpacing port="source_condition" spacing="0"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_input 1" spacing="0"/> <portSpacing port="sink_input 2" spacing="0"/> </process> <process expanded="true"> <connect from_port="condition" to_port="input 1"/> <portSpacing port="source_condition" spacing="0"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_input 1" spacing="0"/> <portSpacing port="sink_input 2" spacing="0"/> </process> </operator> <operator activated="true" class="generate_attributes" compatibility="9.4.001" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="514" y="34"> <list key="function_descriptions"> <parameter key="set" value="str(rint(parse(%{set})))"/> </list> <parameter key="keep_all" value="true"/> </operator> <connect from_port="example set" to_op="Filter Example Range" to_port="example set input"/> <connect from_op="Filter Example Range" from_port="example set output" to_op="Extract Macro" to_port="example set"/> <connect from_op="Extract Macro" from_port="example set" to_op="Branch" to_port="condition"/> <connect from_op="Branch" from_port="input 1" to_op="Generate Attributes (2)" to_port="example set input"/> <connect from_op="Generate Attributes (2)" from_port="example set output" to_port="output 1"/> <portSpacing port="source_example set" spacing="0"/> <portSpacing port="sink_example set" spacing="0"/> <portSpacing port="sink_output 1" spacing="0"/> <portSpacing port="sink_output 2" spacing="0"/> </process> </operator> <operator activated="true" class="operator_toolbox:advanced_append" compatibility="2.2.000" expanded="true" height="82" name="Append (Superset)" width="90" x="514" y="85"/> <operator activated="true" class="concurrency:loop_values" compatibility="9.4.001" expanded="true" height="82" name="Loop Values" width="90" x="648" y="85"> <parameter key="attribute" value="set"/> <parameter key="iteration_macro" value="cs"/> <parameter key="reuse_results" value="false"/> <parameter key="enable_parallel_execution" value="true"/> <process expanded="true"> <operator activated="true" class="filter_examples" compatibility="9.4.001" expanded="true" height="103" name="Filter Examples" width="90" x="112" y="34"> <parameter key="parameter_expression" value=""/> <parameter key="condition_class" value="custom_filters"/> <parameter key="invert_filter" value="false"/> <list key="filters_list"> <parameter key="filters_entry_key" value="set.equals.%{cs}"/> </list> <parameter key="filters_logic_and" value="true"/> <parameter key="filters_check_metadata" value="false"/> </operator> <connect from_port="input 1" to_op="Filter Examples" to_port="example set input"/> <connect from_op="Filter Examples" from_port="example set output" to_port="output 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="source_input 2" spacing="0"/> <portSpacing port="sink_output 1" spacing="0"/> <portSpacing port="sink_output 2" spacing="0"/> </process> </operator> <connect from_op="Create ExampleSet" from_port="output" to_op="Generate Macro" to_port="through 1"/> <connect from_op="Generate Macro" from_port="through 1" to_op="Loop Examples" to_port="example set"/> <connect from_op="Loop Examples" from_port="output 1" to_op="Append (Superset)" to_port="example set 1"/> <connect from_op="Append (Superset)" from_port="merged set" to_op="Loop Values" to_port="input 1"/> <connect from_op="Loop Values" from_port="output 1" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator> </process>
2 -
hi @P2017000971 try using Generate Session ID from the Operator Toolbox1
-
hi @P2017000971 , as you requested some further information on the process :
First copy - paste the XML inside rapidminer. In order to do so go to view -> Show Panel -> XML
This provides you the XML view of processes, paste the content in the content pane, press the green tick on the upper left corner to load it, and then move back to the process view.
Now you will be able to see the actual operators, the 'create exampleset' mimics your excel sheet and can therefore easily be replaced with your actual data.
What happens next is macro-magic, I've defined one starting value (just a zero) and then we start looping through all the examples (rows) one by one. If the value of attribute one is equal to runningsensor we increment our base macro, otherwise it remains the same. This value is used to generate a new attribute (column) so you get something like this :
start
-> macro value = 0loop rows
-key- - new attribute with macro value-runningsensor 1 <- key so macro value + 1 (was 0)sensor 1 <- no key so samesensor 1 <- no key so samerunningsensor 2 <- key so macro value + 1 (was 1, now 2)sensor 2 <- no key so same
...
and so on...
This way you get 'sets' and with using the loop value operator on the new attribute you can split them accordingly.
Hope this makes it easier.2 -
Thanks for Both kayman and sgenzer for their valuable comments and efforts to assist . However I tried kayman's way but it did not work out well. Maybe there are some mistakes during me running the process. I attached the file which I explained on above and I wish if kayman can work on it and make the process successful. Sorry for bothering u guys but it really would mean a lot to me, thanks for both of u
attached is the txt file i worked on. Feel free to use it to make the process as requested0 -
hi @P2017000971, it actually works fine with your dataset, the only problem is that your source data is probably from a database or so as it contains white spaces after the label. Adding a trim fixes this and gives the required results.
Attached adjusted process, you only need to change the location to your 1.txt file<?xml version="1.0" encoding="UTF-8"?><process version="9.4.001"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="9.4.001" expanded="true" name="Process"> <parameter key="logverbosity" value="init"/> <parameter key="random_seed" value="2001"/> <parameter key="send_mail" value="never"/> <parameter key="notification_email" value=""/> <parameter key="process_duration_for_mail" value="30"/> <parameter key="encoding" value="UTF-8"/> <process expanded="true"> <operator activated="true" class="read_csv" compatibility="9.4.001" expanded="true" height="68" name="Read CSV" width="90" x="112" y="34"> <parameter key="csv_file" value="C:\Users\yourfolder\Downloads\1.txt"/> <parameter key="column_separators" value=";"/> <parameter key="trim_lines" value="false"/> <parameter key="use_quotes" value="true"/> <parameter key="quotes_character" value="""/> <parameter key="escape_character" value="\"/> <parameter key="skip_comments" value="true"/> <parameter key="comment_characters" value="#"/> <parameter key="starting_row" value="1"/> <parameter key="parse_numbers" value="true"/> <parameter key="decimal_character" value="."/> <parameter key="grouped_digits" value="false"/> <parameter key="grouping_character" value=","/> <parameter key="infinity_representation" value=""/> <parameter key="date_format" value=""/> <parameter key="first_row_as_names" value="false"/> <list key="annotations"/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="locale" value="English (United States)"/> <parameter key="encoding" value="windows-1252"/> <parameter key="read_all_values_as_polynominal" value="false"/> <list key="data_set_meta_data_information"> <parameter key="0" value="att1.true.polynominal.attribute"/> <parameter key="1" value="att2.true.integer.attribute"/> <parameter key="2" value="att3.true.polynominal.attribute"/> <parameter key="3" value="att4.true.real.attribute"/> <parameter key="4" value="att5.true.real.attribute"/> <parameter key="5" value="att6.true.real.attribute"/> <parameter key="6" value="att7.true.polynominal.attribute"/> <parameter key="7" value="att8.true.polynominal.attribute"/> <parameter key="8" value="att9.true.polynominal.attribute"/> <parameter key="9" value="att10.true.polynominal.attribute"/> <parameter key="10" value="att11.true.polynominal.attribute"/> <parameter key="11" value="att12.true.polynominal.attribute"/> <parameter key="12" value="att13.true.polynominal.attribute"/> <parameter key="13" value="att14.true.polynominal.attribute"/> <parameter key="14" value="att15.true.integer.attribute"/> <parameter key="15" value="att16.true.polynominal.attribute"/> <parameter key="16" value="att17.true.integer.attribute"/> <parameter key="17" value="att18.true.polynominal.attribute"/> <parameter key="18" value="att19.true.real.attribute"/> <parameter key="19" value="att20.true.real.attribute"/> <parameter key="20" value="att21.true.real.attribute"/> <parameter key="21" value="att22.true.polynominal.attribute"/> <parameter key="22" value="att23.true.polynominal.attribute"/> <parameter key="23" value="att24.true.polynominal.attribute"/> <parameter key="24" value="att25.true.polynominal.attribute"/> <parameter key="25" value="att26.true.polynominal.attribute"/> <parameter key="26" value="att27.true.polynominal.attribute"/> <parameter key="27" value="att28.true.polynominal.attribute"/> <parameter key="28" value="att29.true.polynominal.attribute"/> <parameter key="29" value="att30.true.integer.attribute"/> <parameter key="30" value="att31.true.polynominal.attribute"/> <parameter key="31" value="att32.true.integer.attribute"/> <parameter key="32" value="att33.true.polynominal.attribute"/> <parameter key="33" value="att34.true.real.attribute"/> <parameter key="34" value="att35.true.real.attribute"/> <parameter key="35" value="att36.true.real.attribute"/> <parameter key="36" value="att37.true.polynominal.attribute"/> <parameter key="37" value="att38.true.polynominal.attribute"/> <parameter key="38" value="att39.true.polynominal.attribute"/> <parameter key="39" value="att40.true.polynominal.attribute"/> <parameter key="40" value="att41.true.polynominal.attribute"/> <parameter key="41" value="att42.true.polynominal.attribute"/> <parameter key="42" value="att43.true.polynominal.attribute"/> <parameter key="43" value="att44.true.polynominal.attribute"/> <parameter key="44" value="att45.true.integer.attribute"/> <parameter key="45" value="att46.true.polynominal.attribute"/> <parameter key="46" value="att47.true.integer.attribute"/> <parameter key="47" value="att48.true.polynominal.attribute"/> <parameter key="48" value="att49.true.real.attribute"/> <parameter key="49" value="att50.true.real.attribute"/> <parameter key="50" value="att51.true.real.attribute"/> <parameter key="51" value="att52.true.polynominal.attribute"/> <parameter key="52" value="att53.true.polynominal.attribute"/> <parameter key="53" value="att54.true.polynominal.attribute"/> <parameter key="54" value="att55.true.polynominal.attribute"/> <parameter key="55" value="att56.true.polynominal.attribute"/> <parameter key="56" value="att57.true.polynominal.attribute"/> <parameter key="57" value="att58.true.polynominal.attribute"/> <parameter key="58" value="att59.true.polynominal.attribute"/> <parameter key="59" value="att60.true.integer.attribute"/> <parameter key="60" value="att61.true.polynominal.attribute"/> <parameter key="61" value="att62.true.integer.attribute"/> <parameter key="62" value="att63.true.polynominal.attribute"/> <parameter key="63" value="att64.true.real.attribute"/> <parameter key="64" value="att65.true.real.attribute"/> <parameter key="65" value="att66.true.real.attribute"/> <parameter key="66" value="att67.true.polynominal.attribute"/> <parameter key="67" value="att68.true.polynominal.attribute"/> <parameter key="68" value="att69.true.polynominal.attribute"/> <parameter key="69" value="att70.true.polynominal.attribute"/> <parameter key="70" value="att71.true.polynominal.attribute"/> <parameter key="71" value="att72.true.polynominal.attribute"/> <parameter key="72" value="att73.true.polynominal.attribute"/> <parameter key="73" value="att74.true.polynominal.attribute"/> <parameter key="74" value="att75.true.integer.attribute"/> <parameter key="75" value="att76.true.polynominal.attribute"/> <parameter key="76" value="att77.true.integer.attribute"/> <parameter key="77" value="att78.true.polynominal.attribute"/> <parameter key="78" value="att79.true.real.attribute"/> <parameter key="79" value="att80.true.real.attribute"/> <parameter key="80" value="att81.true.real.attribute"/> <parameter key="81" value="att82.true.polynominal.attribute"/> <parameter key="82" value="att83.true.polynominal.attribute"/> <parameter key="83" value="att84.true.polynominal.attribute"/> <parameter key="84" value="att85.true.polynominal.attribute"/> <parameter key="85" value="att86.true.polynominal.attribute"/> <parameter key="86" value="att87.true.polynominal.attribute"/> <parameter key="87" value="att88.true.polynominal.attribute"/> <parameter key="88" value="att89.true.polynominal.attribute"/> <parameter key="89" value="att90.true.integer.attribute"/> <parameter key="90" value="att91.true.polynominal.attribute"/> <parameter key="91" value="att92.true.integer.attribute"/> <parameter key="92" value="att93.true.integer.attribute"/> <parameter key="93" value="att94.true.integer.attribute"/> <parameter key="94" value="att95.true.integer.attribute"/> <parameter key="95" value="att96.true.integer.attribute"/> <parameter key="96" value="att97.true.integer.attribute"/> <parameter key="97" value="att98.true.integer.attribute"/> <parameter key="98" value="att99.true.integer.attribute"/> <parameter key="99" value="att100.true.integer.attribute"/> <parameter key="100" value="att101.true.integer.attribute"/> </list> <parameter key="read_not_matching_values_as_missings" value="false"/> <parameter key="datamanagement" value="double_array"/> <parameter key="data_management" value="auto"/> </operator> <operator activated="true" class="trim" compatibility="9.4.001" expanded="true" height="82" name="Trim" width="90" x="246" y="34"> <parameter key="attribute_filter_type" value="all"/> <parameter key="attribute" value=""/> <parameter key="attributes" value=""/> <parameter key="use_except_expression" value="false"/> <parameter key="value_type" value="nominal"/> <parameter key="use_value_type_exception" value="false"/> <parameter key="except_value_type" value="file_path"/> <parameter key="block_type" value="single_value"/> <parameter key="use_block_type_exception" value="false"/> <parameter key="except_block_type" value="single_value"/> <parameter key="invert_selection" value="false"/> <parameter key="include_special_attributes" value="false"/> </operator> <operator activated="true" class="generate_macro" compatibility="9.4.001" expanded="true" height="82" name="Generate Macro" width="90" x="380" y="34"> <list key="function_descriptions"> <parameter key="set" value="0"/> </list> </operator> <operator activated="true" class="loop_examples" compatibility="9.4.001" expanded="true" height="103" name="Loop Examples" width="90" x="514" y="34"> <parameter key="iteration_macro" value="row"/> <process expanded="true"> <operator activated="true" class="filter_example_range" compatibility="9.4.001" expanded="true" height="82" name="Filter Example Range" width="90" x="112" y="34"> <parameter key="first_example" value="%{row}"/> <parameter key="last_example" value="%{row}"/> <parameter key="invert_filter" value="false"/> </operator> <operator activated="true" class="extract_macro" compatibility="9.4.001" expanded="true" height="68" name="Extract Macro" width="90" x="246" y="34"> <parameter key="macro" value="a1"/> <parameter key="macro_type" value="data_value"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value="att1"/> <parameter key="example_index" value="1"/> <list key="additional_macros"/> </operator> <operator activated="true" class="branch" compatibility="9.4.001" expanded="true" height="82" name="Branch" width="90" x="380" y="34"> <parameter key="condition_type" value="expression"/> <parameter key="expression" value="equals(%{a1},"runningsensor")"/> <parameter key="io_object" value="ANOVAMatrix"/> <parameter key="return_inner_output" value="true"/> <process expanded="true"> <operator activated="true" class="generate_macro" compatibility="9.4.001" expanded="true" height="82" name="Generate Macro (2)" width="90" x="179" y="34"> <list key="function_descriptions"> <parameter key="set" value="parse(%{set})+1"/> </list> </operator> <connect from_port="condition" to_op="Generate Macro (2)" to_port="through 1"/> <connect from_op="Generate Macro (2)" from_port="through 1" to_port="input 1"/> <portSpacing port="source_condition" spacing="0"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_input 1" spacing="0"/> <portSpacing port="sink_input 2" spacing="0"/> </process> <process expanded="true"> <connect from_port="condition" to_port="input 1"/> <portSpacing port="source_condition" spacing="0"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_input 1" spacing="0"/> <portSpacing port="sink_input 2" spacing="0"/> </process> </operator> <operator activated="true" class="generate_attributes" compatibility="9.4.001" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="514" y="34"> <list key="function_descriptions"> <parameter key="set" value="str(rint(parse(%{set})))"/> </list> <parameter key="keep_all" value="true"/> </operator> <connect from_port="example set" to_op="Filter Example Range" to_port="example set input"/> <connect from_op="Filter Example Range" from_port="example set output" to_op="Extract Macro" to_port="example set"/> <connect from_op="Extract Macro" from_port="example set" to_op="Branch" to_port="condition"/> <connect from_op="Branch" from_port="input 1" to_op="Generate Attributes (2)" to_port="example set input"/> <connect from_op="Generate Attributes (2)" from_port="example set output" to_port="output 1"/> <portSpacing port="source_example set" spacing="0"/> <portSpacing port="sink_example set" spacing="0"/> <portSpacing port="sink_output 1" spacing="0"/> <portSpacing port="sink_output 2" spacing="0"/> </process> </operator> <operator activated="true" class="operator_toolbox:advanced_append" compatibility="2.2.000" expanded="true" height="82" name="Append (Superset)" width="90" x="648" y="34"/> <operator activated="true" class="concurrency:loop_values" compatibility="9.4.001" expanded="true" height="82" name="Loop Values" width="90" x="782" y="34"> <parameter key="attribute" value="set"/> <parameter key="iteration_macro" value="cs"/> <parameter key="reuse_results" value="false"/> <parameter key="enable_parallel_execution" value="true"/> <process expanded="true"> <operator activated="true" class="filter_examples" compatibility="9.4.001" expanded="true" height="103" name="Filter Examples" width="90" x="112" y="34"> <parameter key="parameter_expression" value=""/> <parameter key="condition_class" value="custom_filters"/> <parameter key="invert_filter" value="false"/> <list key="filters_list"> <parameter key="filters_entry_key" value="set.equals.%{cs}"/> </list> <parameter key="filters_logic_and" value="true"/> <parameter key="filters_check_metadata" value="false"/> </operator> <connect from_port="input 1" to_op="Filter Examples" to_port="example set input"/> <connect from_op="Filter Examples" from_port="example set output" to_port="output 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="source_input 2" spacing="0"/> <portSpacing port="sink_output 1" spacing="0"/> <portSpacing port="sink_output 2" spacing="0"/> </process> </operator> <connect from_op="Read CSV" from_port="output" to_op="Trim" to_port="example set input"/> <connect from_op="Trim" from_port="example set output" to_op="Generate Macro" to_port="through 1"/> <connect from_op="Generate Macro" from_port="through 1" to_op="Loop Examples" to_port="example set"/> <connect from_op="Loop Examples" from_port="output 1" to_op="Append (Superset)" to_port="example set 1"/> <connect from_op="Append (Superset)" from_port="merged set" to_op="Loop Values" to_port="input 1"/> <connect from_op="Loop Values" from_port="output 1" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator> </process>
2 -
Thanks Man it works perfectly and it separated them into 6 different files.
Now I am trying to use these files. How do I use these 6 files because when i am trying, i could not do anything but seeing them. thanks man0 -
1 -
As this is now a collection of example sets you can use the 'loop collection' operator, and do you logic for every example set3
-
Thanks kayman for all your efforts, I was able to understand the process and construct it from the beginning and apply it elsewhere using the concept I learned from you.
If it is not too much to ask, would u please assist me in " 'loop collection' operator, and do your logic for every example set". I tried the operator but could not get a grasp of how to do it correctly. if u can apply it on 2 example sets and show me the XML and I will learn to do the rest. Thanks a lot0 -
Hi @P2017000971, the question is what you want to do with your split content.
The process starts with one big example set, and then splits it in a collection of smaller example sets. The loop collection operator allows you to iterate now over these one by one, so you need to apply some further logic to them. This could go from just saving them one by one as separate datasets (as in example) , or apply logic and append them all together again, and so one.
The only limitation would be your imagination<?xml version="1.0" encoding="UTF-8"?><process version="9.4.001"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="9.4.001" expanded="true" name="Process"> <parameter key="logverbosity" value="init"/> <parameter key="random_seed" value="2001"/> <parameter key="send_mail" value="never"/> <parameter key="notification_email" value=""/> <parameter key="process_duration_for_mail" value="30"/> <parameter key="encoding" value="UTF-8"/> <process expanded="true"> <operator activated="true" class="read_csv" compatibility="9.4.001" expanded="true" height="68" name="Read CSV" width="90" x="112" y="34"> <parameter key="csv_file" value="C:\Users\yourfolder\Downloads\1.txt"/> <parameter key="column_separators" value=";"/> <parameter key="trim_lines" value="false"/> <parameter key="use_quotes" value="true"/> <parameter key="quotes_character" value="""/> <parameter key="escape_character" value="\"/> <parameter key="skip_comments" value="true"/> <parameter key="comment_characters" value="#"/> <parameter key="starting_row" value="1"/> <parameter key="parse_numbers" value="true"/> <parameter key="decimal_character" value="."/> <parameter key="grouped_digits" value="false"/> <parameter key="grouping_character" value=","/> <parameter key="infinity_representation" value=""/> <parameter key="date_format" value=""/> <parameter key="first_row_as_names" value="false"/> <list key="annotations"/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="locale" value="English (United States)"/> <parameter key="encoding" value="windows-1252"/> <parameter key="read_all_values_as_polynominal" value="false"/> <list key="data_set_meta_data_information"> <parameter key="0" value="att1.true.polynominal.attribute"/> <parameter key="1" value="att2.true.integer.attribute"/> <parameter key="2" value="att3.true.polynominal.attribute"/> <parameter key="3" value="att4.true.real.attribute"/> <parameter key="4" value="att5.true.real.attribute"/> <parameter key="5" value="att6.true.real.attribute"/> <parameter key="6" value="att7.true.polynominal.attribute"/> <parameter key="7" value="att8.true.polynominal.attribute"/> <parameter key="8" value="att9.true.polynominal.attribute"/> <parameter key="9" value="att10.true.polynominal.attribute"/> <parameter key="10" value="att11.true.polynominal.attribute"/> <parameter key="11" value="att12.true.polynominal.attribute"/> <parameter key="12" value="att13.true.polynominal.attribute"/> <parameter key="13" value="att14.true.polynominal.attribute"/> <parameter key="14" value="att15.true.integer.attribute"/> <parameter key="15" value="att16.true.polynominal.attribute"/> <parameter key="16" value="att17.true.integer.attribute"/> <parameter key="17" value="att18.true.polynominal.attribute"/> <parameter key="18" value="att19.true.real.attribute"/> <parameter key="19" value="att20.true.real.attribute"/> <parameter key="20" value="att21.true.real.attribute"/> <parameter key="21" value="att22.true.polynominal.attribute"/> <parameter key="22" value="att23.true.polynominal.attribute"/> <parameter key="23" value="att24.true.polynominal.attribute"/> <parameter key="24" value="att25.true.polynominal.attribute"/> <parameter key="25" value="att26.true.polynominal.attribute"/> <parameter key="26" value="att27.true.polynominal.attribute"/> <parameter key="27" value="att28.true.polynominal.attribute"/> <parameter key="28" value="att29.true.polynominal.attribute"/> <parameter key="29" value="att30.true.integer.attribute"/> <parameter key="30" value="att31.true.polynominal.attribute"/> <parameter key="31" value="att32.true.integer.attribute"/> <parameter key="32" value="att33.true.polynominal.attribute"/> <parameter key="33" value="att34.true.real.attribute"/> <parameter key="34" value="att35.true.real.attribute"/> <parameter key="35" value="att36.true.real.attribute"/> <parameter key="36" value="att37.true.polynominal.attribute"/> <parameter key="37" value="att38.true.polynominal.attribute"/> <parameter key="38" value="att39.true.polynominal.attribute"/> <parameter key="39" value="att40.true.polynominal.attribute"/> <parameter key="40" value="att41.true.polynominal.attribute"/> <parameter key="41" value="att42.true.polynominal.attribute"/> <parameter key="42" value="att43.true.polynominal.attribute"/> <parameter key="43" value="att44.true.polynominal.attribute"/> <parameter key="44" value="att45.true.integer.attribute"/> <parameter key="45" value="att46.true.polynominal.attribute"/> <parameter key="46" value="att47.true.integer.attribute"/> <parameter key="47" value="att48.true.polynominal.attribute"/> <parameter key="48" value="att49.true.real.attribute"/> <parameter key="49" value="att50.true.real.attribute"/> <parameter key="50" value="att51.true.real.attribute"/> <parameter key="51" value="att52.true.polynominal.attribute"/> <parameter key="52" value="att53.true.polynominal.attribute"/> <parameter key="53" value="att54.true.polynominal.attribute"/> <parameter key="54" value="att55.true.polynominal.attribute"/> <parameter key="55" value="att56.true.polynominal.attribute"/> <parameter key="56" value="att57.true.polynominal.attribute"/> <parameter key="57" value="att58.true.polynominal.attribute"/> <parameter key="58" value="att59.true.polynominal.attribute"/> <parameter key="59" value="att60.true.integer.attribute"/> <parameter key="60" value="att61.true.polynominal.attribute"/> <parameter key="61" value="att62.true.integer.attribute"/> <parameter key="62" value="att63.true.polynominal.attribute"/> <parameter key="63" value="att64.true.real.attribute"/> <parameter key="64" value="att65.true.real.attribute"/> <parameter key="65" value="att66.true.real.attribute"/> <parameter key="66" value="att67.true.polynominal.attribute"/> <parameter key="67" value="att68.true.polynominal.attribute"/> <parameter key="68" value="att69.true.polynominal.attribute"/> <parameter key="69" value="att70.true.polynominal.attribute"/> <parameter key="70" value="att71.true.polynominal.attribute"/> <parameter key="71" value="att72.true.polynominal.attribute"/> <parameter key="72" value="att73.true.polynominal.attribute"/> <parameter key="73" value="att74.true.polynominal.attribute"/> <parameter key="74" value="att75.true.integer.attribute"/> <parameter key="75" value="att76.true.polynominal.attribute"/> <parameter key="76" value="att77.true.integer.attribute"/> <parameter key="77" value="att78.true.polynominal.attribute"/> <parameter key="78" value="att79.true.real.attribute"/> <parameter key="79" value="att80.true.real.attribute"/> <parameter key="80" value="att81.true.real.attribute"/> <parameter key="81" value="att82.true.polynominal.attribute"/> <parameter key="82" value="att83.true.polynominal.attribute"/> <parameter key="83" value="att84.true.polynominal.attribute"/> <parameter key="84" value="att85.true.polynominal.attribute"/> <parameter key="85" value="att86.true.polynominal.attribute"/> <parameter key="86" value="att87.true.polynominal.attribute"/> <parameter key="87" value="att88.true.polynominal.attribute"/> <parameter key="88" value="att89.true.polynominal.attribute"/> <parameter key="89" value="att90.true.integer.attribute"/> <parameter key="90" value="att91.true.polynominal.attribute"/> <parameter key="91" value="att92.true.integer.attribute"/> <parameter key="92" value="att93.true.integer.attribute"/> <parameter key="93" value="att94.true.integer.attribute"/> <parameter key="94" value="att95.true.integer.attribute"/> <parameter key="95" value="att96.true.integer.attribute"/> <parameter key="96" value="att97.true.integer.attribute"/> <parameter key="97" value="att98.true.integer.attribute"/> <parameter key="98" value="att99.true.integer.attribute"/> <parameter key="99" value="att100.true.integer.attribute"/> <parameter key="100" value="att101.true.integer.attribute"/> </list> <parameter key="read_not_matching_values_as_missings" value="false"/> <parameter key="datamanagement" value="double_array"/> <parameter key="data_management" value="auto"/> </operator> <operator activated="true" class="trim" compatibility="9.4.001" expanded="true" height="82" name="Trim" width="90" x="246" y="34"> <parameter key="attribute_filter_type" value="all"/> <parameter key="attribute" value=""/> <parameter key="attributes" value=""/> <parameter key="use_except_expression" value="false"/> <parameter key="value_type" value="nominal"/> <parameter key="use_value_type_exception" value="false"/> <parameter key="except_value_type" value="file_path"/> <parameter key="block_type" value="single_value"/> <parameter key="use_block_type_exception" value="false"/> <parameter key="except_block_type" value="single_value"/> <parameter key="invert_selection" value="false"/> <parameter key="include_special_attributes" value="false"/> </operator> <operator activated="true" class="generate_macro" compatibility="9.4.001" expanded="true" height="82" name="Generate Macro" width="90" x="380" y="34"> <list key="function_descriptions"> <parameter key="set" value="0"/> </list> </operator> <operator activated="true" class="loop_examples" compatibility="9.4.001" expanded="true" height="103" name="Loop Examples" width="90" x="514" y="34"> <parameter key="iteration_macro" value="row"/> <process expanded="true"> <operator activated="true" class="filter_example_range" compatibility="9.4.001" expanded="true" height="82" name="Filter Example Range" width="90" x="112" y="34"> <parameter key="first_example" value="%{row}"/> <parameter key="last_example" value="%{row}"/> <parameter key="invert_filter" value="false"/> </operator> <operator activated="true" class="extract_macro" compatibility="9.4.001" expanded="true" height="68" name="Extract Macro" width="90" x="246" y="34"> <parameter key="macro" value="a1"/> <parameter key="macro_type" value="data_value"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value="att1"/> <parameter key="example_index" value="1"/> <list key="additional_macros"/> </operator> <operator activated="true" class="branch" compatibility="9.4.001" expanded="true" height="82" name="Branch" width="90" x="380" y="34"> <parameter key="condition_type" value="expression"/> <parameter key="expression" value="equals(%{a1},"runningsensor")"/> <parameter key="io_object" value="ANOVAMatrix"/> <parameter key="return_inner_output" value="true"/> <process expanded="true"> <operator activated="true" class="generate_macro" compatibility="9.4.001" expanded="true" height="82" name="Generate Macro (2)" width="90" x="179" y="34"> <list key="function_descriptions"> <parameter key="set" value="parse(%{set})+1"/> </list> </operator> <connect from_port="condition" to_op="Generate Macro (2)" to_port="through 1"/> <connect from_op="Generate Macro (2)" from_port="through 1" to_port="input 1"/> <portSpacing port="source_condition" spacing="0"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_input 1" spacing="0"/> <portSpacing port="sink_input 2" spacing="0"/> </process> <process expanded="true"> <connect from_port="condition" to_port="input 1"/> <portSpacing port="source_condition" spacing="0"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_input 1" spacing="0"/> <portSpacing port="sink_input 2" spacing="0"/> </process> </operator> <operator activated="true" class="generate_attributes" compatibility="9.4.001" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="514" y="34"> <list key="function_descriptions"> <parameter key="set" value="str(rint(parse(%{set})))"/> </list> <parameter key="keep_all" value="true"/> </operator> <connect from_port="example set" to_op="Filter Example Range" to_port="example set input"/> <connect from_op="Filter Example Range" from_port="example set output" to_op="Extract Macro" to_port="example set"/> <connect from_op="Extract Macro" from_port="example set" to_op="Branch" to_port="condition"/> <connect from_op="Branch" from_port="input 1" to_op="Generate Attributes (2)" to_port="example set input"/> <connect from_op="Generate Attributes (2)" from_port="example set output" to_port="output 1"/> <portSpacing port="source_example set" spacing="0"/> <portSpacing port="sink_example set" spacing="0"/> <portSpacing port="sink_output 1" spacing="0"/> <portSpacing port="sink_output 2" spacing="0"/> </process> </operator> <operator activated="true" class="operator_toolbox:advanced_append" compatibility="2.2.000" expanded="true" height="82" name="Append (Superset)" width="90" x="648" y="34"/> <operator activated="true" class="concurrency:loop_values" compatibility="9.4.001" expanded="true" height="82" name="Loop Values" width="90" x="782" y="34"> <parameter key="attribute" value="set"/> <parameter key="iteration_macro" value="cs"/> <parameter key="reuse_results" value="false"/> <parameter key="enable_parallel_execution" value="true"/> <process expanded="true"> <operator activated="true" class="filter_examples" compatibility="9.4.001" expanded="true" height="103" name="Filter Examples" width="90" x="112" y="34"> <parameter key="parameter_expression" value=""/> <parameter key="condition_class" value="custom_filters"/> <parameter key="invert_filter" value="false"/> <list key="filters_list"> <parameter key="filters_entry_key" value="set.equals.%{cs}"/> </list> <parameter key="filters_logic_and" value="true"/> <parameter key="filters_check_metadata" value="false"/> </operator> <connect from_port="input 1" to_op="Filter Examples" to_port="example set input"/> <connect from_op="Filter Examples" from_port="example set output" to_port="output 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="source_input 2" spacing="0"/> <portSpacing port="sink_output 1" spacing="0"/> <portSpacing port="sink_output 2" spacing="0"/> </process> </operator> <operator activated="true" class="loop_collection" compatibility="9.4.001" expanded="true" height="68" name="Loop Collection" width="90" x="916" y="34"> <parameter key="set_iteration_macro" value="true"/> <parameter key="macro_name" value="current_set"/> <parameter key="macro_start_value" value="1"/> <parameter key="unfold" value="false"/> <process expanded="true"> <operator activated="true" class="store" compatibility="9.4.001" expanded="true" height="68" name="Store" width="90" x="447" y="34"> <parameter key="repository_entry" value="set_%{current_set}"/> </operator> <connect from_port="single" to_op="Store" to_port="input"/> <portSpacing port="source_single" spacing="0"/> <portSpacing port="sink_output 1" spacing="0"/> <description align="center" color="yellow" colored="false" height="105" resized="false" width="180" x="395" y="161">do something, like saving in a subset</description> </process> <description align="center" color="transparent" colored="false" width="126">loop through all the sets</description> </operator> <connect from_op="Read CSV" from_port="output" to_op="Trim" to_port="example set input"/> <connect from_op="Trim" from_port="example set output" to_op="Generate Macro" to_port="through 1"/> <connect from_op="Generate Macro" from_port="through 1" to_op="Loop Examples" to_port="example set"/> <connect from_op="Loop Examples" from_port="output 1" to_op="Append (Superset)" to_port="example set 1"/> <connect from_op="Append (Superset)" from_port="merged set" to_op="Loop Values" to_port="input 1"/> <connect from_op="Loop Values" from_port="output 1" to_op="Loop Collection" to_port="collection"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> </process> </operator> </process>
1