Extract individual files from each row of an exampleset
Pradeep_K
New Altair Community Member
Hello Rapidminers,
I want to write a textfile from the exampleset data for each row.
Ex: Requisition Title column data will become filename and Overview will become contents of the file.
Have around 300 rows of data, so 300 files with title should be coming from ‘Requisition Title’ and contents of textfile should be from ‘Overview’.
Could you please suggest me in this pre-processing, I have tried with loop operator to perform stripping the data but it didn’t worked.
Tagged:
1
Best Answers
-
Hi,
you could do this with Loop Batches and a batch size of 1.
i.e.<?xml version="1.0" encoding="UTF-8"?><process version="9.5.000"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="9.5.000" expanded="true" name="Process"> <parameter key="logverbosity" value="init"/> <parameter key="random_seed" value="2001"/> <parameter key="send_mail" value="never"/> <parameter key="notification_email" value=""/> <parameter key="process_duration_for_mail" value="30"/> <parameter key="encoding" value="SYSTEM"/> <process expanded="true"> <operator activated="true" class="utility:create_exampleset" compatibility="9.5.000" expanded="true" height="68" name="Create ExampleSet" width="90" x="45" y="34"> <parameter key="generator_type" value="comma separated text"/> <parameter key="number_of_examples" value="100"/> <parameter key="use_stepsize" value="false"/> <list key="function_descriptions"/> <parameter key="add_id_attribute" value="false"/> <list key="numeric_series_configuration"/> <list key="date_series_configuration"/> <list key="date_series_configuration (interval)"/> <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="input_csv_text" value="title,content bla,blubb foo,bar"/> <parameter key="column_separator" value=","/> <parameter key="parse_all_as_nominal" value="true"/> <parameter key="decimal_point_character" value="."/> <parameter key="trim_attribute_names" value="true"/> </operator> <operator activated="true" class="loop_batches" compatibility="9.5.000" expanded="true" height="68" name="Loop Batches" width="90" x="313" y="34"> <parameter key="batch_size" value="1"/> <process expanded="true"> <operator activated="true" class="extract_macro" compatibility="9.5.000" expanded="true" height="68" name="Extract Macro (2)" width="90" x="112" y="34"> <parameter key="macro" value="title"/> <parameter key="macro_type" value="data_value"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value="title"/> <parameter key="example_index" value="1"/> <list key="additional_macros"/> </operator> <operator activated="true" class="select_attributes" compatibility="9.5.000" expanded="true" height="82" name="Select Attributes" width="90" x="246" y="34"> <parameter key="attribute_filter_type" value="single"/> <parameter key="attribute" value="title"/> <parameter key="attributes" value=""/> <parameter key="use_except_expression" value="false"/> <parameter key="value_type" value="attribute_value"/> <parameter key="use_value_type_exception" value="false"/> <parameter key="except_value_type" value="time"/> <parameter key="block_type" value="attribute_block"/> <parameter key="use_block_type_exception" value="false"/> <parameter key="except_block_type" value="value_matrix_row_start"/> <parameter key="invert_selection" value="true"/> <parameter key="include_special_attributes" value="false"/> </operator> <operator activated="true" class="write_csv" compatibility="9.5.000" expanded="true" height="82" name="Write CSV" width="90" x="447" y="34"> <parameter key="csv_file" value="%{title}.txt"/> <parameter key="column_separator" value=";"/> <parameter key="write_attribute_names" value="false"/> <parameter key="quote_nominal_values" value="false"/> <parameter key="format_date_attributes" value="true"/> <parameter key="append_to_file" value="false"/> <parameter key="encoding" value="UTF-8"/> </operator> <connect from_port="exampleSet" to_op="Extract Macro (2)" to_port="example set"/> <connect from_op="Extract Macro (2)" from_port="example set" to_op="Select Attributes" to_port="example set input"/> <connect from_op="Select Attributes" from_port="example set output" to_op="Write CSV" to_port="input"/> <portSpacing port="source_exampleSet" spacing="0"/> </process> </operator> <connect from_op="Create ExampleSet" from_port="output" to_op="Loop Batches" to_port="example set"/> <connect from_op="Loop Batches" from_port="example set" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator></process>
0
Answers
-
Hi,
you could do this with Loop Batches and a batch size of 1.
i.e.<?xml version="1.0" encoding="UTF-8"?><process version="9.5.000"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="9.5.000" expanded="true" name="Process"> <parameter key="logverbosity" value="init"/> <parameter key="random_seed" value="2001"/> <parameter key="send_mail" value="never"/> <parameter key="notification_email" value=""/> <parameter key="process_duration_for_mail" value="30"/> <parameter key="encoding" value="SYSTEM"/> <process expanded="true"> <operator activated="true" class="utility:create_exampleset" compatibility="9.5.000" expanded="true" height="68" name="Create ExampleSet" width="90" x="45" y="34"> <parameter key="generator_type" value="comma separated text"/> <parameter key="number_of_examples" value="100"/> <parameter key="use_stepsize" value="false"/> <list key="function_descriptions"/> <parameter key="add_id_attribute" value="false"/> <list key="numeric_series_configuration"/> <list key="date_series_configuration"/> <list key="date_series_configuration (interval)"/> <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="input_csv_text" value="title,content bla,blubb foo,bar"/> <parameter key="column_separator" value=","/> <parameter key="parse_all_as_nominal" value="true"/> <parameter key="decimal_point_character" value="."/> <parameter key="trim_attribute_names" value="true"/> </operator> <operator activated="true" class="loop_batches" compatibility="9.5.000" expanded="true" height="68" name="Loop Batches" width="90" x="313" y="34"> <parameter key="batch_size" value="1"/> <process expanded="true"> <operator activated="true" class="extract_macro" compatibility="9.5.000" expanded="true" height="68" name="Extract Macro (2)" width="90" x="112" y="34"> <parameter key="macro" value="title"/> <parameter key="macro_type" value="data_value"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value="title"/> <parameter key="example_index" value="1"/> <list key="additional_macros"/> </operator> <operator activated="true" class="select_attributes" compatibility="9.5.000" expanded="true" height="82" name="Select Attributes" width="90" x="246" y="34"> <parameter key="attribute_filter_type" value="single"/> <parameter key="attribute" value="title"/> <parameter key="attributes" value=""/> <parameter key="use_except_expression" value="false"/> <parameter key="value_type" value="attribute_value"/> <parameter key="use_value_type_exception" value="false"/> <parameter key="except_value_type" value="time"/> <parameter key="block_type" value="attribute_block"/> <parameter key="use_block_type_exception" value="false"/> <parameter key="except_block_type" value="value_matrix_row_start"/> <parameter key="invert_selection" value="true"/> <parameter key="include_special_attributes" value="false"/> </operator> <operator activated="true" class="write_csv" compatibility="9.5.000" expanded="true" height="82" name="Write CSV" width="90" x="447" y="34"> <parameter key="csv_file" value="%{title}.txt"/> <parameter key="column_separator" value=";"/> <parameter key="write_attribute_names" value="false"/> <parameter key="quote_nominal_values" value="false"/> <parameter key="format_date_attributes" value="true"/> <parameter key="append_to_file" value="false"/> <parameter key="encoding" value="UTF-8"/> </operator> <connect from_port="exampleSet" to_op="Extract Macro (2)" to_port="example set"/> <connect from_op="Extract Macro (2)" from_port="example set" to_op="Select Attributes" to_port="example set input"/> <connect from_op="Select Attributes" from_port="example set output" to_op="Write CSV" to_port="input"/> <portSpacing port="source_exampleSet" spacing="0"/> </process> </operator> <connect from_op="Create ExampleSet" from_port="output" to_op="Loop Batches" to_port="example set"/> <connect from_op="Loop Batches" from_port="example set" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator></process>
0