A program to recognize and reward our most engaged community members
Hello
Can someone tell me if it is possible to stack the output of differetn individual models from a collection of IOObjects, into one simple csv table? I tried writing a csv, excel after the collection operator but it did not work
Thanks
Hi @sebastian_gonza,
Can you post your XML process? Yes, it should be doable. Let's see what you've got.
All the best,
Rodrigo.
<?xml version="1.0" encoding="UTF-8"?><process version="9.0.002"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="9.0.002" expanded="true" name="Process"> <process expanded="true"> <operator activated="true" class="subprocess" compatibility="9.0.002" expanded="true" height="82" name="Modelos" width="90" x="179" y="136"> <process expanded="true"> <operator activated="true" class="read_csv" compatibility="9.0.002" expanded="true" height="68" name="Read CSV" width="90" x="246" y="34"> <parameter key="csv_file" value="C:\Users\ANGLOBAL\Desktop\sample.csv"/> <list key="annotations"/> <list key="data_set_meta_data_information"/> </operator> <operator activated="true" class="multiply" compatibility="9.0.002" expanded="true" height="103" name="Multiply (2)" width="90" x="447" y="34"/> <operator activated="true" class="select_attributes" compatibility="9.0.002" expanded="true" height="82" name="Select Attributes (5)" width="90" x="581" y="34"> <parameter key="attribute_filter_type" value="subset"/> <parameter key="attributes" value="Cantidad|IdCliente|IdPedido|Importe|PrecioFinal"/> </operator> <operator activated="true" class="generate_attributes" compatibility="9.0.002" expanded="true" height="82" name="Generate Attributes (3)" width="90" x="715" y="34"> <list key="function_descriptions"> <parameter key="Logimporte" value="log(Importe+1)"/> <parameter key="Logprecio" value="log([PrecioFinal]+1)"/> <parameter key="Logcantidades" value="log(Cantidad+1)"/> </list> </operator> <operator activated="true" class="set_role" compatibility="9.0.002" expanded="true" height="82" name="Set Role" width="90" x="849" y="34"> <parameter key="attribute_name" value="Logimporte"/> <parameter key="target_role" value="label"/> <list key="set_additional_roles"/> </operator> <operator activated="true" class="remove_duplicates" compatibility="9.0.002" expanded="true" height="103" name="Remove Duplicates" width="90" x="983" y="34"> <parameter key="attribute_filter_type" value="subset"/> <parameter key="attributes" value="IdCliente|IdPedido|Importe|Cantidad"/> </operator> <operator activated="true" class="multiply" compatibility="9.0.002" expanded="true" height="82" name="Multiply" width="90" x="1117" y="34"/> <operator activated="true" class="operator_toolbox:group_into_collection" compatibility="1.5.000" expanded="true" height="82" name="Group Into Collection" width="90" x="45" y="238"> <parameter key="group_by_attribute" value="IdCliente"/> </operator> <operator activated="true" class="annotate" compatibility="9.0.002" expanded="true" height="68" name="Annotate" width="90" x="179" y="238"> <list key="annotations"> <parameter key="Comment" value="IdCliente"/> </list> </operator> <operator activated="true" class="loop_collection" compatibility="9.0.002" expanded="true" height="145" name="Loop Collection" width="90" x="313" y="238"> <process expanded="true"> <operator activated="true" class="extract_macro" compatibility="9.0.002" expanded="true" height="68" name="Extract Macro (2)" width="90" x="45" y="34"> <parameter key="macro" value="IdCliente"/> <parameter key="macro_type" value="data_value"/> <parameter key="attribute_name" value="IdCliente"/> <parameter key="example_index" value="1"/> <list key="additional_macros"/> </operator> <operator activated="true" class="select_attributes" compatibility="9.0.002" expanded="true" height="82" name="Select Attributes (2)" width="90" x="179" y="34"> <parameter key="attribute_filter_type" value="subset"/> <parameter key="attributes" value="Logcantidades|Logimporte|Logprecio"/> </operator> <operator activated="true" class="split_data" compatibility="9.0.002" expanded="true" height="103" name="Split Data" width="90" x="313" y="34"> <enumeration key="partitions"> <parameter key="ratio" value="0.7"/> <parameter key="ratio" value="0.3"/> </enumeration> </operator> <operator activated="true" class="linear_regression" compatibility="9.0.002" expanded="true" height="103" name="Linear Regression (2)" width="90" x="447" y="34"> <parameter key="eliminate_colinear_features" value="false"/> <parameter key="use_bias" value="false"/> </operator> <operator activated="true" class="annotate" compatibility="9.0.002" expanded="true" height="68" name="Annotate (2)" width="90" x="581" y="34"> <list key="annotations"> <parameter key="Comment" value="%{IdCliente}"/> </list> </operator> <operator activated="true" class="apply_model" compatibility="9.0.002" expanded="true" height="82" name="Apply Model" width="90" x="715" y="34"> <list key="application_parameters"/> </operator> <operator activated="true" class="annotate" compatibility="9.0.002" expanded="true" height="68" name="Annotate (3)" width="90" x="849" y="34"> <list key="annotations"> <parameter key="Comment" value="%{IdCliente}"/> </list> </operator> <connect from_port="single" to_op="Extract Macro (2)" to_port="example set"/> <connect from_op="Extract Macro (2)" from_port="example set" to_op="Select Attributes (2)" to_port="example set input"/> <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Split Data" to_port="example set"/> <connect from_op="Split Data" from_port="partition 1" to_op="Linear Regression (2)" to_port="training set"/> <connect from_op="Split Data" from_port="partition 2" to_op="Apply Model" to_port="unlabelled data"/> <connect from_op="Linear Regression (2)" from_port="model" to_op="Annotate (2)" to_port="input"/> <connect from_op="Annotate (2)" from_port="output" to_op="Apply Model" to_port="model"/> <connect from_op="Apply Model" from_port="labelled data" to_op="Annotate (3)" to_port="input"/> <connect from_op="Apply Model" from_port="model" to_port="output 2"/> <connect from_op="Annotate (3)" from_port="output" to_port="output 1"/> <portSpacing port="source_single" spacing="0"/> <portSpacing port="sink_output 1" spacing="0"/> <portSpacing port="sink_output 2" spacing="0"/> <portSpacing port="sink_output 3" spacing="0"/> <portSpacing port="sink_output 4" spacing="0"/> <portSpacing port="sink_output 5" spacing="0"/> </process> </operator> <operator activated="true" class="collect" compatibility="9.0.002" expanded="true" height="145" name="Collect" width="90" x="514" y="223"/> <connect from_op="Read CSV" from_port="output" to_op="Multiply (2)" to_port="input"/> <connect from_op="Multiply (2)" from_port="output 1" to_op="Select Attributes (5)" to_port="example set input"/> <connect from_op="Select Attributes (5)" from_port="example set output" to_op="Generate Attributes (3)" to_port="example set input"/> <connect from_op="Generate Attributes (3)" from_port="example set output" to_op="Set Role" to_port="example set input"/> <connect from_op="Set Role" from_port="example set output" to_op="Remove Duplicates" to_port="example set input"/> <connect from_op="Remove Duplicates" from_port="example set output" to_op="Multiply" to_port="input"/> <connect from_op="Multiply" from_port="output 1" to_op="Group Into Collection" to_port="exa"/> <connect from_op="Group Into Collection" from_port="col" to_op="Annotate" to_port="input"/> <connect from_op="Annotate" from_port="output" to_op="Loop Collection" to_port="collection"/> <connect from_op="Loop Collection" from_port="output 1" to_op="Collect" to_port="input 1"/> <connect from_op="Loop Collection" from_port="output 2" to_op="Collect" to_port="input 2"/> <connect from_op="Loop Collection" from_port="output 3" to_op="Collect" to_port="input 3"/> <connect from_op="Loop Collection" from_port="output 4" to_op="Collect" to_port="input 4"/> <connect from_op="Collect" from_port="collection" to_port="out 1"/> <portSpacing port="source_in 1" spacing="0"/> <portSpacing port="sink_out 1" spacing="0"/> <portSpacing port="sink_out 2" spacing="0"/> </process> </operator> <connect from_op="Modelos" from_port="out 1" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator></process>
Sure, I attach a sample
Hi,
the key operator is Append. it can merge a colection of example sets into one, if the schema is the same.
BR,
Martin
In the ending of the loop and after the collection does not work, where should the append be placed?
thanks
after the loop. you need to ensure that the data has the same schema though.
Sorry, I dont understand the same schema, if you refer to the structure of each IOObject, is a linear regression by Id, some time is calculated others not, I get the error "The operator needs a rapidminer input type which is not provided"
Sorry, is it possible for you to have a look at the last reply please? thanks
Hi @sebastian_gonza - ok I see what's going on here. So it's not a good idea to mix your models with your ExampleSets. It's like apples and oranges, and hence RM has no idea what to do with that (and hence weird error message).
I don't see more than one model in your XML so I'm not sure what you're trying to do there, but this may guide you in a good direction:
<?xml version="1.0" encoding="UTF-8"?><process version="9.0.003"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="9.0.003" expanded="true" name="Process"> <process expanded="true"> <operator activated="true" class="subprocess" compatibility="9.0.003" expanded="true" height="82" name="Modelos" width="90" x="45" y="85"> <process expanded="true"> <operator activated="true" class="read_csv" compatibility="9.0.003" expanded="true" height="68" name="Read CSV" width="90" x="246" y="34"> <parameter key="csv_file" value="/Users/genzerconsulting/Desktop/sample.csv"/> <list key="annotations"/> <list key="data_set_meta_data_information"/> </operator> <operator activated="true" class="multiply" compatibility="9.0.003" expanded="true" height="82" name="Multiply (2)" width="90" x="447" y="34"/> <operator activated="true" class="select_attributes" compatibility="9.0.003" expanded="true" height="82" name="Select Attributes (5)" width="90" x="581" y="34"> <parameter key="attribute_filter_type" value="subset"/> <parameter key="attributes" value="Cantidad|IdCliente|IdPedido|Importe|PrecioFinal"/> </operator> <operator activated="true" class="generate_attributes" compatibility="9.0.003" expanded="true" height="82" name="Generate Attributes (3)" width="90" x="715" y="34"> <list key="function_descriptions"> <parameter key="Logimporte" value="log(Importe+1)"/> <parameter key="Logprecio" value="log([PrecioFinal]+1)"/> <parameter key="Logcantidades" value="log(Cantidad+1)"/> </list> </operator> <operator activated="true" class="set_role" compatibility="9.0.003" expanded="true" height="82" name="Set Role" width="90" x="849" y="34"> <parameter key="attribute_name" value="Logimporte"/> <parameter key="target_role" value="label"/> <list key="set_additional_roles"/> </operator> <operator activated="true" class="remove_duplicates" compatibility="9.0.003" expanded="true" height="103" name="Remove Duplicates" width="90" x="983" y="34"> <parameter key="attribute_filter_type" value="subset"/> <parameter key="attributes" value="IdCliente|IdPedido|Importe|Cantidad"/> </operator> <operator activated="true" class="multiply" compatibility="9.0.003" expanded="true" height="82" name="Multiply" width="90" x="1117" y="34"/> <operator activated="true" class="operator_toolbox:group_into_collection" compatibility="1.5.000" expanded="true" height="82" name="Group Into Collection" width="90" x="45" y="238"> <parameter key="group_by_attribute" value="IdCliente"/> </operator> <operator activated="true" class="annotate" compatibility="9.0.003" expanded="true" height="68" name="Annotate" width="90" x="179" y="238"> <list key="annotations"> <parameter key="Comment" value="IdCliente"/> </list> </operator> <operator activated="true" class="loop_collection" compatibility="9.0.003" expanded="true" height="103" name="Loop Collection" width="90" x="313" y="238"> <process expanded="true"> <operator activated="true" class="extract_macro" compatibility="9.0.003" expanded="true" height="68" name="Extract Macro (2)" width="90" x="45" y="34"> <parameter key="macro" value="IdCliente"/> <parameter key="macro_type" value="data_value"/> <parameter key="attribute_name" value="IdCliente"/> <parameter key="example_index" value="1"/> <list key="additional_macros"/> </operator> <operator activated="true" class="select_attributes" compatibility="9.0.003" expanded="true" height="82" name="Select Attributes (2)" width="90" x="179" y="34"> <parameter key="attribute_filter_type" value="subset"/> <parameter key="attributes" value="Logcantidades|Logimporte|Logprecio"/> </operator> <operator activated="true" class="split_data" compatibility="9.0.003" expanded="true" height="103" name="Split Data" width="90" x="313" y="238"> <enumeration key="partitions"> <parameter key="ratio" value="0.7"/> <parameter key="ratio" value="0.3"/> </enumeration> </operator> <operator activated="true" class="linear_regression" compatibility="9.0.003" expanded="true" height="103" name="Linear Regression (2)" width="90" x="447" y="34"> <parameter key="eliminate_colinear_features" value="false"/> <parameter key="use_bias" value="false"/> </operator> <operator activated="true" class="annotate" compatibility="9.0.003" expanded="true" height="68" name="Annotate (2)" width="90" x="581" y="34"> <list key="annotations"> <parameter key="Comment" value="%{IdCliente}"/> </list> </operator> <operator activated="true" class="apply_model" compatibility="9.0.003" expanded="true" height="82" name="Apply Model" width="90" x="715" y="136"> <list key="application_parameters"/> </operator> <operator activated="true" class="annotate" compatibility="9.0.003" expanded="true" height="68" name="Annotate (3)" width="90" x="849" y="34"> <list key="annotations"> <parameter key="Comment" value="%{IdCliente}"/> </list> </operator> <connect from_port="single" to_op="Extract Macro (2)" to_port="example set"/> <connect from_op="Extract Macro (2)" from_port="example set" to_op="Select Attributes (2)" to_port="example set input"/> <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Split Data" to_port="example set"/> <connect from_op="Split Data" from_port="partition 1" to_op="Linear Regression (2)" to_port="training set"/> <connect from_op="Split Data" from_port="partition 2" to_op="Apply Model" to_port="unlabelled data"/> <connect from_op="Linear Regression (2)" from_port="model" to_op="Annotate (2)" to_port="input"/> <connect from_op="Annotate (2)" from_port="output" to_op="Apply Model" to_port="model"/> <connect from_op="Apply Model" from_port="labelled data" to_op="Annotate (3)" to_port="input"/> <connect from_op="Apply Model" from_port="model" to_port="output 2"/> <connect from_op="Annotate (3)" from_port="output" to_port="output 1"/> <portSpacing port="source_single" spacing="0"/> <portSpacing port="sink_output 1" spacing="0"/> <portSpacing port="sink_output 2" spacing="189"/> <portSpacing port="sink_output 3" spacing="0"/> </process> </operator> <operator activated="true" class="collect" compatibility="9.0.003" expanded="true" height="82" name="Collect" width="90" x="447" y="238"/> <operator activated="true" class="append" compatibility="9.0.003" expanded="true" height="82" name="Append" width="90" x="581" y="238"/> <connect from_op="Read CSV" from_port="output" to_op="Multiply (2)" to_port="input"/> <connect from_op="Multiply (2)" from_port="output 1" to_op="Select Attributes (5)" to_port="example set input"/> <connect from_op="Select Attributes (5)" from_port="example set output" to_op="Generate Attributes (3)" to_port="example set input"/> <connect from_op="Generate Attributes (3)" from_port="example set output" to_op="Set Role" to_port="example set input"/> <connect from_op="Set Role" from_port="example set output" to_op="Remove Duplicates" to_port="example set input"/> <connect from_op="Remove Duplicates" from_port="example set output" to_op="Multiply" to_port="input"/> <connect from_op="Multiply" from_port="output 1" to_op="Group Into Collection" to_port="exa"/> <connect from_op="Group Into Collection" from_port="col" to_op="Annotate" to_port="input"/> <connect from_op="Annotate" from_port="output" to_op="Loop Collection" to_port="collection"/> <connect from_op="Loop Collection" from_port="output 1" to_op="Collect" to_port="input 1"/> <connect from_op="Collect" from_port="collection" to_op="Append" to_port="example set 1"/> <connect from_op="Append" from_port="merged set" to_port="out 1"/> <portSpacing port="source_in 1" spacing="0"/> <portSpacing port="sink_out 1" spacing="420"/> <portSpacing port="sink_out 2" spacing="0"/> </process> </operator> <connect from_op="Modelos" from_port="out 1" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator></process>
Scott