Hello
I want to concatenate some attributes and then, put all the values of this attribute in other atrribute in one example set. Something like this:
att1 - att 2 - att 3
1 casa - perro - carro
2 avion- teja - amigo
3 mujer - bonita - leal
And then create:
att4
1. casa perro carro avion teja avion teja amigo mujer bonita leal
I did the below process:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.006">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.006" expanded="true" name="Process">
<process expanded="true" height="370" width="614">
<operator activated="true" class="generate_data_user_specification" compatibility="5.2.006" expanded="true" height="60" name="Generate Data by User Specification" width="90" x="179" y="300">
<list key="attribute_values">
<parameter key="recortar" value="cut("%{valor}",0,length("%{valor}")-3)"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="extract_macro" compatibility="5.2.006" expanded="true" height="60" name="Extract Macro" width="90" x="380" y="300">
<parameter key="macro" value="valor"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="recortar"/>
<parameter key="example_index" value="1"/>
</operator>
<operator activated="true" class="read_excel" compatibility="5.2.006" expanded="true" height="60" name="Read Excel (2)" width="90" x="45" y="120">
<list key="annotations"/>
<list key="data_set_meta_data_information">
<parameter key="1" value="a.true.nominal.regular"/>
<parameter key="2" value="b.true.nominal.regular"/>
<parameter key="3" value="c.true.nominal.regular"/>
<parameter key="4" value="d.true.nominal.regular"/>
<parameter key="5" value="e.true.nominal.regular"/>
<parameter key="6" value="f.true.nominal.regular"/>
</list>
</operator>
<operator activated="true" class="generate_attributes" compatibility="5.2.006" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="179" y="120">
<list key="function_descriptions">
<parameter key="nuevo" value="concat(a,b,c,d,e,f)"/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="5.2.006" expanded="true" height="76" name="Select Attributes (2)" width="90" x="313" y="120">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="nuevo"/>
</operator>
<operator activated="true" class="loop_values" compatibility="5.2.006" expanded="true" height="76" name="Loop Values" width="90" x="447" y="120">
<parameter key="attribute" value="nuevo"/>
<process expanded="true" height="383" width="611">
<operator activated="true" class="set_macro" compatibility="5.2.006" expanded="true" height="76" name="Set Macro (3)" width="90" x="246" y="30">
<parameter key="macro" value="valor"/>
<parameter key="value" value="%{valor} %{loop_value} or"/>
</operator>
<connect from_port="example set" to_op="Set Macro (3)" to_port="through 1"/>
<connect from_op="Set Macro (3)" from_port="through 1" to_port="out 1"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<connect from_op="Generate Data by User Specification" from_port="output" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Extract Macro" from_port="example set" to_port="result 1"/>
<connect from_op="Read Excel (2)" from_port="output" to_op="Generate Attributes (2)" to_port="example set input"/>
<connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Loop Values" to_port="example set"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="234"/>
<portSpacing port="sink_result 2" spacing="18"/>
</process>
</operator>
</process>
I have, in average, 9800 exampleset, and I concatenate five or six attributes. My problem is about memory. This preprocess only use more than 1,5gb of RAM. My objetive is make a sql query whit this attribute, but the query is a little big so the process crash for memory.
Please, Could you give some advice about this topic? How can I reduce the memory used for the above process