Force Attribute to Nominal (NOT Binomial)
dragoljub
New Altair Community Member
Hi Guys,
I'm having trouble getting RM to do what I want. Its trying to be too smart but can't see the whole picture. I have a bunch of CSV files that I am appending together. All files have the same attribute columns. Some polynomial attributes only occur as one or two different values per csv file. When I import a CSV and set the attribute to polynomial RM assumes its binomial, however, when I try to append the two example sets it gives me an error saying it cant add another value to a binomial attribute. I know my attributes are polynomial and I use string to nominal, or numerical to nominal with no luck.
Sep 20, 2010 11:28:50 AM SEVERE: Process failed: operator cannot be executed (Cannot map another string for binary attribute: already mapped two strings! First string = <tt>A</tt>, Second string = <tt>B</tt> The third string that was tried to add: <tt>C</tt>).
How can I force the attribute to be polynomial, this should be an option in the type conversion operators. ???
Thanks,
-Gagi
I'm having trouble getting RM to do what I want. Its trying to be too smart but can't see the whole picture. I have a bunch of CSV files that I am appending together. All files have the same attribute columns. Some polynomial attributes only occur as one or two different values per csv file. When I import a CSV and set the attribute to polynomial RM assumes its binomial, however, when I try to append the two example sets it gives me an error saying it cant add another value to a binomial attribute. I know my attributes are polynomial and I use string to nominal, or numerical to nominal with no luck.
Sep 20, 2010 11:28:50 AM SEVERE: Process failed: operator cannot be executed (Cannot map another string for binary attribute: already mapped two strings! First string = <tt>A</tt>, Second string = <tt>B</tt> The third string that was tried to add: <tt>C</tt>).
How can I force the attribute to be polynomial, this should be an option in the type conversion operators. ???
Thanks,
-Gagi
Tagged:
0
Answers
-
Hi, is the idea in the process below of any help for your problem?
Regards,
Dan
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.0.10" expanded="true" name="Process">
<process expanded="true" height="431" width="614">
<operator activated="true" class="subprocess" compatibility="5.0.10" expanded="true" height="76" name="first dataset" width="90" x="45" y="30">
<process expanded="true" height="404" width="532">
<operator activated="true" class="generate_nominal_data" compatibility="5.0.10" expanded="true" height="60" name="Generate Nominal Data" width="90" x="112" y="30"/>
<operator activated="true" class="nominal_to_binominal" compatibility="5.0.10" expanded="true" height="94" name="Nominal to Binominal" width="90" x="246" y="30">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="label"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<connect from_op="Generate Nominal Data" from_port="output" to_op="Nominal to Binominal" to_port="example set input"/>
<connect from_op="Nominal to Binominal" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="5.0.10" expanded="true" height="76" name="Nominal to Text" width="90" x="179" y="30">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="label"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="subprocess" compatibility="5.0.10" expanded="true" height="76" name="second dataset" width="90" x="45" y="165">
<process expanded="true" height="404" width="550">
<operator activated="true" class="generate_nominal_data" compatibility="5.0.10" expanded="true" height="60" name="Generate Nominal Data (2)" width="90" x="45" y="75"/>
<operator activated="true" class="replace" compatibility="5.0.10" expanded="true" height="76" name="Replace" width="90" x="112" y="210">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="label"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="replace_what" value="negative"/>
<parameter key="replace_by" value="no"/>
</operator>
<operator activated="true" class="replace" compatibility="5.0.10" expanded="true" height="76" name="Replace (2)" width="90" x="246" y="210">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="label"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="replace_what" value="positive"/>
<parameter key="replace_by" value="yes"/>
</operator>
<operator activated="true" class="nominal_to_binominal" compatibility="5.0.10" expanded="true" height="94" name="Nominal to Binominal (2)" width="90" x="313" y="75">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="label"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<connect from_op="Generate Nominal Data (2)" from_port="output" to_op="Replace" to_port="example set input"/>
<connect from_op="Replace" from_port="example set output" to_op="Replace (2)" to_port="example set input"/>
<connect from_op="Replace (2)" from_port="example set output" to_op="Nominal to Binominal (2)" to_port="example set input"/>
<connect from_op="Nominal to Binominal (2)" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="18"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="5.0.10" expanded="true" height="76" name="Nominal to Text (2)" width="90" x="179" y="165">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="label"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="append" compatibility="5.0.10" expanded="true" height="94" name="Append" width="90" x="313" y="120"/>
<operator activated="true" class="text_to_nominal" compatibility="5.0.10" expanded="true" height="76" name="Text to Nominal" width="90" x="447" y="120">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="label"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<connect from_op="first dataset" from_port="out 1" to_op="Nominal to Text" to_port="example set input"/>
<connect from_op="Nominal to Text" from_port="example set output" to_op="Append" to_port="example set 1"/>
<connect from_op="second dataset" from_port="out 1" to_op="Nominal to Text (2)" to_port="example set input"/>
<connect from_op="Nominal to Text (2)" from_port="example set output" to_op="Append" to_port="example set 2"/>
<connect from_op="Append" from_port="merged set" to_op="Text to Nominal" to_port="example set input"/>
<connect from_op="Text to Nominal" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="90"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>0 -
Hi,
did you try using the "Read CSV" operator's import wizard? I also had some troubles with those wizards not doing what they should do...
In this case I would also suggest using type Text as a possibility to add more values and then convert it back to Nominal.
But in my opinion this only is a workaround, not a real solution! There should either be a possibility for conversion (Binominal to Polynominal) or the CSV-reader mustn't automatically assign type Binominal if there are only two string values present (in this case if Binominal is wanted - the existing operator can be used). If the wizard is used the types should of course be assigned as they were selected.
Regards,
Matthias0 -
Greets Gagi!
If I run the following code I get your error as well....
>
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.0.10" expanded="true" name="Process">
<process expanded="true" height="386" width="832">
<operator activated="true" class="loop" compatibility="5.0.10" expanded="true" height="60" name="Write CSVs" width="90" x="45" y="30">
<parameter key="iterations" value="3"/>
<process expanded="true" height="376" width="850">
<operator activated="true" class="generate_data" compatibility="5.0.10" expanded="true" height="60" name="Generate Data" width="90" x="57" y="26"/>
<operator activated="true" class="discretize_by_bins" compatibility="5.0.10" expanded="true" height="94" name="Discretize" width="90" x="217" y="28">
<parameter key="create_view" value="true"/>
<parameter key="range_name_type" value="short"/>
</operator>
<operator activated="true" class="write_constructions" compatibility="5.0.10" expanded="true" height="60" name="Write Constructions" width="90" x="376" y="138">
<parameter key="attribute_constructions_file" value="atts"/>
</operator>
<operator activated="true" class="replace" compatibility="5.0.10" expanded="true" height="76" name="Replace" width="90" x="380" y="30">
<parameter key="replace_what" value="range"/>
<parameter key="replace_by" value="run_%{a}"/>
</operator>
<operator activated="true" class="write_csv" compatibility="5.0.10" expanded="true" height="60" name="Write CSV" width="90" x="543" y="26">
<parameter key="csv_file" value="gagi_%{a}"/>
</operator>
<connect from_op="Generate Data" from_port="output" to_op="Discretize" to_port="example set input"/>
<connect from_op="Discretize" from_port="example set output" to_op="Replace" to_port="example set input"/>
<connect from_op="Discretize" from_port="original" to_op="Write Constructions" to_port="input"/>
<connect from_op="Replace" from_port="example set output" to_op="Write CSV" to_port="input"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
</process>
</operator>
<operator activated="true" class="loop" compatibility="5.0.10" expanded="true" height="76" name="Read CSVs" width="90" x="45" y="165">
<parameter key="iterations" value="3"/>
<process expanded="true" height="376" width="850">
<operator activated="true" class="read_csv" compatibility="5.0.10" expanded="true" height="60" name="Read CSV" width="90" x="112" y="75">
<parameter key="file_name" value="gagi_%{a}"/>
<list key="data_set_meta_data_information"/>
</operator>
<connect from_op="Read CSV" from_port="output" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="append" compatibility="5.0.10" expanded="true" height="76" name="Append" width="90" x="179" y="165"/>
<operator activated="true" class="write_csv" compatibility="5.0.10" expanded="true" height="60" name="Write Merged" width="90" x="313" y="165">
<parameter key="csv_file" value="merged"/>
<parameter key="write_attribute_names" value="false"/>
</operator>
<operator activated="true" class="read_csv" compatibility="5.0.10" expanded="true" height="60" name="Read Merged" width="90" x="45" y="300">
<parameter key="file_name" value="merged"/>
<list key="data_set_meta_data_information"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="5.0.10" expanded="true" height="76" name="Tidy up" width="90" x="246" y="300">
<parameter key="condition_class" value="no_missing_attributes"/>
</operator>
<connect from_op="Read CSVs" from_port="output 1" to_op="Append" to_port="example set 1"/>
<connect from_op="Append" from_port="merged set" to_op="Write Merged" to_port="input"/>
<connect from_op="Read Merged" from_port="output" to_op="Tidy up" to_port="example set input"/>
<connect from_op="Tidy up" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
But with one tiny change all works OK....<?xml version="1.0" encoding="UTF-8" standalone="no"?>
That change is not to read in the first line as attribute names; you can see the problem, read file one, att1 is binominal, read file two and we find att1 again, so it is binominal again.....?
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.0.10" expanded="true" name="Process">
<process expanded="true" height="386" width="832">
<operator activated="true" class="loop" compatibility="5.0.10" expanded="true" height="60" name="Write CSVs" width="90" x="45" y="30">
<parameter key="iterations" value="3"/>
<process expanded="true" height="376" width="850">
<operator activated="true" class="generate_data" compatibility="5.0.10" expanded="true" height="60" name="Generate Data" width="90" x="57" y="26"/>
<operator activated="true" class="discretize_by_bins" compatibility="5.0.10" expanded="true" height="94" name="Discretize" width="90" x="217" y="28">
<parameter key="create_view" value="true"/>
<parameter key="range_name_type" value="short"/>
</operator>
<operator activated="true" class="write_constructions" compatibility="5.0.10" expanded="true" height="60" name="Write Constructions" width="90" x="376" y="138">
<parameter key="attribute_constructions_file" value="atts"/>
</operator>
<operator activated="true" class="replace" compatibility="5.0.10" expanded="true" height="76" name="Replace" width="90" x="380" y="30">
<parameter key="replace_what" value="range"/>
<parameter key="replace_by" value="run_%{a}"/>
</operator>
<operator activated="true" class="write_csv" compatibility="5.0.10" expanded="true" height="60" name="Write CSV" width="90" x="543" y="26">
<parameter key="csv_file" value="gagi_%{a}"/>
</operator>
<connect from_op="Generate Data" from_port="output" to_op="Discretize" to_port="example set input"/>
<connect from_op="Discretize" from_port="example set output" to_op="Replace" to_port="example set input"/>
<connect from_op="Discretize" from_port="original" to_op="Write Constructions" to_port="input"/>
<connect from_op="Replace" from_port="example set output" to_op="Write CSV" to_port="input"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
</process>
</operator>
<operator activated="true" class="loop" compatibility="5.0.10" expanded="true" height="76" name="Read CSVs" width="90" x="45" y="165">
<parameter key="iterations" value="3"/>
<process expanded="true" height="376" width="850">
<operator activated="true" class="read_csv" compatibility="5.0.10" expanded="true" height="60" name="Read CSV" width="90" x="112" y="75">
<parameter key="file_name" value="gagi_%{a}"/>
<parameter key="use_first_row_as_attribute_names" value="false"/>
<list key="data_set_meta_data_information"/>
</operator>
<connect from_op="Read CSV" from_port="output" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="append" compatibility="5.0.10" expanded="true" height="76" name="Append" width="90" x="179" y="165"/>
<operator activated="true" class="write_csv" compatibility="5.0.10" expanded="true" height="60" name="Write Merged" width="90" x="313" y="165">
<parameter key="csv_file" value="merged"/>
<parameter key="write_attribute_names" value="false"/>
</operator>
<operator activated="true" class="read_csv" compatibility="5.0.10" expanded="true" height="60" name="Read Merged" width="90" x="45" y="300">
<parameter key="file_name" value="merged"/>
<list key="data_set_meta_data_information"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="5.0.10" expanded="true" height="76" name="Tidy up" width="90" x="246" y="300">
<parameter key="condition_class" value="no_missing_attributes"/>
</operator>
<connect from_op="Read CSVs" from_port="output 1" to_op="Append" to_port="example set 1"/>
<connect from_op="Append" from_port="merged set" to_op="Write Merged" to_port="input"/>
<connect from_op="Read Merged" from_port="output" to_op="Tidy up" to_port="example set input"/>
<connect from_op="Tidy up" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Hope that is useful!
0 -
Thanks guys. The simple fix is just forcing the attribute to TEXT after each CSV read, then convert them to polynomial (as mentioned above).
Its a bit of a hack but such is life. ;D
-Gagi0