Hello,
I got a few questions following my post from earlier (
https://community.rapidminer.com/discussion/55414/how-can-i-classify-one-example-into-multiple-classes-if-necessary#latest) I have training data for my model where some examples have multiple labels, how do I put these in RapidMiner?
Say I have this example: I like cats and dogs, the labels are cat and dog.
Do I put them as separate examples? I like cats and dogs -> cat, I like cats and dogs -> dog
Or do I need to make a second label attribute for this? I like cats and dogs -> label1 cat -> label2 dog.
I've also managed to make the following model that has a second prediction when the confidence is lower than 0.7. But I actually want it to make a second prediction more accurately. Is this possible, that the model knows when an example probably has one label or two? Or do I just have to make it around a margin for instance 0.4-0.6?
My final question is how can I make the generate aggregation variable instead of having to select the subset. I know it's possible with regular expression, but I can't figure out the syntax. It just needs to select all the attributes with "confidence" in it.
<?xml version="1.0" encoding="UTF-8"?><process version="9.2.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.2.001" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="UTF-8"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="9.2.001" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34">
<parameter key="repository_entry" value="//Samples/data/Iris"/>
</operator>
<operator activated="true" class="split_data" compatibility="9.2.001" expanded="true" height="103" name="Split Data" width="90" x="179" y="85">
<enumeration key="partitions">
<parameter key="ratio" value="0.66"/>
<parameter key="ratio" value="0.34"/>
</enumeration>
<parameter key="sampling_type" value="automatic"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
<operator activated="true" class="naive_bayes" compatibility="9.2.001" expanded="true" height="82" name="Naive Bayes" width="90" x="313" y="34">
<parameter key="laplace_correction" value="true"/>
</operator>
<operator activated="true" class="apply_model" compatibility="9.2.001" expanded="true" height="82" name="Apply Model" width="90" x="313" y="136">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="generate_aggregation" compatibility="9.2.001" expanded="true" height="82" name="Generate Aggregation (3)" width="90" x="447" y="238">
<parameter key="attribute_name" value="Maximum"/>
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value="confidence(Iris-setosa)|confidence(Iris-versicolor)|confidence(Iris-virginica)"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="aggregation_function" value="maximum"/>
<parameter key="concatenation_separator" value="|"/>
<parameter key="keep_all" value="true"/>
<parameter key="ignore_missings" value="true"/>
<parameter key="ignore_missing_attributes" value="false"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="9.2.001" expanded="true" height="103" name="Filter Examples" width="90" x="581" y="136">
<parameter key="parameter_expression" value=""/>
<parameter key="condition_class" value="custom_filters"/>
<parameter key="invert_filter" value="false"/>
<list key="filters_list">
<parameter key="filters_entry_key" value="Maximum.lt.0\.7"/>
</list>
<parameter key="filters_logic_and" value="true"/>
<parameter key="filters_check_metadata" value="true"/>
</operator>
<operator activated="true" class="generate_prediction_ranking" compatibility="9.2.001" expanded="true" height="82" name="Generate Prediction Ranking" width="90" x="581" y="34">
<parameter key="number_of_ranks" value="2"/>
<parameter key="remove_old_predictions" value="true"/>
</operator>
<operator activated="true" class="rename_by_replacing" compatibility="9.2.001" expanded="true" height="82" name="Rename by Replacing" width="90" x="715" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="prediction(label)_1"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="replace_what" value="_1"/>
</operator>
<operator activated="true" class="set_role" compatibility="9.2.001" expanded="true" height="82" name="Set Role" width="90" x="849" y="34">
<parameter key="attribute_name" value="prediction(label)"/>
<parameter key="target_role" value="prediction"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="union" compatibility="9.2.001" expanded="true" height="82" name="Union" width="90" x="849" y="187"/>
<connect from_op="Retrieve Iris" from_port="output" to_op="Split Data" to_port="example set"/>
<connect from_op="Split Data" from_port="partition 1" to_op="Naive Bayes" to_port="training set"/>
<connect from_op="Split Data" from_port="partition 2" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Naive Bayes" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Generate Aggregation (3)" to_port="example set input"/>
<connect from_op="Generate Aggregation (3)" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Generate Prediction Ranking" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="unmatched example set" to_op="Union" to_port="example set 2"/>
<connect from_op="Generate Prediction Ranking" from_port="example set output" to_op="Rename by Replacing" to_port="example set input"/>
<connect from_op="Rename by Replacing" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Union" to_port="example set 1"/>
<connect from_op="Union" from_port="union" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Thanks a lot
-Prentice